blob: 43761ecd12017b0f072ae562385b8d88c1609262 [file] [log] [blame]
Greg Kroah-Hartmanb2441312017-11-01 15:07:57 +01001// SPDX-License-Identifier: GPL-2.0
David Sterbac1d7c512018-04-03 19:23:33 +02002
Chris Masond1310b22008-01-24 16:13:08 -05003#include <linux/bitops.h>
4#include <linux/slab.h>
5#include <linux/bio.h>
6#include <linux/mm.h>
Chris Masond1310b22008-01-24 16:13:08 -05007#include <linux/pagemap.h>
8#include <linux/page-flags.h>
Chris Masond1310b22008-01-24 16:13:08 -05009#include <linux/spinlock.h>
10#include <linux/blkdev.h>
11#include <linux/swap.h>
Chris Masond1310b22008-01-24 16:13:08 -050012#include <linux/writeback.h>
13#include <linux/pagevec.h>
Linus Torvalds268bb0c2011-05-20 12:50:29 -070014#include <linux/prefetch.h>
Dan Magenheimer90a887c2011-05-26 10:01:56 -060015#include <linux/cleancache.h>
Chris Masond1310b22008-01-24 16:13:08 -050016#include "extent_io.h"
17#include "extent_map.h"
David Woodhouse902b22f2008-08-20 08:51:49 -040018#include "ctree.h"
19#include "btrfs_inode.h"
Jan Schmidt4a54c8c2011-07-22 15:41:52 +020020#include "volumes.h"
Stefan Behrens21adbd52011-11-09 13:44:05 +010021#include "check-integrity.h"
Josef Bacik0b32f4b2012-03-13 09:38:00 -040022#include "locking.h"
Josef Bacik606686e2012-06-04 14:03:51 -040023#include "rcu-string.h"
Liu Bofe09e162013-09-22 12:54:23 +080024#include "backref.h"
David Sterba6af49db2017-06-23 04:09:57 +020025#include "disk-io.h"
Chris Masond1310b22008-01-24 16:13:08 -050026
Chris Masond1310b22008-01-24 16:13:08 -050027static struct kmem_cache *extent_state_cache;
28static struct kmem_cache *extent_buffer_cache;
Kent Overstreet8ac9f7c2018-05-20 18:25:56 -040029static struct bio_set btrfs_bioset;
Chris Masond1310b22008-01-24 16:13:08 -050030
Filipe Manana27a35072014-07-06 20:09:59 +010031static inline bool extent_state_in_tree(const struct extent_state *state)
32{
33 return !RB_EMPTY_NODE(&state->rb_node);
34}
35
Eric Sandeen6d49ba12013-04-22 16:12:31 +000036#ifdef CONFIG_BTRFS_DEBUG
Chris Masond1310b22008-01-24 16:13:08 -050037static LIST_HEAD(buffers);
38static LIST_HEAD(states);
Chris Mason4bef0842008-09-08 11:18:08 -040039
Chris Masond3977122009-01-05 21:25:51 -050040static DEFINE_SPINLOCK(leak_lock);
Eric Sandeen6d49ba12013-04-22 16:12:31 +000041
42static inline
43void btrfs_leak_debug_add(struct list_head *new, struct list_head *head)
44{
45 unsigned long flags;
46
47 spin_lock_irqsave(&leak_lock, flags);
48 list_add(new, head);
49 spin_unlock_irqrestore(&leak_lock, flags);
50}
51
52static inline
53void btrfs_leak_debug_del(struct list_head *entry)
54{
55 unsigned long flags;
56
57 spin_lock_irqsave(&leak_lock, flags);
58 list_del(entry);
59 spin_unlock_irqrestore(&leak_lock, flags);
60}
61
62static inline
63void btrfs_leak_debug_check(void)
64{
65 struct extent_state *state;
66 struct extent_buffer *eb;
67
68 while (!list_empty(&states)) {
69 state = list_entry(states.next, struct extent_state, leak_list);
David Sterba9ee49a042015-01-14 19:52:13 +010070 pr_err("BTRFS: state leak: start %llu end %llu state %u in tree %d refs %d\n",
Filipe Manana27a35072014-07-06 20:09:59 +010071 state->start, state->end, state->state,
72 extent_state_in_tree(state),
Elena Reshetovab7ac31b2017-03-03 10:55:19 +020073 refcount_read(&state->refs));
Eric Sandeen6d49ba12013-04-22 16:12:31 +000074 list_del(&state->leak_list);
75 kmem_cache_free(extent_state_cache, state);
76 }
77
78 while (!list_empty(&buffers)) {
79 eb = list_entry(buffers.next, struct extent_buffer, leak_list);
Liu Boaf2679e2018-01-25 11:02:48 -070080 pr_err("BTRFS: buffer leak start %llu len %lu refs %d bflags %lu\n",
81 eb->start, eb->len, atomic_read(&eb->refs), eb->bflags);
Eric Sandeen6d49ba12013-04-22 16:12:31 +000082 list_del(&eb->leak_list);
83 kmem_cache_free(extent_buffer_cache, eb);
84 }
85}
David Sterba8d599ae2013-04-30 15:22:23 +000086
Josef Bacika5dee372013-12-13 10:02:44 -050087#define btrfs_debug_check_extent_io_range(tree, start, end) \
88 __btrfs_debug_check_extent_io_range(__func__, (tree), (start), (end))
David Sterba8d599ae2013-04-30 15:22:23 +000089static inline void __btrfs_debug_check_extent_io_range(const char *caller,
Josef Bacika5dee372013-12-13 10:02:44 -050090 struct extent_io_tree *tree, u64 start, u64 end)
David Sterba8d599ae2013-04-30 15:22:23 +000091{
Nikolay Borisov65a680f2018-11-01 14:09:49 +020092 struct inode *inode = tree->private_data;
93 u64 isize;
94
95 if (!inode || !is_data_inode(inode))
96 return;
97
98 isize = i_size_read(inode);
99 if (end >= PAGE_SIZE && (end % 2) == 0 && end != isize - 1) {
100 btrfs_debug_rl(BTRFS_I(inode)->root->fs_info,
101 "%s: ino %llu isize %llu odd range [%llu,%llu]",
102 caller, btrfs_ino(BTRFS_I(inode)), isize, start, end);
103 }
David Sterba8d599ae2013-04-30 15:22:23 +0000104}
Eric Sandeen6d49ba12013-04-22 16:12:31 +0000105#else
106#define btrfs_leak_debug_add(new, head) do {} while (0)
107#define btrfs_leak_debug_del(entry) do {} while (0)
108#define btrfs_leak_debug_check() do {} while (0)
David Sterba8d599ae2013-04-30 15:22:23 +0000109#define btrfs_debug_check_extent_io_range(c, s, e) do {} while (0)
Chris Mason4bef0842008-09-08 11:18:08 -0400110#endif
Chris Masond1310b22008-01-24 16:13:08 -0500111
Chris Masond1310b22008-01-24 16:13:08 -0500112struct tree_entry {
113 u64 start;
114 u64 end;
Chris Masond1310b22008-01-24 16:13:08 -0500115 struct rb_node rb_node;
116};
117
118struct extent_page_data {
119 struct bio *bio;
120 struct extent_io_tree *tree;
Chris Mason771ed682008-11-06 22:02:51 -0500121 /* tells writepage not to lock the state bits for this range
122 * it still does the unlocking
123 */
Chris Masonffbd5172009-04-20 15:50:09 -0400124 unsigned int extent_locked:1;
125
Christoph Hellwig70fd7612016-11-01 07:40:10 -0600126 /* tells the submit_bio code to use REQ_SYNC */
Chris Masonffbd5172009-04-20 15:50:09 -0400127 unsigned int sync_io:1;
Chris Masond1310b22008-01-24 16:13:08 -0500128};
129
David Sterba57599c72018-03-01 17:56:34 +0100130static int add_extent_changeset(struct extent_state *state, unsigned bits,
Qu Wenruod38ed272015-10-12 14:53:37 +0800131 struct extent_changeset *changeset,
132 int set)
133{
134 int ret;
135
136 if (!changeset)
David Sterba57599c72018-03-01 17:56:34 +0100137 return 0;
Qu Wenruod38ed272015-10-12 14:53:37 +0800138 if (set && (state->state & bits) == bits)
David Sterba57599c72018-03-01 17:56:34 +0100139 return 0;
Qu Wenruofefdc552015-10-12 15:35:38 +0800140 if (!set && (state->state & bits) == 0)
David Sterba57599c72018-03-01 17:56:34 +0100141 return 0;
Qu Wenruod38ed272015-10-12 14:53:37 +0800142 changeset->bytes_changed += state->end - state->start + 1;
David Sterba53d32352017-02-13 13:42:29 +0100143 ret = ulist_add(&changeset->range_changed, state->start, state->end,
Qu Wenruod38ed272015-10-12 14:53:37 +0800144 GFP_ATOMIC);
David Sterba57599c72018-03-01 17:56:34 +0100145 return ret;
Qu Wenruod38ed272015-10-12 14:53:37 +0800146}
147
Qu Wenruobb58eb92019-01-25 13:09:15 +0800148static int __must_check submit_one_bio(struct bio *bio, int mirror_num,
149 unsigned long bio_flags)
150{
151 blk_status_t ret = 0;
Qu Wenruobb58eb92019-01-25 13:09:15 +0800152 struct extent_io_tree *tree = bio->bi_private;
Qu Wenruobb58eb92019-01-25 13:09:15 +0800153
154 bio->bi_private = NULL;
155
156 if (tree->ops)
157 ret = tree->ops->submit_bio_hook(tree->private_data, bio,
Nikolay Borisov50489a52019-04-10 19:46:04 +0300158 mirror_num, bio_flags);
Qu Wenruobb58eb92019-01-25 13:09:15 +0800159 else
160 btrfsic_submit_bio(bio);
161
162 return blk_status_to_errno(ret);
163}
164
Qu Wenruo30659762019-03-20 14:27:42 +0800165/* Cleanup unsubmitted bios */
166static void end_write_bio(struct extent_page_data *epd, int ret)
167{
168 if (epd->bio) {
169 epd->bio->bi_status = errno_to_blk_status(ret);
170 bio_endio(epd->bio);
171 epd->bio = NULL;
172 }
173}
174
Qu Wenruof4340622019-03-20 14:27:41 +0800175/*
176 * Submit bio from extent page data via submit_one_bio
177 *
178 * Return 0 if everything is OK.
179 * Return <0 for error.
180 */
181static int __must_check flush_write_bio(struct extent_page_data *epd)
Qu Wenruobb58eb92019-01-25 13:09:15 +0800182{
Qu Wenruof4340622019-03-20 14:27:41 +0800183 int ret = 0;
Qu Wenruobb58eb92019-01-25 13:09:15 +0800184
Qu Wenruof4340622019-03-20 14:27:41 +0800185 if (epd->bio) {
Qu Wenruobb58eb92019-01-25 13:09:15 +0800186 ret = submit_one_bio(epd->bio, 0, 0);
Qu Wenruof4340622019-03-20 14:27:41 +0800187 /*
188 * Clean up of epd->bio is handled by its endio function.
189 * And endio is either triggered by successful bio execution
190 * or the error handler of submit bio hook.
191 * So at this point, no matter what happened, we don't need
192 * to clean up epd->bio.
193 */
Qu Wenruobb58eb92019-01-25 13:09:15 +0800194 epd->bio = NULL;
195 }
Qu Wenruof4340622019-03-20 14:27:41 +0800196 return ret;
Qu Wenruobb58eb92019-01-25 13:09:15 +0800197}
David Sterbae2932ee2017-06-23 04:16:17 +0200198
Chris Masond1310b22008-01-24 16:13:08 -0500199int __init extent_io_init(void)
200{
David Sterba837e1972012-09-07 03:00:48 -0600201 extent_state_cache = kmem_cache_create("btrfs_extent_state",
Christoph Hellwig9601e3f2009-04-13 15:33:09 +0200202 sizeof(struct extent_state), 0,
Nikolay Borisovfba4b692016-06-23 21:17:08 +0300203 SLAB_MEM_SPREAD, NULL);
Chris Masond1310b22008-01-24 16:13:08 -0500204 if (!extent_state_cache)
205 return -ENOMEM;
206
David Sterba837e1972012-09-07 03:00:48 -0600207 extent_buffer_cache = kmem_cache_create("btrfs_extent_buffer",
Christoph Hellwig9601e3f2009-04-13 15:33:09 +0200208 sizeof(struct extent_buffer), 0,
Nikolay Borisovfba4b692016-06-23 21:17:08 +0300209 SLAB_MEM_SPREAD, NULL);
Chris Masond1310b22008-01-24 16:13:08 -0500210 if (!extent_buffer_cache)
211 goto free_state_cache;
Chris Mason9be33952013-05-17 18:30:14 -0400212
Kent Overstreet8ac9f7c2018-05-20 18:25:56 -0400213 if (bioset_init(&btrfs_bioset, BIO_POOL_SIZE,
214 offsetof(struct btrfs_io_bio, bio),
215 BIOSET_NEED_BVECS))
Chris Mason9be33952013-05-17 18:30:14 -0400216 goto free_buffer_cache;
Darrick J. Wongb208c2f2013-09-19 20:37:07 -0700217
Kent Overstreet8ac9f7c2018-05-20 18:25:56 -0400218 if (bioset_integrity_create(&btrfs_bioset, BIO_POOL_SIZE))
Darrick J. Wongb208c2f2013-09-19 20:37:07 -0700219 goto free_bioset;
220
Chris Masond1310b22008-01-24 16:13:08 -0500221 return 0;
222
Darrick J. Wongb208c2f2013-09-19 20:37:07 -0700223free_bioset:
Kent Overstreet8ac9f7c2018-05-20 18:25:56 -0400224 bioset_exit(&btrfs_bioset);
Darrick J. Wongb208c2f2013-09-19 20:37:07 -0700225
Chris Mason9be33952013-05-17 18:30:14 -0400226free_buffer_cache:
227 kmem_cache_destroy(extent_buffer_cache);
228 extent_buffer_cache = NULL;
229
Chris Masond1310b22008-01-24 16:13:08 -0500230free_state_cache:
231 kmem_cache_destroy(extent_state_cache);
Chris Mason9be33952013-05-17 18:30:14 -0400232 extent_state_cache = NULL;
Chris Masond1310b22008-01-24 16:13:08 -0500233 return -ENOMEM;
234}
235
David Sterbae67c7182018-02-19 17:24:18 +0100236void __cold extent_io_exit(void)
Chris Masond1310b22008-01-24 16:13:08 -0500237{
Eric Sandeen6d49ba12013-04-22 16:12:31 +0000238 btrfs_leak_debug_check();
Kirill A. Shutemov8c0a8532012-09-26 11:33:07 +1000239
240 /*
241 * Make sure all delayed rcu free are flushed before we
242 * destroy caches.
243 */
244 rcu_barrier();
Kinglong Mee5598e902016-01-29 21:36:35 +0800245 kmem_cache_destroy(extent_state_cache);
246 kmem_cache_destroy(extent_buffer_cache);
Kent Overstreet8ac9f7c2018-05-20 18:25:56 -0400247 bioset_exit(&btrfs_bioset);
Chris Masond1310b22008-01-24 16:13:08 -0500248}
249
Qu Wenruoc258d6e2019-03-01 10:47:58 +0800250void extent_io_tree_init(struct btrfs_fs_info *fs_info,
Qu Wenruo43eb5f22019-03-01 10:47:59 +0800251 struct extent_io_tree *tree, unsigned int owner,
252 void *private_data)
Chris Masond1310b22008-01-24 16:13:08 -0500253{
Qu Wenruoc258d6e2019-03-01 10:47:58 +0800254 tree->fs_info = fs_info;
Eric Paris6bef4d32010-02-23 19:43:04 +0000255 tree->state = RB_ROOT;
Chris Masond1310b22008-01-24 16:13:08 -0500256 tree->ops = NULL;
257 tree->dirty_bytes = 0;
Chris Mason70dec802008-01-29 09:59:12 -0500258 spin_lock_init(&tree->lock);
Josef Bacikc6100a42017-05-05 11:57:13 -0400259 tree->private_data = private_data;
Qu Wenruo43eb5f22019-03-01 10:47:59 +0800260 tree->owner = owner;
Chris Masond1310b22008-01-24 16:13:08 -0500261}
Chris Masond1310b22008-01-24 16:13:08 -0500262
Nikolay Borisov41e7acd2019-03-25 14:31:24 +0200263void extent_io_tree_release(struct extent_io_tree *tree)
264{
265 spin_lock(&tree->lock);
266 /*
267 * Do a single barrier for the waitqueue_active check here, the state
268 * of the waitqueue should not change once extent_io_tree_release is
269 * called.
270 */
271 smp_mb();
272 while (!RB_EMPTY_ROOT(&tree->state)) {
273 struct rb_node *node;
274 struct extent_state *state;
275
276 node = rb_first(&tree->state);
277 state = rb_entry(node, struct extent_state, rb_node);
278 rb_erase(&state->rb_node, &tree->state);
279 RB_CLEAR_NODE(&state->rb_node);
280 /*
281 * btree io trees aren't supposed to have tasks waiting for
282 * changes in the flags of extent states ever.
283 */
284 ASSERT(!waitqueue_active(&state->wq));
285 free_extent_state(state);
286
287 cond_resched_lock(&tree->lock);
288 }
289 spin_unlock(&tree->lock);
290}
291
Christoph Hellwigb2950862008-12-02 09:54:17 -0500292static struct extent_state *alloc_extent_state(gfp_t mask)
Chris Masond1310b22008-01-24 16:13:08 -0500293{
294 struct extent_state *state;
Chris Masond1310b22008-01-24 16:13:08 -0500295
Michal Hocko3ba7ab22017-01-09 15:39:02 +0100296 /*
297 * The given mask might be not appropriate for the slab allocator,
298 * drop the unsupported bits
299 */
300 mask &= ~(__GFP_DMA32|__GFP_HIGHMEM);
Chris Masond1310b22008-01-24 16:13:08 -0500301 state = kmem_cache_alloc(extent_state_cache, mask);
Peter2b114d12008-04-01 11:21:40 -0400302 if (!state)
Chris Masond1310b22008-01-24 16:13:08 -0500303 return state;
304 state->state = 0;
David Sterba47dc1962016-02-11 13:24:13 +0100305 state->failrec = NULL;
Filipe Manana27a35072014-07-06 20:09:59 +0100306 RB_CLEAR_NODE(&state->rb_node);
Eric Sandeen6d49ba12013-04-22 16:12:31 +0000307 btrfs_leak_debug_add(&state->leak_list, &states);
Elena Reshetovab7ac31b2017-03-03 10:55:19 +0200308 refcount_set(&state->refs, 1);
Chris Masond1310b22008-01-24 16:13:08 -0500309 init_waitqueue_head(&state->wq);
Jeff Mahoney143bede2012-03-01 14:56:26 +0100310 trace_alloc_extent_state(state, mask, _RET_IP_);
Chris Masond1310b22008-01-24 16:13:08 -0500311 return state;
312}
Chris Masond1310b22008-01-24 16:13:08 -0500313
Chris Mason4845e442010-05-25 20:56:50 -0400314void free_extent_state(struct extent_state *state)
Chris Masond1310b22008-01-24 16:13:08 -0500315{
Chris Masond1310b22008-01-24 16:13:08 -0500316 if (!state)
317 return;
Elena Reshetovab7ac31b2017-03-03 10:55:19 +0200318 if (refcount_dec_and_test(&state->refs)) {
Filipe Manana27a35072014-07-06 20:09:59 +0100319 WARN_ON(extent_state_in_tree(state));
Eric Sandeen6d49ba12013-04-22 16:12:31 +0000320 btrfs_leak_debug_del(&state->leak_list);
Jeff Mahoney143bede2012-03-01 14:56:26 +0100321 trace_free_extent_state(state, _RET_IP_);
Chris Masond1310b22008-01-24 16:13:08 -0500322 kmem_cache_free(extent_state_cache, state);
323 }
324}
Chris Masond1310b22008-01-24 16:13:08 -0500325
Filipe Mananaf2071b22014-02-12 15:05:53 +0000326static struct rb_node *tree_insert(struct rb_root *root,
327 struct rb_node *search_start,
328 u64 offset,
Filipe David Borba Manana12cfbad2013-11-26 15:41:47 +0000329 struct rb_node *node,
330 struct rb_node ***p_in,
331 struct rb_node **parent_in)
Chris Masond1310b22008-01-24 16:13:08 -0500332{
Filipe Mananaf2071b22014-02-12 15:05:53 +0000333 struct rb_node **p;
Chris Masond3977122009-01-05 21:25:51 -0500334 struct rb_node *parent = NULL;
Chris Masond1310b22008-01-24 16:13:08 -0500335 struct tree_entry *entry;
336
Filipe David Borba Manana12cfbad2013-11-26 15:41:47 +0000337 if (p_in && parent_in) {
338 p = *p_in;
339 parent = *parent_in;
340 goto do_insert;
341 }
342
Filipe Mananaf2071b22014-02-12 15:05:53 +0000343 p = search_start ? &search_start : &root->rb_node;
Chris Masond3977122009-01-05 21:25:51 -0500344 while (*p) {
Chris Masond1310b22008-01-24 16:13:08 -0500345 parent = *p;
346 entry = rb_entry(parent, struct tree_entry, rb_node);
347
348 if (offset < entry->start)
349 p = &(*p)->rb_left;
350 else if (offset > entry->end)
351 p = &(*p)->rb_right;
352 else
353 return parent;
354 }
355
Filipe David Borba Manana12cfbad2013-11-26 15:41:47 +0000356do_insert:
Chris Masond1310b22008-01-24 16:13:08 -0500357 rb_link_node(node, parent, p);
358 rb_insert_color(node, root);
359 return NULL;
360}
361
Nikolay Borisov8666e632019-06-05 14:50:04 +0300362/**
363 * __etree_search - searche @tree for an entry that contains @offset. Such
364 * entry would have entry->start <= offset && entry->end >= offset.
365 *
366 * @tree - the tree to search
367 * @offset - offset that should fall within an entry in @tree
368 * @next_ret - pointer to the first entry whose range ends after @offset
369 * @prev - pointer to the first entry whose range begins before @offset
370 * @p_ret - pointer where new node should be anchored (used when inserting an
371 * entry in the tree)
372 * @parent_ret - points to entry which would have been the parent of the entry,
373 * containing @offset
374 *
375 * This function returns a pointer to the entry that contains @offset byte
376 * address. If no such entry exists, then NULL is returned and the other
377 * pointer arguments to the function are filled, otherwise the found entry is
378 * returned and other pointers are left untouched.
379 */
Chris Mason80ea96b2008-02-01 14:51:59 -0500380static struct rb_node *__etree_search(struct extent_io_tree *tree, u64 offset,
Filipe David Borba Manana12cfbad2013-11-26 15:41:47 +0000381 struct rb_node **next_ret,
Nikolay Borisov352646c2019-01-30 16:51:00 +0200382 struct rb_node **prev_ret,
Filipe David Borba Manana12cfbad2013-11-26 15:41:47 +0000383 struct rb_node ***p_ret,
384 struct rb_node **parent_ret)
Chris Masond1310b22008-01-24 16:13:08 -0500385{
Chris Mason80ea96b2008-02-01 14:51:59 -0500386 struct rb_root *root = &tree->state;
Filipe David Borba Manana12cfbad2013-11-26 15:41:47 +0000387 struct rb_node **n = &root->rb_node;
Chris Masond1310b22008-01-24 16:13:08 -0500388 struct rb_node *prev = NULL;
389 struct rb_node *orig_prev = NULL;
390 struct tree_entry *entry;
391 struct tree_entry *prev_entry = NULL;
392
Filipe David Borba Manana12cfbad2013-11-26 15:41:47 +0000393 while (*n) {
394 prev = *n;
395 entry = rb_entry(prev, struct tree_entry, rb_node);
Chris Masond1310b22008-01-24 16:13:08 -0500396 prev_entry = entry;
397
398 if (offset < entry->start)
Filipe David Borba Manana12cfbad2013-11-26 15:41:47 +0000399 n = &(*n)->rb_left;
Chris Masond1310b22008-01-24 16:13:08 -0500400 else if (offset > entry->end)
Filipe David Borba Manana12cfbad2013-11-26 15:41:47 +0000401 n = &(*n)->rb_right;
Chris Masond3977122009-01-05 21:25:51 -0500402 else
Filipe David Borba Manana12cfbad2013-11-26 15:41:47 +0000403 return *n;
Chris Masond1310b22008-01-24 16:13:08 -0500404 }
405
Filipe David Borba Manana12cfbad2013-11-26 15:41:47 +0000406 if (p_ret)
407 *p_ret = n;
408 if (parent_ret)
409 *parent_ret = prev;
410
Nikolay Borisov352646c2019-01-30 16:51:00 +0200411 if (next_ret) {
Chris Masond1310b22008-01-24 16:13:08 -0500412 orig_prev = prev;
Chris Masond3977122009-01-05 21:25:51 -0500413 while (prev && offset > prev_entry->end) {
Chris Masond1310b22008-01-24 16:13:08 -0500414 prev = rb_next(prev);
415 prev_entry = rb_entry(prev, struct tree_entry, rb_node);
416 }
Nikolay Borisov352646c2019-01-30 16:51:00 +0200417 *next_ret = prev;
Chris Masond1310b22008-01-24 16:13:08 -0500418 prev = orig_prev;
419 }
420
Nikolay Borisov352646c2019-01-30 16:51:00 +0200421 if (prev_ret) {
Chris Masond1310b22008-01-24 16:13:08 -0500422 prev_entry = rb_entry(prev, struct tree_entry, rb_node);
Chris Masond3977122009-01-05 21:25:51 -0500423 while (prev && offset < prev_entry->start) {
Chris Masond1310b22008-01-24 16:13:08 -0500424 prev = rb_prev(prev);
425 prev_entry = rb_entry(prev, struct tree_entry, rb_node);
426 }
Nikolay Borisov352646c2019-01-30 16:51:00 +0200427 *prev_ret = prev;
Chris Masond1310b22008-01-24 16:13:08 -0500428 }
429 return NULL;
430}
431
Filipe David Borba Manana12cfbad2013-11-26 15:41:47 +0000432static inline struct rb_node *
433tree_search_for_insert(struct extent_io_tree *tree,
434 u64 offset,
435 struct rb_node ***p_ret,
436 struct rb_node **parent_ret)
Chris Masond1310b22008-01-24 16:13:08 -0500437{
Nikolay Borisov352646c2019-01-30 16:51:00 +0200438 struct rb_node *next= NULL;
Chris Masond1310b22008-01-24 16:13:08 -0500439 struct rb_node *ret;
Chris Mason70dec802008-01-29 09:59:12 -0500440
Nikolay Borisov352646c2019-01-30 16:51:00 +0200441 ret = __etree_search(tree, offset, &next, NULL, p_ret, parent_ret);
Chris Masond3977122009-01-05 21:25:51 -0500442 if (!ret)
Nikolay Borisov352646c2019-01-30 16:51:00 +0200443 return next;
Chris Masond1310b22008-01-24 16:13:08 -0500444 return ret;
445}
446
Filipe David Borba Manana12cfbad2013-11-26 15:41:47 +0000447static inline struct rb_node *tree_search(struct extent_io_tree *tree,
448 u64 offset)
449{
450 return tree_search_for_insert(tree, offset, NULL, NULL);
451}
452
Chris Masond1310b22008-01-24 16:13:08 -0500453/*
454 * utility function to look for merge candidates inside a given range.
455 * Any extents with matching state are merged together into a single
456 * extent in the tree. Extents with EXTENT_IO in their state field
457 * are not merged because the end_io handlers need to be able to do
458 * operations on them without sleeping (or doing allocations/splits).
459 *
460 * This should be called with the tree lock held.
461 */
Jeff Mahoney1bf85042011-07-21 16:56:09 +0000462static void merge_state(struct extent_io_tree *tree,
463 struct extent_state *state)
Chris Masond1310b22008-01-24 16:13:08 -0500464{
465 struct extent_state *other;
466 struct rb_node *other_node;
467
Nikolay Borisov88826792019-03-14 15:28:31 +0200468 if (state->state & (EXTENT_LOCKED | EXTENT_BOUNDARY))
Jeff Mahoney1bf85042011-07-21 16:56:09 +0000469 return;
Chris Masond1310b22008-01-24 16:13:08 -0500470
471 other_node = rb_prev(&state->rb_node);
472 if (other_node) {
473 other = rb_entry(other_node, struct extent_state, rb_node);
474 if (other->end == state->start - 1 &&
475 other->state == state->state) {
Nikolay Borisov5c848192018-11-01 14:09:52 +0200476 if (tree->private_data &&
477 is_data_inode(tree->private_data))
478 btrfs_merge_delalloc_extent(tree->private_data,
479 state, other);
Chris Masond1310b22008-01-24 16:13:08 -0500480 state->start = other->start;
Chris Masond1310b22008-01-24 16:13:08 -0500481 rb_erase(&other->rb_node, &tree->state);
Filipe Manana27a35072014-07-06 20:09:59 +0100482 RB_CLEAR_NODE(&other->rb_node);
Chris Masond1310b22008-01-24 16:13:08 -0500483 free_extent_state(other);
484 }
485 }
486 other_node = rb_next(&state->rb_node);
487 if (other_node) {
488 other = rb_entry(other_node, struct extent_state, rb_node);
489 if (other->start == state->end + 1 &&
490 other->state == state->state) {
Nikolay Borisov5c848192018-11-01 14:09:52 +0200491 if (tree->private_data &&
492 is_data_inode(tree->private_data))
493 btrfs_merge_delalloc_extent(tree->private_data,
494 state, other);
Josef Bacikdf98b6e2011-06-20 14:53:48 -0400495 state->end = other->end;
Josef Bacikdf98b6e2011-06-20 14:53:48 -0400496 rb_erase(&other->rb_node, &tree->state);
Filipe Manana27a35072014-07-06 20:09:59 +0100497 RB_CLEAR_NODE(&other->rb_node);
Josef Bacikdf98b6e2011-06-20 14:53:48 -0400498 free_extent_state(other);
Chris Masond1310b22008-01-24 16:13:08 -0500499 }
500 }
Chris Masond1310b22008-01-24 16:13:08 -0500501}
502
Xiao Guangrong3150b692011-07-14 03:19:08 +0000503static void set_state_bits(struct extent_io_tree *tree,
Qu Wenruod38ed272015-10-12 14:53:37 +0800504 struct extent_state *state, unsigned *bits,
505 struct extent_changeset *changeset);
Xiao Guangrong3150b692011-07-14 03:19:08 +0000506
Chris Masond1310b22008-01-24 16:13:08 -0500507/*
508 * insert an extent_state struct into the tree. 'bits' are set on the
509 * struct before it is inserted.
510 *
511 * This may return -EEXIST if the extent is already there, in which case the
512 * state struct is freed.
513 *
514 * The tree lock is not taken internally. This is a utility function and
515 * probably isn't what you want to call (see set/clear_extent_bit).
516 */
517static int insert_state(struct extent_io_tree *tree,
518 struct extent_state *state, u64 start, u64 end,
Filipe David Borba Manana12cfbad2013-11-26 15:41:47 +0000519 struct rb_node ***p,
520 struct rb_node **parent,
Qu Wenruod38ed272015-10-12 14:53:37 +0800521 unsigned *bits, struct extent_changeset *changeset)
Chris Masond1310b22008-01-24 16:13:08 -0500522{
523 struct rb_node *node;
524
Julia Lawall31b1a2b2012-11-03 10:58:34 +0000525 if (end < start)
Frank Holtonefe120a2013-12-20 11:37:06 -0500526 WARN(1, KERN_ERR "BTRFS: end < start %llu %llu\n",
Geert Uytterhoevenc1c9ff72013-08-20 13:20:07 +0200527 end, start);
Chris Masond1310b22008-01-24 16:13:08 -0500528 state->start = start;
529 state->end = end;
Josef Bacik9ed74f22009-09-11 16:12:44 -0400530
Qu Wenruod38ed272015-10-12 14:53:37 +0800531 set_state_bits(tree, state, bits, changeset);
Xiao Guangrong3150b692011-07-14 03:19:08 +0000532
Filipe Mananaf2071b22014-02-12 15:05:53 +0000533 node = tree_insert(&tree->state, NULL, end, &state->rb_node, p, parent);
Chris Masond1310b22008-01-24 16:13:08 -0500534 if (node) {
535 struct extent_state *found;
536 found = rb_entry(node, struct extent_state, rb_node);
Jeff Mahoney62e85572016-09-20 10:05:01 -0400537 pr_err("BTRFS: found node %llu %llu on insert of %llu %llu\n",
Geert Uytterhoevenc1c9ff72013-08-20 13:20:07 +0200538 found->start, found->end, start, end);
Chris Masond1310b22008-01-24 16:13:08 -0500539 return -EEXIST;
540 }
541 merge_state(tree, state);
542 return 0;
543}
544
545/*
546 * split a given extent state struct in two, inserting the preallocated
547 * struct 'prealloc' as the newly created second half. 'split' indicates an
548 * offset inside 'orig' where it should be split.
549 *
550 * Before calling,
551 * the tree has 'orig' at [orig->start, orig->end]. After calling, there
552 * are two extent state structs in the tree:
553 * prealloc: [orig->start, split - 1]
554 * orig: [ split, orig->end ]
555 *
556 * The tree locks are not taken by this function. They need to be held
557 * by the caller.
558 */
559static int split_state(struct extent_io_tree *tree, struct extent_state *orig,
560 struct extent_state *prealloc, u64 split)
561{
562 struct rb_node *node;
Josef Bacik9ed74f22009-09-11 16:12:44 -0400563
Nikolay Borisovabbb55f2018-11-01 14:09:53 +0200564 if (tree->private_data && is_data_inode(tree->private_data))
565 btrfs_split_delalloc_extent(tree->private_data, orig, split);
Josef Bacik9ed74f22009-09-11 16:12:44 -0400566
Chris Masond1310b22008-01-24 16:13:08 -0500567 prealloc->start = orig->start;
568 prealloc->end = split - 1;
569 prealloc->state = orig->state;
570 orig->start = split;
571
Filipe Mananaf2071b22014-02-12 15:05:53 +0000572 node = tree_insert(&tree->state, &orig->rb_node, prealloc->end,
573 &prealloc->rb_node, NULL, NULL);
Chris Masond1310b22008-01-24 16:13:08 -0500574 if (node) {
Chris Masond1310b22008-01-24 16:13:08 -0500575 free_extent_state(prealloc);
576 return -EEXIST;
577 }
578 return 0;
579}
580
Li Zefancdc6a392012-03-12 16:39:48 +0800581static struct extent_state *next_state(struct extent_state *state)
582{
583 struct rb_node *next = rb_next(&state->rb_node);
584 if (next)
585 return rb_entry(next, struct extent_state, rb_node);
586 else
587 return NULL;
588}
589
Chris Masond1310b22008-01-24 16:13:08 -0500590/*
591 * utility function to clear some bits in an extent state struct.
Andrea Gelmini52042d82018-11-28 12:05:13 +0100592 * it will optionally wake up anyone waiting on this state (wake == 1).
Chris Masond1310b22008-01-24 16:13:08 -0500593 *
594 * If no bits are set on the state struct after clearing things, the
595 * struct is freed and removed from the tree
596 */
Li Zefancdc6a392012-03-12 16:39:48 +0800597static struct extent_state *clear_state_bit(struct extent_io_tree *tree,
598 struct extent_state *state,
Qu Wenruofefdc552015-10-12 15:35:38 +0800599 unsigned *bits, int wake,
600 struct extent_changeset *changeset)
Chris Masond1310b22008-01-24 16:13:08 -0500601{
Li Zefancdc6a392012-03-12 16:39:48 +0800602 struct extent_state *next;
David Sterba9ee49a042015-01-14 19:52:13 +0100603 unsigned bits_to_clear = *bits & ~EXTENT_CTLBITS;
David Sterba57599c72018-03-01 17:56:34 +0100604 int ret;
Chris Masond1310b22008-01-24 16:13:08 -0500605
Yan, Zheng0ca1f7c2010-05-16 10:48:47 -0400606 if ((bits_to_clear & EXTENT_DIRTY) && (state->state & EXTENT_DIRTY)) {
Chris Masond1310b22008-01-24 16:13:08 -0500607 u64 range = state->end - state->start + 1;
608 WARN_ON(range > tree->dirty_bytes);
609 tree->dirty_bytes -= range;
610 }
Nikolay Borisova36bb5f2018-11-01 14:09:51 +0200611
612 if (tree->private_data && is_data_inode(tree->private_data))
613 btrfs_clear_delalloc_extent(tree->private_data, state, bits);
614
David Sterba57599c72018-03-01 17:56:34 +0100615 ret = add_extent_changeset(state, bits_to_clear, changeset, 0);
616 BUG_ON(ret < 0);
Josef Bacik32c00af2009-10-08 13:34:05 -0400617 state->state &= ~bits_to_clear;
Chris Masond1310b22008-01-24 16:13:08 -0500618 if (wake)
619 wake_up(&state->wq);
Yan, Zheng0ca1f7c2010-05-16 10:48:47 -0400620 if (state->state == 0) {
Li Zefancdc6a392012-03-12 16:39:48 +0800621 next = next_state(state);
Filipe Manana27a35072014-07-06 20:09:59 +0100622 if (extent_state_in_tree(state)) {
Chris Masond1310b22008-01-24 16:13:08 -0500623 rb_erase(&state->rb_node, &tree->state);
Filipe Manana27a35072014-07-06 20:09:59 +0100624 RB_CLEAR_NODE(&state->rb_node);
Chris Masond1310b22008-01-24 16:13:08 -0500625 free_extent_state(state);
626 } else {
627 WARN_ON(1);
628 }
629 } else {
630 merge_state(tree, state);
Li Zefancdc6a392012-03-12 16:39:48 +0800631 next = next_state(state);
Chris Masond1310b22008-01-24 16:13:08 -0500632 }
Li Zefancdc6a392012-03-12 16:39:48 +0800633 return next;
Chris Masond1310b22008-01-24 16:13:08 -0500634}
635
Xiao Guangrong82337672011-04-20 06:44:57 +0000636static struct extent_state *
637alloc_extent_state_atomic(struct extent_state *prealloc)
638{
639 if (!prealloc)
640 prealloc = alloc_extent_state(GFP_ATOMIC);
641
642 return prealloc;
643}
644
Eric Sandeen48a3b632013-04-25 20:41:01 +0000645static void extent_io_tree_panic(struct extent_io_tree *tree, int err)
Jeff Mahoneyc2d904e2011-10-03 23:22:32 -0400646{
David Sterba05912a32018-07-18 19:23:45 +0200647 struct inode *inode = tree->private_data;
648
649 btrfs_panic(btrfs_sb(inode->i_sb), err,
650 "locking error: extent tree was modified by another thread while locked");
Jeff Mahoneyc2d904e2011-10-03 23:22:32 -0400651}
652
Chris Masond1310b22008-01-24 16:13:08 -0500653/*
654 * clear some bits on a range in the tree. This may require splitting
655 * or inserting elements in the tree, so the gfp mask is used to
656 * indicate which allocations or sleeping are allowed.
657 *
658 * pass 'wake' == 1 to kick any sleepers, and 'delete' == 1 to remove
659 * the given range from the tree regardless of state (ie for truncate).
660 *
661 * the range [start, end] is inclusive.
662 *
Jeff Mahoney6763af82012-03-01 14:56:29 +0100663 * This takes the tree lock, and returns 0 on success and < 0 on error.
Chris Masond1310b22008-01-24 16:13:08 -0500664 */
David Sterba66b0c882017-10-31 16:30:47 +0100665int __clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
Qu Wenruofefdc552015-10-12 15:35:38 +0800666 unsigned bits, int wake, int delete,
667 struct extent_state **cached_state,
668 gfp_t mask, struct extent_changeset *changeset)
Chris Masond1310b22008-01-24 16:13:08 -0500669{
670 struct extent_state *state;
Chris Mason2c64c532009-09-02 15:04:12 -0400671 struct extent_state *cached;
Chris Masond1310b22008-01-24 16:13:08 -0500672 struct extent_state *prealloc = NULL;
673 struct rb_node *node;
Yan Zheng5c939df2009-05-27 09:16:03 -0400674 u64 last_end;
Chris Masond1310b22008-01-24 16:13:08 -0500675 int err;
Josef Bacik2ac55d42010-02-03 19:33:23 +0000676 int clear = 0;
Chris Masond1310b22008-01-24 16:13:08 -0500677
Josef Bacika5dee372013-12-13 10:02:44 -0500678 btrfs_debug_check_extent_io_range(tree, start, end);
Qu Wenruoa1d19842019-03-01 10:48:00 +0800679 trace_btrfs_clear_extent_bit(tree, start, end - start + 1, bits);
David Sterba8d599ae2013-04-30 15:22:23 +0000680
Josef Bacik7ee9e442013-06-21 16:37:03 -0400681 if (bits & EXTENT_DELALLOC)
682 bits |= EXTENT_NORESERVE;
683
Yan, Zheng0ca1f7c2010-05-16 10:48:47 -0400684 if (delete)
685 bits |= ~EXTENT_CTLBITS;
Yan, Zheng0ca1f7c2010-05-16 10:48:47 -0400686
Nikolay Borisov88826792019-03-14 15:28:31 +0200687 if (bits & (EXTENT_LOCKED | EXTENT_BOUNDARY))
Josef Bacik2ac55d42010-02-03 19:33:23 +0000688 clear = 1;
Chris Masond1310b22008-01-24 16:13:08 -0500689again:
Mel Gormand0164ad2015-11-06 16:28:21 -0800690 if (!prealloc && gfpflags_allow_blocking(mask)) {
Filipe Mananac7bc6312014-11-03 14:12:57 +0000691 /*
692 * Don't care for allocation failure here because we might end
693 * up not needing the pre-allocated extent state at all, which
694 * is the case if we only have in the tree extent states that
695 * cover our input range and don't cover too any other range.
696 * If we end up needing a new extent state we allocate it later.
697 */
Chris Masond1310b22008-01-24 16:13:08 -0500698 prealloc = alloc_extent_state(mask);
Chris Masond1310b22008-01-24 16:13:08 -0500699 }
700
Chris Masoncad321a2008-12-17 14:51:42 -0500701 spin_lock(&tree->lock);
Chris Mason2c64c532009-09-02 15:04:12 -0400702 if (cached_state) {
703 cached = *cached_state;
Josef Bacik2ac55d42010-02-03 19:33:23 +0000704
705 if (clear) {
706 *cached_state = NULL;
707 cached_state = NULL;
708 }
709
Filipe Manana27a35072014-07-06 20:09:59 +0100710 if (cached && extent_state_in_tree(cached) &&
711 cached->start <= start && cached->end > start) {
Josef Bacik2ac55d42010-02-03 19:33:23 +0000712 if (clear)
Elena Reshetovab7ac31b2017-03-03 10:55:19 +0200713 refcount_dec(&cached->refs);
Chris Mason2c64c532009-09-02 15:04:12 -0400714 state = cached;
Chris Mason42daec22009-09-23 19:51:09 -0400715 goto hit_next;
Chris Mason2c64c532009-09-02 15:04:12 -0400716 }
Josef Bacik2ac55d42010-02-03 19:33:23 +0000717 if (clear)
718 free_extent_state(cached);
Chris Mason2c64c532009-09-02 15:04:12 -0400719 }
Chris Masond1310b22008-01-24 16:13:08 -0500720 /*
721 * this search will find the extents that end after
722 * our range starts
723 */
Chris Mason80ea96b2008-02-01 14:51:59 -0500724 node = tree_search(tree, start);
Chris Masond1310b22008-01-24 16:13:08 -0500725 if (!node)
726 goto out;
727 state = rb_entry(node, struct extent_state, rb_node);
Chris Mason2c64c532009-09-02 15:04:12 -0400728hit_next:
Chris Masond1310b22008-01-24 16:13:08 -0500729 if (state->start > end)
730 goto out;
731 WARN_ON(state->end < start);
Yan Zheng5c939df2009-05-27 09:16:03 -0400732 last_end = state->end;
Chris Masond1310b22008-01-24 16:13:08 -0500733
Liu Bo04493142012-02-16 18:34:37 +0800734 /* the state doesn't have the wanted bits, go ahead */
Li Zefancdc6a392012-03-12 16:39:48 +0800735 if (!(state->state & bits)) {
736 state = next_state(state);
Liu Bo04493142012-02-16 18:34:37 +0800737 goto next;
Li Zefancdc6a392012-03-12 16:39:48 +0800738 }
Liu Bo04493142012-02-16 18:34:37 +0800739
Chris Masond1310b22008-01-24 16:13:08 -0500740 /*
741 * | ---- desired range ---- |
742 * | state | or
743 * | ------------- state -------------- |
744 *
745 * We need to split the extent we found, and may flip
746 * bits on second half.
747 *
748 * If the extent we found extends past our range, we
749 * just split and search again. It'll get split again
750 * the next time though.
751 *
752 * If the extent we found is inside our range, we clear
753 * the desired bit on it.
754 */
755
756 if (state->start < start) {
Xiao Guangrong82337672011-04-20 06:44:57 +0000757 prealloc = alloc_extent_state_atomic(prealloc);
758 BUG_ON(!prealloc);
Chris Masond1310b22008-01-24 16:13:08 -0500759 err = split_state(tree, state, prealloc, start);
Jeff Mahoneyc2d904e2011-10-03 23:22:32 -0400760 if (err)
761 extent_io_tree_panic(tree, err);
762
Chris Masond1310b22008-01-24 16:13:08 -0500763 prealloc = NULL;
764 if (err)
765 goto out;
766 if (state->end <= end) {
Qu Wenruofefdc552015-10-12 15:35:38 +0800767 state = clear_state_bit(tree, state, &bits, wake,
768 changeset);
Liu Bod1ac6e42012-05-10 18:10:39 +0800769 goto next;
Chris Masond1310b22008-01-24 16:13:08 -0500770 }
771 goto search_again;
772 }
773 /*
774 * | ---- desired range ---- |
775 * | state |
776 * We need to split the extent, and clear the bit
777 * on the first half
778 */
779 if (state->start <= end && state->end > end) {
Xiao Guangrong82337672011-04-20 06:44:57 +0000780 prealloc = alloc_extent_state_atomic(prealloc);
781 BUG_ON(!prealloc);
Chris Masond1310b22008-01-24 16:13:08 -0500782 err = split_state(tree, state, prealloc, end + 1);
Jeff Mahoneyc2d904e2011-10-03 23:22:32 -0400783 if (err)
784 extent_io_tree_panic(tree, err);
785
Chris Masond1310b22008-01-24 16:13:08 -0500786 if (wake)
787 wake_up(&state->wq);
Chris Mason42daec22009-09-23 19:51:09 -0400788
Qu Wenruofefdc552015-10-12 15:35:38 +0800789 clear_state_bit(tree, prealloc, &bits, wake, changeset);
Josef Bacik9ed74f22009-09-11 16:12:44 -0400790
Chris Masond1310b22008-01-24 16:13:08 -0500791 prealloc = NULL;
792 goto out;
793 }
Chris Mason42daec22009-09-23 19:51:09 -0400794
Qu Wenruofefdc552015-10-12 15:35:38 +0800795 state = clear_state_bit(tree, state, &bits, wake, changeset);
Liu Bo04493142012-02-16 18:34:37 +0800796next:
Yan Zheng5c939df2009-05-27 09:16:03 -0400797 if (last_end == (u64)-1)
798 goto out;
799 start = last_end + 1;
Li Zefancdc6a392012-03-12 16:39:48 +0800800 if (start <= end && state && !need_resched())
Liu Bo692e5752012-02-16 18:34:36 +0800801 goto hit_next;
Chris Masond1310b22008-01-24 16:13:08 -0500802
803search_again:
804 if (start > end)
805 goto out;
Chris Masoncad321a2008-12-17 14:51:42 -0500806 spin_unlock(&tree->lock);
Mel Gormand0164ad2015-11-06 16:28:21 -0800807 if (gfpflags_allow_blocking(mask))
Chris Masond1310b22008-01-24 16:13:08 -0500808 cond_resched();
809 goto again;
David Sterba7ab5cb22016-04-27 01:02:15 +0200810
811out:
812 spin_unlock(&tree->lock);
813 if (prealloc)
814 free_extent_state(prealloc);
815
816 return 0;
817
Chris Masond1310b22008-01-24 16:13:08 -0500818}
Chris Masond1310b22008-01-24 16:13:08 -0500819
Jeff Mahoney143bede2012-03-01 14:56:26 +0100820static void wait_on_state(struct extent_io_tree *tree,
821 struct extent_state *state)
Christoph Hellwig641f5212008-12-02 06:36:10 -0500822 __releases(tree->lock)
823 __acquires(tree->lock)
Chris Masond1310b22008-01-24 16:13:08 -0500824{
825 DEFINE_WAIT(wait);
826 prepare_to_wait(&state->wq, &wait, TASK_UNINTERRUPTIBLE);
Chris Masoncad321a2008-12-17 14:51:42 -0500827 spin_unlock(&tree->lock);
Chris Masond1310b22008-01-24 16:13:08 -0500828 schedule();
Chris Masoncad321a2008-12-17 14:51:42 -0500829 spin_lock(&tree->lock);
Chris Masond1310b22008-01-24 16:13:08 -0500830 finish_wait(&state->wq, &wait);
Chris Masond1310b22008-01-24 16:13:08 -0500831}
832
833/*
834 * waits for one or more bits to clear on a range in the state tree.
835 * The range [start, end] is inclusive.
836 * The tree lock is taken by this function
837 */
David Sterba41074882013-04-29 13:38:46 +0000838static void wait_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
839 unsigned long bits)
Chris Masond1310b22008-01-24 16:13:08 -0500840{
841 struct extent_state *state;
842 struct rb_node *node;
843
Josef Bacika5dee372013-12-13 10:02:44 -0500844 btrfs_debug_check_extent_io_range(tree, start, end);
David Sterba8d599ae2013-04-30 15:22:23 +0000845
Chris Masoncad321a2008-12-17 14:51:42 -0500846 spin_lock(&tree->lock);
Chris Masond1310b22008-01-24 16:13:08 -0500847again:
848 while (1) {
849 /*
850 * this search will find all the extents that end after
851 * our range starts
852 */
Chris Mason80ea96b2008-02-01 14:51:59 -0500853 node = tree_search(tree, start);
Filipe Mananac50d3e72014-03-31 14:53:25 +0100854process_node:
Chris Masond1310b22008-01-24 16:13:08 -0500855 if (!node)
856 break;
857
858 state = rb_entry(node, struct extent_state, rb_node);
859
860 if (state->start > end)
861 goto out;
862
863 if (state->state & bits) {
864 start = state->start;
Elena Reshetovab7ac31b2017-03-03 10:55:19 +0200865 refcount_inc(&state->refs);
Chris Masond1310b22008-01-24 16:13:08 -0500866 wait_on_state(tree, state);
867 free_extent_state(state);
868 goto again;
869 }
870 start = state->end + 1;
871
872 if (start > end)
873 break;
874
Filipe Mananac50d3e72014-03-31 14:53:25 +0100875 if (!cond_resched_lock(&tree->lock)) {
876 node = rb_next(node);
877 goto process_node;
878 }
Chris Masond1310b22008-01-24 16:13:08 -0500879 }
880out:
Chris Masoncad321a2008-12-17 14:51:42 -0500881 spin_unlock(&tree->lock);
Chris Masond1310b22008-01-24 16:13:08 -0500882}
Chris Masond1310b22008-01-24 16:13:08 -0500883
Jeff Mahoney1bf85042011-07-21 16:56:09 +0000884static void set_state_bits(struct extent_io_tree *tree,
Chris Masond1310b22008-01-24 16:13:08 -0500885 struct extent_state *state,
Qu Wenruod38ed272015-10-12 14:53:37 +0800886 unsigned *bits, struct extent_changeset *changeset)
Chris Masond1310b22008-01-24 16:13:08 -0500887{
David Sterba9ee49a042015-01-14 19:52:13 +0100888 unsigned bits_to_set = *bits & ~EXTENT_CTLBITS;
David Sterba57599c72018-03-01 17:56:34 +0100889 int ret;
Josef Bacik9ed74f22009-09-11 16:12:44 -0400890
Nikolay Borisove06a1fc2018-11-01 14:09:50 +0200891 if (tree->private_data && is_data_inode(tree->private_data))
892 btrfs_set_delalloc_extent(tree->private_data, state, bits);
893
Yan, Zheng0ca1f7c2010-05-16 10:48:47 -0400894 if ((bits_to_set & EXTENT_DIRTY) && !(state->state & EXTENT_DIRTY)) {
Chris Masond1310b22008-01-24 16:13:08 -0500895 u64 range = state->end - state->start + 1;
896 tree->dirty_bytes += range;
897 }
David Sterba57599c72018-03-01 17:56:34 +0100898 ret = add_extent_changeset(state, bits_to_set, changeset, 1);
899 BUG_ON(ret < 0);
Yan, Zheng0ca1f7c2010-05-16 10:48:47 -0400900 state->state |= bits_to_set;
Chris Masond1310b22008-01-24 16:13:08 -0500901}
902
Filipe Mananae38e2ed2014-10-13 12:28:38 +0100903static void cache_state_if_flags(struct extent_state *state,
904 struct extent_state **cached_ptr,
David Sterba9ee49a042015-01-14 19:52:13 +0100905 unsigned flags)
Chris Mason2c64c532009-09-02 15:04:12 -0400906{
907 if (cached_ptr && !(*cached_ptr)) {
Filipe Mananae38e2ed2014-10-13 12:28:38 +0100908 if (!flags || (state->state & flags)) {
Chris Mason2c64c532009-09-02 15:04:12 -0400909 *cached_ptr = state;
Elena Reshetovab7ac31b2017-03-03 10:55:19 +0200910 refcount_inc(&state->refs);
Chris Mason2c64c532009-09-02 15:04:12 -0400911 }
912 }
913}
914
Filipe Mananae38e2ed2014-10-13 12:28:38 +0100915static void cache_state(struct extent_state *state,
916 struct extent_state **cached_ptr)
917{
918 return cache_state_if_flags(state, cached_ptr,
Nikolay Borisov88826792019-03-14 15:28:31 +0200919 EXTENT_LOCKED | EXTENT_BOUNDARY);
Filipe Mananae38e2ed2014-10-13 12:28:38 +0100920}
921
Chris Masond1310b22008-01-24 16:13:08 -0500922/*
Chris Mason1edbb732009-09-02 13:24:36 -0400923 * set some bits on a range in the tree. This may require allocations or
924 * sleeping, so the gfp mask is used to indicate what is allowed.
Chris Masond1310b22008-01-24 16:13:08 -0500925 *
Chris Mason1edbb732009-09-02 13:24:36 -0400926 * If any of the exclusive bits are set, this will fail with -EEXIST if some
927 * part of the range already has the desired bits set. The start of the
928 * existing range is returned in failed_start in this case.
Chris Masond1310b22008-01-24 16:13:08 -0500929 *
Chris Mason1edbb732009-09-02 13:24:36 -0400930 * [start, end] is inclusive This takes the tree lock.
Chris Masond1310b22008-01-24 16:13:08 -0500931 */
Chris Mason1edbb732009-09-02 13:24:36 -0400932
Jeff Mahoney3fbe5c02012-03-01 14:57:19 +0100933static int __must_check
934__set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
David Sterba9ee49a042015-01-14 19:52:13 +0100935 unsigned bits, unsigned exclusive_bits,
David Sterba41074882013-04-29 13:38:46 +0000936 u64 *failed_start, struct extent_state **cached_state,
Qu Wenruod38ed272015-10-12 14:53:37 +0800937 gfp_t mask, struct extent_changeset *changeset)
Chris Masond1310b22008-01-24 16:13:08 -0500938{
939 struct extent_state *state;
940 struct extent_state *prealloc = NULL;
941 struct rb_node *node;
Filipe David Borba Manana12cfbad2013-11-26 15:41:47 +0000942 struct rb_node **p;
943 struct rb_node *parent;
Chris Masond1310b22008-01-24 16:13:08 -0500944 int err = 0;
Chris Masond1310b22008-01-24 16:13:08 -0500945 u64 last_start;
946 u64 last_end;
Chris Mason42daec22009-09-23 19:51:09 -0400947
Josef Bacika5dee372013-12-13 10:02:44 -0500948 btrfs_debug_check_extent_io_range(tree, start, end);
Qu Wenruoa1d19842019-03-01 10:48:00 +0800949 trace_btrfs_set_extent_bit(tree, start, end - start + 1, bits);
David Sterba8d599ae2013-04-30 15:22:23 +0000950
Chris Masond1310b22008-01-24 16:13:08 -0500951again:
Mel Gormand0164ad2015-11-06 16:28:21 -0800952 if (!prealloc && gfpflags_allow_blocking(mask)) {
David Sterba059f7912016-04-27 01:03:45 +0200953 /*
954 * Don't care for allocation failure here because we might end
955 * up not needing the pre-allocated extent state at all, which
956 * is the case if we only have in the tree extent states that
957 * cover our input range and don't cover too any other range.
958 * If we end up needing a new extent state we allocate it later.
959 */
Chris Masond1310b22008-01-24 16:13:08 -0500960 prealloc = alloc_extent_state(mask);
Chris Masond1310b22008-01-24 16:13:08 -0500961 }
962
Chris Masoncad321a2008-12-17 14:51:42 -0500963 spin_lock(&tree->lock);
Chris Mason9655d292009-09-02 15:22:30 -0400964 if (cached_state && *cached_state) {
965 state = *cached_state;
Josef Bacikdf98b6e2011-06-20 14:53:48 -0400966 if (state->start <= start && state->end > start &&
Filipe Manana27a35072014-07-06 20:09:59 +0100967 extent_state_in_tree(state)) {
Chris Mason9655d292009-09-02 15:22:30 -0400968 node = &state->rb_node;
969 goto hit_next;
970 }
971 }
Chris Masond1310b22008-01-24 16:13:08 -0500972 /*
973 * this search will find all the extents that end after
974 * our range starts.
975 */
Filipe David Borba Manana12cfbad2013-11-26 15:41:47 +0000976 node = tree_search_for_insert(tree, start, &p, &parent);
Chris Masond1310b22008-01-24 16:13:08 -0500977 if (!node) {
Xiao Guangrong82337672011-04-20 06:44:57 +0000978 prealloc = alloc_extent_state_atomic(prealloc);
979 BUG_ON(!prealloc);
Filipe David Borba Manana12cfbad2013-11-26 15:41:47 +0000980 err = insert_state(tree, prealloc, start, end,
Qu Wenruod38ed272015-10-12 14:53:37 +0800981 &p, &parent, &bits, changeset);
Jeff Mahoneyc2d904e2011-10-03 23:22:32 -0400982 if (err)
983 extent_io_tree_panic(tree, err);
984
Filipe David Borba Mananac42ac0b2013-11-26 15:01:34 +0000985 cache_state(prealloc, cached_state);
Chris Masond1310b22008-01-24 16:13:08 -0500986 prealloc = NULL;
Chris Masond1310b22008-01-24 16:13:08 -0500987 goto out;
988 }
Chris Masond1310b22008-01-24 16:13:08 -0500989 state = rb_entry(node, struct extent_state, rb_node);
Chris Mason40431d62009-08-05 12:57:59 -0400990hit_next:
Chris Masond1310b22008-01-24 16:13:08 -0500991 last_start = state->start;
992 last_end = state->end;
993
994 /*
995 * | ---- desired range ---- |
996 * | state |
997 *
998 * Just lock what we found and keep going
999 */
1000 if (state->start == start && state->end <= end) {
Chris Mason1edbb732009-09-02 13:24:36 -04001001 if (state->state & exclusive_bits) {
Chris Masond1310b22008-01-24 16:13:08 -05001002 *failed_start = state->start;
1003 err = -EEXIST;
1004 goto out;
1005 }
Chris Mason42daec22009-09-23 19:51:09 -04001006
Qu Wenruod38ed272015-10-12 14:53:37 +08001007 set_state_bits(tree, state, &bits, changeset);
Chris Mason2c64c532009-09-02 15:04:12 -04001008 cache_state(state, cached_state);
Chris Masond1310b22008-01-24 16:13:08 -05001009 merge_state(tree, state);
Yan Zheng5c939df2009-05-27 09:16:03 -04001010 if (last_end == (u64)-1)
1011 goto out;
1012 start = last_end + 1;
Liu Bod1ac6e42012-05-10 18:10:39 +08001013 state = next_state(state);
1014 if (start < end && state && state->start == start &&
1015 !need_resched())
1016 goto hit_next;
Chris Masond1310b22008-01-24 16:13:08 -05001017 goto search_again;
1018 }
1019
1020 /*
1021 * | ---- desired range ---- |
1022 * | state |
1023 * or
1024 * | ------------- state -------------- |
1025 *
1026 * We need to split the extent we found, and may flip bits on
1027 * second half.
1028 *
1029 * If the extent we found extends past our
1030 * range, we just split and search again. It'll get split
1031 * again the next time though.
1032 *
1033 * If the extent we found is inside our range, we set the
1034 * desired bit on it.
1035 */
1036 if (state->start < start) {
Chris Mason1edbb732009-09-02 13:24:36 -04001037 if (state->state & exclusive_bits) {
Chris Masond1310b22008-01-24 16:13:08 -05001038 *failed_start = start;
1039 err = -EEXIST;
1040 goto out;
1041 }
Xiao Guangrong82337672011-04-20 06:44:57 +00001042
1043 prealloc = alloc_extent_state_atomic(prealloc);
1044 BUG_ON(!prealloc);
Chris Masond1310b22008-01-24 16:13:08 -05001045 err = split_state(tree, state, prealloc, start);
Jeff Mahoneyc2d904e2011-10-03 23:22:32 -04001046 if (err)
1047 extent_io_tree_panic(tree, err);
1048
Chris Masond1310b22008-01-24 16:13:08 -05001049 prealloc = NULL;
1050 if (err)
1051 goto out;
1052 if (state->end <= end) {
Qu Wenruod38ed272015-10-12 14:53:37 +08001053 set_state_bits(tree, state, &bits, changeset);
Chris Mason2c64c532009-09-02 15:04:12 -04001054 cache_state(state, cached_state);
Chris Masond1310b22008-01-24 16:13:08 -05001055 merge_state(tree, state);
Yan Zheng5c939df2009-05-27 09:16:03 -04001056 if (last_end == (u64)-1)
1057 goto out;
1058 start = last_end + 1;
Liu Bod1ac6e42012-05-10 18:10:39 +08001059 state = next_state(state);
1060 if (start < end && state && state->start == start &&
1061 !need_resched())
1062 goto hit_next;
Chris Masond1310b22008-01-24 16:13:08 -05001063 }
1064 goto search_again;
1065 }
1066 /*
1067 * | ---- desired range ---- |
1068 * | state | or | state |
1069 *
1070 * There's a hole, we need to insert something in it and
1071 * ignore the extent we found.
1072 */
1073 if (state->start > start) {
1074 u64 this_end;
1075 if (end < last_start)
1076 this_end = end;
1077 else
Chris Masond3977122009-01-05 21:25:51 -05001078 this_end = last_start - 1;
Xiao Guangrong82337672011-04-20 06:44:57 +00001079
1080 prealloc = alloc_extent_state_atomic(prealloc);
1081 BUG_ON(!prealloc);
Xiao Guangrongc7f895a2011-04-20 06:45:49 +00001082
1083 /*
1084 * Avoid to free 'prealloc' if it can be merged with
1085 * the later extent.
1086 */
Chris Masond1310b22008-01-24 16:13:08 -05001087 err = insert_state(tree, prealloc, start, this_end,
Qu Wenruod38ed272015-10-12 14:53:37 +08001088 NULL, NULL, &bits, changeset);
Jeff Mahoneyc2d904e2011-10-03 23:22:32 -04001089 if (err)
1090 extent_io_tree_panic(tree, err);
1091
Chris Mason2c64c532009-09-02 15:04:12 -04001092 cache_state(prealloc, cached_state);
Chris Masond1310b22008-01-24 16:13:08 -05001093 prealloc = NULL;
Chris Masond1310b22008-01-24 16:13:08 -05001094 start = this_end + 1;
1095 goto search_again;
1096 }
1097 /*
1098 * | ---- desired range ---- |
1099 * | state |
1100 * We need to split the extent, and set the bit
1101 * on the first half
1102 */
1103 if (state->start <= end && state->end > end) {
Chris Mason1edbb732009-09-02 13:24:36 -04001104 if (state->state & exclusive_bits) {
Chris Masond1310b22008-01-24 16:13:08 -05001105 *failed_start = start;
1106 err = -EEXIST;
1107 goto out;
1108 }
Xiao Guangrong82337672011-04-20 06:44:57 +00001109
1110 prealloc = alloc_extent_state_atomic(prealloc);
1111 BUG_ON(!prealloc);
Chris Masond1310b22008-01-24 16:13:08 -05001112 err = split_state(tree, state, prealloc, end + 1);
Jeff Mahoneyc2d904e2011-10-03 23:22:32 -04001113 if (err)
1114 extent_io_tree_panic(tree, err);
Chris Masond1310b22008-01-24 16:13:08 -05001115
Qu Wenruod38ed272015-10-12 14:53:37 +08001116 set_state_bits(tree, prealloc, &bits, changeset);
Chris Mason2c64c532009-09-02 15:04:12 -04001117 cache_state(prealloc, cached_state);
Chris Masond1310b22008-01-24 16:13:08 -05001118 merge_state(tree, prealloc);
1119 prealloc = NULL;
1120 goto out;
1121 }
1122
David Sterbab5a4ba142016-04-27 01:02:15 +02001123search_again:
1124 if (start > end)
1125 goto out;
1126 spin_unlock(&tree->lock);
1127 if (gfpflags_allow_blocking(mask))
1128 cond_resched();
1129 goto again;
Chris Masond1310b22008-01-24 16:13:08 -05001130
1131out:
Chris Masoncad321a2008-12-17 14:51:42 -05001132 spin_unlock(&tree->lock);
Chris Masond1310b22008-01-24 16:13:08 -05001133 if (prealloc)
1134 free_extent_state(prealloc);
1135
1136 return err;
1137
Chris Masond1310b22008-01-24 16:13:08 -05001138}
Chris Masond1310b22008-01-24 16:13:08 -05001139
David Sterba41074882013-04-29 13:38:46 +00001140int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
David Sterba9ee49a042015-01-14 19:52:13 +01001141 unsigned bits, u64 * failed_start,
David Sterba41074882013-04-29 13:38:46 +00001142 struct extent_state **cached_state, gfp_t mask)
Jeff Mahoney3fbe5c02012-03-01 14:57:19 +01001143{
1144 return __set_extent_bit(tree, start, end, bits, 0, failed_start,
Qu Wenruod38ed272015-10-12 14:53:37 +08001145 cached_state, mask, NULL);
Jeff Mahoney3fbe5c02012-03-01 14:57:19 +01001146}
1147
1148
Josef Bacik462d6fa2011-09-26 13:56:12 -04001149/**
Liu Bo10983f22012-07-11 15:26:19 +08001150 * convert_extent_bit - convert all bits in a given range from one bit to
1151 * another
Josef Bacik462d6fa2011-09-26 13:56:12 -04001152 * @tree: the io tree to search
1153 * @start: the start offset in bytes
1154 * @end: the end offset in bytes (inclusive)
1155 * @bits: the bits to set in this range
1156 * @clear_bits: the bits to clear in this range
Josef Bacike6138872012-09-27 17:07:30 -04001157 * @cached_state: state that we're going to cache
Josef Bacik462d6fa2011-09-26 13:56:12 -04001158 *
1159 * This will go through and set bits for the given range. If any states exist
1160 * already in this range they are set with the given bit and cleared of the
1161 * clear_bits. This is only meant to be used by things that are mergeable, ie
1162 * converting from say DELALLOC to DIRTY. This is not meant to be used with
1163 * boundary bits like LOCK.
David Sterba210aa272016-04-26 23:54:39 +02001164 *
1165 * All allocations are done with GFP_NOFS.
Josef Bacik462d6fa2011-09-26 13:56:12 -04001166 */
1167int convert_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
David Sterba9ee49a042015-01-14 19:52:13 +01001168 unsigned bits, unsigned clear_bits,
David Sterba210aa272016-04-26 23:54:39 +02001169 struct extent_state **cached_state)
Josef Bacik462d6fa2011-09-26 13:56:12 -04001170{
1171 struct extent_state *state;
1172 struct extent_state *prealloc = NULL;
1173 struct rb_node *node;
Filipe David Borba Manana12cfbad2013-11-26 15:41:47 +00001174 struct rb_node **p;
1175 struct rb_node *parent;
Josef Bacik462d6fa2011-09-26 13:56:12 -04001176 int err = 0;
1177 u64 last_start;
1178 u64 last_end;
Filipe Mananac8fd3de2014-10-13 12:28:39 +01001179 bool first_iteration = true;
Josef Bacik462d6fa2011-09-26 13:56:12 -04001180
Josef Bacika5dee372013-12-13 10:02:44 -05001181 btrfs_debug_check_extent_io_range(tree, start, end);
Qu Wenruoa1d19842019-03-01 10:48:00 +08001182 trace_btrfs_convert_extent_bit(tree, start, end - start + 1, bits,
1183 clear_bits);
David Sterba8d599ae2013-04-30 15:22:23 +00001184
Josef Bacik462d6fa2011-09-26 13:56:12 -04001185again:
David Sterba210aa272016-04-26 23:54:39 +02001186 if (!prealloc) {
Filipe Mananac8fd3de2014-10-13 12:28:39 +01001187 /*
1188 * Best effort, don't worry if extent state allocation fails
1189 * here for the first iteration. We might have a cached state
1190 * that matches exactly the target range, in which case no
1191 * extent state allocations are needed. We'll only know this
1192 * after locking the tree.
1193 */
David Sterba210aa272016-04-26 23:54:39 +02001194 prealloc = alloc_extent_state(GFP_NOFS);
Filipe Mananac8fd3de2014-10-13 12:28:39 +01001195 if (!prealloc && !first_iteration)
Josef Bacik462d6fa2011-09-26 13:56:12 -04001196 return -ENOMEM;
1197 }
1198
1199 spin_lock(&tree->lock);
Josef Bacike6138872012-09-27 17:07:30 -04001200 if (cached_state && *cached_state) {
1201 state = *cached_state;
1202 if (state->start <= start && state->end > start &&
Filipe Manana27a35072014-07-06 20:09:59 +01001203 extent_state_in_tree(state)) {
Josef Bacike6138872012-09-27 17:07:30 -04001204 node = &state->rb_node;
1205 goto hit_next;
1206 }
1207 }
1208
Josef Bacik462d6fa2011-09-26 13:56:12 -04001209 /*
1210 * this search will find all the extents that end after
1211 * our range starts.
1212 */
Filipe David Borba Manana12cfbad2013-11-26 15:41:47 +00001213 node = tree_search_for_insert(tree, start, &p, &parent);
Josef Bacik462d6fa2011-09-26 13:56:12 -04001214 if (!node) {
1215 prealloc = alloc_extent_state_atomic(prealloc);
Liu Bo1cf4ffd2011-12-07 20:08:40 -05001216 if (!prealloc) {
1217 err = -ENOMEM;
1218 goto out;
1219 }
Filipe David Borba Manana12cfbad2013-11-26 15:41:47 +00001220 err = insert_state(tree, prealloc, start, end,
Qu Wenruod38ed272015-10-12 14:53:37 +08001221 &p, &parent, &bits, NULL);
Jeff Mahoneyc2d904e2011-10-03 23:22:32 -04001222 if (err)
1223 extent_io_tree_panic(tree, err);
Filipe David Borba Mananac42ac0b2013-11-26 15:01:34 +00001224 cache_state(prealloc, cached_state);
1225 prealloc = NULL;
Josef Bacik462d6fa2011-09-26 13:56:12 -04001226 goto out;
1227 }
1228 state = rb_entry(node, struct extent_state, rb_node);
1229hit_next:
1230 last_start = state->start;
1231 last_end = state->end;
1232
1233 /*
1234 * | ---- desired range ---- |
1235 * | state |
1236 *
1237 * Just lock what we found and keep going
1238 */
1239 if (state->start == start && state->end <= end) {
Qu Wenruod38ed272015-10-12 14:53:37 +08001240 set_state_bits(tree, state, &bits, NULL);
Josef Bacike6138872012-09-27 17:07:30 -04001241 cache_state(state, cached_state);
Qu Wenruofefdc552015-10-12 15:35:38 +08001242 state = clear_state_bit(tree, state, &clear_bits, 0, NULL);
Josef Bacik462d6fa2011-09-26 13:56:12 -04001243 if (last_end == (u64)-1)
1244 goto out;
Josef Bacik462d6fa2011-09-26 13:56:12 -04001245 start = last_end + 1;
Liu Bod1ac6e42012-05-10 18:10:39 +08001246 if (start < end && state && state->start == start &&
1247 !need_resched())
1248 goto hit_next;
Josef Bacik462d6fa2011-09-26 13:56:12 -04001249 goto search_again;
1250 }
1251
1252 /*
1253 * | ---- desired range ---- |
1254 * | state |
1255 * or
1256 * | ------------- state -------------- |
1257 *
1258 * We need to split the extent we found, and may flip bits on
1259 * second half.
1260 *
1261 * If the extent we found extends past our
1262 * range, we just split and search again. It'll get split
1263 * again the next time though.
1264 *
1265 * If the extent we found is inside our range, we set the
1266 * desired bit on it.
1267 */
1268 if (state->start < start) {
1269 prealloc = alloc_extent_state_atomic(prealloc);
Liu Bo1cf4ffd2011-12-07 20:08:40 -05001270 if (!prealloc) {
1271 err = -ENOMEM;
1272 goto out;
1273 }
Josef Bacik462d6fa2011-09-26 13:56:12 -04001274 err = split_state(tree, state, prealloc, start);
Jeff Mahoneyc2d904e2011-10-03 23:22:32 -04001275 if (err)
1276 extent_io_tree_panic(tree, err);
Josef Bacik462d6fa2011-09-26 13:56:12 -04001277 prealloc = NULL;
1278 if (err)
1279 goto out;
1280 if (state->end <= end) {
Qu Wenruod38ed272015-10-12 14:53:37 +08001281 set_state_bits(tree, state, &bits, NULL);
Josef Bacike6138872012-09-27 17:07:30 -04001282 cache_state(state, cached_state);
Qu Wenruofefdc552015-10-12 15:35:38 +08001283 state = clear_state_bit(tree, state, &clear_bits, 0,
1284 NULL);
Josef Bacik462d6fa2011-09-26 13:56:12 -04001285 if (last_end == (u64)-1)
1286 goto out;
1287 start = last_end + 1;
Liu Bod1ac6e42012-05-10 18:10:39 +08001288 if (start < end && state && state->start == start &&
1289 !need_resched())
1290 goto hit_next;
Josef Bacik462d6fa2011-09-26 13:56:12 -04001291 }
1292 goto search_again;
1293 }
1294 /*
1295 * | ---- desired range ---- |
1296 * | state | or | state |
1297 *
1298 * There's a hole, we need to insert something in it and
1299 * ignore the extent we found.
1300 */
1301 if (state->start > start) {
1302 u64 this_end;
1303 if (end < last_start)
1304 this_end = end;
1305 else
1306 this_end = last_start - 1;
1307
1308 prealloc = alloc_extent_state_atomic(prealloc);
Liu Bo1cf4ffd2011-12-07 20:08:40 -05001309 if (!prealloc) {
1310 err = -ENOMEM;
1311 goto out;
1312 }
Josef Bacik462d6fa2011-09-26 13:56:12 -04001313
1314 /*
1315 * Avoid to free 'prealloc' if it can be merged with
1316 * the later extent.
1317 */
1318 err = insert_state(tree, prealloc, start, this_end,
Qu Wenruod38ed272015-10-12 14:53:37 +08001319 NULL, NULL, &bits, NULL);
Jeff Mahoneyc2d904e2011-10-03 23:22:32 -04001320 if (err)
1321 extent_io_tree_panic(tree, err);
Josef Bacike6138872012-09-27 17:07:30 -04001322 cache_state(prealloc, cached_state);
Josef Bacik462d6fa2011-09-26 13:56:12 -04001323 prealloc = NULL;
1324 start = this_end + 1;
1325 goto search_again;
1326 }
1327 /*
1328 * | ---- desired range ---- |
1329 * | state |
1330 * We need to split the extent, and set the bit
1331 * on the first half
1332 */
1333 if (state->start <= end && state->end > end) {
1334 prealloc = alloc_extent_state_atomic(prealloc);
Liu Bo1cf4ffd2011-12-07 20:08:40 -05001335 if (!prealloc) {
1336 err = -ENOMEM;
1337 goto out;
1338 }
Josef Bacik462d6fa2011-09-26 13:56:12 -04001339
1340 err = split_state(tree, state, prealloc, end + 1);
Jeff Mahoneyc2d904e2011-10-03 23:22:32 -04001341 if (err)
1342 extent_io_tree_panic(tree, err);
Josef Bacik462d6fa2011-09-26 13:56:12 -04001343
Qu Wenruod38ed272015-10-12 14:53:37 +08001344 set_state_bits(tree, prealloc, &bits, NULL);
Josef Bacike6138872012-09-27 17:07:30 -04001345 cache_state(prealloc, cached_state);
Qu Wenruofefdc552015-10-12 15:35:38 +08001346 clear_state_bit(tree, prealloc, &clear_bits, 0, NULL);
Josef Bacik462d6fa2011-09-26 13:56:12 -04001347 prealloc = NULL;
1348 goto out;
1349 }
1350
Josef Bacik462d6fa2011-09-26 13:56:12 -04001351search_again:
1352 if (start > end)
1353 goto out;
1354 spin_unlock(&tree->lock);
David Sterba210aa272016-04-26 23:54:39 +02001355 cond_resched();
Filipe Mananac8fd3de2014-10-13 12:28:39 +01001356 first_iteration = false;
Josef Bacik462d6fa2011-09-26 13:56:12 -04001357 goto again;
Josef Bacik462d6fa2011-09-26 13:56:12 -04001358
1359out:
1360 spin_unlock(&tree->lock);
1361 if (prealloc)
1362 free_extent_state(prealloc);
1363
1364 return err;
1365}
1366
Chris Masond1310b22008-01-24 16:13:08 -05001367/* wrappers around set/clear extent bit */
Qu Wenruod38ed272015-10-12 14:53:37 +08001368int set_record_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
David Sterba2c53b912016-04-26 23:54:39 +02001369 unsigned bits, struct extent_changeset *changeset)
Qu Wenruod38ed272015-10-12 14:53:37 +08001370{
1371 /*
1372 * We don't support EXTENT_LOCKED yet, as current changeset will
1373 * record any bits changed, so for EXTENT_LOCKED case, it will
1374 * either fail with -EEXIST or changeset will record the whole
1375 * range.
1376 */
1377 BUG_ON(bits & EXTENT_LOCKED);
1378
David Sterba2c53b912016-04-26 23:54:39 +02001379 return __set_extent_bit(tree, start, end, bits, 0, NULL, NULL, GFP_NOFS,
Qu Wenruod38ed272015-10-12 14:53:37 +08001380 changeset);
1381}
1382
Nikolay Borisov4ca73652019-03-27 14:24:10 +02001383int set_extent_bits_nowait(struct extent_io_tree *tree, u64 start, u64 end,
1384 unsigned bits)
1385{
1386 return __set_extent_bit(tree, start, end, bits, 0, NULL, NULL,
1387 GFP_NOWAIT, NULL);
1388}
1389
Qu Wenruofefdc552015-10-12 15:35:38 +08001390int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
1391 unsigned bits, int wake, int delete,
David Sterbaae0f1622017-10-31 16:37:52 +01001392 struct extent_state **cached)
Qu Wenruofefdc552015-10-12 15:35:38 +08001393{
1394 return __clear_extent_bit(tree, start, end, bits, wake, delete,
David Sterbaae0f1622017-10-31 16:37:52 +01001395 cached, GFP_NOFS, NULL);
Qu Wenruofefdc552015-10-12 15:35:38 +08001396}
1397
Qu Wenruofefdc552015-10-12 15:35:38 +08001398int clear_record_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
David Sterbaf734c442016-04-26 23:54:39 +02001399 unsigned bits, struct extent_changeset *changeset)
Qu Wenruofefdc552015-10-12 15:35:38 +08001400{
1401 /*
1402 * Don't support EXTENT_LOCKED case, same reason as
1403 * set_record_extent_bits().
1404 */
1405 BUG_ON(bits & EXTENT_LOCKED);
1406
David Sterbaf734c442016-04-26 23:54:39 +02001407 return __clear_extent_bit(tree, start, end, bits, 0, 0, NULL, GFP_NOFS,
Qu Wenruofefdc552015-10-12 15:35:38 +08001408 changeset);
1409}
1410
Chris Masond352ac62008-09-29 15:18:18 -04001411/*
1412 * either insert or lock state struct between start and end use mask to tell
1413 * us if waiting is desired.
1414 */
Chris Mason1edbb732009-09-02 13:24:36 -04001415int lock_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
David Sterbaff13db42015-12-03 14:30:40 +01001416 struct extent_state **cached_state)
Chris Masond1310b22008-01-24 16:13:08 -05001417{
1418 int err;
1419 u64 failed_start;
David Sterba9ee49a042015-01-14 19:52:13 +01001420
Chris Masond1310b22008-01-24 16:13:08 -05001421 while (1) {
David Sterbaff13db42015-12-03 14:30:40 +01001422 err = __set_extent_bit(tree, start, end, EXTENT_LOCKED,
Jeff Mahoney3fbe5c02012-03-01 14:57:19 +01001423 EXTENT_LOCKED, &failed_start,
Qu Wenruod38ed272015-10-12 14:53:37 +08001424 cached_state, GFP_NOFS, NULL);
Jeff Mahoneyd0082372012-03-01 14:57:19 +01001425 if (err == -EEXIST) {
Chris Masond1310b22008-01-24 16:13:08 -05001426 wait_extent_bit(tree, failed_start, end, EXTENT_LOCKED);
1427 start = failed_start;
Jeff Mahoneyd0082372012-03-01 14:57:19 +01001428 } else
Chris Masond1310b22008-01-24 16:13:08 -05001429 break;
Chris Masond1310b22008-01-24 16:13:08 -05001430 WARN_ON(start > end);
1431 }
1432 return err;
1433}
Chris Masond1310b22008-01-24 16:13:08 -05001434
Jeff Mahoneyd0082372012-03-01 14:57:19 +01001435int try_lock_extent(struct extent_io_tree *tree, u64 start, u64 end)
Josef Bacik25179202008-10-29 14:49:05 -04001436{
1437 int err;
1438 u64 failed_start;
1439
Jeff Mahoney3fbe5c02012-03-01 14:57:19 +01001440 err = __set_extent_bit(tree, start, end, EXTENT_LOCKED, EXTENT_LOCKED,
Qu Wenruod38ed272015-10-12 14:53:37 +08001441 &failed_start, NULL, GFP_NOFS, NULL);
Yan Zheng66435582008-10-30 14:19:50 -04001442 if (err == -EEXIST) {
1443 if (failed_start > start)
1444 clear_extent_bit(tree, start, failed_start - 1,
David Sterbaae0f1622017-10-31 16:37:52 +01001445 EXTENT_LOCKED, 1, 0, NULL);
Josef Bacik25179202008-10-29 14:49:05 -04001446 return 0;
Yan Zheng66435582008-10-30 14:19:50 -04001447 }
Josef Bacik25179202008-10-29 14:49:05 -04001448 return 1;
1449}
Josef Bacik25179202008-10-29 14:49:05 -04001450
David Sterbabd1fa4f2015-12-03 13:08:59 +01001451void extent_range_clear_dirty_for_io(struct inode *inode, u64 start, u64 end)
Chris Mason4adaa612013-03-26 13:07:00 -04001452{
Kirill A. Shutemov09cbfea2016-04-01 15:29:47 +03001453 unsigned long index = start >> PAGE_SHIFT;
1454 unsigned long end_index = end >> PAGE_SHIFT;
Chris Mason4adaa612013-03-26 13:07:00 -04001455 struct page *page;
1456
1457 while (index <= end_index) {
1458 page = find_get_page(inode->i_mapping, index);
1459 BUG_ON(!page); /* Pages should be in the extent_io_tree */
1460 clear_page_dirty_for_io(page);
Kirill A. Shutemov09cbfea2016-04-01 15:29:47 +03001461 put_page(page);
Chris Mason4adaa612013-03-26 13:07:00 -04001462 index++;
1463 }
Chris Mason4adaa612013-03-26 13:07:00 -04001464}
1465
David Sterbaf6311572015-12-03 13:08:59 +01001466void extent_range_redirty_for_io(struct inode *inode, u64 start, u64 end)
Chris Mason4adaa612013-03-26 13:07:00 -04001467{
Kirill A. Shutemov09cbfea2016-04-01 15:29:47 +03001468 unsigned long index = start >> PAGE_SHIFT;
1469 unsigned long end_index = end >> PAGE_SHIFT;
Chris Mason4adaa612013-03-26 13:07:00 -04001470 struct page *page;
1471
1472 while (index <= end_index) {
1473 page = find_get_page(inode->i_mapping, index);
1474 BUG_ON(!page); /* Pages should be in the extent_io_tree */
Chris Mason4adaa612013-03-26 13:07:00 -04001475 __set_page_dirty_nobuffers(page);
Konstantin Khebnikov8d386332015-02-11 15:26:55 -08001476 account_page_redirty(page);
Kirill A. Shutemov09cbfea2016-04-01 15:29:47 +03001477 put_page(page);
Chris Mason4adaa612013-03-26 13:07:00 -04001478 index++;
1479 }
Chris Mason4adaa612013-03-26 13:07:00 -04001480}
1481
Chris Masond352ac62008-09-29 15:18:18 -04001482/* find the first state struct with 'bits' set after 'start', and
1483 * return it. tree->lock must be held. NULL will returned if
1484 * nothing was found after 'start'
1485 */
Eric Sandeen48a3b632013-04-25 20:41:01 +00001486static struct extent_state *
1487find_first_extent_bit_state(struct extent_io_tree *tree,
David Sterba9ee49a042015-01-14 19:52:13 +01001488 u64 start, unsigned bits)
Chris Masond7fc6402008-02-18 12:12:38 -05001489{
1490 struct rb_node *node;
1491 struct extent_state *state;
1492
1493 /*
1494 * this search will find all the extents that end after
1495 * our range starts.
1496 */
1497 node = tree_search(tree, start);
Chris Masond3977122009-01-05 21:25:51 -05001498 if (!node)
Chris Masond7fc6402008-02-18 12:12:38 -05001499 goto out;
Chris Masond7fc6402008-02-18 12:12:38 -05001500
Chris Masond3977122009-01-05 21:25:51 -05001501 while (1) {
Chris Masond7fc6402008-02-18 12:12:38 -05001502 state = rb_entry(node, struct extent_state, rb_node);
Chris Masond3977122009-01-05 21:25:51 -05001503 if (state->end >= start && (state->state & bits))
Chris Masond7fc6402008-02-18 12:12:38 -05001504 return state;
Chris Masond3977122009-01-05 21:25:51 -05001505
Chris Masond7fc6402008-02-18 12:12:38 -05001506 node = rb_next(node);
1507 if (!node)
1508 break;
1509 }
1510out:
1511 return NULL;
1512}
Chris Masond7fc6402008-02-18 12:12:38 -05001513
Chris Masond352ac62008-09-29 15:18:18 -04001514/*
Xiao Guangrong69261c42011-07-14 03:19:45 +00001515 * find the first offset in the io tree with 'bits' set. zero is
1516 * returned if we find something, and *start_ret and *end_ret are
1517 * set to reflect the state struct that was found.
1518 *
Wang Sheng-Hui477d7ea2012-04-06 14:35:47 +08001519 * If nothing was found, 1 is returned. If found something, return 0.
Xiao Guangrong69261c42011-07-14 03:19:45 +00001520 */
1521int find_first_extent_bit(struct extent_io_tree *tree, u64 start,
David Sterba9ee49a042015-01-14 19:52:13 +01001522 u64 *start_ret, u64 *end_ret, unsigned bits,
Josef Bacike6138872012-09-27 17:07:30 -04001523 struct extent_state **cached_state)
Xiao Guangrong69261c42011-07-14 03:19:45 +00001524{
1525 struct extent_state *state;
1526 int ret = 1;
1527
1528 spin_lock(&tree->lock);
Josef Bacike6138872012-09-27 17:07:30 -04001529 if (cached_state && *cached_state) {
1530 state = *cached_state;
Filipe Manana27a35072014-07-06 20:09:59 +01001531 if (state->end == start - 1 && extent_state_in_tree(state)) {
Liu Bo9688e9a2018-08-23 03:14:53 +08001532 while ((state = next_state(state)) != NULL) {
Josef Bacike6138872012-09-27 17:07:30 -04001533 if (state->state & bits)
1534 goto got_it;
Josef Bacike6138872012-09-27 17:07:30 -04001535 }
1536 free_extent_state(*cached_state);
1537 *cached_state = NULL;
1538 goto out;
1539 }
1540 free_extent_state(*cached_state);
1541 *cached_state = NULL;
1542 }
1543
Xiao Guangrong69261c42011-07-14 03:19:45 +00001544 state = find_first_extent_bit_state(tree, start, bits);
Josef Bacike6138872012-09-27 17:07:30 -04001545got_it:
Xiao Guangrong69261c42011-07-14 03:19:45 +00001546 if (state) {
Filipe Mananae38e2ed2014-10-13 12:28:38 +01001547 cache_state_if_flags(state, cached_state, 0);
Xiao Guangrong69261c42011-07-14 03:19:45 +00001548 *start_ret = state->start;
1549 *end_ret = state->end;
1550 ret = 0;
1551 }
Josef Bacike6138872012-09-27 17:07:30 -04001552out:
Xiao Guangrong69261c42011-07-14 03:19:45 +00001553 spin_unlock(&tree->lock);
1554 return ret;
1555}
1556
Nikolay Borisov45bfcfc2019-03-27 14:24:17 +02001557/**
Nikolay Borisov1eaebb32019-06-03 13:06:02 +03001558 * find_first_clear_extent_bit - find the first range that has @bits not set.
1559 * This range could start before @start.
Nikolay Borisov45bfcfc2019-03-27 14:24:17 +02001560 *
1561 * @tree - the tree to search
1562 * @start - the offset at/after which the found extent should start
1563 * @start_ret - records the beginning of the range
1564 * @end_ret - records the end of the range (inclusive)
1565 * @bits - the set of bits which must be unset
1566 *
1567 * Since unallocated range is also considered one which doesn't have the bits
1568 * set it's possible that @end_ret contains -1, this happens in case the range
1569 * spans (last_range_end, end of device]. In this case it's up to the caller to
1570 * trim @end_ret to the appropriate size.
1571 */
1572void find_first_clear_extent_bit(struct extent_io_tree *tree, u64 start,
1573 u64 *start_ret, u64 *end_ret, unsigned bits)
1574{
1575 struct extent_state *state;
1576 struct rb_node *node, *prev = NULL, *next;
1577
1578 spin_lock(&tree->lock);
1579
1580 /* Find first extent with bits cleared */
1581 while (1) {
1582 node = __etree_search(tree, start, &next, &prev, NULL, NULL);
1583 if (!node) {
1584 node = next;
1585 if (!node) {
1586 /*
1587 * We are past the last allocated chunk,
1588 * set start at the end of the last extent. The
1589 * device alloc tree should never be empty so
1590 * prev is always set.
1591 */
1592 ASSERT(prev);
1593 state = rb_entry(prev, struct extent_state, rb_node);
1594 *start_ret = state->end + 1;
1595 *end_ret = -1;
1596 goto out;
1597 }
1598 }
Nikolay Borisov1eaebb32019-06-03 13:06:02 +03001599 /*
1600 * At this point 'node' either contains 'start' or start is
1601 * before 'node'
1602 */
Nikolay Borisov45bfcfc2019-03-27 14:24:17 +02001603 state = rb_entry(node, struct extent_state, rb_node);
Nikolay Borisov1eaebb32019-06-03 13:06:02 +03001604
1605 if (in_range(start, state->start, state->end - state->start + 1)) {
1606 if (state->state & bits) {
1607 /*
1608 * |--range with bits sets--|
1609 * |
1610 * start
1611 */
1612 start = state->end + 1;
1613 } else {
1614 /*
1615 * 'start' falls within a range that doesn't
1616 * have the bits set, so take its start as
1617 * the beginning of the desired range
1618 *
1619 * |--range with bits cleared----|
1620 * |
1621 * start
1622 */
1623 *start_ret = state->start;
1624 break;
1625 }
Nikolay Borisov45bfcfc2019-03-27 14:24:17 +02001626 } else {
Nikolay Borisov1eaebb32019-06-03 13:06:02 +03001627 /*
1628 * |---prev range---|---hole/unset---|---node range---|
1629 * |
1630 * start
1631 *
1632 * or
1633 *
1634 * |---hole/unset--||--first node--|
1635 * 0 |
1636 * start
1637 */
1638 if (prev) {
1639 state = rb_entry(prev, struct extent_state,
1640 rb_node);
1641 *start_ret = state->end + 1;
1642 } else {
1643 *start_ret = 0;
1644 }
Nikolay Borisov45bfcfc2019-03-27 14:24:17 +02001645 break;
1646 }
1647 }
1648
1649 /*
1650 * Find the longest stretch from start until an entry which has the
1651 * bits set
1652 */
1653 while (1) {
1654 state = rb_entry(node, struct extent_state, rb_node);
1655 if (state->end >= start && !(state->state & bits)) {
1656 *end_ret = state->end;
1657 } else {
1658 *end_ret = state->start - 1;
1659 break;
1660 }
1661
1662 node = rb_next(node);
1663 if (!node)
1664 break;
1665 }
1666out:
1667 spin_unlock(&tree->lock);
1668}
1669
Xiao Guangrong69261c42011-07-14 03:19:45 +00001670/*
Chris Masond352ac62008-09-29 15:18:18 -04001671 * find a contiguous range of bytes in the file marked as delalloc, not
1672 * more than 'max_bytes'. start and end are used to return the range,
1673 *
Lu Fengqi3522e902018-11-29 11:33:38 +08001674 * true is returned if we find something, false if nothing was in the tree
Chris Masond352ac62008-09-29 15:18:18 -04001675 */
Lu Fengqi3522e902018-11-29 11:33:38 +08001676static noinline bool find_delalloc_range(struct extent_io_tree *tree,
Josef Bacikc2a128d2010-02-02 21:19:11 +00001677 u64 *start, u64 *end, u64 max_bytes,
1678 struct extent_state **cached_state)
Chris Masond1310b22008-01-24 16:13:08 -05001679{
1680 struct rb_node *node;
1681 struct extent_state *state;
1682 u64 cur_start = *start;
Lu Fengqi3522e902018-11-29 11:33:38 +08001683 bool found = false;
Chris Masond1310b22008-01-24 16:13:08 -05001684 u64 total_bytes = 0;
1685
Chris Masoncad321a2008-12-17 14:51:42 -05001686 spin_lock(&tree->lock);
Chris Masonc8b97812008-10-29 14:49:59 -04001687
Chris Masond1310b22008-01-24 16:13:08 -05001688 /*
1689 * this search will find all the extents that end after
1690 * our range starts.
1691 */
Chris Mason80ea96b2008-02-01 14:51:59 -05001692 node = tree_search(tree, cur_start);
Peter2b114d12008-04-01 11:21:40 -04001693 if (!node) {
Lu Fengqi3522e902018-11-29 11:33:38 +08001694 *end = (u64)-1;
Chris Masond1310b22008-01-24 16:13:08 -05001695 goto out;
1696 }
1697
Chris Masond3977122009-01-05 21:25:51 -05001698 while (1) {
Chris Masond1310b22008-01-24 16:13:08 -05001699 state = rb_entry(node, struct extent_state, rb_node);
Zheng Yan5b21f2e2008-09-26 10:05:38 -04001700 if (found && (state->start != cur_start ||
1701 (state->state & EXTENT_BOUNDARY))) {
Chris Masond1310b22008-01-24 16:13:08 -05001702 goto out;
1703 }
1704 if (!(state->state & EXTENT_DELALLOC)) {
1705 if (!found)
1706 *end = state->end;
1707 goto out;
1708 }
Josef Bacikc2a128d2010-02-02 21:19:11 +00001709 if (!found) {
Chris Masond1310b22008-01-24 16:13:08 -05001710 *start = state->start;
Josef Bacikc2a128d2010-02-02 21:19:11 +00001711 *cached_state = state;
Elena Reshetovab7ac31b2017-03-03 10:55:19 +02001712 refcount_inc(&state->refs);
Josef Bacikc2a128d2010-02-02 21:19:11 +00001713 }
Lu Fengqi3522e902018-11-29 11:33:38 +08001714 found = true;
Chris Masond1310b22008-01-24 16:13:08 -05001715 *end = state->end;
1716 cur_start = state->end + 1;
1717 node = rb_next(node);
Chris Masond1310b22008-01-24 16:13:08 -05001718 total_bytes += state->end - state->start + 1;
Josef Bacik7bf811a52013-10-07 22:11:09 -04001719 if (total_bytes >= max_bytes)
Josef Bacik573aeca2013-08-30 14:38:49 -04001720 break;
Josef Bacik573aeca2013-08-30 14:38:49 -04001721 if (!node)
Chris Masond1310b22008-01-24 16:13:08 -05001722 break;
1723 }
1724out:
Chris Masoncad321a2008-12-17 14:51:42 -05001725 spin_unlock(&tree->lock);
Chris Masond1310b22008-01-24 16:13:08 -05001726 return found;
1727}
1728
Liu Boda2c7002017-02-10 16:41:05 +01001729static int __process_pages_contig(struct address_space *mapping,
1730 struct page *locked_page,
1731 pgoff_t start_index, pgoff_t end_index,
1732 unsigned long page_ops, pgoff_t *index_ret);
1733
Jeff Mahoney143bede2012-03-01 14:56:26 +01001734static noinline void __unlock_for_delalloc(struct inode *inode,
1735 struct page *locked_page,
1736 u64 start, u64 end)
Chris Masonc8b97812008-10-29 14:49:59 -04001737{
Kirill A. Shutemov09cbfea2016-04-01 15:29:47 +03001738 unsigned long index = start >> PAGE_SHIFT;
1739 unsigned long end_index = end >> PAGE_SHIFT;
Chris Masonc8b97812008-10-29 14:49:59 -04001740
Liu Bo76c00212017-02-10 16:42:14 +01001741 ASSERT(locked_page);
Chris Masonc8b97812008-10-29 14:49:59 -04001742 if (index == locked_page->index && end_index == index)
Jeff Mahoney143bede2012-03-01 14:56:26 +01001743 return;
Chris Masonc8b97812008-10-29 14:49:59 -04001744
Liu Bo76c00212017-02-10 16:42:14 +01001745 __process_pages_contig(inode->i_mapping, locked_page, index, end_index,
1746 PAGE_UNLOCK, NULL);
Chris Masonc8b97812008-10-29 14:49:59 -04001747}
1748
1749static noinline int lock_delalloc_pages(struct inode *inode,
1750 struct page *locked_page,
1751 u64 delalloc_start,
1752 u64 delalloc_end)
1753{
Kirill A. Shutemov09cbfea2016-04-01 15:29:47 +03001754 unsigned long index = delalloc_start >> PAGE_SHIFT;
Liu Bo76c00212017-02-10 16:42:14 +01001755 unsigned long index_ret = index;
Kirill A. Shutemov09cbfea2016-04-01 15:29:47 +03001756 unsigned long end_index = delalloc_end >> PAGE_SHIFT;
Chris Masonc8b97812008-10-29 14:49:59 -04001757 int ret;
Chris Masonc8b97812008-10-29 14:49:59 -04001758
Liu Bo76c00212017-02-10 16:42:14 +01001759 ASSERT(locked_page);
Chris Masonc8b97812008-10-29 14:49:59 -04001760 if (index == locked_page->index && index == end_index)
1761 return 0;
1762
Liu Bo76c00212017-02-10 16:42:14 +01001763 ret = __process_pages_contig(inode->i_mapping, locked_page, index,
1764 end_index, PAGE_LOCK, &index_ret);
1765 if (ret == -EAGAIN)
1766 __unlock_for_delalloc(inode, locked_page, delalloc_start,
1767 (u64)index_ret << PAGE_SHIFT);
Chris Masonc8b97812008-10-29 14:49:59 -04001768 return ret;
1769}
1770
1771/*
Lu Fengqi3522e902018-11-29 11:33:38 +08001772 * Find and lock a contiguous range of bytes in the file marked as delalloc, no
1773 * more than @max_bytes. @Start and @end are used to return the range,
Chris Masonc8b97812008-10-29 14:49:59 -04001774 *
Lu Fengqi3522e902018-11-29 11:33:38 +08001775 * Return: true if we find something
1776 * false if nothing was in the tree
Chris Masonc8b97812008-10-29 14:49:59 -04001777 */
Johannes Thumshirnce9f9672018-11-19 10:38:17 +01001778EXPORT_FOR_TESTS
Lu Fengqi3522e902018-11-29 11:33:38 +08001779noinline_for_stack bool find_lock_delalloc_range(struct inode *inode,
Josef Bacik294e30f2013-10-09 12:00:56 -04001780 struct extent_io_tree *tree,
1781 struct page *locked_page, u64 *start,
Nikolay Borisov917aace2018-10-26 14:43:20 +03001782 u64 *end)
Chris Masonc8b97812008-10-29 14:49:59 -04001783{
Nikolay Borisov917aace2018-10-26 14:43:20 +03001784 u64 max_bytes = BTRFS_MAX_EXTENT_SIZE;
Chris Masonc8b97812008-10-29 14:49:59 -04001785 u64 delalloc_start;
1786 u64 delalloc_end;
Lu Fengqi3522e902018-11-29 11:33:38 +08001787 bool found;
Chris Mason9655d292009-09-02 15:22:30 -04001788 struct extent_state *cached_state = NULL;
Chris Masonc8b97812008-10-29 14:49:59 -04001789 int ret;
1790 int loops = 0;
1791
1792again:
1793 /* step one, find a bunch of delalloc bytes starting at start */
1794 delalloc_start = *start;
1795 delalloc_end = 0;
1796 found = find_delalloc_range(tree, &delalloc_start, &delalloc_end,
Josef Bacikc2a128d2010-02-02 21:19:11 +00001797 max_bytes, &cached_state);
Chris Mason70b99e62008-10-31 12:46:39 -04001798 if (!found || delalloc_end <= *start) {
Chris Masonc8b97812008-10-29 14:49:59 -04001799 *start = delalloc_start;
1800 *end = delalloc_end;
Josef Bacikc2a128d2010-02-02 21:19:11 +00001801 free_extent_state(cached_state);
Lu Fengqi3522e902018-11-29 11:33:38 +08001802 return false;
Chris Masonc8b97812008-10-29 14:49:59 -04001803 }
1804
1805 /*
Chris Mason70b99e62008-10-31 12:46:39 -04001806 * start comes from the offset of locked_page. We have to lock
1807 * pages in order, so we can't process delalloc bytes before
1808 * locked_page
1809 */
Chris Masond3977122009-01-05 21:25:51 -05001810 if (delalloc_start < *start)
Chris Mason70b99e62008-10-31 12:46:39 -04001811 delalloc_start = *start;
Chris Mason70b99e62008-10-31 12:46:39 -04001812
1813 /*
Chris Masonc8b97812008-10-29 14:49:59 -04001814 * make sure to limit the number of pages we try to lock down
Chris Masonc8b97812008-10-29 14:49:59 -04001815 */
Josef Bacik7bf811a52013-10-07 22:11:09 -04001816 if (delalloc_end + 1 - delalloc_start > max_bytes)
1817 delalloc_end = delalloc_start + max_bytes - 1;
Chris Masond3977122009-01-05 21:25:51 -05001818
Chris Masonc8b97812008-10-29 14:49:59 -04001819 /* step two, lock all the pages after the page that has start */
1820 ret = lock_delalloc_pages(inode, locked_page,
1821 delalloc_start, delalloc_end);
Nikolay Borisov9bfd61d2018-10-26 14:43:21 +03001822 ASSERT(!ret || ret == -EAGAIN);
Chris Masonc8b97812008-10-29 14:49:59 -04001823 if (ret == -EAGAIN) {
1824 /* some of the pages are gone, lets avoid looping by
1825 * shortening the size of the delalloc range we're searching
1826 */
Chris Mason9655d292009-09-02 15:22:30 -04001827 free_extent_state(cached_state);
Chris Mason7d788742014-05-21 05:49:54 -07001828 cached_state = NULL;
Chris Masonc8b97812008-10-29 14:49:59 -04001829 if (!loops) {
Kirill A. Shutemov09cbfea2016-04-01 15:29:47 +03001830 max_bytes = PAGE_SIZE;
Chris Masonc8b97812008-10-29 14:49:59 -04001831 loops = 1;
1832 goto again;
1833 } else {
Lu Fengqi3522e902018-11-29 11:33:38 +08001834 found = false;
Chris Masonc8b97812008-10-29 14:49:59 -04001835 goto out_failed;
1836 }
1837 }
Chris Masonc8b97812008-10-29 14:49:59 -04001838
1839 /* step three, lock the state bits for the whole range */
David Sterbaff13db42015-12-03 14:30:40 +01001840 lock_extent_bits(tree, delalloc_start, delalloc_end, &cached_state);
Chris Masonc8b97812008-10-29 14:49:59 -04001841
1842 /* then test to make sure it is all still delalloc */
1843 ret = test_range_bit(tree, delalloc_start, delalloc_end,
Chris Mason9655d292009-09-02 15:22:30 -04001844 EXTENT_DELALLOC, 1, cached_state);
Chris Masonc8b97812008-10-29 14:49:59 -04001845 if (!ret) {
Chris Mason9655d292009-09-02 15:22:30 -04001846 unlock_extent_cached(tree, delalloc_start, delalloc_end,
David Sterbae43bbe52017-12-12 21:43:52 +01001847 &cached_state);
Chris Masonc8b97812008-10-29 14:49:59 -04001848 __unlock_for_delalloc(inode, locked_page,
1849 delalloc_start, delalloc_end);
1850 cond_resched();
1851 goto again;
1852 }
Chris Mason9655d292009-09-02 15:22:30 -04001853 free_extent_state(cached_state);
Chris Masonc8b97812008-10-29 14:49:59 -04001854 *start = delalloc_start;
1855 *end = delalloc_end;
1856out_failed:
1857 return found;
1858}
1859
Liu Boda2c7002017-02-10 16:41:05 +01001860static int __process_pages_contig(struct address_space *mapping,
1861 struct page *locked_page,
1862 pgoff_t start_index, pgoff_t end_index,
1863 unsigned long page_ops, pgoff_t *index_ret)
Chris Masonc8b97812008-10-29 14:49:59 -04001864{
Liu Bo873695b2017-02-02 17:49:22 -08001865 unsigned long nr_pages = end_index - start_index + 1;
Liu Boda2c7002017-02-10 16:41:05 +01001866 unsigned long pages_locked = 0;
Liu Bo873695b2017-02-02 17:49:22 -08001867 pgoff_t index = start_index;
Chris Masonc8b97812008-10-29 14:49:59 -04001868 struct page *pages[16];
Liu Bo873695b2017-02-02 17:49:22 -08001869 unsigned ret;
Liu Boda2c7002017-02-10 16:41:05 +01001870 int err = 0;
Chris Masonc8b97812008-10-29 14:49:59 -04001871 int i;
Chris Mason771ed682008-11-06 22:02:51 -05001872
Liu Boda2c7002017-02-10 16:41:05 +01001873 if (page_ops & PAGE_LOCK) {
1874 ASSERT(page_ops == PAGE_LOCK);
1875 ASSERT(index_ret && *index_ret == start_index);
1876 }
1877
Filipe Manana704de492014-10-06 22:14:22 +01001878 if ((page_ops & PAGE_SET_ERROR) && nr_pages > 0)
Liu Bo873695b2017-02-02 17:49:22 -08001879 mapping_set_error(mapping, -EIO);
Filipe Manana704de492014-10-06 22:14:22 +01001880
Chris Masond3977122009-01-05 21:25:51 -05001881 while (nr_pages > 0) {
Liu Bo873695b2017-02-02 17:49:22 -08001882 ret = find_get_pages_contig(mapping, index,
Chris Mason5b050f02008-11-11 09:34:41 -05001883 min_t(unsigned long,
1884 nr_pages, ARRAY_SIZE(pages)), pages);
Liu Boda2c7002017-02-10 16:41:05 +01001885 if (ret == 0) {
1886 /*
1887 * Only if we're going to lock these pages,
1888 * can we find nothing at @index.
1889 */
1890 ASSERT(page_ops & PAGE_LOCK);
Liu Bo49d4a332017-03-06 18:20:56 -08001891 err = -EAGAIN;
1892 goto out;
Liu Boda2c7002017-02-10 16:41:05 +01001893 }
Chris Mason8b62b722009-09-02 16:53:46 -04001894
Liu Boda2c7002017-02-10 16:41:05 +01001895 for (i = 0; i < ret; i++) {
Josef Bacikc2790a22013-07-29 11:20:47 -04001896 if (page_ops & PAGE_SET_PRIVATE2)
Chris Mason8b62b722009-09-02 16:53:46 -04001897 SetPagePrivate2(pages[i]);
1898
Chris Masonc8b97812008-10-29 14:49:59 -04001899 if (pages[i] == locked_page) {
Kirill A. Shutemov09cbfea2016-04-01 15:29:47 +03001900 put_page(pages[i]);
Liu Boda2c7002017-02-10 16:41:05 +01001901 pages_locked++;
Chris Masonc8b97812008-10-29 14:49:59 -04001902 continue;
1903 }
Josef Bacikc2790a22013-07-29 11:20:47 -04001904 if (page_ops & PAGE_CLEAR_DIRTY)
Chris Masonc8b97812008-10-29 14:49:59 -04001905 clear_page_dirty_for_io(pages[i]);
Josef Bacikc2790a22013-07-29 11:20:47 -04001906 if (page_ops & PAGE_SET_WRITEBACK)
Chris Masonc8b97812008-10-29 14:49:59 -04001907 set_page_writeback(pages[i]);
Filipe Manana704de492014-10-06 22:14:22 +01001908 if (page_ops & PAGE_SET_ERROR)
1909 SetPageError(pages[i]);
Josef Bacikc2790a22013-07-29 11:20:47 -04001910 if (page_ops & PAGE_END_WRITEBACK)
Chris Masonc8b97812008-10-29 14:49:59 -04001911 end_page_writeback(pages[i]);
Josef Bacikc2790a22013-07-29 11:20:47 -04001912 if (page_ops & PAGE_UNLOCK)
Chris Mason771ed682008-11-06 22:02:51 -05001913 unlock_page(pages[i]);
Liu Boda2c7002017-02-10 16:41:05 +01001914 if (page_ops & PAGE_LOCK) {
1915 lock_page(pages[i]);
1916 if (!PageDirty(pages[i]) ||
1917 pages[i]->mapping != mapping) {
1918 unlock_page(pages[i]);
1919 put_page(pages[i]);
1920 err = -EAGAIN;
1921 goto out;
1922 }
1923 }
Kirill A. Shutemov09cbfea2016-04-01 15:29:47 +03001924 put_page(pages[i]);
Liu Boda2c7002017-02-10 16:41:05 +01001925 pages_locked++;
Chris Masonc8b97812008-10-29 14:49:59 -04001926 }
1927 nr_pages -= ret;
1928 index += ret;
1929 cond_resched();
1930 }
Liu Boda2c7002017-02-10 16:41:05 +01001931out:
1932 if (err && index_ret)
1933 *index_ret = start_index + pages_locked - 1;
1934 return err;
Chris Masonc8b97812008-10-29 14:49:59 -04001935}
Chris Masonc8b97812008-10-29 14:49:59 -04001936
Liu Bo873695b2017-02-02 17:49:22 -08001937void extent_clear_unlock_delalloc(struct inode *inode, u64 start, u64 end,
1938 u64 delalloc_end, struct page *locked_page,
1939 unsigned clear_bits,
1940 unsigned long page_ops)
1941{
1942 clear_extent_bit(&BTRFS_I(inode)->io_tree, start, end, clear_bits, 1, 0,
David Sterbaae0f1622017-10-31 16:37:52 +01001943 NULL);
Liu Bo873695b2017-02-02 17:49:22 -08001944
1945 __process_pages_contig(inode->i_mapping, locked_page,
1946 start >> PAGE_SHIFT, end >> PAGE_SHIFT,
Liu Boda2c7002017-02-10 16:41:05 +01001947 page_ops, NULL);
Liu Bo873695b2017-02-02 17:49:22 -08001948}
1949
Chris Masond352ac62008-09-29 15:18:18 -04001950/*
1951 * count the number of bytes in the tree that have a given bit(s)
1952 * set. This can be fairly slow, except for EXTENT_DIRTY which is
1953 * cached. The total number found is returned.
1954 */
Chris Masond1310b22008-01-24 16:13:08 -05001955u64 count_range_bits(struct extent_io_tree *tree,
1956 u64 *start, u64 search_end, u64 max_bytes,
David Sterba9ee49a042015-01-14 19:52:13 +01001957 unsigned bits, int contig)
Chris Masond1310b22008-01-24 16:13:08 -05001958{
1959 struct rb_node *node;
1960 struct extent_state *state;
1961 u64 cur_start = *start;
1962 u64 total_bytes = 0;
Chris Masonec29ed52011-02-23 16:23:20 -05001963 u64 last = 0;
Chris Masond1310b22008-01-24 16:13:08 -05001964 int found = 0;
1965
Dulshani Gunawardhanafae7f212013-10-31 10:30:08 +05301966 if (WARN_ON(search_end <= cur_start))
Chris Masond1310b22008-01-24 16:13:08 -05001967 return 0;
Chris Masond1310b22008-01-24 16:13:08 -05001968
Chris Masoncad321a2008-12-17 14:51:42 -05001969 spin_lock(&tree->lock);
Chris Masond1310b22008-01-24 16:13:08 -05001970 if (cur_start == 0 && bits == EXTENT_DIRTY) {
1971 total_bytes = tree->dirty_bytes;
1972 goto out;
1973 }
1974 /*
1975 * this search will find all the extents that end after
1976 * our range starts.
1977 */
Chris Mason80ea96b2008-02-01 14:51:59 -05001978 node = tree_search(tree, cur_start);
Chris Masond3977122009-01-05 21:25:51 -05001979 if (!node)
Chris Masond1310b22008-01-24 16:13:08 -05001980 goto out;
Chris Masond1310b22008-01-24 16:13:08 -05001981
Chris Masond3977122009-01-05 21:25:51 -05001982 while (1) {
Chris Masond1310b22008-01-24 16:13:08 -05001983 state = rb_entry(node, struct extent_state, rb_node);
1984 if (state->start > search_end)
1985 break;
Chris Masonec29ed52011-02-23 16:23:20 -05001986 if (contig && found && state->start > last + 1)
1987 break;
1988 if (state->end >= cur_start && (state->state & bits) == bits) {
Chris Masond1310b22008-01-24 16:13:08 -05001989 total_bytes += min(search_end, state->end) + 1 -
1990 max(cur_start, state->start);
1991 if (total_bytes >= max_bytes)
1992 break;
1993 if (!found) {
Josef Bacikaf60bed2011-05-04 11:11:17 -04001994 *start = max(cur_start, state->start);
Chris Masond1310b22008-01-24 16:13:08 -05001995 found = 1;
1996 }
Chris Masonec29ed52011-02-23 16:23:20 -05001997 last = state->end;
1998 } else if (contig && found) {
1999 break;
Chris Masond1310b22008-01-24 16:13:08 -05002000 }
2001 node = rb_next(node);
2002 if (!node)
2003 break;
2004 }
2005out:
Chris Masoncad321a2008-12-17 14:51:42 -05002006 spin_unlock(&tree->lock);
Chris Masond1310b22008-01-24 16:13:08 -05002007 return total_bytes;
2008}
Christoph Hellwigb2950862008-12-02 09:54:17 -05002009
Chris Masond352ac62008-09-29 15:18:18 -04002010/*
2011 * set the private field for a given byte offset in the tree. If there isn't
2012 * an extent_state there already, this does nothing.
2013 */
Arnd Bergmannf827ba92016-02-22 22:53:20 +01002014static noinline int set_state_failrec(struct extent_io_tree *tree, u64 start,
David Sterba47dc1962016-02-11 13:24:13 +01002015 struct io_failure_record *failrec)
Chris Masond1310b22008-01-24 16:13:08 -05002016{
2017 struct rb_node *node;
2018 struct extent_state *state;
2019 int ret = 0;
2020
Chris Masoncad321a2008-12-17 14:51:42 -05002021 spin_lock(&tree->lock);
Chris Masond1310b22008-01-24 16:13:08 -05002022 /*
2023 * this search will find all the extents that end after
2024 * our range starts.
2025 */
Chris Mason80ea96b2008-02-01 14:51:59 -05002026 node = tree_search(tree, start);
Peter2b114d12008-04-01 11:21:40 -04002027 if (!node) {
Chris Masond1310b22008-01-24 16:13:08 -05002028 ret = -ENOENT;
2029 goto out;
2030 }
2031 state = rb_entry(node, struct extent_state, rb_node);
2032 if (state->start != start) {
2033 ret = -ENOENT;
2034 goto out;
2035 }
David Sterba47dc1962016-02-11 13:24:13 +01002036 state->failrec = failrec;
Chris Masond1310b22008-01-24 16:13:08 -05002037out:
Chris Masoncad321a2008-12-17 14:51:42 -05002038 spin_unlock(&tree->lock);
Chris Masond1310b22008-01-24 16:13:08 -05002039 return ret;
2040}
2041
Arnd Bergmannf827ba92016-02-22 22:53:20 +01002042static noinline int get_state_failrec(struct extent_io_tree *tree, u64 start,
David Sterba47dc1962016-02-11 13:24:13 +01002043 struct io_failure_record **failrec)
Chris Masond1310b22008-01-24 16:13:08 -05002044{
2045 struct rb_node *node;
2046 struct extent_state *state;
2047 int ret = 0;
2048
Chris Masoncad321a2008-12-17 14:51:42 -05002049 spin_lock(&tree->lock);
Chris Masond1310b22008-01-24 16:13:08 -05002050 /*
2051 * this search will find all the extents that end after
2052 * our range starts.
2053 */
Chris Mason80ea96b2008-02-01 14:51:59 -05002054 node = tree_search(tree, start);
Peter2b114d12008-04-01 11:21:40 -04002055 if (!node) {
Chris Masond1310b22008-01-24 16:13:08 -05002056 ret = -ENOENT;
2057 goto out;
2058 }
2059 state = rb_entry(node, struct extent_state, rb_node);
2060 if (state->start != start) {
2061 ret = -ENOENT;
2062 goto out;
2063 }
David Sterba47dc1962016-02-11 13:24:13 +01002064 *failrec = state->failrec;
Chris Masond1310b22008-01-24 16:13:08 -05002065out:
Chris Masoncad321a2008-12-17 14:51:42 -05002066 spin_unlock(&tree->lock);
Chris Masond1310b22008-01-24 16:13:08 -05002067 return ret;
2068}
2069
2070/*
2071 * searches a range in the state tree for a given mask.
Chris Mason70dec802008-01-29 09:59:12 -05002072 * If 'filled' == 1, this returns 1 only if every extent in the tree
Chris Masond1310b22008-01-24 16:13:08 -05002073 * has the bits set. Otherwise, 1 is returned if any bit in the
2074 * range is found set.
2075 */
2076int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end,
David Sterba9ee49a042015-01-14 19:52:13 +01002077 unsigned bits, int filled, struct extent_state *cached)
Chris Masond1310b22008-01-24 16:13:08 -05002078{
2079 struct extent_state *state = NULL;
2080 struct rb_node *node;
2081 int bitset = 0;
Chris Masond1310b22008-01-24 16:13:08 -05002082
Chris Masoncad321a2008-12-17 14:51:42 -05002083 spin_lock(&tree->lock);
Filipe Manana27a35072014-07-06 20:09:59 +01002084 if (cached && extent_state_in_tree(cached) && cached->start <= start &&
Josef Bacikdf98b6e2011-06-20 14:53:48 -04002085 cached->end > start)
Chris Mason9655d292009-09-02 15:22:30 -04002086 node = &cached->rb_node;
2087 else
2088 node = tree_search(tree, start);
Chris Masond1310b22008-01-24 16:13:08 -05002089 while (node && start <= end) {
2090 state = rb_entry(node, struct extent_state, rb_node);
2091
2092 if (filled && state->start > start) {
2093 bitset = 0;
2094 break;
2095 }
2096
2097 if (state->start > end)
2098 break;
2099
2100 if (state->state & bits) {
2101 bitset = 1;
2102 if (!filled)
2103 break;
2104 } else if (filled) {
2105 bitset = 0;
2106 break;
2107 }
Chris Mason46562ce2009-09-23 20:23:16 -04002108
2109 if (state->end == (u64)-1)
2110 break;
2111
Chris Masond1310b22008-01-24 16:13:08 -05002112 start = state->end + 1;
2113 if (start > end)
2114 break;
2115 node = rb_next(node);
2116 if (!node) {
2117 if (filled)
2118 bitset = 0;
2119 break;
2120 }
2121 }
Chris Masoncad321a2008-12-17 14:51:42 -05002122 spin_unlock(&tree->lock);
Chris Masond1310b22008-01-24 16:13:08 -05002123 return bitset;
2124}
Chris Masond1310b22008-01-24 16:13:08 -05002125
2126/*
2127 * helper function to set a given page up to date if all the
2128 * extents in the tree for that page are up to date
2129 */
Jeff Mahoney143bede2012-03-01 14:56:26 +01002130static void check_page_uptodate(struct extent_io_tree *tree, struct page *page)
Chris Masond1310b22008-01-24 16:13:08 -05002131{
Miao Xie4eee4fa2012-12-21 09:17:45 +00002132 u64 start = page_offset(page);
Kirill A. Shutemov09cbfea2016-04-01 15:29:47 +03002133 u64 end = start + PAGE_SIZE - 1;
Chris Mason9655d292009-09-02 15:22:30 -04002134 if (test_range_bit(tree, start, end, EXTENT_UPTODATE, 1, NULL))
Chris Masond1310b22008-01-24 16:13:08 -05002135 SetPageUptodate(page);
Chris Masond1310b22008-01-24 16:13:08 -05002136}
2137
Josef Bacik7870d082017-05-05 11:57:15 -04002138int free_io_failure(struct extent_io_tree *failure_tree,
2139 struct extent_io_tree *io_tree,
2140 struct io_failure_record *rec)
Jan Schmidt4a54c8c2011-07-22 15:41:52 +02002141{
2142 int ret;
2143 int err = 0;
Jan Schmidt4a54c8c2011-07-22 15:41:52 +02002144
David Sterba47dc1962016-02-11 13:24:13 +01002145 set_state_failrec(failure_tree, rec->start, NULL);
Jan Schmidt4a54c8c2011-07-22 15:41:52 +02002146 ret = clear_extent_bits(failure_tree, rec->start,
2147 rec->start + rec->len - 1,
David Sterba91166212016-04-26 23:54:39 +02002148 EXTENT_LOCKED | EXTENT_DIRTY);
Jan Schmidt4a54c8c2011-07-22 15:41:52 +02002149 if (ret)
2150 err = ret;
2151
Josef Bacik7870d082017-05-05 11:57:15 -04002152 ret = clear_extent_bits(io_tree, rec->start,
David Woodhouse53b381b2013-01-29 18:40:14 -05002153 rec->start + rec->len - 1,
David Sterba91166212016-04-26 23:54:39 +02002154 EXTENT_DAMAGED);
David Woodhouse53b381b2013-01-29 18:40:14 -05002155 if (ret && !err)
2156 err = ret;
Jan Schmidt4a54c8c2011-07-22 15:41:52 +02002157
2158 kfree(rec);
2159 return err;
2160}
2161
Jan Schmidt4a54c8c2011-07-22 15:41:52 +02002162/*
2163 * this bypasses the standard btrfs submit functions deliberately, as
2164 * the standard behavior is to write all copies in a raid setup. here we only
2165 * want to write the one bad copy. so we do the mapping for ourselves and issue
2166 * submit_bio directly.
Stefan Behrens3ec706c2012-11-05 15:46:42 +01002167 * to avoid any synchronization issues, wait for the data after writing, which
Jan Schmidt4a54c8c2011-07-22 15:41:52 +02002168 * actually prevents the read that triggered the error from finishing.
2169 * currently, there can be no more than two copies of every data bit. thus,
2170 * exactly one rewrite is required.
2171 */
Josef Bacik6ec656b2017-05-05 11:57:14 -04002172int repair_io_failure(struct btrfs_fs_info *fs_info, u64 ino, u64 start,
2173 u64 length, u64 logical, struct page *page,
2174 unsigned int pg_offset, int mirror_num)
Jan Schmidt4a54c8c2011-07-22 15:41:52 +02002175{
2176 struct bio *bio;
2177 struct btrfs_device *dev;
Jan Schmidt4a54c8c2011-07-22 15:41:52 +02002178 u64 map_length = 0;
2179 u64 sector;
2180 struct btrfs_bio *bbio = NULL;
2181 int ret;
2182
Linus Torvalds1751e8a2017-11-27 13:05:09 -08002183 ASSERT(!(fs_info->sb->s_flags & SB_RDONLY));
Jan Schmidt4a54c8c2011-07-22 15:41:52 +02002184 BUG_ON(!mirror_num);
2185
David Sterbac5e4c3d2017-06-12 17:29:41 +02002186 bio = btrfs_io_bio_alloc(1);
Kent Overstreet4f024f32013-10-11 15:44:27 -07002187 bio->bi_iter.bi_size = 0;
Jan Schmidt4a54c8c2011-07-22 15:41:52 +02002188 map_length = length;
2189
Filipe Mananab5de8d02016-05-27 22:21:27 +01002190 /*
2191 * Avoid races with device replace and make sure our bbio has devices
2192 * associated to its stripes that don't go away while we are doing the
2193 * read repair operation.
2194 */
2195 btrfs_bio_counter_inc_blocked(fs_info);
Nikolay Borisove4ff5fb2017-07-19 10:48:42 +03002196 if (btrfs_is_parity_mirror(fs_info, logical, length)) {
Liu Boc7253282017-03-29 10:53:58 -07002197 /*
2198 * Note that we don't use BTRFS_MAP_WRITE because it's supposed
2199 * to update all raid stripes, but here we just want to correct
2200 * bad stripe, thus BTRFS_MAP_READ is abused to only get the bad
2201 * stripe's dev and sector.
2202 */
2203 ret = btrfs_map_block(fs_info, BTRFS_MAP_READ, logical,
2204 &map_length, &bbio, 0);
2205 if (ret) {
2206 btrfs_bio_counter_dec(fs_info);
2207 bio_put(bio);
2208 return -EIO;
2209 }
2210 ASSERT(bbio->mirror_num == 1);
2211 } else {
2212 ret = btrfs_map_block(fs_info, BTRFS_MAP_WRITE, logical,
2213 &map_length, &bbio, mirror_num);
2214 if (ret) {
2215 btrfs_bio_counter_dec(fs_info);
2216 bio_put(bio);
2217 return -EIO;
2218 }
2219 BUG_ON(mirror_num != bbio->mirror_num);
Jan Schmidt4a54c8c2011-07-22 15:41:52 +02002220 }
Liu Boc7253282017-03-29 10:53:58 -07002221
2222 sector = bbio->stripes[bbio->mirror_num - 1].physical >> 9;
Kent Overstreet4f024f32013-10-11 15:44:27 -07002223 bio->bi_iter.bi_sector = sector;
Liu Boc7253282017-03-29 10:53:58 -07002224 dev = bbio->stripes[bbio->mirror_num - 1].dev;
Zhao Lei6e9606d2015-01-20 15:11:34 +08002225 btrfs_put_bbio(bbio);
Anand Jainebbede42017-12-04 12:54:52 +08002226 if (!dev || !dev->bdev ||
2227 !test_bit(BTRFS_DEV_STATE_WRITEABLE, &dev->dev_state)) {
Filipe Mananab5de8d02016-05-27 22:21:27 +01002228 btrfs_bio_counter_dec(fs_info);
Jan Schmidt4a54c8c2011-07-22 15:41:52 +02002229 bio_put(bio);
2230 return -EIO;
2231 }
Christoph Hellwig74d46992017-08-23 19:10:32 +02002232 bio_set_dev(bio, dev->bdev);
Christoph Hellwig70fd7612016-11-01 07:40:10 -06002233 bio->bi_opf = REQ_OP_WRITE | REQ_SYNC;
Miao Xieffdd2012014-09-12 18:44:00 +08002234 bio_add_page(bio, page, length, pg_offset);
Jan Schmidt4a54c8c2011-07-22 15:41:52 +02002235
Mike Christie4e49ea42016-06-05 14:31:41 -05002236 if (btrfsic_submit_bio_wait(bio)) {
Jan Schmidt4a54c8c2011-07-22 15:41:52 +02002237 /* try to remap that extent elsewhere? */
Filipe Mananab5de8d02016-05-27 22:21:27 +01002238 btrfs_bio_counter_dec(fs_info);
Jan Schmidt4a54c8c2011-07-22 15:41:52 +02002239 bio_put(bio);
Stefan Behrens442a4f62012-05-25 16:06:08 +02002240 btrfs_dev_stat_inc_and_print(dev, BTRFS_DEV_STAT_WRITE_ERRS);
Jan Schmidt4a54c8c2011-07-22 15:41:52 +02002241 return -EIO;
2242 }
2243
David Sterbab14af3b2015-10-08 10:43:10 +02002244 btrfs_info_rl_in_rcu(fs_info,
2245 "read error corrected: ino %llu off %llu (dev %s sector %llu)",
Josef Bacik6ec656b2017-05-05 11:57:14 -04002246 ino, start,
Miao Xie1203b682014-09-12 18:44:01 +08002247 rcu_str_deref(dev->name), sector);
Filipe Mananab5de8d02016-05-27 22:21:27 +01002248 btrfs_bio_counter_dec(fs_info);
Jan Schmidt4a54c8c2011-07-22 15:41:52 +02002249 bio_put(bio);
2250 return 0;
2251}
2252
David Sterba20a1fbf92019-03-20 11:23:44 +01002253int btrfs_repair_eb_io_failure(struct extent_buffer *eb, int mirror_num)
Josef Bacikea466792012-03-26 21:57:36 -04002254{
David Sterba20a1fbf92019-03-20 11:23:44 +01002255 struct btrfs_fs_info *fs_info = eb->fs_info;
Josef Bacikea466792012-03-26 21:57:36 -04002256 u64 start = eb->start;
David Sterbacc5e31a2018-03-01 18:20:27 +01002257 int i, num_pages = num_extent_pages(eb);
Chris Masond95603b2012-04-12 15:55:15 -04002258 int ret = 0;
Josef Bacikea466792012-03-26 21:57:36 -04002259
David Howellsbc98a422017-07-17 08:45:34 +01002260 if (sb_rdonly(fs_info->sb))
Ilya Dryomov908960c2013-11-03 19:06:39 +02002261 return -EROFS;
2262
Josef Bacikea466792012-03-26 21:57:36 -04002263 for (i = 0; i < num_pages; i++) {
David Sterbafb85fc92014-07-31 01:03:53 +02002264 struct page *p = eb->pages[i];
Miao Xie1203b682014-09-12 18:44:01 +08002265
Josef Bacik6ec656b2017-05-05 11:57:14 -04002266 ret = repair_io_failure(fs_info, 0, start, PAGE_SIZE, start, p,
Miao Xie1203b682014-09-12 18:44:01 +08002267 start - page_offset(p), mirror_num);
Josef Bacikea466792012-03-26 21:57:36 -04002268 if (ret)
2269 break;
Kirill A. Shutemov09cbfea2016-04-01 15:29:47 +03002270 start += PAGE_SIZE;
Josef Bacikea466792012-03-26 21:57:36 -04002271 }
2272
2273 return ret;
2274}
2275
Jan Schmidt4a54c8c2011-07-22 15:41:52 +02002276/*
2277 * each time an IO finishes, we do a fast check in the IO failure tree
2278 * to see if we need to process or clean up an io_failure_record
2279 */
Josef Bacik7870d082017-05-05 11:57:15 -04002280int clean_io_failure(struct btrfs_fs_info *fs_info,
2281 struct extent_io_tree *failure_tree,
2282 struct extent_io_tree *io_tree, u64 start,
2283 struct page *page, u64 ino, unsigned int pg_offset)
Jan Schmidt4a54c8c2011-07-22 15:41:52 +02002284{
2285 u64 private;
Jan Schmidt4a54c8c2011-07-22 15:41:52 +02002286 struct io_failure_record *failrec;
Jan Schmidt4a54c8c2011-07-22 15:41:52 +02002287 struct extent_state *state;
2288 int num_copies;
Jan Schmidt4a54c8c2011-07-22 15:41:52 +02002289 int ret;
Jan Schmidt4a54c8c2011-07-22 15:41:52 +02002290
2291 private = 0;
Josef Bacik7870d082017-05-05 11:57:15 -04002292 ret = count_range_bits(failure_tree, &private, (u64)-1, 1,
2293 EXTENT_DIRTY, 0);
Jan Schmidt4a54c8c2011-07-22 15:41:52 +02002294 if (!ret)
2295 return 0;
2296
Josef Bacik7870d082017-05-05 11:57:15 -04002297 ret = get_state_failrec(failure_tree, start, &failrec);
Jan Schmidt4a54c8c2011-07-22 15:41:52 +02002298 if (ret)
2299 return 0;
2300
Jan Schmidt4a54c8c2011-07-22 15:41:52 +02002301 BUG_ON(!failrec->this_mirror);
2302
2303 if (failrec->in_validation) {
2304 /* there was no real error, just free the record */
Jeff Mahoneyab8d0fc2016-09-20 10:05:02 -04002305 btrfs_debug(fs_info,
2306 "clean_io_failure: freeing dummy error at %llu",
2307 failrec->start);
Jan Schmidt4a54c8c2011-07-22 15:41:52 +02002308 goto out;
2309 }
David Howellsbc98a422017-07-17 08:45:34 +01002310 if (sb_rdonly(fs_info->sb))
Ilya Dryomov908960c2013-11-03 19:06:39 +02002311 goto out;
Jan Schmidt4a54c8c2011-07-22 15:41:52 +02002312
Josef Bacik7870d082017-05-05 11:57:15 -04002313 spin_lock(&io_tree->lock);
2314 state = find_first_extent_bit_state(io_tree,
Jan Schmidt4a54c8c2011-07-22 15:41:52 +02002315 failrec->start,
2316 EXTENT_LOCKED);
Josef Bacik7870d082017-05-05 11:57:15 -04002317 spin_unlock(&io_tree->lock);
Jan Schmidt4a54c8c2011-07-22 15:41:52 +02002318
Miao Xie883d0de2013-07-25 19:22:35 +08002319 if (state && state->start <= failrec->start &&
2320 state->end >= failrec->start + failrec->len - 1) {
Stefan Behrens3ec706c2012-11-05 15:46:42 +01002321 num_copies = btrfs_num_copies(fs_info, failrec->logical,
2322 failrec->len);
Jan Schmidt4a54c8c2011-07-22 15:41:52 +02002323 if (num_copies > 1) {
Josef Bacik7870d082017-05-05 11:57:15 -04002324 repair_io_failure(fs_info, ino, start, failrec->len,
2325 failrec->logical, page, pg_offset,
2326 failrec->failed_mirror);
Jan Schmidt4a54c8c2011-07-22 15:41:52 +02002327 }
2328 }
2329
2330out:
Josef Bacik7870d082017-05-05 11:57:15 -04002331 free_io_failure(failure_tree, io_tree, failrec);
Jan Schmidt4a54c8c2011-07-22 15:41:52 +02002332
Miao Xie454ff3d2014-09-12 18:43:58 +08002333 return 0;
Jan Schmidt4a54c8c2011-07-22 15:41:52 +02002334}
2335
Miao Xief6124962014-09-12 18:44:04 +08002336/*
2337 * Can be called when
2338 * - hold extent lock
2339 * - under ordered extent
2340 * - the inode is freeing
2341 */
Nikolay Borisov7ab79562017-02-20 13:50:57 +02002342void btrfs_free_io_failure_record(struct btrfs_inode *inode, u64 start, u64 end)
Miao Xief6124962014-09-12 18:44:04 +08002343{
Nikolay Borisov7ab79562017-02-20 13:50:57 +02002344 struct extent_io_tree *failure_tree = &inode->io_failure_tree;
Miao Xief6124962014-09-12 18:44:04 +08002345 struct io_failure_record *failrec;
2346 struct extent_state *state, *next;
2347
2348 if (RB_EMPTY_ROOT(&failure_tree->state))
2349 return;
2350
2351 spin_lock(&failure_tree->lock);
2352 state = find_first_extent_bit_state(failure_tree, start, EXTENT_DIRTY);
2353 while (state) {
2354 if (state->start > end)
2355 break;
2356
2357 ASSERT(state->end <= end);
2358
2359 next = next_state(state);
2360
David Sterba47dc1962016-02-11 13:24:13 +01002361 failrec = state->failrec;
Miao Xief6124962014-09-12 18:44:04 +08002362 free_extent_state(state);
2363 kfree(failrec);
2364
2365 state = next;
2366 }
2367 spin_unlock(&failure_tree->lock);
2368}
2369
Miao Xie2fe63032014-09-12 18:43:59 +08002370int btrfs_get_io_failure_record(struct inode *inode, u64 start, u64 end,
David Sterba47dc1962016-02-11 13:24:13 +01002371 struct io_failure_record **failrec_ret)
Jan Schmidt4a54c8c2011-07-22 15:41:52 +02002372{
Jeff Mahoneyab8d0fc2016-09-20 10:05:02 -04002373 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
Miao Xie2fe63032014-09-12 18:43:59 +08002374 struct io_failure_record *failrec;
Jan Schmidt4a54c8c2011-07-22 15:41:52 +02002375 struct extent_map *em;
Jan Schmidt4a54c8c2011-07-22 15:41:52 +02002376 struct extent_io_tree *failure_tree = &BTRFS_I(inode)->io_failure_tree;
2377 struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree;
2378 struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
Jan Schmidt4a54c8c2011-07-22 15:41:52 +02002379 int ret;
Jan Schmidt4a54c8c2011-07-22 15:41:52 +02002380 u64 logical;
2381
David Sterba47dc1962016-02-11 13:24:13 +01002382 ret = get_state_failrec(failure_tree, start, &failrec);
Jan Schmidt4a54c8c2011-07-22 15:41:52 +02002383 if (ret) {
2384 failrec = kzalloc(sizeof(*failrec), GFP_NOFS);
2385 if (!failrec)
2386 return -ENOMEM;
Miao Xie2fe63032014-09-12 18:43:59 +08002387
Jan Schmidt4a54c8c2011-07-22 15:41:52 +02002388 failrec->start = start;
2389 failrec->len = end - start + 1;
2390 failrec->this_mirror = 0;
2391 failrec->bio_flags = 0;
2392 failrec->in_validation = 0;
2393
2394 read_lock(&em_tree->lock);
2395 em = lookup_extent_mapping(em_tree, start, failrec->len);
2396 if (!em) {
2397 read_unlock(&em_tree->lock);
2398 kfree(failrec);
2399 return -EIO;
2400 }
2401
Filipe David Borba Manana68ba9902013-11-25 03:22:07 +00002402 if (em->start > start || em->start + em->len <= start) {
Jan Schmidt4a54c8c2011-07-22 15:41:52 +02002403 free_extent_map(em);
2404 em = NULL;
2405 }
2406 read_unlock(&em_tree->lock);
Tsutomu Itoh7a2d6a62012-10-01 03:07:15 -06002407 if (!em) {
Jan Schmidt4a54c8c2011-07-22 15:41:52 +02002408 kfree(failrec);
2409 return -EIO;
2410 }
Miao Xie2fe63032014-09-12 18:43:59 +08002411
Jan Schmidt4a54c8c2011-07-22 15:41:52 +02002412 logical = start - em->start;
2413 logical = em->block_start + logical;
2414 if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) {
2415 logical = em->block_start;
2416 failrec->bio_flags = EXTENT_BIO_COMPRESSED;
2417 extent_set_compress_type(&failrec->bio_flags,
2418 em->compress_type);
2419 }
Miao Xie2fe63032014-09-12 18:43:59 +08002420
Jeff Mahoneyab8d0fc2016-09-20 10:05:02 -04002421 btrfs_debug(fs_info,
2422 "Get IO Failure Record: (new) logical=%llu, start=%llu, len=%llu",
2423 logical, start, failrec->len);
Miao Xie2fe63032014-09-12 18:43:59 +08002424
Jan Schmidt4a54c8c2011-07-22 15:41:52 +02002425 failrec->logical = logical;
2426 free_extent_map(em);
2427
2428 /* set the bits in the private failure tree */
2429 ret = set_extent_bits(failure_tree, start, end,
David Sterbaceeb0ae2016-04-26 23:54:39 +02002430 EXTENT_LOCKED | EXTENT_DIRTY);
Jan Schmidt4a54c8c2011-07-22 15:41:52 +02002431 if (ret >= 0)
David Sterba47dc1962016-02-11 13:24:13 +01002432 ret = set_state_failrec(failure_tree, start, failrec);
Jan Schmidt4a54c8c2011-07-22 15:41:52 +02002433 /* set the bits in the inode's tree */
2434 if (ret >= 0)
David Sterbaceeb0ae2016-04-26 23:54:39 +02002435 ret = set_extent_bits(tree, start, end, EXTENT_DAMAGED);
Jan Schmidt4a54c8c2011-07-22 15:41:52 +02002436 if (ret < 0) {
2437 kfree(failrec);
2438 return ret;
2439 }
2440 } else {
Jeff Mahoneyab8d0fc2016-09-20 10:05:02 -04002441 btrfs_debug(fs_info,
2442 "Get IO Failure Record: (found) logical=%llu, start=%llu, len=%llu, validation=%d",
2443 failrec->logical, failrec->start, failrec->len,
2444 failrec->in_validation);
Jan Schmidt4a54c8c2011-07-22 15:41:52 +02002445 /*
2446 * when data can be on disk more than twice, add to failrec here
2447 * (e.g. with a list for failed_mirror) to make
2448 * clean_io_failure() clean all those errors at once.
2449 */
2450 }
Miao Xie2fe63032014-09-12 18:43:59 +08002451
2452 *failrec_ret = failrec;
2453
2454 return 0;
2455}
2456
Ming Leia0b60d72017-12-18 20:22:11 +08002457bool btrfs_check_repairable(struct inode *inode, unsigned failed_bio_pages,
Miao Xie2fe63032014-09-12 18:43:59 +08002458 struct io_failure_record *failrec, int failed_mirror)
2459{
Jeff Mahoneyab8d0fc2016-09-20 10:05:02 -04002460 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
Miao Xie2fe63032014-09-12 18:43:59 +08002461 int num_copies;
2462
Jeff Mahoneyab8d0fc2016-09-20 10:05:02 -04002463 num_copies = btrfs_num_copies(fs_info, failrec->logical, failrec->len);
Jan Schmidt4a54c8c2011-07-22 15:41:52 +02002464 if (num_copies == 1) {
2465 /*
2466 * we only have a single copy of the data, so don't bother with
2467 * all the retry and error correction code that follows. no
2468 * matter what the error is, it is very likely to persist.
2469 */
Jeff Mahoneyab8d0fc2016-09-20 10:05:02 -04002470 btrfs_debug(fs_info,
2471 "Check Repairable: cannot repair, num_copies=%d, next_mirror %d, failed_mirror %d",
2472 num_copies, failrec->this_mirror, failed_mirror);
Liu Boc3cfb652017-07-13 15:00:50 -07002473 return false;
Jan Schmidt4a54c8c2011-07-22 15:41:52 +02002474 }
2475
Jan Schmidt4a54c8c2011-07-22 15:41:52 +02002476 /*
2477 * there are two premises:
2478 * a) deliver good data to the caller
2479 * b) correct the bad sectors on disk
2480 */
Ming Leia0b60d72017-12-18 20:22:11 +08002481 if (failed_bio_pages > 1) {
Jan Schmidt4a54c8c2011-07-22 15:41:52 +02002482 /*
2483 * to fulfill b), we need to know the exact failing sectors, as
2484 * we don't want to rewrite any more than the failed ones. thus,
2485 * we need separate read requests for the failed bio
2486 *
2487 * if the following BUG_ON triggers, our validation request got
2488 * merged. we need separate requests for our algorithm to work.
2489 */
2490 BUG_ON(failrec->in_validation);
2491 failrec->in_validation = 1;
2492 failrec->this_mirror = failed_mirror;
Jan Schmidt4a54c8c2011-07-22 15:41:52 +02002493 } else {
2494 /*
2495 * we're ready to fulfill a) and b) alongside. get a good copy
2496 * of the failed sector and if we succeed, we have setup
2497 * everything for repair_io_failure to do the rest for us.
2498 */
2499 if (failrec->in_validation) {
2500 BUG_ON(failrec->this_mirror != failed_mirror);
2501 failrec->in_validation = 0;
2502 failrec->this_mirror = 0;
2503 }
2504 failrec->failed_mirror = failed_mirror;
2505 failrec->this_mirror++;
2506 if (failrec->this_mirror == failed_mirror)
2507 failrec->this_mirror++;
Jan Schmidt4a54c8c2011-07-22 15:41:52 +02002508 }
2509
Miao Xiefacc8a222013-07-25 19:22:34 +08002510 if (failrec->this_mirror > num_copies) {
Jeff Mahoneyab8d0fc2016-09-20 10:05:02 -04002511 btrfs_debug(fs_info,
2512 "Check Repairable: (fail) num_copies=%d, next_mirror %d, failed_mirror %d",
2513 num_copies, failrec->this_mirror, failed_mirror);
Liu Boc3cfb652017-07-13 15:00:50 -07002514 return false;
Jan Schmidt4a54c8c2011-07-22 15:41:52 +02002515 }
2516
Liu Boc3cfb652017-07-13 15:00:50 -07002517 return true;
Miao Xie2fe63032014-09-12 18:43:59 +08002518}
2519
2520
2521struct bio *btrfs_create_repair_bio(struct inode *inode, struct bio *failed_bio,
2522 struct io_failure_record *failrec,
2523 struct page *page, int pg_offset, int icsum,
Miao Xie8b110e32014-09-12 18:44:03 +08002524 bio_end_io_t *endio_func, void *data)
Miao Xie2fe63032014-09-12 18:43:59 +08002525{
Jeff Mahoney0b246af2016-06-22 18:54:23 -04002526 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
Miao Xie2fe63032014-09-12 18:43:59 +08002527 struct bio *bio;
2528 struct btrfs_io_bio *btrfs_failed_bio;
2529 struct btrfs_io_bio *btrfs_bio;
2530
David Sterbac5e4c3d2017-06-12 17:29:41 +02002531 bio = btrfs_io_bio_alloc(1);
Miao Xie2fe63032014-09-12 18:43:59 +08002532 bio->bi_end_io = endio_func;
Kent Overstreet4f024f32013-10-11 15:44:27 -07002533 bio->bi_iter.bi_sector = failrec->logical >> 9;
Christoph Hellwig74d46992017-08-23 19:10:32 +02002534 bio_set_dev(bio, fs_info->fs_devices->latest_bdev);
Kent Overstreet4f024f32013-10-11 15:44:27 -07002535 bio->bi_iter.bi_size = 0;
Miao Xie8b110e32014-09-12 18:44:03 +08002536 bio->bi_private = data;
Jan Schmidt4a54c8c2011-07-22 15:41:52 +02002537
Miao Xiefacc8a222013-07-25 19:22:34 +08002538 btrfs_failed_bio = btrfs_io_bio(failed_bio);
2539 if (btrfs_failed_bio->csum) {
Miao Xiefacc8a222013-07-25 19:22:34 +08002540 u16 csum_size = btrfs_super_csum_size(fs_info->super_copy);
2541
2542 btrfs_bio = btrfs_io_bio(bio);
2543 btrfs_bio->csum = btrfs_bio->csum_inline;
Miao Xie2fe63032014-09-12 18:43:59 +08002544 icsum *= csum_size;
2545 memcpy(btrfs_bio->csum, btrfs_failed_bio->csum + icsum,
Miao Xiefacc8a222013-07-25 19:22:34 +08002546 csum_size);
2547 }
2548
Miao Xie2fe63032014-09-12 18:43:59 +08002549 bio_add_page(bio, page, failrec->len, pg_offset);
Jan Schmidt4a54c8c2011-07-22 15:41:52 +02002550
Miao Xie2fe63032014-09-12 18:43:59 +08002551 return bio;
2552}
2553
2554/*
Nikolay Borisov78e62c02018-11-22 10:17:49 +02002555 * This is a generic handler for readpage errors. If other copies exist, read
2556 * those and write back good data to the failed position. Does not investigate
2557 * in remapping the failed extent elsewhere, hoping the device will be smart
2558 * enough to do this as needed
Miao Xie2fe63032014-09-12 18:43:59 +08002559 */
Miao Xie2fe63032014-09-12 18:43:59 +08002560static int bio_readpage_error(struct bio *failed_bio, u64 phy_offset,
2561 struct page *page, u64 start, u64 end,
2562 int failed_mirror)
2563{
2564 struct io_failure_record *failrec;
2565 struct inode *inode = page->mapping->host;
2566 struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree;
Josef Bacik7870d082017-05-05 11:57:15 -04002567 struct extent_io_tree *failure_tree = &BTRFS_I(inode)->io_failure_tree;
Miao Xie2fe63032014-09-12 18:43:59 +08002568 struct bio *bio;
Christoph Hellwig70fd7612016-11-01 07:40:10 -06002569 int read_mode = 0;
Christoph Hellwig4e4cbee2017-06-03 09:38:06 +02002570 blk_status_t status;
Miao Xie2fe63032014-09-12 18:43:59 +08002571 int ret;
Christoph Hellwig8a2ee442019-02-15 19:13:07 +08002572 unsigned failed_bio_pages = failed_bio->bi_iter.bi_size >> PAGE_SHIFT;
Miao Xie2fe63032014-09-12 18:43:59 +08002573
Mike Christie1f7ad752016-06-05 14:31:51 -05002574 BUG_ON(bio_op(failed_bio) == REQ_OP_WRITE);
Miao Xie2fe63032014-09-12 18:43:59 +08002575
2576 ret = btrfs_get_io_failure_record(inode, start, end, &failrec);
2577 if (ret)
2578 return ret;
2579
Ming Leia0b60d72017-12-18 20:22:11 +08002580 if (!btrfs_check_repairable(inode, failed_bio_pages, failrec,
Liu Boc3cfb652017-07-13 15:00:50 -07002581 failed_mirror)) {
Josef Bacik7870d082017-05-05 11:57:15 -04002582 free_io_failure(failure_tree, tree, failrec);
Miao Xie2fe63032014-09-12 18:43:59 +08002583 return -EIO;
2584 }
2585
Ming Leia0b60d72017-12-18 20:22:11 +08002586 if (failed_bio_pages > 1)
Christoph Hellwig70fd7612016-11-01 07:40:10 -06002587 read_mode |= REQ_FAILFAST_DEV;
Miao Xie2fe63032014-09-12 18:43:59 +08002588
2589 phy_offset >>= inode->i_sb->s_blocksize_bits;
2590 bio = btrfs_create_repair_bio(inode, failed_bio, failrec, page,
2591 start - page_offset(page),
Miao Xie8b110e32014-09-12 18:44:03 +08002592 (int)phy_offset, failed_bio->bi_end_io,
2593 NULL);
David Sterbaebcc3262018-06-29 10:56:53 +02002594 bio->bi_opf = REQ_OP_READ | read_mode;
Miao Xie2fe63032014-09-12 18:43:59 +08002595
Jeff Mahoneyab8d0fc2016-09-20 10:05:02 -04002596 btrfs_debug(btrfs_sb(inode->i_sb),
2597 "Repair Read Error: submitting new read[%#x] to this_mirror=%d, in_validation=%d",
2598 read_mode, failrec->this_mirror, failrec->in_validation);
Jan Schmidt4a54c8c2011-07-22 15:41:52 +02002599
Linus Torvalds8c27cb32017-07-05 16:41:23 -07002600 status = tree->ops->submit_bio_hook(tree->private_data, bio, failrec->this_mirror,
Nikolay Borisov50489a52019-04-10 19:46:04 +03002601 failrec->bio_flags);
Christoph Hellwig4e4cbee2017-06-03 09:38:06 +02002602 if (status) {
Josef Bacik7870d082017-05-05 11:57:15 -04002603 free_io_failure(failure_tree, tree, failrec);
Miao Xie6c387ab2014-09-12 18:43:57 +08002604 bio_put(bio);
Christoph Hellwig4e4cbee2017-06-03 09:38:06 +02002605 ret = blk_status_to_errno(status);
Miao Xie6c387ab2014-09-12 18:43:57 +08002606 }
2607
Tsutomu Itoh013bd4c2012-02-16 10:11:40 +09002608 return ret;
Jan Schmidt4a54c8c2011-07-22 15:41:52 +02002609}
2610
Chris Masond1310b22008-01-24 16:13:08 -05002611/* lots and lots of room for performance fixes in the end_bio funcs */
2612
David Sterbab5227c02015-12-03 13:08:59 +01002613void end_extent_writepage(struct page *page, int err, u64 start, u64 end)
Jeff Mahoney87826df2012-02-15 16:23:57 +01002614{
2615 int uptodate = (err == 0);
Eric Sandeen3e2426b2014-06-12 00:39:58 -05002616 int ret = 0;
Jeff Mahoney87826df2012-02-15 16:23:57 +01002617
Nikolay Borisovc6297322018-11-08 10:18:08 +02002618 btrfs_writepage_endio_finish_ordered(page, start, end, uptodate);
Jeff Mahoney87826df2012-02-15 16:23:57 +01002619
Jeff Mahoney87826df2012-02-15 16:23:57 +01002620 if (!uptodate) {
Jeff Mahoney87826df2012-02-15 16:23:57 +01002621 ClearPageUptodate(page);
2622 SetPageError(page);
Colin Ian Kingbff5baf2017-05-09 18:14:01 +01002623 ret = err < 0 ? err : -EIO;
Liu Bo5dca6ee2014-05-12 12:47:36 +08002624 mapping_set_error(page->mapping, ret);
Jeff Mahoney87826df2012-02-15 16:23:57 +01002625 }
Jeff Mahoney87826df2012-02-15 16:23:57 +01002626}
2627
Chris Masond1310b22008-01-24 16:13:08 -05002628/*
2629 * after a writepage IO is done, we need to:
2630 * clear the uptodate bits on error
2631 * clear the writeback bits in the extent tree for this IO
2632 * end_page_writeback if the page has no more pending IO
2633 *
2634 * Scheduling is not allowed, so the extent state tree is expected
2635 * to have one and only one object corresponding to this IO.
2636 */
Christoph Hellwig4246a0b2015-07-20 15:29:37 +02002637static void end_bio_extent_writepage(struct bio *bio)
Chris Masond1310b22008-01-24 16:13:08 -05002638{
Christoph Hellwig4e4cbee2017-06-03 09:38:06 +02002639 int error = blk_status_to_errno(bio->bi_status);
Kent Overstreet2c30c712013-11-07 12:20:26 -08002640 struct bio_vec *bvec;
Chris Masond1310b22008-01-24 16:13:08 -05002641 u64 start;
2642 u64 end;
Ming Lei6dc4f102019-02-15 19:13:19 +08002643 struct bvec_iter_all iter_all;
Chris Masond1310b22008-01-24 16:13:08 -05002644
David Sterbac09abff2017-07-13 18:10:07 +02002645 ASSERT(!bio_flagged(bio, BIO_CLONED));
Christoph Hellwig2b070cf2019-04-25 09:03:00 +02002646 bio_for_each_segment_all(bvec, bio, iter_all) {
Chris Masond1310b22008-01-24 16:13:08 -05002647 struct page *page = bvec->bv_page;
Jeff Mahoney0b246af2016-06-22 18:54:23 -04002648 struct inode *inode = page->mapping->host;
2649 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
David Woodhouse902b22f2008-08-20 08:51:49 -04002650
Alexandre Oliva17a5adc2013-05-15 11:38:55 -04002651 /* We always issue full-page reads, but if some block
2652 * in a page fails to read, blk_update_request() will
2653 * advance bv_offset and adjust bv_len to compensate.
2654 * Print a warning for nonzero offsets, and an error
2655 * if they don't add up to a full page. */
Kirill A. Shutemov09cbfea2016-04-01 15:29:47 +03002656 if (bvec->bv_offset || bvec->bv_len != PAGE_SIZE) {
2657 if (bvec->bv_offset + bvec->bv_len != PAGE_SIZE)
Jeff Mahoney0b246af2016-06-22 18:54:23 -04002658 btrfs_err(fs_info,
Frank Holtonefe120a2013-12-20 11:37:06 -05002659 "partial page write in btrfs with offset %u and length %u",
2660 bvec->bv_offset, bvec->bv_len);
2661 else
Jeff Mahoney0b246af2016-06-22 18:54:23 -04002662 btrfs_info(fs_info,
Jeff Mahoney5d163e02016-09-20 10:05:00 -04002663 "incomplete page write in btrfs with offset %u and length %u",
Frank Holtonefe120a2013-12-20 11:37:06 -05002664 bvec->bv_offset, bvec->bv_len);
2665 }
Chris Masond1310b22008-01-24 16:13:08 -05002666
Alexandre Oliva17a5adc2013-05-15 11:38:55 -04002667 start = page_offset(page);
2668 end = start + bvec->bv_offset + bvec->bv_len - 1;
Chris Masond1310b22008-01-24 16:13:08 -05002669
Christoph Hellwig4e4cbee2017-06-03 09:38:06 +02002670 end_extent_writepage(page, error, start, end);
Alexandre Oliva17a5adc2013-05-15 11:38:55 -04002671 end_page_writeback(page);
Kent Overstreet2c30c712013-11-07 12:20:26 -08002672 }
Chris Mason2b1f55b2008-09-24 11:48:04 -04002673
Chris Masond1310b22008-01-24 16:13:08 -05002674 bio_put(bio);
Chris Masond1310b22008-01-24 16:13:08 -05002675}
2676
Miao Xie883d0de2013-07-25 19:22:35 +08002677static void
2678endio_readpage_release_extent(struct extent_io_tree *tree, u64 start, u64 len,
2679 int uptodate)
2680{
2681 struct extent_state *cached = NULL;
2682 u64 end = start + len - 1;
2683
2684 if (uptodate && tree->track_uptodate)
2685 set_extent_uptodate(tree, start, end, &cached, GFP_ATOMIC);
David Sterbad810a4b2017-12-07 18:52:54 +01002686 unlock_extent_cached_atomic(tree, start, end, &cached);
Miao Xie883d0de2013-07-25 19:22:35 +08002687}
2688
Chris Masond1310b22008-01-24 16:13:08 -05002689/*
2690 * after a readpage IO is done, we need to:
2691 * clear the uptodate bits on error
2692 * set the uptodate bits if things worked
2693 * set the page up to date if all extents in the tree are uptodate
2694 * clear the lock bit in the extent tree
2695 * unlock the page if there are no other extents locked for it
2696 *
2697 * Scheduling is not allowed, so the extent state tree is expected
2698 * to have one and only one object corresponding to this IO.
2699 */
Christoph Hellwig4246a0b2015-07-20 15:29:37 +02002700static void end_bio_extent_readpage(struct bio *bio)
Chris Masond1310b22008-01-24 16:13:08 -05002701{
Kent Overstreet2c30c712013-11-07 12:20:26 -08002702 struct bio_vec *bvec;
Christoph Hellwig4e4cbee2017-06-03 09:38:06 +02002703 int uptodate = !bio->bi_status;
Miao Xiefacc8a222013-07-25 19:22:34 +08002704 struct btrfs_io_bio *io_bio = btrfs_io_bio(bio);
Josef Bacik7870d082017-05-05 11:57:15 -04002705 struct extent_io_tree *tree, *failure_tree;
Miao Xiefacc8a222013-07-25 19:22:34 +08002706 u64 offset = 0;
Chris Masond1310b22008-01-24 16:13:08 -05002707 u64 start;
2708 u64 end;
Miao Xiefacc8a222013-07-25 19:22:34 +08002709 u64 len;
Miao Xie883d0de2013-07-25 19:22:35 +08002710 u64 extent_start = 0;
2711 u64 extent_len = 0;
Josef Bacik5cf1ab52012-04-16 09:42:26 -04002712 int mirror;
Chris Masond1310b22008-01-24 16:13:08 -05002713 int ret;
Ming Lei6dc4f102019-02-15 19:13:19 +08002714 struct bvec_iter_all iter_all;
Chris Masond1310b22008-01-24 16:13:08 -05002715
David Sterbac09abff2017-07-13 18:10:07 +02002716 ASSERT(!bio_flagged(bio, BIO_CLONED));
Christoph Hellwig2b070cf2019-04-25 09:03:00 +02002717 bio_for_each_segment_all(bvec, bio, iter_all) {
Chris Masond1310b22008-01-24 16:13:08 -05002718 struct page *page = bvec->bv_page;
Josef Bacika71754f2013-06-17 17:14:39 -04002719 struct inode *inode = page->mapping->host;
Jeff Mahoneyab8d0fc2016-09-20 10:05:02 -04002720 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
Nikolay Borisov78e62c02018-11-22 10:17:49 +02002721 bool data_inode = btrfs_ino(BTRFS_I(inode))
2722 != BTRFS_BTREE_INODE_OBJECTID;
Arne Jansen507903b2011-04-06 10:02:20 +00002723
Jeff Mahoneyab8d0fc2016-09-20 10:05:02 -04002724 btrfs_debug(fs_info,
2725 "end_bio_extent_readpage: bi_sector=%llu, err=%d, mirror=%u",
Christoph Hellwig4e4cbee2017-06-03 09:38:06 +02002726 (u64)bio->bi_iter.bi_sector, bio->bi_status,
Jeff Mahoneyab8d0fc2016-09-20 10:05:02 -04002727 io_bio->mirror_num);
Josef Bacika71754f2013-06-17 17:14:39 -04002728 tree = &BTRFS_I(inode)->io_tree;
Josef Bacik7870d082017-05-05 11:57:15 -04002729 failure_tree = &BTRFS_I(inode)->io_failure_tree;
David Woodhouse902b22f2008-08-20 08:51:49 -04002730
Alexandre Oliva17a5adc2013-05-15 11:38:55 -04002731 /* We always issue full-page reads, but if some block
2732 * in a page fails to read, blk_update_request() will
2733 * advance bv_offset and adjust bv_len to compensate.
2734 * Print a warning for nonzero offsets, and an error
2735 * if they don't add up to a full page. */
Kirill A. Shutemov09cbfea2016-04-01 15:29:47 +03002736 if (bvec->bv_offset || bvec->bv_len != PAGE_SIZE) {
2737 if (bvec->bv_offset + bvec->bv_len != PAGE_SIZE)
Jeff Mahoneyab8d0fc2016-09-20 10:05:02 -04002738 btrfs_err(fs_info,
2739 "partial page read in btrfs with offset %u and length %u",
Frank Holtonefe120a2013-12-20 11:37:06 -05002740 bvec->bv_offset, bvec->bv_len);
2741 else
Jeff Mahoneyab8d0fc2016-09-20 10:05:02 -04002742 btrfs_info(fs_info,
2743 "incomplete page read in btrfs with offset %u and length %u",
Frank Holtonefe120a2013-12-20 11:37:06 -05002744 bvec->bv_offset, bvec->bv_len);
2745 }
Chris Masond1310b22008-01-24 16:13:08 -05002746
Alexandre Oliva17a5adc2013-05-15 11:38:55 -04002747 start = page_offset(page);
2748 end = start + bvec->bv_offset + bvec->bv_len - 1;
Miao Xiefacc8a222013-07-25 19:22:34 +08002749 len = bvec->bv_len;
Chris Masond1310b22008-01-24 16:13:08 -05002750
Chris Mason9be33952013-05-17 18:30:14 -04002751 mirror = io_bio->mirror_num;
Nikolay Borisov78e62c02018-11-22 10:17:49 +02002752 if (likely(uptodate)) {
Miao Xiefacc8a222013-07-25 19:22:34 +08002753 ret = tree->ops->readpage_end_io_hook(io_bio, offset,
2754 page, start, end,
2755 mirror);
Stefan Behrens5ee08442012-08-27 08:30:03 -06002756 if (ret)
Chris Masond1310b22008-01-24 16:13:08 -05002757 uptodate = 0;
Stefan Behrens5ee08442012-08-27 08:30:03 -06002758 else
Josef Bacik7870d082017-05-05 11:57:15 -04002759 clean_io_failure(BTRFS_I(inode)->root->fs_info,
2760 failure_tree, tree, start,
2761 page,
2762 btrfs_ino(BTRFS_I(inode)), 0);
Chris Masond1310b22008-01-24 16:13:08 -05002763 }
Josef Bacikea466792012-03-26 21:57:36 -04002764
Miao Xief2a09da2013-07-25 19:22:33 +08002765 if (likely(uptodate))
2766 goto readpage_ok;
2767
Nikolay Borisov78e62c02018-11-22 10:17:49 +02002768 if (data_inode) {
Liu Bo9d0d1c82017-03-24 15:04:50 -07002769
2770 /*
Nikolay Borisov78e62c02018-11-22 10:17:49 +02002771 * The generic bio_readpage_error handles errors the
2772 * following way: If possible, new read requests are
2773 * created and submitted and will end up in
2774 * end_bio_extent_readpage as well (if we're lucky,
2775 * not in the !uptodate case). In that case it returns
2776 * 0 and we just go on with the next page in our bio.
2777 * If it can't handle the error it will return -EIO and
2778 * we remain responsible for that page.
Liu Bo9d0d1c82017-03-24 15:04:50 -07002779 */
Nikolay Borisov78e62c02018-11-22 10:17:49 +02002780 ret = bio_readpage_error(bio, offset, page, start, end,
2781 mirror);
2782 if (ret == 0) {
2783 uptodate = !bio->bi_status;
2784 offset += len;
2785 continue;
2786 }
2787 } else {
2788 struct extent_buffer *eb;
2789
2790 eb = (struct extent_buffer *)page->private;
2791 set_bit(EXTENT_BUFFER_READ_ERR, &eb->bflags);
2792 eb->read_mirror = mirror;
2793 atomic_dec(&eb->io_pages);
2794 if (test_and_clear_bit(EXTENT_BUFFER_READAHEAD,
2795 &eb->bflags))
2796 btree_readahead_hook(eb, -EIO);
Chris Mason7e383262008-04-09 16:28:12 -04002797 }
Miao Xief2a09da2013-07-25 19:22:33 +08002798readpage_ok:
Miao Xie883d0de2013-07-25 19:22:35 +08002799 if (likely(uptodate)) {
Josef Bacika71754f2013-06-17 17:14:39 -04002800 loff_t i_size = i_size_read(inode);
Kirill A. Shutemov09cbfea2016-04-01 15:29:47 +03002801 pgoff_t end_index = i_size >> PAGE_SHIFT;
Liu Boa583c022014-08-19 23:32:22 +08002802 unsigned off;
Josef Bacika71754f2013-06-17 17:14:39 -04002803
2804 /* Zero out the end if this page straddles i_size */
Johannes Thumshirn70730172018-12-05 15:23:03 +01002805 off = offset_in_page(i_size);
Liu Boa583c022014-08-19 23:32:22 +08002806 if (page->index == end_index && off)
Kirill A. Shutemov09cbfea2016-04-01 15:29:47 +03002807 zero_user_segment(page, off, PAGE_SIZE);
Alexandre Oliva17a5adc2013-05-15 11:38:55 -04002808 SetPageUptodate(page);
Chris Mason70dec802008-01-29 09:59:12 -05002809 } else {
Alexandre Oliva17a5adc2013-05-15 11:38:55 -04002810 ClearPageUptodate(page);
2811 SetPageError(page);
Chris Mason70dec802008-01-29 09:59:12 -05002812 }
Alexandre Oliva17a5adc2013-05-15 11:38:55 -04002813 unlock_page(page);
Miao Xiefacc8a222013-07-25 19:22:34 +08002814 offset += len;
Miao Xie883d0de2013-07-25 19:22:35 +08002815
2816 if (unlikely(!uptodate)) {
2817 if (extent_len) {
2818 endio_readpage_release_extent(tree,
2819 extent_start,
2820 extent_len, 1);
2821 extent_start = 0;
2822 extent_len = 0;
2823 }
2824 endio_readpage_release_extent(tree, start,
2825 end - start + 1, 0);
2826 } else if (!extent_len) {
2827 extent_start = start;
2828 extent_len = end + 1 - start;
2829 } else if (extent_start + extent_len == start) {
2830 extent_len += end + 1 - start;
2831 } else {
2832 endio_readpage_release_extent(tree, extent_start,
2833 extent_len, uptodate);
2834 extent_start = start;
2835 extent_len = end + 1 - start;
2836 }
Kent Overstreet2c30c712013-11-07 12:20:26 -08002837 }
Chris Masond1310b22008-01-24 16:13:08 -05002838
Miao Xie883d0de2013-07-25 19:22:35 +08002839 if (extent_len)
2840 endio_readpage_release_extent(tree, extent_start, extent_len,
2841 uptodate);
David Sterbab3a0dd52018-11-22 17:16:49 +01002842 btrfs_io_bio_free_csum(io_bio);
Chris Masond1310b22008-01-24 16:13:08 -05002843 bio_put(bio);
Chris Masond1310b22008-01-24 16:13:08 -05002844}
2845
Chris Mason9be33952013-05-17 18:30:14 -04002846/*
David Sterba184f9992017-06-12 17:29:39 +02002847 * Initialize the members up to but not including 'bio'. Use after allocating a
2848 * new bio by bio_alloc_bioset as it does not initialize the bytes outside of
2849 * 'bio' because use of __GFP_ZERO is not supported.
Chris Mason9be33952013-05-17 18:30:14 -04002850 */
David Sterba184f9992017-06-12 17:29:39 +02002851static inline void btrfs_io_bio_init(struct btrfs_io_bio *btrfs_bio)
Chris Masond1310b22008-01-24 16:13:08 -05002852{
David Sterba184f9992017-06-12 17:29:39 +02002853 memset(btrfs_bio, 0, offsetof(struct btrfs_io_bio, bio));
2854}
2855
2856/*
David Sterba6e707bc2017-06-02 17:26:26 +02002857 * The following helpers allocate a bio. As it's backed by a bioset, it'll
2858 * never fail. We're returning a bio right now but you can call btrfs_io_bio
2859 * for the appropriate container_of magic
Chris Masond1310b22008-01-24 16:13:08 -05002860 */
David Sterbac821e7f32017-06-02 18:35:36 +02002861struct bio *btrfs_bio_alloc(struct block_device *bdev, u64 first_byte)
Chris Masond1310b22008-01-24 16:13:08 -05002862{
2863 struct bio *bio;
2864
Kent Overstreet8ac9f7c2018-05-20 18:25:56 -04002865 bio = bio_alloc_bioset(GFP_NOFS, BIO_MAX_PAGES, &btrfs_bioset);
Christoph Hellwig74d46992017-08-23 19:10:32 +02002866 bio_set_dev(bio, bdev);
David Sterbac821e7f32017-06-02 18:35:36 +02002867 bio->bi_iter.bi_sector = first_byte >> 9;
David Sterba184f9992017-06-12 17:29:39 +02002868 btrfs_io_bio_init(btrfs_io_bio(bio));
Chris Masond1310b22008-01-24 16:13:08 -05002869 return bio;
2870}
2871
David Sterba8b6c1d52017-06-02 17:48:13 +02002872struct bio *btrfs_bio_clone(struct bio *bio)
Chris Mason9be33952013-05-17 18:30:14 -04002873{
Miao Xie23ea8e52014-09-12 18:43:54 +08002874 struct btrfs_io_bio *btrfs_bio;
2875 struct bio *new;
Chris Mason9be33952013-05-17 18:30:14 -04002876
David Sterba6e707bc2017-06-02 17:26:26 +02002877 /* Bio allocation backed by a bioset does not fail */
Kent Overstreet8ac9f7c2018-05-20 18:25:56 -04002878 new = bio_clone_fast(bio, GFP_NOFS, &btrfs_bioset);
David Sterba6e707bc2017-06-02 17:26:26 +02002879 btrfs_bio = btrfs_io_bio(new);
David Sterba184f9992017-06-12 17:29:39 +02002880 btrfs_io_bio_init(btrfs_bio);
David Sterba6e707bc2017-06-02 17:26:26 +02002881 btrfs_bio->iter = bio->bi_iter;
Miao Xie23ea8e52014-09-12 18:43:54 +08002882 return new;
2883}
Chris Mason9be33952013-05-17 18:30:14 -04002884
David Sterbac5e4c3d2017-06-12 17:29:41 +02002885struct bio *btrfs_io_bio_alloc(unsigned int nr_iovecs)
Chris Mason9be33952013-05-17 18:30:14 -04002886{
Miao Xiefacc8a222013-07-25 19:22:34 +08002887 struct bio *bio;
2888
David Sterba6e707bc2017-06-02 17:26:26 +02002889 /* Bio allocation backed by a bioset does not fail */
Kent Overstreet8ac9f7c2018-05-20 18:25:56 -04002890 bio = bio_alloc_bioset(GFP_NOFS, nr_iovecs, &btrfs_bioset);
David Sterba184f9992017-06-12 17:29:39 +02002891 btrfs_io_bio_init(btrfs_io_bio(bio));
Miao Xiefacc8a222013-07-25 19:22:34 +08002892 return bio;
Chris Mason9be33952013-05-17 18:30:14 -04002893}
2894
Liu Boe4770942017-05-16 10:57:14 -07002895struct bio *btrfs_bio_clone_partial(struct bio *orig, int offset, int size)
Liu Bo2f8e9142017-05-15 17:43:31 -07002896{
2897 struct bio *bio;
2898 struct btrfs_io_bio *btrfs_bio;
2899
2900 /* this will never fail when it's backed by a bioset */
Kent Overstreet8ac9f7c2018-05-20 18:25:56 -04002901 bio = bio_clone_fast(orig, GFP_NOFS, &btrfs_bioset);
Liu Bo2f8e9142017-05-15 17:43:31 -07002902 ASSERT(bio);
2903
2904 btrfs_bio = btrfs_io_bio(bio);
David Sterba184f9992017-06-12 17:29:39 +02002905 btrfs_io_bio_init(btrfs_bio);
Liu Bo2f8e9142017-05-15 17:43:31 -07002906
2907 bio_trim(bio, offset >> 9, size >> 9);
Liu Bo17347ce2017-05-15 15:33:27 -07002908 btrfs_bio->iter = bio->bi_iter;
Liu Bo2f8e9142017-05-15 17:43:31 -07002909 return bio;
2910}
Chris Mason9be33952013-05-17 18:30:14 -04002911
David Sterba4b81ba42017-06-06 19:14:26 +02002912/*
2913 * @opf: bio REQ_OP_* and REQ_* flags as one value
David Sterbab8b3d622017-06-12 19:50:41 +02002914 * @tree: tree so we can call our merge_bio hook
2915 * @wbc: optional writeback control for io accounting
2916 * @page: page to add to the bio
2917 * @pg_offset: offset of the new bio or to check whether we are adding
2918 * a contiguous page to the previous one
2919 * @size: portion of page that we want to write
2920 * @offset: starting offset in the page
2921 * @bdev: attach newly created bios to this bdev
David Sterba5c2b1fd2017-06-06 19:22:55 +02002922 * @bio_ret: must be valid pointer, newly allocated bio will be stored there
David Sterbab8b3d622017-06-12 19:50:41 +02002923 * @end_io_func: end_io callback for new bio
2924 * @mirror_num: desired mirror to read/write
2925 * @prev_bio_flags: flags of previous bio to see if we can merge the current one
2926 * @bio_flags: flags of the current bio to see if we can merge them
David Sterba4b81ba42017-06-06 19:14:26 +02002927 */
2928static int submit_extent_page(unsigned int opf, struct extent_io_tree *tree,
Chris Masonda2f0f72015-07-02 13:57:22 -07002929 struct writeback_control *wbc,
David Sterba6273b7f2017-10-04 17:30:11 +02002930 struct page *page, u64 offset,
David Sterba6c5a4e22017-10-04 17:10:34 +02002931 size_t size, unsigned long pg_offset,
Chris Masond1310b22008-01-24 16:13:08 -05002932 struct block_device *bdev,
2933 struct bio **bio_ret,
Chris Masonf1885912008-04-09 16:28:12 -04002934 bio_end_io_t end_io_func,
Chris Masonc8b97812008-10-29 14:49:59 -04002935 int mirror_num,
2936 unsigned long prev_bio_flags,
Filipe Manana005efed2015-09-14 09:09:31 +01002937 unsigned long bio_flags,
2938 bool force_bio_submit)
Chris Masond1310b22008-01-24 16:13:08 -05002939{
2940 int ret = 0;
2941 struct bio *bio;
Kirill A. Shutemov09cbfea2016-04-01 15:29:47 +03002942 size_t page_size = min_t(size_t, size, PAGE_SIZE);
David Sterba6273b7f2017-10-04 17:30:11 +02002943 sector_t sector = offset >> 9;
Chris Masond1310b22008-01-24 16:13:08 -05002944
David Sterba5c2b1fd2017-06-06 19:22:55 +02002945 ASSERT(bio_ret);
2946
2947 if (*bio_ret) {
David Sterba0c8508a2017-06-12 20:00:43 +02002948 bool contig;
2949 bool can_merge = true;
2950
Chris Masond1310b22008-01-24 16:13:08 -05002951 bio = *bio_ret;
David Sterba0c8508a2017-06-12 20:00:43 +02002952 if (prev_bio_flags & EXTENT_BIO_COMPRESSED)
Kent Overstreet4f024f32013-10-11 15:44:27 -07002953 contig = bio->bi_iter.bi_sector == sector;
Chris Masonc8b97812008-10-29 14:49:59 -04002954 else
Kent Overstreetf73a1c72012-09-25 15:05:12 -07002955 contig = bio_end_sector(bio) == sector;
Chris Masonc8b97812008-10-29 14:49:59 -04002956
Nikolay Borisovda12fe52018-11-27 20:57:58 +02002957 ASSERT(tree->ops);
2958 if (btrfs_bio_fits_in_stripe(page, page_size, bio, bio_flags))
David Sterba0c8508a2017-06-12 20:00:43 +02002959 can_merge = false;
2960
2961 if (prev_bio_flags != bio_flags || !contig || !can_merge ||
Filipe Manana005efed2015-09-14 09:09:31 +01002962 force_bio_submit ||
David Sterba6c5a4e22017-10-04 17:10:34 +02002963 bio_add_page(bio, page, page_size, pg_offset) < page_size) {
Mike Christie1f7ad752016-06-05 14:31:51 -05002964 ret = submit_one_bio(bio, mirror_num, prev_bio_flags);
Naohiro Aota289454a2015-01-06 01:01:03 +09002965 if (ret < 0) {
2966 *bio_ret = NULL;
Jeff Mahoney79787ea2012-03-12 16:03:00 +01002967 return ret;
Naohiro Aota289454a2015-01-06 01:01:03 +09002968 }
Chris Masond1310b22008-01-24 16:13:08 -05002969 bio = NULL;
2970 } else {
Chris Masonda2f0f72015-07-02 13:57:22 -07002971 if (wbc)
2972 wbc_account_io(wbc, page, page_size);
Chris Masond1310b22008-01-24 16:13:08 -05002973 return 0;
2974 }
2975 }
Chris Masonc8b97812008-10-29 14:49:59 -04002976
David Sterba6273b7f2017-10-04 17:30:11 +02002977 bio = btrfs_bio_alloc(bdev, offset);
David Sterba6c5a4e22017-10-04 17:10:34 +02002978 bio_add_page(bio, page, page_size, pg_offset);
Chris Masond1310b22008-01-24 16:13:08 -05002979 bio->bi_end_io = end_io_func;
2980 bio->bi_private = tree;
Jens Axboee6959b92017-06-27 11:51:28 -06002981 bio->bi_write_hint = page->mapping->host->i_write_hint;
David Sterba4b81ba42017-06-06 19:14:26 +02002982 bio->bi_opf = opf;
Chris Masonda2f0f72015-07-02 13:57:22 -07002983 if (wbc) {
2984 wbc_init_bio(wbc, bio);
2985 wbc_account_io(wbc, page, page_size);
2986 }
Chris Mason70dec802008-01-29 09:59:12 -05002987
David Sterba5c2b1fd2017-06-06 19:22:55 +02002988 *bio_ret = bio;
Chris Masond1310b22008-01-24 16:13:08 -05002989
2990 return ret;
2991}
2992
Eric Sandeen48a3b632013-04-25 20:41:01 +00002993static void attach_extent_buffer_page(struct extent_buffer *eb,
2994 struct page *page)
Josef Bacik4f2de97a2012-03-07 16:20:05 -05002995{
2996 if (!PagePrivate(page)) {
2997 SetPagePrivate(page);
Kirill A. Shutemov09cbfea2016-04-01 15:29:47 +03002998 get_page(page);
Josef Bacik4f2de97a2012-03-07 16:20:05 -05002999 set_page_private(page, (unsigned long)eb);
3000 } else {
3001 WARN_ON(page->private != (unsigned long)eb);
3002 }
3003}
3004
Chris Masond1310b22008-01-24 16:13:08 -05003005void set_page_extent_mapped(struct page *page)
3006{
3007 if (!PagePrivate(page)) {
3008 SetPagePrivate(page);
Kirill A. Shutemov09cbfea2016-04-01 15:29:47 +03003009 get_page(page);
Chris Mason6af118ce2008-07-22 11:18:07 -04003010 set_page_private(page, EXTENT_PAGE_PRIVATE);
Chris Masond1310b22008-01-24 16:13:08 -05003011 }
3012}
3013
Miao Xie125bac012013-07-25 19:22:37 +08003014static struct extent_map *
3015__get_extent_map(struct inode *inode, struct page *page, size_t pg_offset,
3016 u64 start, u64 len, get_extent_t *get_extent,
3017 struct extent_map **em_cached)
3018{
3019 struct extent_map *em;
3020
3021 if (em_cached && *em_cached) {
3022 em = *em_cached;
Filipe Mananacbc0e922014-02-25 14:15:12 +00003023 if (extent_map_in_tree(em) && start >= em->start &&
Miao Xie125bac012013-07-25 19:22:37 +08003024 start < extent_map_end(em)) {
Elena Reshetova490b54d2017-03-03 10:55:12 +02003025 refcount_inc(&em->refs);
Miao Xie125bac012013-07-25 19:22:37 +08003026 return em;
3027 }
3028
3029 free_extent_map(em);
3030 *em_cached = NULL;
3031 }
3032
Nikolay Borisovfc4f21b12017-02-20 13:51:06 +02003033 em = get_extent(BTRFS_I(inode), page, pg_offset, start, len, 0);
Miao Xie125bac012013-07-25 19:22:37 +08003034 if (em_cached && !IS_ERR_OR_NULL(em)) {
3035 BUG_ON(*em_cached);
Elena Reshetova490b54d2017-03-03 10:55:12 +02003036 refcount_inc(&em->refs);
Miao Xie125bac012013-07-25 19:22:37 +08003037 *em_cached = em;
3038 }
3039 return em;
3040}
Chris Masond1310b22008-01-24 16:13:08 -05003041/*
3042 * basic readpage implementation. Locked extent state structs are inserted
3043 * into the tree that are removed when the IO is done (by the end_io
3044 * handlers)
Jeff Mahoney79787ea2012-03-12 16:03:00 +01003045 * XXX JDM: This needs looking at to ensure proper page locking
Liu Bobaf863b2016-07-11 10:39:07 -07003046 * return 0 on success, otherwise return error
Chris Masond1310b22008-01-24 16:13:08 -05003047 */
Miao Xie99740902013-07-25 19:22:36 +08003048static int __do_readpage(struct extent_io_tree *tree,
3049 struct page *page,
3050 get_extent_t *get_extent,
Miao Xie125bac012013-07-25 19:22:37 +08003051 struct extent_map **em_cached,
Miao Xie99740902013-07-25 19:22:36 +08003052 struct bio **bio, int mirror_num,
David Sterbaf1c77c52017-06-06 19:03:49 +02003053 unsigned long *bio_flags, unsigned int read_flags,
Filipe Manana005efed2015-09-14 09:09:31 +01003054 u64 *prev_em_start)
Chris Masond1310b22008-01-24 16:13:08 -05003055{
3056 struct inode *inode = page->mapping->host;
Miao Xie4eee4fa2012-12-21 09:17:45 +00003057 u64 start = page_offset(page);
David Sterba8eec8292017-06-06 19:50:13 +02003058 const u64 end = start + PAGE_SIZE - 1;
Chris Masond1310b22008-01-24 16:13:08 -05003059 u64 cur = start;
3060 u64 extent_offset;
3061 u64 last_byte = i_size_read(inode);
3062 u64 block_start;
3063 u64 cur_end;
Chris Masond1310b22008-01-24 16:13:08 -05003064 struct extent_map *em;
3065 struct block_device *bdev;
Liu Bobaf863b2016-07-11 10:39:07 -07003066 int ret = 0;
Chris Masond1310b22008-01-24 16:13:08 -05003067 int nr = 0;
David Sterba306e16c2011-04-19 14:29:38 +02003068 size_t pg_offset = 0;
Chris Masond1310b22008-01-24 16:13:08 -05003069 size_t iosize;
Chris Masonc8b97812008-10-29 14:49:59 -04003070 size_t disk_io_size;
Chris Masond1310b22008-01-24 16:13:08 -05003071 size_t blocksize = inode->i_sb->s_blocksize;
Filipe Manana7f042a82016-01-27 19:17:20 +00003072 unsigned long this_bio_flag = 0;
Chris Masond1310b22008-01-24 16:13:08 -05003073
3074 set_page_extent_mapped(page);
3075
Dan Magenheimer90a887c2011-05-26 10:01:56 -06003076 if (!PageUptodate(page)) {
3077 if (cleancache_get_page(page) == 0) {
3078 BUG_ON(blocksize != PAGE_SIZE);
Miao Xie99740902013-07-25 19:22:36 +08003079 unlock_extent(tree, start, end);
Dan Magenheimer90a887c2011-05-26 10:01:56 -06003080 goto out;
3081 }
3082 }
3083
Kirill A. Shutemov09cbfea2016-04-01 15:29:47 +03003084 if (page->index == last_byte >> PAGE_SHIFT) {
Chris Masonc8b97812008-10-29 14:49:59 -04003085 char *userpage;
Johannes Thumshirn70730172018-12-05 15:23:03 +01003086 size_t zero_offset = offset_in_page(last_byte);
Chris Masonc8b97812008-10-29 14:49:59 -04003087
3088 if (zero_offset) {
Kirill A. Shutemov09cbfea2016-04-01 15:29:47 +03003089 iosize = PAGE_SIZE - zero_offset;
Cong Wang7ac687d2011-11-25 23:14:28 +08003090 userpage = kmap_atomic(page);
Chris Masonc8b97812008-10-29 14:49:59 -04003091 memset(userpage + zero_offset, 0, iosize);
3092 flush_dcache_page(page);
Cong Wang7ac687d2011-11-25 23:14:28 +08003093 kunmap_atomic(userpage);
Chris Masonc8b97812008-10-29 14:49:59 -04003094 }
3095 }
Chris Masond1310b22008-01-24 16:13:08 -05003096 while (cur <= end) {
Filipe Manana005efed2015-09-14 09:09:31 +01003097 bool force_bio_submit = false;
David Sterba6273b7f2017-10-04 17:30:11 +02003098 u64 offset;
Josef Bacikc8f2f242013-02-11 11:33:00 -05003099
Chris Masond1310b22008-01-24 16:13:08 -05003100 if (cur >= last_byte) {
3101 char *userpage;
Arne Jansen507903b2011-04-06 10:02:20 +00003102 struct extent_state *cached = NULL;
3103
Kirill A. Shutemov09cbfea2016-04-01 15:29:47 +03003104 iosize = PAGE_SIZE - pg_offset;
Cong Wang7ac687d2011-11-25 23:14:28 +08003105 userpage = kmap_atomic(page);
David Sterba306e16c2011-04-19 14:29:38 +02003106 memset(userpage + pg_offset, 0, iosize);
Chris Masond1310b22008-01-24 16:13:08 -05003107 flush_dcache_page(page);
Cong Wang7ac687d2011-11-25 23:14:28 +08003108 kunmap_atomic(userpage);
Chris Masond1310b22008-01-24 16:13:08 -05003109 set_extent_uptodate(tree, cur, cur + iosize - 1,
Arne Jansen507903b2011-04-06 10:02:20 +00003110 &cached, GFP_NOFS);
Filipe Manana7f042a82016-01-27 19:17:20 +00003111 unlock_extent_cached(tree, cur,
David Sterbae43bbe52017-12-12 21:43:52 +01003112 cur + iosize - 1, &cached);
Chris Masond1310b22008-01-24 16:13:08 -05003113 break;
3114 }
Miao Xie125bac012013-07-25 19:22:37 +08003115 em = __get_extent_map(inode, page, pg_offset, cur,
3116 end - cur + 1, get_extent, em_cached);
David Sterbac7040052011-04-19 18:00:01 +02003117 if (IS_ERR_OR_NULL(em)) {
Chris Masond1310b22008-01-24 16:13:08 -05003118 SetPageError(page);
Filipe Manana7f042a82016-01-27 19:17:20 +00003119 unlock_extent(tree, cur, end);
Chris Masond1310b22008-01-24 16:13:08 -05003120 break;
3121 }
Chris Masond1310b22008-01-24 16:13:08 -05003122 extent_offset = cur - em->start;
3123 BUG_ON(extent_map_end(em) <= cur);
3124 BUG_ON(end < cur);
3125
Li Zefan261507a02010-12-17 14:21:50 +08003126 if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) {
Mark Fasheh4b384312013-08-06 11:42:50 -07003127 this_bio_flag |= EXTENT_BIO_COMPRESSED;
Li Zefan261507a02010-12-17 14:21:50 +08003128 extent_set_compress_type(&this_bio_flag,
3129 em->compress_type);
3130 }
Chris Masonc8b97812008-10-29 14:49:59 -04003131
Chris Masond1310b22008-01-24 16:13:08 -05003132 iosize = min(extent_map_end(em) - cur, end - cur + 1);
3133 cur_end = min(extent_map_end(em) - 1, end);
Qu Wenruofda28322013-02-26 08:10:22 +00003134 iosize = ALIGN(iosize, blocksize);
Chris Masonc8b97812008-10-29 14:49:59 -04003135 if (this_bio_flag & EXTENT_BIO_COMPRESSED) {
3136 disk_io_size = em->block_len;
David Sterba6273b7f2017-10-04 17:30:11 +02003137 offset = em->block_start;
Chris Masonc8b97812008-10-29 14:49:59 -04003138 } else {
David Sterba6273b7f2017-10-04 17:30:11 +02003139 offset = em->block_start + extent_offset;
Chris Masonc8b97812008-10-29 14:49:59 -04003140 disk_io_size = iosize;
3141 }
Chris Masond1310b22008-01-24 16:13:08 -05003142 bdev = em->bdev;
3143 block_start = em->block_start;
Yan Zhengd899e052008-10-30 14:25:28 -04003144 if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags))
3145 block_start = EXTENT_MAP_HOLE;
Filipe Manana005efed2015-09-14 09:09:31 +01003146
3147 /*
3148 * If we have a file range that points to a compressed extent
3149 * and it's followed by a consecutive file range that points to
3150 * to the same compressed extent (possibly with a different
3151 * offset and/or length, so it either points to the whole extent
3152 * or only part of it), we must make sure we do not submit a
3153 * single bio to populate the pages for the 2 ranges because
3154 * this makes the compressed extent read zero out the pages
3155 * belonging to the 2nd range. Imagine the following scenario:
3156 *
3157 * File layout
3158 * [0 - 8K] [8K - 24K]
3159 * | |
3160 * | |
3161 * points to extent X, points to extent X,
3162 * offset 4K, length of 8K offset 0, length 16K
3163 *
3164 * [extent X, compressed length = 4K uncompressed length = 16K]
3165 *
3166 * If the bio to read the compressed extent covers both ranges,
3167 * it will decompress extent X into the pages belonging to the
3168 * first range and then it will stop, zeroing out the remaining
3169 * pages that belong to the other range that points to extent X.
3170 * So here we make sure we submit 2 bios, one for the first
3171 * range and another one for the third range. Both will target
3172 * the same physical extent from disk, but we can't currently
3173 * make the compressed bio endio callback populate the pages
3174 * for both ranges because each compressed bio is tightly
3175 * coupled with a single extent map, and each range can have
3176 * an extent map with a different offset value relative to the
3177 * uncompressed data of our extent and different lengths. This
3178 * is a corner case so we prioritize correctness over
3179 * non-optimal behavior (submitting 2 bios for the same extent).
3180 */
3181 if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags) &&
3182 prev_em_start && *prev_em_start != (u64)-1 &&
Filipe Manana8e928212019-02-14 15:17:20 +00003183 *prev_em_start != em->start)
Filipe Manana005efed2015-09-14 09:09:31 +01003184 force_bio_submit = true;
3185
3186 if (prev_em_start)
Filipe Manana8e928212019-02-14 15:17:20 +00003187 *prev_em_start = em->start;
Filipe Manana005efed2015-09-14 09:09:31 +01003188
Chris Masond1310b22008-01-24 16:13:08 -05003189 free_extent_map(em);
3190 em = NULL;
3191
3192 /* we've found a hole, just zero and go on */
3193 if (block_start == EXTENT_MAP_HOLE) {
3194 char *userpage;
Arne Jansen507903b2011-04-06 10:02:20 +00003195 struct extent_state *cached = NULL;
3196
Cong Wang7ac687d2011-11-25 23:14:28 +08003197 userpage = kmap_atomic(page);
David Sterba306e16c2011-04-19 14:29:38 +02003198 memset(userpage + pg_offset, 0, iosize);
Chris Masond1310b22008-01-24 16:13:08 -05003199 flush_dcache_page(page);
Cong Wang7ac687d2011-11-25 23:14:28 +08003200 kunmap_atomic(userpage);
Chris Masond1310b22008-01-24 16:13:08 -05003201
3202 set_extent_uptodate(tree, cur, cur + iosize - 1,
Arne Jansen507903b2011-04-06 10:02:20 +00003203 &cached, GFP_NOFS);
Filipe Manana7f042a82016-01-27 19:17:20 +00003204 unlock_extent_cached(tree, cur,
David Sterbae43bbe52017-12-12 21:43:52 +01003205 cur + iosize - 1, &cached);
Chris Masond1310b22008-01-24 16:13:08 -05003206 cur = cur + iosize;
David Sterba306e16c2011-04-19 14:29:38 +02003207 pg_offset += iosize;
Chris Masond1310b22008-01-24 16:13:08 -05003208 continue;
3209 }
3210 /* the get_extent function already copied into the page */
Chris Mason9655d292009-09-02 15:22:30 -04003211 if (test_range_bit(tree, cur, cur_end,
3212 EXTENT_UPTODATE, 1, NULL)) {
Chris Masona1b32a52008-09-05 16:09:51 -04003213 check_page_uptodate(tree, page);
Filipe Manana7f042a82016-01-27 19:17:20 +00003214 unlock_extent(tree, cur, cur + iosize - 1);
Chris Masond1310b22008-01-24 16:13:08 -05003215 cur = cur + iosize;
David Sterba306e16c2011-04-19 14:29:38 +02003216 pg_offset += iosize;
Chris Masond1310b22008-01-24 16:13:08 -05003217 continue;
3218 }
Chris Mason70dec802008-01-29 09:59:12 -05003219 /* we have an inline extent but it didn't get marked up
3220 * to date. Error out
3221 */
3222 if (block_start == EXTENT_MAP_INLINE) {
3223 SetPageError(page);
Filipe Manana7f042a82016-01-27 19:17:20 +00003224 unlock_extent(tree, cur, cur + iosize - 1);
Chris Mason70dec802008-01-29 09:59:12 -05003225 cur = cur + iosize;
David Sterba306e16c2011-04-19 14:29:38 +02003226 pg_offset += iosize;
Chris Mason70dec802008-01-29 09:59:12 -05003227 continue;
3228 }
Chris Masond1310b22008-01-24 16:13:08 -05003229
David Sterba4b81ba42017-06-06 19:14:26 +02003230 ret = submit_extent_page(REQ_OP_READ | read_flags, tree, NULL,
David Sterba6273b7f2017-10-04 17:30:11 +02003231 page, offset, disk_io_size,
3232 pg_offset, bdev, bio,
Chris Masonc8b97812008-10-29 14:49:59 -04003233 end_bio_extent_readpage, mirror_num,
3234 *bio_flags,
Filipe Manana005efed2015-09-14 09:09:31 +01003235 this_bio_flag,
3236 force_bio_submit);
Josef Bacikc8f2f242013-02-11 11:33:00 -05003237 if (!ret) {
3238 nr++;
3239 *bio_flags = this_bio_flag;
3240 } else {
Chris Masond1310b22008-01-24 16:13:08 -05003241 SetPageError(page);
Filipe Manana7f042a82016-01-27 19:17:20 +00003242 unlock_extent(tree, cur, cur + iosize - 1);
Liu Bobaf863b2016-07-11 10:39:07 -07003243 goto out;
Josef Bacikedd33c92012-10-05 16:40:32 -04003244 }
Chris Masond1310b22008-01-24 16:13:08 -05003245 cur = cur + iosize;
David Sterba306e16c2011-04-19 14:29:38 +02003246 pg_offset += iosize;
Chris Masond1310b22008-01-24 16:13:08 -05003247 }
Dan Magenheimer90a887c2011-05-26 10:01:56 -06003248out:
Chris Masond1310b22008-01-24 16:13:08 -05003249 if (!nr) {
3250 if (!PageError(page))
3251 SetPageUptodate(page);
3252 unlock_page(page);
3253 }
Liu Bobaf863b2016-07-11 10:39:07 -07003254 return ret;
Chris Masond1310b22008-01-24 16:13:08 -05003255}
3256
Nikolay Borisove65ef212019-03-11 09:55:38 +02003257static inline void contiguous_readpages(struct extent_io_tree *tree,
Miao Xie99740902013-07-25 19:22:36 +08003258 struct page *pages[], int nr_pages,
3259 u64 start, u64 end,
Miao Xie125bac012013-07-25 19:22:37 +08003260 struct extent_map **em_cached,
Nikolay Borisovd3fac6b2017-10-24 11:50:39 +03003261 struct bio **bio,
Mike Christie1f7ad752016-06-05 14:31:51 -05003262 unsigned long *bio_flags,
Filipe Manana808f80b2015-09-28 09:56:26 +01003263 u64 *prev_em_start)
Miao Xie99740902013-07-25 19:22:36 +08003264{
Nikolay Borisov23d31bd2019-05-07 10:19:23 +03003265 struct btrfs_inode *inode = BTRFS_I(pages[0]->mapping->host);
Miao Xie99740902013-07-25 19:22:36 +08003266 int index;
3267
Nikolay Borisov23d31bd2019-05-07 10:19:23 +03003268 btrfs_lock_and_flush_ordered_range(tree, inode, start, end, NULL);
Miao Xie99740902013-07-25 19:22:36 +08003269
3270 for (index = 0; index < nr_pages; index++) {
David Sterba4ef77692017-06-23 04:09:57 +02003271 __do_readpage(tree, pages[index], btrfs_get_extent, em_cached,
Jens Axboe5e9d3982018-08-17 15:45:39 -07003272 bio, 0, bio_flags, REQ_RAHEAD, prev_em_start);
Kirill A. Shutemov09cbfea2016-04-01 15:29:47 +03003273 put_page(pages[index]);
Miao Xie99740902013-07-25 19:22:36 +08003274 }
3275}
3276
Miao Xie99740902013-07-25 19:22:36 +08003277static int __extent_read_full_page(struct extent_io_tree *tree,
3278 struct page *page,
3279 get_extent_t *get_extent,
3280 struct bio **bio, int mirror_num,
David Sterbaf1c77c52017-06-06 19:03:49 +02003281 unsigned long *bio_flags,
3282 unsigned int read_flags)
Miao Xie99740902013-07-25 19:22:36 +08003283{
Nikolay Borisov23d31bd2019-05-07 10:19:23 +03003284 struct btrfs_inode *inode = BTRFS_I(page->mapping->host);
Miao Xie99740902013-07-25 19:22:36 +08003285 u64 start = page_offset(page);
Kirill A. Shutemov09cbfea2016-04-01 15:29:47 +03003286 u64 end = start + PAGE_SIZE - 1;
Miao Xie99740902013-07-25 19:22:36 +08003287 int ret;
3288
Nikolay Borisov23d31bd2019-05-07 10:19:23 +03003289 btrfs_lock_and_flush_ordered_range(tree, inode, start, end, NULL);
Miao Xie99740902013-07-25 19:22:36 +08003290
Miao Xie125bac012013-07-25 19:22:37 +08003291 ret = __do_readpage(tree, page, get_extent, NULL, bio, mirror_num,
Mike Christie1f7ad752016-06-05 14:31:51 -05003292 bio_flags, read_flags, NULL);
Miao Xie99740902013-07-25 19:22:36 +08003293 return ret;
3294}
3295
Chris Masond1310b22008-01-24 16:13:08 -05003296int extent_read_full_page(struct extent_io_tree *tree, struct page *page,
Jan Schmidt8ddc7d92011-06-13 20:02:58 +02003297 get_extent_t *get_extent, int mirror_num)
Chris Masond1310b22008-01-24 16:13:08 -05003298{
3299 struct bio *bio = NULL;
Chris Masonc8b97812008-10-29 14:49:59 -04003300 unsigned long bio_flags = 0;
Chris Masond1310b22008-01-24 16:13:08 -05003301 int ret;
3302
Jan Schmidt8ddc7d92011-06-13 20:02:58 +02003303 ret = __extent_read_full_page(tree, page, get_extent, &bio, mirror_num,
Mike Christie1f7ad752016-06-05 14:31:51 -05003304 &bio_flags, 0);
Chris Masond1310b22008-01-24 16:13:08 -05003305 if (bio)
Mike Christie1f7ad752016-06-05 14:31:51 -05003306 ret = submit_one_bio(bio, mirror_num, bio_flags);
Chris Masond1310b22008-01-24 16:13:08 -05003307 return ret;
3308}
Chris Masond1310b22008-01-24 16:13:08 -05003309
David Sterba3d4b9492017-02-10 19:33:41 +01003310static void update_nr_written(struct writeback_control *wbc,
Liu Boa91326672016-03-07 16:56:21 -08003311 unsigned long nr_written)
Chris Mason11c83492009-04-20 15:50:09 -04003312{
3313 wbc->nr_to_write -= nr_written;
Chris Mason11c83492009-04-20 15:50:09 -04003314}
3315
Chris Masond1310b22008-01-24 16:13:08 -05003316/*
Chris Mason40f76582014-05-21 13:35:51 -07003317 * helper for __extent_writepage, doing all of the delayed allocation setup.
3318 *
Nikolay Borisov5eaad972018-11-01 14:09:46 +02003319 * This returns 1 if btrfs_run_delalloc_range function did all the work required
Chris Mason40f76582014-05-21 13:35:51 -07003320 * to write the page (copy into inline extent). In this case the IO has
3321 * been started and the page is already unlocked.
3322 *
3323 * This returns 0 if all went well (page still locked)
3324 * This returns < 0 if there were errors (page still locked)
Chris Masond1310b22008-01-24 16:13:08 -05003325 */
Chris Mason40f76582014-05-21 13:35:51 -07003326static noinline_for_stack int writepage_delalloc(struct inode *inode,
Nikolay Borisov8cc02372018-11-08 10:18:07 +02003327 struct page *page, struct writeback_control *wbc,
3328 u64 delalloc_start, unsigned long *nr_written)
Chris Masond1310b22008-01-24 16:13:08 -05003329{
Nikolay Borisov8cc02372018-11-08 10:18:07 +02003330 struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree;
Kirill A. Shutemov09cbfea2016-04-01 15:29:47 +03003331 u64 page_end = delalloc_start + PAGE_SIZE - 1;
Lu Fengqi3522e902018-11-29 11:33:38 +08003332 bool found;
Chris Mason40f76582014-05-21 13:35:51 -07003333 u64 delalloc_to_write = 0;
3334 u64 delalloc_end = 0;
3335 int ret;
3336 int page_started = 0;
3337
Chris Mason40f76582014-05-21 13:35:51 -07003338
3339 while (delalloc_end < page_end) {
Lu Fengqi3522e902018-11-29 11:33:38 +08003340 found = find_lock_delalloc_range(inode, tree,
Chris Mason40f76582014-05-21 13:35:51 -07003341 page,
3342 &delalloc_start,
Nikolay Borisov917aace2018-10-26 14:43:20 +03003343 &delalloc_end);
Lu Fengqi3522e902018-11-29 11:33:38 +08003344 if (!found) {
Chris Mason40f76582014-05-21 13:35:51 -07003345 delalloc_start = delalloc_end + 1;
3346 continue;
3347 }
Nikolay Borisov5eaad972018-11-01 14:09:46 +02003348 ret = btrfs_run_delalloc_range(inode, page, delalloc_start,
3349 delalloc_end, &page_started, nr_written, wbc);
Chris Mason40f76582014-05-21 13:35:51 -07003350 if (ret) {
3351 SetPageError(page);
Nikolay Borisov5eaad972018-11-01 14:09:46 +02003352 /*
3353 * btrfs_run_delalloc_range should return < 0 for error
3354 * but just in case, we use > 0 here meaning the IO is
3355 * started, so we don't want to return > 0 unless
3356 * things are going well.
Chris Mason40f76582014-05-21 13:35:51 -07003357 */
3358 ret = ret < 0 ? ret : -EIO;
3359 goto done;
3360 }
3361 /*
Kirill A. Shutemovea1754a2016-04-01 15:29:48 +03003362 * delalloc_end is already one less than the total length, so
3363 * we don't subtract one from PAGE_SIZE
Chris Mason40f76582014-05-21 13:35:51 -07003364 */
3365 delalloc_to_write += (delalloc_end - delalloc_start +
Kirill A. Shutemovea1754a2016-04-01 15:29:48 +03003366 PAGE_SIZE) >> PAGE_SHIFT;
Chris Mason40f76582014-05-21 13:35:51 -07003367 delalloc_start = delalloc_end + 1;
3368 }
3369 if (wbc->nr_to_write < delalloc_to_write) {
3370 int thresh = 8192;
3371
3372 if (delalloc_to_write < thresh * 2)
3373 thresh = delalloc_to_write;
3374 wbc->nr_to_write = min_t(u64, delalloc_to_write,
3375 thresh);
3376 }
3377
3378 /* did the fill delalloc function already unlock and start
3379 * the IO?
3380 */
3381 if (page_started) {
3382 /*
3383 * we've unlocked the page, so we can't update
3384 * the mapping's writeback index, just update
3385 * nr_to_write.
3386 */
3387 wbc->nr_to_write -= *nr_written;
3388 return 1;
3389 }
3390
3391 ret = 0;
3392
3393done:
3394 return ret;
3395}
3396
3397/*
3398 * helper for __extent_writepage. This calls the writepage start hooks,
3399 * and does the loop to map the page into extents and bios.
3400 *
3401 * We return 1 if the IO is started and the page is unlocked,
3402 * 0 if all went well (page still locked)
3403 * < 0 if there were errors (page still locked)
3404 */
3405static noinline_for_stack int __extent_writepage_io(struct inode *inode,
3406 struct page *page,
3407 struct writeback_control *wbc,
3408 struct extent_page_data *epd,
3409 loff_t i_size,
3410 unsigned long nr_written,
David Sterbaf1c77c52017-06-06 19:03:49 +02003411 unsigned int write_flags, int *nr_ret)
Chris Mason40f76582014-05-21 13:35:51 -07003412{
Chris Masond1310b22008-01-24 16:13:08 -05003413 struct extent_io_tree *tree = epd->tree;
Miao Xie4eee4fa2012-12-21 09:17:45 +00003414 u64 start = page_offset(page);
Kirill A. Shutemov09cbfea2016-04-01 15:29:47 +03003415 u64 page_end = start + PAGE_SIZE - 1;
Chris Masond1310b22008-01-24 16:13:08 -05003416 u64 end;
3417 u64 cur = start;
3418 u64 extent_offset;
Chris Masond1310b22008-01-24 16:13:08 -05003419 u64 block_start;
3420 u64 iosize;
Chris Masond1310b22008-01-24 16:13:08 -05003421 struct extent_map *em;
3422 struct block_device *bdev;
Chris Mason7f3c74f2008-07-18 12:01:11 -04003423 size_t pg_offset = 0;
Chris Masond1310b22008-01-24 16:13:08 -05003424 size_t blocksize;
Chris Mason40f76582014-05-21 13:35:51 -07003425 int ret = 0;
3426 int nr = 0;
3427 bool compressed;
Chris Masond1310b22008-01-24 16:13:08 -05003428
Nikolay Borisovd75855b2018-11-01 14:09:47 +02003429 ret = btrfs_writepage_cow_fixup(page, start, page_end);
3430 if (ret) {
3431 /* Fixup worker will requeue */
3432 if (ret == -EBUSY)
3433 wbc->pages_skipped++;
3434 else
3435 redirty_page_for_writepage(wbc, page);
Chris Mason40f76582014-05-21 13:35:51 -07003436
Nikolay Borisovd75855b2018-11-01 14:09:47 +02003437 update_nr_written(wbc, nr_written);
3438 unlock_page(page);
3439 return 1;
Chris Mason247e7432008-07-17 12:53:51 -04003440 }
3441
Chris Mason11c83492009-04-20 15:50:09 -04003442 /*
3443 * we don't want to touch the inode after unlocking the page,
3444 * so we update the mapping writeback index now
3445 */
David Sterba3d4b9492017-02-10 19:33:41 +01003446 update_nr_written(wbc, nr_written + 1);
Chris Mason771ed682008-11-06 22:02:51 -05003447
Chris Masond1310b22008-01-24 16:13:08 -05003448 end = page_end;
Chris Mason40f76582014-05-21 13:35:51 -07003449 if (i_size <= start) {
Nikolay Borisovc6297322018-11-08 10:18:08 +02003450 btrfs_writepage_endio_finish_ordered(page, start, page_end, 1);
Chris Masond1310b22008-01-24 16:13:08 -05003451 goto done;
3452 }
3453
Chris Masond1310b22008-01-24 16:13:08 -05003454 blocksize = inode->i_sb->s_blocksize;
3455
3456 while (cur <= end) {
Chris Mason40f76582014-05-21 13:35:51 -07003457 u64 em_end;
David Sterba6273b7f2017-10-04 17:30:11 +02003458 u64 offset;
David Sterba58409ed2016-05-04 11:46:10 +02003459
Chris Mason40f76582014-05-21 13:35:51 -07003460 if (cur >= i_size) {
Nikolay Borisov7087a9d2018-11-01 14:09:48 +02003461 btrfs_writepage_endio_finish_ordered(page, cur,
Nikolay Borisovc6297322018-11-08 10:18:08 +02003462 page_end, 1);
Chris Masond1310b22008-01-24 16:13:08 -05003463 break;
3464 }
David Sterba3c98c622017-06-23 04:01:08 +02003465 em = btrfs_get_extent(BTRFS_I(inode), page, pg_offset, cur,
Chris Masond1310b22008-01-24 16:13:08 -05003466 end - cur + 1, 1);
David Sterbac7040052011-04-19 18:00:01 +02003467 if (IS_ERR_OR_NULL(em)) {
Chris Masond1310b22008-01-24 16:13:08 -05003468 SetPageError(page);
Filipe Manana61391d52014-05-09 17:17:40 +01003469 ret = PTR_ERR_OR_ZERO(em);
Chris Masond1310b22008-01-24 16:13:08 -05003470 break;
3471 }
3472
3473 extent_offset = cur - em->start;
Chris Mason40f76582014-05-21 13:35:51 -07003474 em_end = extent_map_end(em);
3475 BUG_ON(em_end <= cur);
Chris Masond1310b22008-01-24 16:13:08 -05003476 BUG_ON(end < cur);
Chris Mason40f76582014-05-21 13:35:51 -07003477 iosize = min(em_end - cur, end - cur + 1);
Qu Wenruofda28322013-02-26 08:10:22 +00003478 iosize = ALIGN(iosize, blocksize);
David Sterba6273b7f2017-10-04 17:30:11 +02003479 offset = em->block_start + extent_offset;
Chris Masond1310b22008-01-24 16:13:08 -05003480 bdev = em->bdev;
3481 block_start = em->block_start;
Chris Masonc8b97812008-10-29 14:49:59 -04003482 compressed = test_bit(EXTENT_FLAG_COMPRESSED, &em->flags);
Chris Masond1310b22008-01-24 16:13:08 -05003483 free_extent_map(em);
3484 em = NULL;
3485
Chris Masonc8b97812008-10-29 14:49:59 -04003486 /*
3487 * compressed and inline extents are written through other
3488 * paths in the FS
3489 */
3490 if (compressed || block_start == EXTENT_MAP_HOLE ||
Chris Masond1310b22008-01-24 16:13:08 -05003491 block_start == EXTENT_MAP_INLINE) {
Chris Masonc8b97812008-10-29 14:49:59 -04003492 /*
3493 * end_io notification does not happen here for
3494 * compressed extents
3495 */
Nikolay Borisov7087a9d2018-11-01 14:09:48 +02003496 if (!compressed)
3497 btrfs_writepage_endio_finish_ordered(page, cur,
3498 cur + iosize - 1,
Nikolay Borisovc6297322018-11-08 10:18:08 +02003499 1);
Chris Masonc8b97812008-10-29 14:49:59 -04003500 else if (compressed) {
3501 /* we don't want to end_page_writeback on
3502 * a compressed extent. this happens
3503 * elsewhere
3504 */
3505 nr++;
3506 }
3507
3508 cur += iosize;
Chris Mason7f3c74f2008-07-18 12:01:11 -04003509 pg_offset += iosize;
Chris Masond1310b22008-01-24 16:13:08 -05003510 continue;
3511 }
Chris Masonc8b97812008-10-29 14:49:59 -04003512
David Sterba5cdc84b2018-07-18 20:32:52 +02003513 btrfs_set_range_writeback(tree, cur, cur + iosize - 1);
David Sterba58409ed2016-05-04 11:46:10 +02003514 if (!PageWriteback(page)) {
3515 btrfs_err(BTRFS_I(inode)->root->fs_info,
3516 "page %lu not writeback, cur %llu end %llu",
3517 page->index, cur, end);
Chris Masond1310b22008-01-24 16:13:08 -05003518 }
David Sterba58409ed2016-05-04 11:46:10 +02003519
David Sterba4b81ba42017-06-06 19:14:26 +02003520 ret = submit_extent_page(REQ_OP_WRITE | write_flags, tree, wbc,
David Sterba6273b7f2017-10-04 17:30:11 +02003521 page, offset, iosize, pg_offset,
David Sterbac2df8bb2017-02-10 19:29:38 +01003522 bdev, &epd->bio,
David Sterba58409ed2016-05-04 11:46:10 +02003523 end_bio_extent_writepage,
3524 0, 0, 0, false);
Takafumi Kubotafe01aa62017-02-09 17:24:33 +09003525 if (ret) {
Chris Masond1310b22008-01-24 16:13:08 -05003526 SetPageError(page);
Takafumi Kubotafe01aa62017-02-09 17:24:33 +09003527 if (PageWriteback(page))
3528 end_page_writeback(page);
3529 }
Chris Mason7f3c74f2008-07-18 12:01:11 -04003530
Chris Masond1310b22008-01-24 16:13:08 -05003531 cur = cur + iosize;
Chris Mason7f3c74f2008-07-18 12:01:11 -04003532 pg_offset += iosize;
Chris Masond1310b22008-01-24 16:13:08 -05003533 nr++;
3534 }
3535done:
Chris Mason40f76582014-05-21 13:35:51 -07003536 *nr_ret = nr;
Chris Mason40f76582014-05-21 13:35:51 -07003537 return ret;
3538}
3539
3540/*
3541 * the writepage semantics are similar to regular writepage. extent
3542 * records are inserted to lock ranges in the tree, and as dirty areas
3543 * are found, they are marked writeback. Then the lock bits are removed
3544 * and the end_io handler clears the writeback ranges
Qu Wenruo30659762019-03-20 14:27:42 +08003545 *
3546 * Return 0 if everything goes well.
3547 * Return <0 for error.
Chris Mason40f76582014-05-21 13:35:51 -07003548 */
3549static int __extent_writepage(struct page *page, struct writeback_control *wbc,
David Sterbaaab6e9e2017-11-30 18:00:02 +01003550 struct extent_page_data *epd)
Chris Mason40f76582014-05-21 13:35:51 -07003551{
3552 struct inode *inode = page->mapping->host;
Chris Mason40f76582014-05-21 13:35:51 -07003553 u64 start = page_offset(page);
Kirill A. Shutemov09cbfea2016-04-01 15:29:47 +03003554 u64 page_end = start + PAGE_SIZE - 1;
Chris Mason40f76582014-05-21 13:35:51 -07003555 int ret;
3556 int nr = 0;
3557 size_t pg_offset = 0;
3558 loff_t i_size = i_size_read(inode);
Kirill A. Shutemov09cbfea2016-04-01 15:29:47 +03003559 unsigned long end_index = i_size >> PAGE_SHIFT;
David Sterbaf1c77c52017-06-06 19:03:49 +02003560 unsigned int write_flags = 0;
Chris Mason40f76582014-05-21 13:35:51 -07003561 unsigned long nr_written = 0;
3562
Liu Boff40adf2017-08-24 18:19:48 -06003563 write_flags = wbc_to_write_flags(wbc);
Chris Mason40f76582014-05-21 13:35:51 -07003564
3565 trace___extent_writepage(page, inode, wbc);
3566
3567 WARN_ON(!PageLocked(page));
3568
3569 ClearPageError(page);
3570
Johannes Thumshirn70730172018-12-05 15:23:03 +01003571 pg_offset = offset_in_page(i_size);
Chris Mason40f76582014-05-21 13:35:51 -07003572 if (page->index > end_index ||
3573 (page->index == end_index && !pg_offset)) {
Kirill A. Shutemov09cbfea2016-04-01 15:29:47 +03003574 page->mapping->a_ops->invalidatepage(page, 0, PAGE_SIZE);
Chris Mason40f76582014-05-21 13:35:51 -07003575 unlock_page(page);
3576 return 0;
3577 }
3578
3579 if (page->index == end_index) {
3580 char *userpage;
3581
3582 userpage = kmap_atomic(page);
3583 memset(userpage + pg_offset, 0,
Kirill A. Shutemov09cbfea2016-04-01 15:29:47 +03003584 PAGE_SIZE - pg_offset);
Chris Mason40f76582014-05-21 13:35:51 -07003585 kunmap_atomic(userpage);
3586 flush_dcache_page(page);
3587 }
3588
3589 pg_offset = 0;
3590
3591 set_page_extent_mapped(page);
3592
Nikolay Borisov7789a552018-11-08 10:18:06 +02003593 if (!epd->extent_locked) {
Nikolay Borisov8cc02372018-11-08 10:18:07 +02003594 ret = writepage_delalloc(inode, page, wbc, start, &nr_written);
Nikolay Borisov7789a552018-11-08 10:18:06 +02003595 if (ret == 1)
3596 goto done_unlocked;
3597 if (ret)
3598 goto done;
3599 }
Chris Mason40f76582014-05-21 13:35:51 -07003600
3601 ret = __extent_writepage_io(inode, page, wbc, epd,
3602 i_size, nr_written, write_flags, &nr);
3603 if (ret == 1)
3604 goto done_unlocked;
3605
3606done:
Chris Masond1310b22008-01-24 16:13:08 -05003607 if (nr == 0) {
3608 /* make sure the mapping tag for page dirty gets cleared */
3609 set_page_writeback(page);
3610 end_page_writeback(page);
3611 }
Filipe Manana61391d52014-05-09 17:17:40 +01003612 if (PageError(page)) {
3613 ret = ret < 0 ? ret : -EIO;
3614 end_extent_writepage(page, ret, start, page_end);
3615 }
Chris Masond1310b22008-01-24 16:13:08 -05003616 unlock_page(page);
Qu Wenruo30659762019-03-20 14:27:42 +08003617 ASSERT(ret <= 0);
Chris Mason40f76582014-05-21 13:35:51 -07003618 return ret;
Chris Mason771ed682008-11-06 22:02:51 -05003619
Chris Mason11c83492009-04-20 15:50:09 -04003620done_unlocked:
Chris Masond1310b22008-01-24 16:13:08 -05003621 return 0;
3622}
3623
Josef Bacikfd8b2b62013-04-24 16:41:19 -04003624void wait_on_extent_buffer_writeback(struct extent_buffer *eb)
Josef Bacik0b32f4b2012-03-13 09:38:00 -04003625{
NeilBrown74316202014-07-07 15:16:04 +10003626 wait_on_bit_io(&eb->bflags, EXTENT_BUFFER_WRITEBACK,
3627 TASK_UNINTERRUPTIBLE);
Josef Bacik0b32f4b2012-03-13 09:38:00 -04003628}
3629
Qu Wenruo2e3c2512019-03-20 14:27:46 +08003630/*
3631 * Lock eb pages and flush the bio if we can't the locks
3632 *
3633 * Return 0 if nothing went wrong
3634 * Return >0 is same as 0, except bio is not submitted
3635 * Return <0 if something went wrong, no page is locked
3636 */
David Sterba9df76fb2019-03-20 11:21:41 +01003637static noinline_for_stack int lock_extent_buffer_for_io(struct extent_buffer *eb,
Chris Mason0e378df2014-05-19 20:55:27 -07003638 struct extent_page_data *epd)
Josef Bacik0b32f4b2012-03-13 09:38:00 -04003639{
David Sterba9df76fb2019-03-20 11:21:41 +01003640 struct btrfs_fs_info *fs_info = eb->fs_info;
Qu Wenruo2e3c2512019-03-20 14:27:46 +08003641 int i, num_pages, failed_page_nr;
Josef Bacik0b32f4b2012-03-13 09:38:00 -04003642 int flush = 0;
3643 int ret = 0;
3644
3645 if (!btrfs_try_tree_write_lock(eb)) {
Qu Wenruof4340622019-03-20 14:27:41 +08003646 ret = flush_write_bio(epd);
Qu Wenruo2e3c2512019-03-20 14:27:46 +08003647 if (ret < 0)
3648 return ret;
3649 flush = 1;
Josef Bacik0b32f4b2012-03-13 09:38:00 -04003650 btrfs_tree_lock(eb);
3651 }
3652
3653 if (test_bit(EXTENT_BUFFER_WRITEBACK, &eb->bflags)) {
3654 btrfs_tree_unlock(eb);
3655 if (!epd->sync_io)
3656 return 0;
3657 if (!flush) {
Qu Wenruof4340622019-03-20 14:27:41 +08003658 ret = flush_write_bio(epd);
Qu Wenruo2e3c2512019-03-20 14:27:46 +08003659 if (ret < 0)
3660 return ret;
Josef Bacik0b32f4b2012-03-13 09:38:00 -04003661 flush = 1;
3662 }
Chris Masona098d8e82012-03-21 12:09:56 -04003663 while (1) {
3664 wait_on_extent_buffer_writeback(eb);
3665 btrfs_tree_lock(eb);
3666 if (!test_bit(EXTENT_BUFFER_WRITEBACK, &eb->bflags))
3667 break;
Josef Bacik0b32f4b2012-03-13 09:38:00 -04003668 btrfs_tree_unlock(eb);
Josef Bacik0b32f4b2012-03-13 09:38:00 -04003669 }
3670 }
3671
Josef Bacik51561ff2012-07-20 16:25:24 -04003672 /*
3673 * We need to do this to prevent races in people who check if the eb is
3674 * under IO since we can end up having no IO bits set for a short period
3675 * of time.
3676 */
3677 spin_lock(&eb->refs_lock);
Josef Bacik0b32f4b2012-03-13 09:38:00 -04003678 if (test_and_clear_bit(EXTENT_BUFFER_DIRTY, &eb->bflags)) {
3679 set_bit(EXTENT_BUFFER_WRITEBACK, &eb->bflags);
Josef Bacik51561ff2012-07-20 16:25:24 -04003680 spin_unlock(&eb->refs_lock);
Josef Bacik0b32f4b2012-03-13 09:38:00 -04003681 btrfs_set_header_flag(eb, BTRFS_HEADER_FLAG_WRITTEN);
Nikolay Borisov104b4e52017-06-20 21:01:20 +03003682 percpu_counter_add_batch(&fs_info->dirty_metadata_bytes,
3683 -eb->len,
3684 fs_info->dirty_metadata_batch);
Josef Bacik0b32f4b2012-03-13 09:38:00 -04003685 ret = 1;
Josef Bacik51561ff2012-07-20 16:25:24 -04003686 } else {
3687 spin_unlock(&eb->refs_lock);
Josef Bacik0b32f4b2012-03-13 09:38:00 -04003688 }
3689
3690 btrfs_tree_unlock(eb);
3691
3692 if (!ret)
3693 return ret;
3694
David Sterba65ad0102018-06-29 10:56:49 +02003695 num_pages = num_extent_pages(eb);
Josef Bacik0b32f4b2012-03-13 09:38:00 -04003696 for (i = 0; i < num_pages; i++) {
David Sterbafb85fc92014-07-31 01:03:53 +02003697 struct page *p = eb->pages[i];
Josef Bacik0b32f4b2012-03-13 09:38:00 -04003698
3699 if (!trylock_page(p)) {
3700 if (!flush) {
Qu Wenruof4340622019-03-20 14:27:41 +08003701 ret = flush_write_bio(epd);
Qu Wenruo2e3c2512019-03-20 14:27:46 +08003702 if (ret < 0) {
3703 failed_page_nr = i;
3704 goto err_unlock;
3705 }
Josef Bacik0b32f4b2012-03-13 09:38:00 -04003706 flush = 1;
3707 }
3708 lock_page(p);
3709 }
3710 }
3711
3712 return ret;
Qu Wenruo2e3c2512019-03-20 14:27:46 +08003713err_unlock:
3714 /* Unlock already locked pages */
3715 for (i = 0; i < failed_page_nr; i++)
3716 unlock_page(eb->pages[i]);
3717 return ret;
Josef Bacik0b32f4b2012-03-13 09:38:00 -04003718}
3719
3720static void end_extent_buffer_writeback(struct extent_buffer *eb)
3721{
3722 clear_bit(EXTENT_BUFFER_WRITEBACK, &eb->bflags);
Peter Zijlstra4e857c52014-03-17 18:06:10 +01003723 smp_mb__after_atomic();
Josef Bacik0b32f4b2012-03-13 09:38:00 -04003724 wake_up_bit(&eb->bflags, EXTENT_BUFFER_WRITEBACK);
3725}
3726
Filipe Manana656f30d2014-09-26 12:25:56 +01003727static void set_btree_ioerr(struct page *page)
3728{
3729 struct extent_buffer *eb = (struct extent_buffer *)page->private;
Filipe Manana656f30d2014-09-26 12:25:56 +01003730
3731 SetPageError(page);
3732 if (test_and_set_bit(EXTENT_BUFFER_WRITE_ERR, &eb->bflags))
3733 return;
3734
3735 /*
3736 * If writeback for a btree extent that doesn't belong to a log tree
3737 * failed, increment the counter transaction->eb_write_errors.
3738 * We do this because while the transaction is running and before it's
3739 * committing (when we call filemap_fdata[write|wait]_range against
3740 * the btree inode), we might have
3741 * btree_inode->i_mapping->a_ops->writepages() called by the VM - if it
3742 * returns an error or an error happens during writeback, when we're
3743 * committing the transaction we wouldn't know about it, since the pages
3744 * can be no longer dirty nor marked anymore for writeback (if a
3745 * subsequent modification to the extent buffer didn't happen before the
3746 * transaction commit), which makes filemap_fdata[write|wait]_range not
3747 * able to find the pages tagged with SetPageError at transaction
3748 * commit time. So if this happens we must abort the transaction,
3749 * otherwise we commit a super block with btree roots that point to
3750 * btree nodes/leafs whose content on disk is invalid - either garbage
3751 * or the content of some node/leaf from a past generation that got
3752 * cowed or deleted and is no longer valid.
3753 *
3754 * Note: setting AS_EIO/AS_ENOSPC in the btree inode's i_mapping would
3755 * not be enough - we need to distinguish between log tree extents vs
3756 * non-log tree extents, and the next filemap_fdatawait_range() call
3757 * will catch and clear such errors in the mapping - and that call might
3758 * be from a log sync and not from a transaction commit. Also, checking
3759 * for the eb flag EXTENT_BUFFER_WRITE_ERR at transaction commit time is
3760 * not done and would not be reliable - the eb might have been released
3761 * from memory and reading it back again means that flag would not be
3762 * set (since it's a runtime flag, not persisted on disk).
3763 *
3764 * Using the flags below in the btree inode also makes us achieve the
3765 * goal of AS_EIO/AS_ENOSPC when writepages() returns success, started
3766 * writeback for all dirty pages and before filemap_fdatawait_range()
3767 * is called, the writeback for all dirty pages had already finished
3768 * with errors - because we were not using AS_EIO/AS_ENOSPC,
3769 * filemap_fdatawait_range() would return success, as it could not know
3770 * that writeback errors happened (the pages were no longer tagged for
3771 * writeback).
3772 */
3773 switch (eb->log_index) {
3774 case -1:
Josef Bacikafcdd122016-09-02 15:40:02 -04003775 set_bit(BTRFS_FS_BTREE_ERR, &eb->fs_info->flags);
Filipe Manana656f30d2014-09-26 12:25:56 +01003776 break;
3777 case 0:
Josef Bacikafcdd122016-09-02 15:40:02 -04003778 set_bit(BTRFS_FS_LOG1_ERR, &eb->fs_info->flags);
Filipe Manana656f30d2014-09-26 12:25:56 +01003779 break;
3780 case 1:
Josef Bacikafcdd122016-09-02 15:40:02 -04003781 set_bit(BTRFS_FS_LOG2_ERR, &eb->fs_info->flags);
Filipe Manana656f30d2014-09-26 12:25:56 +01003782 break;
3783 default:
3784 BUG(); /* unexpected, logic error */
3785 }
3786}
3787
Christoph Hellwig4246a0b2015-07-20 15:29:37 +02003788static void end_bio_extent_buffer_writepage(struct bio *bio)
Josef Bacik0b32f4b2012-03-13 09:38:00 -04003789{
Kent Overstreet2c30c712013-11-07 12:20:26 -08003790 struct bio_vec *bvec;
Josef Bacik0b32f4b2012-03-13 09:38:00 -04003791 struct extent_buffer *eb;
Christoph Hellwig2b070cf2019-04-25 09:03:00 +02003792 int done;
Ming Lei6dc4f102019-02-15 19:13:19 +08003793 struct bvec_iter_all iter_all;
Josef Bacik0b32f4b2012-03-13 09:38:00 -04003794
David Sterbac09abff2017-07-13 18:10:07 +02003795 ASSERT(!bio_flagged(bio, BIO_CLONED));
Christoph Hellwig2b070cf2019-04-25 09:03:00 +02003796 bio_for_each_segment_all(bvec, bio, iter_all) {
Josef Bacik0b32f4b2012-03-13 09:38:00 -04003797 struct page *page = bvec->bv_page;
3798
Josef Bacik0b32f4b2012-03-13 09:38:00 -04003799 eb = (struct extent_buffer *)page->private;
3800 BUG_ON(!eb);
3801 done = atomic_dec_and_test(&eb->io_pages);
3802
Christoph Hellwig4e4cbee2017-06-03 09:38:06 +02003803 if (bio->bi_status ||
Christoph Hellwig4246a0b2015-07-20 15:29:37 +02003804 test_bit(EXTENT_BUFFER_WRITE_ERR, &eb->bflags)) {
Josef Bacik0b32f4b2012-03-13 09:38:00 -04003805 ClearPageUptodate(page);
Filipe Manana656f30d2014-09-26 12:25:56 +01003806 set_btree_ioerr(page);
Josef Bacik0b32f4b2012-03-13 09:38:00 -04003807 }
3808
3809 end_page_writeback(page);
3810
3811 if (!done)
3812 continue;
3813
3814 end_extent_buffer_writeback(eb);
Kent Overstreet2c30c712013-11-07 12:20:26 -08003815 }
Josef Bacik0b32f4b2012-03-13 09:38:00 -04003816
3817 bio_put(bio);
Josef Bacik0b32f4b2012-03-13 09:38:00 -04003818}
3819
Chris Mason0e378df2014-05-19 20:55:27 -07003820static noinline_for_stack int write_one_eb(struct extent_buffer *eb,
Josef Bacik0b32f4b2012-03-13 09:38:00 -04003821 struct writeback_control *wbc,
3822 struct extent_page_data *epd)
3823{
David Sterba0ab02062019-03-20 11:27:57 +01003824 struct btrfs_fs_info *fs_info = eb->fs_info;
Josef Bacik0b32f4b2012-03-13 09:38:00 -04003825 struct block_device *bdev = fs_info->fs_devices->latest_bdev;
Josef Bacikf28491e2013-12-16 13:24:27 -05003826 struct extent_io_tree *tree = &BTRFS_I(fs_info->btree_inode)->io_tree;
Josef Bacik0b32f4b2012-03-13 09:38:00 -04003827 u64 offset = eb->start;
Liu Bo851cd172016-09-23 13:44:44 -07003828 u32 nritems;
David Sterbacc5e31a2018-03-01 18:20:27 +01003829 int i, num_pages;
Liu Bo851cd172016-09-23 13:44:44 -07003830 unsigned long start, end;
Liu Boff40adf2017-08-24 18:19:48 -06003831 unsigned int write_flags = wbc_to_write_flags(wbc) | REQ_META;
Josef Bacikd7dbe9e2012-04-23 14:00:51 -04003832 int ret = 0;
Josef Bacik0b32f4b2012-03-13 09:38:00 -04003833
Filipe Manana656f30d2014-09-26 12:25:56 +01003834 clear_bit(EXTENT_BUFFER_WRITE_ERR, &eb->bflags);
David Sterba65ad0102018-06-29 10:56:49 +02003835 num_pages = num_extent_pages(eb);
Josef Bacik0b32f4b2012-03-13 09:38:00 -04003836 atomic_set(&eb->io_pages, num_pages);
Josef Bacikde0022b2012-09-25 14:25:58 -04003837
Liu Bo851cd172016-09-23 13:44:44 -07003838 /* set btree blocks beyond nritems with 0 to avoid stale content. */
3839 nritems = btrfs_header_nritems(eb);
Liu Bo3eb548e2016-09-14 17:22:57 -07003840 if (btrfs_header_level(eb) > 0) {
Liu Bo3eb548e2016-09-14 17:22:57 -07003841 end = btrfs_node_key_ptr_offset(nritems);
3842
David Sterbab159fa22016-11-08 18:09:03 +01003843 memzero_extent_buffer(eb, end, eb->len - end);
Liu Bo851cd172016-09-23 13:44:44 -07003844 } else {
3845 /*
3846 * leaf:
3847 * header 0 1 2 .. N ... data_N .. data_2 data_1 data_0
3848 */
3849 start = btrfs_item_nr_offset(nritems);
David Sterba8f881e82019-03-20 11:33:10 +01003850 end = BTRFS_LEAF_DATA_OFFSET + leaf_data_end(eb);
David Sterbab159fa22016-11-08 18:09:03 +01003851 memzero_extent_buffer(eb, start, end - start);
Liu Bo3eb548e2016-09-14 17:22:57 -07003852 }
3853
Josef Bacik0b32f4b2012-03-13 09:38:00 -04003854 for (i = 0; i < num_pages; i++) {
David Sterbafb85fc92014-07-31 01:03:53 +02003855 struct page *p = eb->pages[i];
Josef Bacik0b32f4b2012-03-13 09:38:00 -04003856
3857 clear_page_dirty_for_io(p);
3858 set_page_writeback(p);
David Sterba4b81ba42017-06-06 19:14:26 +02003859 ret = submit_extent_page(REQ_OP_WRITE | write_flags, tree, wbc,
David Sterba6273b7f2017-10-04 17:30:11 +02003860 p, offset, PAGE_SIZE, 0, bdev,
David Sterbac2df8bb2017-02-10 19:29:38 +01003861 &epd->bio,
Mike Christie1f7ad752016-06-05 14:31:51 -05003862 end_bio_extent_buffer_writepage,
Liu Bo18fdc672017-09-13 12:18:22 -06003863 0, 0, 0, false);
Josef Bacik0b32f4b2012-03-13 09:38:00 -04003864 if (ret) {
Filipe Manana656f30d2014-09-26 12:25:56 +01003865 set_btree_ioerr(p);
Takafumi Kubotafe01aa62017-02-09 17:24:33 +09003866 if (PageWriteback(p))
3867 end_page_writeback(p);
Josef Bacik0b32f4b2012-03-13 09:38:00 -04003868 if (atomic_sub_and_test(num_pages - i, &eb->io_pages))
3869 end_extent_buffer_writeback(eb);
3870 ret = -EIO;
3871 break;
3872 }
Kirill A. Shutemov09cbfea2016-04-01 15:29:47 +03003873 offset += PAGE_SIZE;
David Sterba3d4b9492017-02-10 19:33:41 +01003874 update_nr_written(wbc, 1);
Josef Bacik0b32f4b2012-03-13 09:38:00 -04003875 unlock_page(p);
3876 }
3877
3878 if (unlikely(ret)) {
3879 for (; i < num_pages; i++) {
Chris Masonbbf65cf2014-10-04 09:56:45 -07003880 struct page *p = eb->pages[i];
Liu Bo81465022014-09-23 22:22:33 +08003881 clear_page_dirty_for_io(p);
Josef Bacik0b32f4b2012-03-13 09:38:00 -04003882 unlock_page(p);
3883 }
3884 }
3885
3886 return ret;
3887}
3888
3889int btree_write_cache_pages(struct address_space *mapping,
3890 struct writeback_control *wbc)
3891{
3892 struct extent_io_tree *tree = &BTRFS_I(mapping->host)->io_tree;
Josef Bacik0b32f4b2012-03-13 09:38:00 -04003893 struct extent_buffer *eb, *prev_eb = NULL;
3894 struct extent_page_data epd = {
3895 .bio = NULL,
3896 .tree = tree,
3897 .extent_locked = 0,
3898 .sync_io = wbc->sync_mode == WB_SYNC_ALL,
3899 };
3900 int ret = 0;
3901 int done = 0;
3902 int nr_to_write_done = 0;
3903 struct pagevec pvec;
3904 int nr_pages;
3905 pgoff_t index;
3906 pgoff_t end; /* Inclusive */
3907 int scanned = 0;
Matthew Wilcox10bbd232017-12-05 17:30:38 -05003908 xa_mark_t tag;
Josef Bacik0b32f4b2012-03-13 09:38:00 -04003909
Mel Gorman86679822017-11-15 17:37:52 -08003910 pagevec_init(&pvec);
Josef Bacik0b32f4b2012-03-13 09:38:00 -04003911 if (wbc->range_cyclic) {
3912 index = mapping->writeback_index; /* Start from prev offset */
3913 end = -1;
3914 } else {
Kirill A. Shutemov09cbfea2016-04-01 15:29:47 +03003915 index = wbc->range_start >> PAGE_SHIFT;
3916 end = wbc->range_end >> PAGE_SHIFT;
Josef Bacik0b32f4b2012-03-13 09:38:00 -04003917 scanned = 1;
3918 }
3919 if (wbc->sync_mode == WB_SYNC_ALL)
3920 tag = PAGECACHE_TAG_TOWRITE;
3921 else
3922 tag = PAGECACHE_TAG_DIRTY;
3923retry:
3924 if (wbc->sync_mode == WB_SYNC_ALL)
3925 tag_pages_for_writeback(mapping, index, end);
3926 while (!done && !nr_to_write_done && (index <= end) &&
Jan Kara4006f432017-11-15 17:34:37 -08003927 (nr_pages = pagevec_lookup_range_tag(&pvec, mapping, &index, end,
Jan Kara67fd7072017-11-15 17:35:19 -08003928 tag))) {
Josef Bacik0b32f4b2012-03-13 09:38:00 -04003929 unsigned i;
3930
3931 scanned = 1;
3932 for (i = 0; i < nr_pages; i++) {
3933 struct page *page = pvec.pages[i];
3934
3935 if (!PagePrivate(page))
3936 continue;
3937
Josef Bacikb5bae262012-09-14 13:43:01 -04003938 spin_lock(&mapping->private_lock);
3939 if (!PagePrivate(page)) {
3940 spin_unlock(&mapping->private_lock);
3941 continue;
3942 }
3943
Josef Bacik0b32f4b2012-03-13 09:38:00 -04003944 eb = (struct extent_buffer *)page->private;
Josef Bacikb5bae262012-09-14 13:43:01 -04003945
3946 /*
3947 * Shouldn't happen and normally this would be a BUG_ON
3948 * but no sense in crashing the users box for something
3949 * we can survive anyway.
3950 */
Dulshani Gunawardhanafae7f212013-10-31 10:30:08 +05303951 if (WARN_ON(!eb)) {
Josef Bacikb5bae262012-09-14 13:43:01 -04003952 spin_unlock(&mapping->private_lock);
Josef Bacik0b32f4b2012-03-13 09:38:00 -04003953 continue;
3954 }
3955
Josef Bacikb5bae262012-09-14 13:43:01 -04003956 if (eb == prev_eb) {
3957 spin_unlock(&mapping->private_lock);
Josef Bacik0b32f4b2012-03-13 09:38:00 -04003958 continue;
3959 }
3960
Josef Bacikb5bae262012-09-14 13:43:01 -04003961 ret = atomic_inc_not_zero(&eb->refs);
3962 spin_unlock(&mapping->private_lock);
3963 if (!ret)
3964 continue;
3965
Josef Bacik0b32f4b2012-03-13 09:38:00 -04003966 prev_eb = eb;
David Sterba9df76fb2019-03-20 11:21:41 +01003967 ret = lock_extent_buffer_for_io(eb, &epd);
Josef Bacik0b32f4b2012-03-13 09:38:00 -04003968 if (!ret) {
3969 free_extent_buffer(eb);
3970 continue;
3971 }
3972
David Sterba0ab02062019-03-20 11:27:57 +01003973 ret = write_one_eb(eb, wbc, &epd);
Josef Bacik0b32f4b2012-03-13 09:38:00 -04003974 if (ret) {
3975 done = 1;
3976 free_extent_buffer(eb);
3977 break;
3978 }
3979 free_extent_buffer(eb);
3980
3981 /*
3982 * the filesystem may choose to bump up nr_to_write.
3983 * We have to make sure to honor the new nr_to_write
3984 * at any time
3985 */
3986 nr_to_write_done = wbc->nr_to_write <= 0;
3987 }
3988 pagevec_release(&pvec);
3989 cond_resched();
3990 }
3991 if (!scanned && !done) {
3992 /*
3993 * We hit the last page and there is more work to be done: wrap
3994 * back to the start of the file
3995 */
3996 scanned = 1;
3997 index = 0;
3998 goto retry;
3999 }
Qu Wenruo2b952ee2019-03-20 14:27:43 +08004000 ASSERT(ret <= 0);
4001 if (ret < 0) {
4002 end_write_bio(&epd, ret);
4003 return ret;
4004 }
4005 ret = flush_write_bio(&epd);
Josef Bacik0b32f4b2012-03-13 09:38:00 -04004006 return ret;
4007}
4008
Chris Masond1310b22008-01-24 16:13:08 -05004009/**
Chris Mason4bef0842008-09-08 11:18:08 -04004010 * write_cache_pages - walk the list of dirty pages of the given address space and write all of them.
Chris Masond1310b22008-01-24 16:13:08 -05004011 * @mapping: address space structure to write
4012 * @wbc: subtract the number of written pages from *@wbc->nr_to_write
David Sterba935db852017-06-23 04:30:28 +02004013 * @data: data passed to __extent_writepage function
Chris Masond1310b22008-01-24 16:13:08 -05004014 *
4015 * If a page is already under I/O, write_cache_pages() skips it, even
4016 * if it's dirty. This is desirable behaviour for memory-cleaning writeback,
4017 * but it is INCORRECT for data-integrity system calls such as fsync(). fsync()
4018 * and msync() need to guarantee that all the data which was dirty at the time
4019 * the call was made get new I/O started against them. If wbc->sync_mode is
4020 * WB_SYNC_ALL then we were called for data integrity and we must wait for
4021 * existing IO to complete.
4022 */
David Sterba4242b642017-02-10 19:38:24 +01004023static int extent_write_cache_pages(struct address_space *mapping,
Chris Mason4bef0842008-09-08 11:18:08 -04004024 struct writeback_control *wbc,
David Sterbaaab6e9e2017-11-30 18:00:02 +01004025 struct extent_page_data *epd)
Chris Masond1310b22008-01-24 16:13:08 -05004026{
Josef Bacik7fd1a3f2012-06-27 17:18:41 -04004027 struct inode *inode = mapping->host;
Chris Masond1310b22008-01-24 16:13:08 -05004028 int ret = 0;
4029 int done = 0;
Chris Masonf85d7d6c2009-09-18 16:03:16 -04004030 int nr_to_write_done = 0;
Chris Masond1310b22008-01-24 16:13:08 -05004031 struct pagevec pvec;
4032 int nr_pages;
4033 pgoff_t index;
4034 pgoff_t end; /* Inclusive */
Liu Boa91326672016-03-07 16:56:21 -08004035 pgoff_t done_index;
4036 int range_whole = 0;
Chris Masond1310b22008-01-24 16:13:08 -05004037 int scanned = 0;
Matthew Wilcox10bbd232017-12-05 17:30:38 -05004038 xa_mark_t tag;
Chris Masond1310b22008-01-24 16:13:08 -05004039
Josef Bacik7fd1a3f2012-06-27 17:18:41 -04004040 /*
4041 * We have to hold onto the inode so that ordered extents can do their
4042 * work when the IO finishes. The alternative to this is failing to add
4043 * an ordered extent if the igrab() fails there and that is a huge pain
4044 * to deal with, so instead just hold onto the inode throughout the
4045 * writepages operation. If it fails here we are freeing up the inode
4046 * anyway and we'd rather not waste our time writing out stuff that is
4047 * going to be truncated anyway.
4048 */
4049 if (!igrab(inode))
4050 return 0;
4051
Mel Gorman86679822017-11-15 17:37:52 -08004052 pagevec_init(&pvec);
Chris Masond1310b22008-01-24 16:13:08 -05004053 if (wbc->range_cyclic) {
4054 index = mapping->writeback_index; /* Start from prev offset */
4055 end = -1;
4056 } else {
Kirill A. Shutemov09cbfea2016-04-01 15:29:47 +03004057 index = wbc->range_start >> PAGE_SHIFT;
4058 end = wbc->range_end >> PAGE_SHIFT;
Liu Boa91326672016-03-07 16:56:21 -08004059 if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
4060 range_whole = 1;
Chris Masond1310b22008-01-24 16:13:08 -05004061 scanned = 1;
4062 }
Ethan Lien3cd24c62018-11-01 14:49:03 +08004063
4064 /*
4065 * We do the tagged writepage as long as the snapshot flush bit is set
4066 * and we are the first one who do the filemap_flush() on this inode.
4067 *
4068 * The nr_to_write == LONG_MAX is needed to make sure other flushers do
4069 * not race in and drop the bit.
4070 */
4071 if (range_whole && wbc->nr_to_write == LONG_MAX &&
4072 test_and_clear_bit(BTRFS_INODE_SNAPSHOT_FLUSH,
4073 &BTRFS_I(inode)->runtime_flags))
4074 wbc->tagged_writepages = 1;
4075
4076 if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages)
Josef Bacikf7aaa062011-07-15 21:26:38 +00004077 tag = PAGECACHE_TAG_TOWRITE;
4078 else
4079 tag = PAGECACHE_TAG_DIRTY;
Chris Masond1310b22008-01-24 16:13:08 -05004080retry:
Ethan Lien3cd24c62018-11-01 14:49:03 +08004081 if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages)
Josef Bacikf7aaa062011-07-15 21:26:38 +00004082 tag_pages_for_writeback(mapping, index, end);
Liu Boa91326672016-03-07 16:56:21 -08004083 done_index = index;
Chris Masonf85d7d6c2009-09-18 16:03:16 -04004084 while (!done && !nr_to_write_done && (index <= end) &&
Jan Kara67fd7072017-11-15 17:35:19 -08004085 (nr_pages = pagevec_lookup_range_tag(&pvec, mapping,
4086 &index, end, tag))) {
Chris Masond1310b22008-01-24 16:13:08 -05004087 unsigned i;
4088
4089 scanned = 1;
4090 for (i = 0; i < nr_pages; i++) {
4091 struct page *page = pvec.pages[i];
4092
Liu Boa91326672016-03-07 16:56:21 -08004093 done_index = page->index;
Chris Masond1310b22008-01-24 16:13:08 -05004094 /*
Matthew Wilcoxb93b0162018-04-10 16:36:56 -07004095 * At this point we hold neither the i_pages lock nor
4096 * the page lock: the page may be truncated or
4097 * invalidated (changing page->mapping to NULL),
4098 * or even swizzled back from swapper_space to
4099 * tmpfs file mapping
Chris Masond1310b22008-01-24 16:13:08 -05004100 */
Josef Bacikc8f2f242013-02-11 11:33:00 -05004101 if (!trylock_page(page)) {
Qu Wenruof4340622019-03-20 14:27:41 +08004102 ret = flush_write_bio(epd);
4103 BUG_ON(ret < 0);
Josef Bacikc8f2f242013-02-11 11:33:00 -05004104 lock_page(page);
Chris Mason01d658f2011-11-01 10:08:06 -04004105 }
Chris Masond1310b22008-01-24 16:13:08 -05004106
4107 if (unlikely(page->mapping != mapping)) {
4108 unlock_page(page);
4109 continue;
4110 }
4111
Chris Masond2c3f4f2008-11-19 12:44:22 -05004112 if (wbc->sync_mode != WB_SYNC_NONE) {
Qu Wenruof4340622019-03-20 14:27:41 +08004113 if (PageWriteback(page)) {
4114 ret = flush_write_bio(epd);
4115 BUG_ON(ret < 0);
4116 }
Chris Masond1310b22008-01-24 16:13:08 -05004117 wait_on_page_writeback(page);
Chris Masond2c3f4f2008-11-19 12:44:22 -05004118 }
Chris Masond1310b22008-01-24 16:13:08 -05004119
4120 if (PageWriteback(page) ||
4121 !clear_page_dirty_for_io(page)) {
4122 unlock_page(page);
4123 continue;
4124 }
4125
David Sterbaaab6e9e2017-11-30 18:00:02 +01004126 ret = __extent_writepage(page, wbc, epd);
Liu Boa91326672016-03-07 16:56:21 -08004127 if (ret < 0) {
4128 /*
4129 * done_index is set past this page,
4130 * so media errors will not choke
4131 * background writeout for the entire
4132 * file. This has consequences for
4133 * range_cyclic semantics (ie. it may
4134 * not be suitable for data integrity
4135 * writeout).
4136 */
4137 done_index = page->index + 1;
4138 done = 1;
4139 break;
4140 }
Chris Masonf85d7d6c2009-09-18 16:03:16 -04004141
4142 /*
4143 * the filesystem may choose to bump up nr_to_write.
4144 * We have to make sure to honor the new nr_to_write
4145 * at any time
4146 */
4147 nr_to_write_done = wbc->nr_to_write <= 0;
Chris Masond1310b22008-01-24 16:13:08 -05004148 }
4149 pagevec_release(&pvec);
4150 cond_resched();
4151 }
Liu Bo894b36e2016-03-07 16:56:22 -08004152 if (!scanned && !done) {
Chris Masond1310b22008-01-24 16:13:08 -05004153 /*
4154 * We hit the last page and there is more work to be done: wrap
4155 * back to the start of the file
4156 */
4157 scanned = 1;
4158 index = 0;
4159 goto retry;
4160 }
Liu Boa91326672016-03-07 16:56:21 -08004161
4162 if (wbc->range_cyclic || (wbc->nr_to_write > 0 && range_whole))
4163 mapping->writeback_index = done_index;
4164
Josef Bacik7fd1a3f2012-06-27 17:18:41 -04004165 btrfs_add_delayed_iput(inode);
Liu Bo894b36e2016-03-07 16:56:22 -08004166 return ret;
Chris Masond1310b22008-01-24 16:13:08 -05004167}
Chris Masond1310b22008-01-24 16:13:08 -05004168
Nikolay Borisov0a9b0e52017-12-08 15:55:59 +02004169int extent_write_full_page(struct page *page, struct writeback_control *wbc)
Chris Masond1310b22008-01-24 16:13:08 -05004170{
4171 int ret;
Chris Masond1310b22008-01-24 16:13:08 -05004172 struct extent_page_data epd = {
4173 .bio = NULL,
Nikolay Borisov0a9b0e52017-12-08 15:55:59 +02004174 .tree = &BTRFS_I(page->mapping->host)->io_tree,
Chris Mason771ed682008-11-06 22:02:51 -05004175 .extent_locked = 0,
Chris Masonffbd5172009-04-20 15:50:09 -04004176 .sync_io = wbc->sync_mode == WB_SYNC_ALL,
Chris Masond1310b22008-01-24 16:13:08 -05004177 };
Chris Masond1310b22008-01-24 16:13:08 -05004178
Chris Masond1310b22008-01-24 16:13:08 -05004179 ret = __extent_writepage(page, wbc, &epd);
Qu Wenruo30659762019-03-20 14:27:42 +08004180 ASSERT(ret <= 0);
4181 if (ret < 0) {
4182 end_write_bio(&epd, ret);
4183 return ret;
4184 }
Chris Masond1310b22008-01-24 16:13:08 -05004185
Qu Wenruo30659762019-03-20 14:27:42 +08004186 ret = flush_write_bio(&epd);
4187 ASSERT(ret <= 0);
Chris Masond1310b22008-01-24 16:13:08 -05004188 return ret;
4189}
Chris Masond1310b22008-01-24 16:13:08 -05004190
Nikolay Borisov5e3ee232017-12-08 15:55:58 +02004191int extent_write_locked_range(struct inode *inode, u64 start, u64 end,
Chris Mason771ed682008-11-06 22:02:51 -05004192 int mode)
4193{
4194 int ret = 0;
4195 struct address_space *mapping = inode->i_mapping;
Nikolay Borisov5e3ee232017-12-08 15:55:58 +02004196 struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree;
Chris Mason771ed682008-11-06 22:02:51 -05004197 struct page *page;
Kirill A. Shutemov09cbfea2016-04-01 15:29:47 +03004198 unsigned long nr_pages = (end - start + PAGE_SIZE) >>
4199 PAGE_SHIFT;
Chris Mason771ed682008-11-06 22:02:51 -05004200
4201 struct extent_page_data epd = {
4202 .bio = NULL,
4203 .tree = tree,
Chris Mason771ed682008-11-06 22:02:51 -05004204 .extent_locked = 1,
Chris Masonffbd5172009-04-20 15:50:09 -04004205 .sync_io = mode == WB_SYNC_ALL,
Chris Mason771ed682008-11-06 22:02:51 -05004206 };
4207 struct writeback_control wbc_writepages = {
Chris Mason771ed682008-11-06 22:02:51 -05004208 .sync_mode = mode,
Chris Mason771ed682008-11-06 22:02:51 -05004209 .nr_to_write = nr_pages * 2,
4210 .range_start = start,
4211 .range_end = end + 1,
4212 };
4213
Chris Masond3977122009-01-05 21:25:51 -05004214 while (start <= end) {
Kirill A. Shutemov09cbfea2016-04-01 15:29:47 +03004215 page = find_get_page(mapping, start >> PAGE_SHIFT);
Chris Mason771ed682008-11-06 22:02:51 -05004216 if (clear_page_dirty_for_io(page))
4217 ret = __extent_writepage(page, &wbc_writepages, &epd);
4218 else {
Nikolay Borisov7087a9d2018-11-01 14:09:48 +02004219 btrfs_writepage_endio_finish_ordered(page, start,
Nikolay Borisovc6297322018-11-08 10:18:08 +02004220 start + PAGE_SIZE - 1, 1);
Chris Mason771ed682008-11-06 22:02:51 -05004221 unlock_page(page);
4222 }
Kirill A. Shutemov09cbfea2016-04-01 15:29:47 +03004223 put_page(page);
4224 start += PAGE_SIZE;
Chris Mason771ed682008-11-06 22:02:51 -05004225 }
4226
Qu Wenruo02c6db42019-03-20 14:27:45 +08004227 ASSERT(ret <= 0);
4228 if (ret < 0) {
4229 end_write_bio(&epd, ret);
4230 return ret;
4231 }
4232 ret = flush_write_bio(&epd);
Chris Mason771ed682008-11-06 22:02:51 -05004233 return ret;
4234}
Chris Masond1310b22008-01-24 16:13:08 -05004235
Nikolay Borisov8ae225a2018-04-19 10:46:38 +03004236int extent_writepages(struct address_space *mapping,
Chris Masond1310b22008-01-24 16:13:08 -05004237 struct writeback_control *wbc)
4238{
4239 int ret = 0;
4240 struct extent_page_data epd = {
4241 .bio = NULL,
Nikolay Borisov8ae225a2018-04-19 10:46:38 +03004242 .tree = &BTRFS_I(mapping->host)->io_tree,
Chris Mason771ed682008-11-06 22:02:51 -05004243 .extent_locked = 0,
Chris Masonffbd5172009-04-20 15:50:09 -04004244 .sync_io = wbc->sync_mode == WB_SYNC_ALL,
Chris Masond1310b22008-01-24 16:13:08 -05004245 };
4246
David Sterba935db852017-06-23 04:30:28 +02004247 ret = extent_write_cache_pages(mapping, wbc, &epd);
Qu Wenruoa2a72fb2019-03-20 14:27:48 +08004248 ASSERT(ret <= 0);
4249 if (ret < 0) {
4250 end_write_bio(&epd, ret);
4251 return ret;
4252 }
4253 ret = flush_write_bio(&epd);
Chris Masond1310b22008-01-24 16:13:08 -05004254 return ret;
4255}
Chris Masond1310b22008-01-24 16:13:08 -05004256
Nikolay Borisov2a3ff0a2018-04-19 10:46:36 +03004257int extent_readpages(struct address_space *mapping, struct list_head *pages,
4258 unsigned nr_pages)
Chris Masond1310b22008-01-24 16:13:08 -05004259{
4260 struct bio *bio = NULL;
Chris Masonc8b97812008-10-29 14:49:59 -04004261 unsigned long bio_flags = 0;
Liu Bo67c96842012-07-20 21:43:09 -06004262 struct page *pagepool[16];
Miao Xie125bac012013-07-25 19:22:37 +08004263 struct extent_map *em_cached = NULL;
Nikolay Borisov2a3ff0a2018-04-19 10:46:36 +03004264 struct extent_io_tree *tree = &BTRFS_I(mapping->host)->io_tree;
Liu Bo67c96842012-07-20 21:43:09 -06004265 int nr = 0;
Filipe Manana808f80b2015-09-28 09:56:26 +01004266 u64 prev_em_start = (u64)-1;
Chris Masond1310b22008-01-24 16:13:08 -05004267
Nikolay Borisov61ed3a12018-11-29 18:41:31 +02004268 while (!list_empty(pages)) {
Nikolay Borisove65ef212019-03-11 09:55:38 +02004269 u64 contig_end = 0;
4270
Nikolay Borisov61ed3a12018-11-29 18:41:31 +02004271 for (nr = 0; nr < ARRAY_SIZE(pagepool) && !list_empty(pages);) {
Nikolay Borisovf86196e2019-01-03 15:29:02 -08004272 struct page *page = lru_to_page(pages);
Chris Masond1310b22008-01-24 16:13:08 -05004273
Nikolay Borisov61ed3a12018-11-29 18:41:31 +02004274 prefetchw(&page->flags);
4275 list_del(&page->lru);
4276 if (add_to_page_cache_lru(page, mapping, page->index,
4277 readahead_gfp_mask(mapping))) {
4278 put_page(page);
Nikolay Borisove65ef212019-03-11 09:55:38 +02004279 break;
Nikolay Borisov61ed3a12018-11-29 18:41:31 +02004280 }
4281
4282 pagepool[nr++] = page;
Nikolay Borisove65ef212019-03-11 09:55:38 +02004283 contig_end = page_offset(page) + PAGE_SIZE - 1;
Chris Masond1310b22008-01-24 16:13:08 -05004284 }
Liu Bo67c96842012-07-20 21:43:09 -06004285
Nikolay Borisove65ef212019-03-11 09:55:38 +02004286 if (nr) {
4287 u64 contig_start = page_offset(pagepool[0]);
4288
4289 ASSERT(contig_start + nr * PAGE_SIZE - 1 == contig_end);
4290
4291 contiguous_readpages(tree, pagepool, nr, contig_start,
4292 contig_end, &em_cached, &bio, &bio_flags,
4293 &prev_em_start);
4294 }
Chris Masond1310b22008-01-24 16:13:08 -05004295 }
Liu Bo67c96842012-07-20 21:43:09 -06004296
Miao Xie125bac012013-07-25 19:22:37 +08004297 if (em_cached)
4298 free_extent_map(em_cached);
4299
Chris Masond1310b22008-01-24 16:13:08 -05004300 if (bio)
Mike Christie1f7ad752016-06-05 14:31:51 -05004301 return submit_one_bio(bio, 0, bio_flags);
Chris Masond1310b22008-01-24 16:13:08 -05004302 return 0;
4303}
Chris Masond1310b22008-01-24 16:13:08 -05004304
4305/*
4306 * basic invalidatepage code, this waits on any locked or writeback
4307 * ranges corresponding to the page, and then deletes any extent state
4308 * records from the tree
4309 */
4310int extent_invalidatepage(struct extent_io_tree *tree,
4311 struct page *page, unsigned long offset)
4312{
Josef Bacik2ac55d42010-02-03 19:33:23 +00004313 struct extent_state *cached_state = NULL;
Miao Xie4eee4fa2012-12-21 09:17:45 +00004314 u64 start = page_offset(page);
Kirill A. Shutemov09cbfea2016-04-01 15:29:47 +03004315 u64 end = start + PAGE_SIZE - 1;
Chris Masond1310b22008-01-24 16:13:08 -05004316 size_t blocksize = page->mapping->host->i_sb->s_blocksize;
4317
Qu Wenruofda28322013-02-26 08:10:22 +00004318 start += ALIGN(offset, blocksize);
Chris Masond1310b22008-01-24 16:13:08 -05004319 if (start > end)
4320 return 0;
4321
David Sterbaff13db42015-12-03 14:30:40 +01004322 lock_extent_bits(tree, start, end, &cached_state);
Chris Mason1edbb732009-09-02 13:24:36 -04004323 wait_on_page_writeback(page);
Chris Masond1310b22008-01-24 16:13:08 -05004324 clear_extent_bit(tree, start, end,
Josef Bacik32c00af2009-10-08 13:34:05 -04004325 EXTENT_LOCKED | EXTENT_DIRTY | EXTENT_DELALLOC |
4326 EXTENT_DO_ACCOUNTING,
David Sterbaae0f1622017-10-31 16:37:52 +01004327 1, 1, &cached_state);
Chris Masond1310b22008-01-24 16:13:08 -05004328 return 0;
4329}
Chris Masond1310b22008-01-24 16:13:08 -05004330
4331/*
Chris Mason7b13b7b2008-04-18 10:29:50 -04004332 * a helper for releasepage, this tests for areas of the page that
4333 * are locked or under IO and drops the related state bits if it is safe
4334 * to drop the page.
4335 */
Nikolay Borisov29c68b2d2018-04-19 10:46:35 +03004336static int try_release_extent_state(struct extent_io_tree *tree,
Eric Sandeen48a3b632013-04-25 20:41:01 +00004337 struct page *page, gfp_t mask)
Chris Mason7b13b7b2008-04-18 10:29:50 -04004338{
Miao Xie4eee4fa2012-12-21 09:17:45 +00004339 u64 start = page_offset(page);
Kirill A. Shutemov09cbfea2016-04-01 15:29:47 +03004340 u64 end = start + PAGE_SIZE - 1;
Chris Mason7b13b7b2008-04-18 10:29:50 -04004341 int ret = 1;
4342
Nikolay Borisov88826792019-03-14 15:28:31 +02004343 if (test_range_bit(tree, start, end, EXTENT_LOCKED, 0, NULL)) {
Chris Mason7b13b7b2008-04-18 10:29:50 -04004344 ret = 0;
Nikolay Borisov88826792019-03-14 15:28:31 +02004345 } else {
Chris Mason11ef1602009-09-23 20:28:46 -04004346 /*
4347 * at this point we can safely clear everything except the
4348 * locked bit and the nodatasum bit
4349 */
David Sterba66b0c882017-10-31 16:30:47 +01004350 ret = __clear_extent_bit(tree, start, end,
Chris Mason11ef1602009-09-23 20:28:46 -04004351 ~(EXTENT_LOCKED | EXTENT_NODATASUM),
David Sterba66b0c882017-10-31 16:30:47 +01004352 0, 0, NULL, mask, NULL);
Chris Masone3f24cc2011-02-14 12:52:08 -05004353
4354 /* if clear_extent_bit failed for enomem reasons,
4355 * we can't allow the release to continue.
4356 */
4357 if (ret < 0)
4358 ret = 0;
4359 else
4360 ret = 1;
Chris Mason7b13b7b2008-04-18 10:29:50 -04004361 }
4362 return ret;
4363}
Chris Mason7b13b7b2008-04-18 10:29:50 -04004364
4365/*
Chris Masond1310b22008-01-24 16:13:08 -05004366 * a helper for releasepage. As long as there are no locked extents
4367 * in the range corresponding to the page, both state records and extent
4368 * map records are removed
4369 */
Nikolay Borisov477a30b2018-04-19 10:46:34 +03004370int try_release_extent_mapping(struct page *page, gfp_t mask)
Chris Masond1310b22008-01-24 16:13:08 -05004371{
4372 struct extent_map *em;
Miao Xie4eee4fa2012-12-21 09:17:45 +00004373 u64 start = page_offset(page);
Kirill A. Shutemov09cbfea2016-04-01 15:29:47 +03004374 u64 end = start + PAGE_SIZE - 1;
Filipe Mananabd3599a2018-07-12 01:36:43 +01004375 struct btrfs_inode *btrfs_inode = BTRFS_I(page->mapping->host);
4376 struct extent_io_tree *tree = &btrfs_inode->io_tree;
4377 struct extent_map_tree *map = &btrfs_inode->extent_tree;
Chris Mason7b13b7b2008-04-18 10:29:50 -04004378
Mel Gormand0164ad2015-11-06 16:28:21 -08004379 if (gfpflags_allow_blocking(mask) &&
Byongho Leeee221842015-12-15 01:42:10 +09004380 page->mapping->host->i_size > SZ_16M) {
Yan39b56372008-02-15 10:40:50 -05004381 u64 len;
Chris Mason70dec802008-01-29 09:59:12 -05004382 while (start <= end) {
Yan39b56372008-02-15 10:40:50 -05004383 len = end - start + 1;
Chris Mason890871b2009-09-02 16:24:52 -04004384 write_lock(&map->lock);
Yan39b56372008-02-15 10:40:50 -05004385 em = lookup_extent_mapping(map, start, len);
Tsutomu Itoh285190d2012-02-16 16:23:58 +09004386 if (!em) {
Chris Mason890871b2009-09-02 16:24:52 -04004387 write_unlock(&map->lock);
Chris Mason70dec802008-01-29 09:59:12 -05004388 break;
4389 }
Chris Mason7f3c74f2008-07-18 12:01:11 -04004390 if (test_bit(EXTENT_FLAG_PINNED, &em->flags) ||
4391 em->start != start) {
Chris Mason890871b2009-09-02 16:24:52 -04004392 write_unlock(&map->lock);
Chris Mason70dec802008-01-29 09:59:12 -05004393 free_extent_map(em);
4394 break;
4395 }
4396 if (!test_range_bit(tree, em->start,
4397 extent_map_end(em) - 1,
Nikolay Borisov4e586ca2019-03-14 15:28:30 +02004398 EXTENT_LOCKED, 0, NULL)) {
Filipe Mananabd3599a2018-07-12 01:36:43 +01004399 set_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
4400 &btrfs_inode->runtime_flags);
Chris Mason70dec802008-01-29 09:59:12 -05004401 remove_extent_mapping(map, em);
4402 /* once for the rb tree */
4403 free_extent_map(em);
4404 }
4405 start = extent_map_end(em);
Chris Mason890871b2009-09-02 16:24:52 -04004406 write_unlock(&map->lock);
Chris Mason70dec802008-01-29 09:59:12 -05004407
4408 /* once for us */
Chris Masond1310b22008-01-24 16:13:08 -05004409 free_extent_map(em);
4410 }
Chris Masond1310b22008-01-24 16:13:08 -05004411 }
Nikolay Borisov29c68b2d2018-04-19 10:46:35 +03004412 return try_release_extent_state(tree, page, mask);
Chris Masond1310b22008-01-24 16:13:08 -05004413}
Chris Masond1310b22008-01-24 16:13:08 -05004414
Chris Masonec29ed52011-02-23 16:23:20 -05004415/*
4416 * helper function for fiemap, which doesn't want to see any holes.
4417 * This maps until we find something past 'last'
4418 */
4419static struct extent_map *get_extent_skip_holes(struct inode *inode,
David Sterbae3350e12017-06-23 04:09:57 +02004420 u64 offset, u64 last)
Chris Masonec29ed52011-02-23 16:23:20 -05004421{
Jeff Mahoneyda170662016-06-15 09:22:56 -04004422 u64 sectorsize = btrfs_inode_sectorsize(inode);
Chris Masonec29ed52011-02-23 16:23:20 -05004423 struct extent_map *em;
4424 u64 len;
4425
4426 if (offset >= last)
4427 return NULL;
4428
Dulshani Gunawardhana67871252013-10-31 10:33:04 +05304429 while (1) {
Chris Masonec29ed52011-02-23 16:23:20 -05004430 len = last - offset;
4431 if (len == 0)
4432 break;
Qu Wenruofda28322013-02-26 08:10:22 +00004433 len = ALIGN(len, sectorsize);
Nikolay Borisov4ab47a82018-12-12 09:42:32 +02004434 em = btrfs_get_extent_fiemap(BTRFS_I(inode), offset, len);
David Sterbac7040052011-04-19 18:00:01 +02004435 if (IS_ERR_OR_NULL(em))
Chris Masonec29ed52011-02-23 16:23:20 -05004436 return em;
4437
4438 /* if this isn't a hole return it */
Nikolay Borisov4a2d25c2017-11-23 10:51:43 +02004439 if (em->block_start != EXTENT_MAP_HOLE)
Chris Masonec29ed52011-02-23 16:23:20 -05004440 return em;
Chris Masonec29ed52011-02-23 16:23:20 -05004441
4442 /* this is a hole, advance to the next extent */
4443 offset = extent_map_end(em);
4444 free_extent_map(em);
4445 if (offset >= last)
4446 break;
4447 }
4448 return NULL;
4449}
4450
Qu Wenruo47518322017-04-07 10:43:15 +08004451/*
4452 * To cache previous fiemap extent
4453 *
4454 * Will be used for merging fiemap extent
4455 */
4456struct fiemap_cache {
4457 u64 offset;
4458 u64 phys;
4459 u64 len;
4460 u32 flags;
4461 bool cached;
4462};
4463
4464/*
4465 * Helper to submit fiemap extent.
4466 *
4467 * Will try to merge current fiemap extent specified by @offset, @phys,
4468 * @len and @flags with cached one.
4469 * And only when we fails to merge, cached one will be submitted as
4470 * fiemap extent.
4471 *
4472 * Return value is the same as fiemap_fill_next_extent().
4473 */
4474static int emit_fiemap_extent(struct fiemap_extent_info *fieinfo,
4475 struct fiemap_cache *cache,
4476 u64 offset, u64 phys, u64 len, u32 flags)
4477{
4478 int ret = 0;
4479
4480 if (!cache->cached)
4481 goto assign;
4482
4483 /*
4484 * Sanity check, extent_fiemap() should have ensured that new
Andrea Gelmini52042d82018-11-28 12:05:13 +01004485 * fiemap extent won't overlap with cached one.
Qu Wenruo47518322017-04-07 10:43:15 +08004486 * Not recoverable.
4487 *
4488 * NOTE: Physical address can overlap, due to compression
4489 */
4490 if (cache->offset + cache->len > offset) {
4491 WARN_ON(1);
4492 return -EINVAL;
4493 }
4494
4495 /*
4496 * Only merges fiemap extents if
4497 * 1) Their logical addresses are continuous
4498 *
4499 * 2) Their physical addresses are continuous
4500 * So truly compressed (physical size smaller than logical size)
4501 * extents won't get merged with each other
4502 *
4503 * 3) Share same flags except FIEMAP_EXTENT_LAST
4504 * So regular extent won't get merged with prealloc extent
4505 */
4506 if (cache->offset + cache->len == offset &&
4507 cache->phys + cache->len == phys &&
4508 (cache->flags & ~FIEMAP_EXTENT_LAST) ==
4509 (flags & ~FIEMAP_EXTENT_LAST)) {
4510 cache->len += len;
4511 cache->flags |= flags;
4512 goto try_submit_last;
4513 }
4514
4515 /* Not mergeable, need to submit cached one */
4516 ret = fiemap_fill_next_extent(fieinfo, cache->offset, cache->phys,
4517 cache->len, cache->flags);
4518 cache->cached = false;
4519 if (ret)
4520 return ret;
4521assign:
4522 cache->cached = true;
4523 cache->offset = offset;
4524 cache->phys = phys;
4525 cache->len = len;
4526 cache->flags = flags;
4527try_submit_last:
4528 if (cache->flags & FIEMAP_EXTENT_LAST) {
4529 ret = fiemap_fill_next_extent(fieinfo, cache->offset,
4530 cache->phys, cache->len, cache->flags);
4531 cache->cached = false;
4532 }
4533 return ret;
4534}
4535
4536/*
Qu Wenruo848c23b2017-06-22 10:01:21 +08004537 * Emit last fiemap cache
Qu Wenruo47518322017-04-07 10:43:15 +08004538 *
Qu Wenruo848c23b2017-06-22 10:01:21 +08004539 * The last fiemap cache may still be cached in the following case:
4540 * 0 4k 8k
4541 * |<- Fiemap range ->|
4542 * |<------------ First extent ----------->|
4543 *
4544 * In this case, the first extent range will be cached but not emitted.
4545 * So we must emit it before ending extent_fiemap().
Qu Wenruo47518322017-04-07 10:43:15 +08004546 */
David Sterba5c5aff92019-03-20 11:29:46 +01004547static int emit_last_fiemap_cache(struct fiemap_extent_info *fieinfo,
Qu Wenruo848c23b2017-06-22 10:01:21 +08004548 struct fiemap_cache *cache)
Qu Wenruo47518322017-04-07 10:43:15 +08004549{
4550 int ret;
4551
4552 if (!cache->cached)
4553 return 0;
4554
Qu Wenruo47518322017-04-07 10:43:15 +08004555 ret = fiemap_fill_next_extent(fieinfo, cache->offset, cache->phys,
4556 cache->len, cache->flags);
4557 cache->cached = false;
4558 if (ret > 0)
4559 ret = 0;
4560 return ret;
4561}
4562
Yehuda Sadeh1506fcc2009-01-21 14:39:14 -05004563int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
David Sterba2135fb92017-06-23 04:09:57 +02004564 __u64 start, __u64 len)
Yehuda Sadeh1506fcc2009-01-21 14:39:14 -05004565{
Josef Bacik975f84f2010-11-23 19:36:57 +00004566 int ret = 0;
Yehuda Sadeh1506fcc2009-01-21 14:39:14 -05004567 u64 off = start;
4568 u64 max = start + len;
4569 u32 flags = 0;
Josef Bacik975f84f2010-11-23 19:36:57 +00004570 u32 found_type;
4571 u64 last;
Chris Masonec29ed52011-02-23 16:23:20 -05004572 u64 last_for_get_extent = 0;
Yehuda Sadeh1506fcc2009-01-21 14:39:14 -05004573 u64 disko = 0;
Chris Masonec29ed52011-02-23 16:23:20 -05004574 u64 isize = i_size_read(inode);
Josef Bacik975f84f2010-11-23 19:36:57 +00004575 struct btrfs_key found_key;
Yehuda Sadeh1506fcc2009-01-21 14:39:14 -05004576 struct extent_map *em = NULL;
Josef Bacik2ac55d42010-02-03 19:33:23 +00004577 struct extent_state *cached_state = NULL;
Josef Bacik975f84f2010-11-23 19:36:57 +00004578 struct btrfs_path *path;
Josef Bacikdc046b12014-09-10 16:20:45 -04004579 struct btrfs_root *root = BTRFS_I(inode)->root;
Qu Wenruo47518322017-04-07 10:43:15 +08004580 struct fiemap_cache cache = { 0 };
David Sterba5911c8f2019-05-15 15:31:04 +02004581 struct ulist *roots;
4582 struct ulist *tmp_ulist;
Yehuda Sadeh1506fcc2009-01-21 14:39:14 -05004583 int end = 0;
Chris Masonec29ed52011-02-23 16:23:20 -05004584 u64 em_start = 0;
4585 u64 em_len = 0;
4586 u64 em_end = 0;
Yehuda Sadeh1506fcc2009-01-21 14:39:14 -05004587
4588 if (len == 0)
4589 return -EINVAL;
4590
Josef Bacik975f84f2010-11-23 19:36:57 +00004591 path = btrfs_alloc_path();
4592 if (!path)
4593 return -ENOMEM;
4594 path->leave_spinning = 1;
4595
David Sterba5911c8f2019-05-15 15:31:04 +02004596 roots = ulist_alloc(GFP_KERNEL);
4597 tmp_ulist = ulist_alloc(GFP_KERNEL);
4598 if (!roots || !tmp_ulist) {
4599 ret = -ENOMEM;
4600 goto out_free_ulist;
4601 }
4602
Jeff Mahoneyda170662016-06-15 09:22:56 -04004603 start = round_down(start, btrfs_inode_sectorsize(inode));
4604 len = round_up(max, btrfs_inode_sectorsize(inode)) - start;
Josef Bacik4d479cf2011-11-17 11:34:31 -05004605
Chris Masonec29ed52011-02-23 16:23:20 -05004606 /*
4607 * lookup the last file extent. We're not using i_size here
4608 * because there might be preallocation past i_size
4609 */
David Sterbaf85b7372017-01-20 14:54:07 +01004610 ret = btrfs_lookup_file_extent(NULL, root, path,
4611 btrfs_ino(BTRFS_I(inode)), -1, 0);
Josef Bacik975f84f2010-11-23 19:36:57 +00004612 if (ret < 0) {
4613 btrfs_free_path(path);
David Sterba5911c8f2019-05-15 15:31:04 +02004614 goto out_free_ulist;
Liu Bo2d324f52016-05-17 17:21:48 -07004615 } else {
4616 WARN_ON(!ret);
4617 if (ret == 1)
4618 ret = 0;
Josef Bacik975f84f2010-11-23 19:36:57 +00004619 }
Liu Bo2d324f52016-05-17 17:21:48 -07004620
Josef Bacik975f84f2010-11-23 19:36:57 +00004621 path->slots[0]--;
Josef Bacik975f84f2010-11-23 19:36:57 +00004622 btrfs_item_key_to_cpu(path->nodes[0], &found_key, path->slots[0]);
David Sterba962a2982014-06-04 18:41:45 +02004623 found_type = found_key.type;
Josef Bacik975f84f2010-11-23 19:36:57 +00004624
Chris Masonec29ed52011-02-23 16:23:20 -05004625 /* No extents, but there might be delalloc bits */
Nikolay Borisov4a0cc7c2017-01-10 20:35:31 +02004626 if (found_key.objectid != btrfs_ino(BTRFS_I(inode)) ||
Josef Bacik975f84f2010-11-23 19:36:57 +00004627 found_type != BTRFS_EXTENT_DATA_KEY) {
Chris Masonec29ed52011-02-23 16:23:20 -05004628 /* have to trust i_size as the end */
4629 last = (u64)-1;
4630 last_for_get_extent = isize;
4631 } else {
4632 /*
4633 * remember the start of the last extent. There are a
4634 * bunch of different factors that go into the length of the
4635 * extent, so its much less complex to remember where it started
4636 */
4637 last = found_key.offset;
4638 last_for_get_extent = last + 1;
Josef Bacik975f84f2010-11-23 19:36:57 +00004639 }
Liu Bofe09e162013-09-22 12:54:23 +08004640 btrfs_release_path(path);
Josef Bacik975f84f2010-11-23 19:36:57 +00004641
Chris Masonec29ed52011-02-23 16:23:20 -05004642 /*
4643 * we might have some extents allocated but more delalloc past those
4644 * extents. so, we trust isize unless the start of the last extent is
4645 * beyond isize
4646 */
4647 if (last < isize) {
4648 last = (u64)-1;
4649 last_for_get_extent = isize;
4650 }
4651
David Sterbaff13db42015-12-03 14:30:40 +01004652 lock_extent_bits(&BTRFS_I(inode)->io_tree, start, start + len - 1,
Jeff Mahoneyd0082372012-03-01 14:57:19 +01004653 &cached_state);
Chris Masonec29ed52011-02-23 16:23:20 -05004654
David Sterbae3350e12017-06-23 04:09:57 +02004655 em = get_extent_skip_holes(inode, start, last_for_get_extent);
Yehuda Sadeh1506fcc2009-01-21 14:39:14 -05004656 if (!em)
4657 goto out;
4658 if (IS_ERR(em)) {
4659 ret = PTR_ERR(em);
4660 goto out;
4661 }
Josef Bacik975f84f2010-11-23 19:36:57 +00004662
Yehuda Sadeh1506fcc2009-01-21 14:39:14 -05004663 while (!end) {
Josef Bacikb76bb702013-07-05 13:52:51 -04004664 u64 offset_in_extent = 0;
Yehuda Sadeh1506fcc2009-01-21 14:39:14 -05004665
Chris Masonea8efc72011-03-08 11:54:40 -05004666 /* break if the extent we found is outside the range */
4667 if (em->start >= max || extent_map_end(em) < off)
4668 break;
4669
4670 /*
4671 * get_extent may return an extent that starts before our
4672 * requested range. We have to make sure the ranges
4673 * we return to fiemap always move forward and don't
4674 * overlap, so adjust the offsets here
4675 */
4676 em_start = max(em->start, off);
4677
4678 /*
4679 * record the offset from the start of the extent
Josef Bacikb76bb702013-07-05 13:52:51 -04004680 * for adjusting the disk offset below. Only do this if the
4681 * extent isn't compressed since our in ram offset may be past
4682 * what we have actually allocated on disk.
Chris Masonea8efc72011-03-08 11:54:40 -05004683 */
Josef Bacikb76bb702013-07-05 13:52:51 -04004684 if (!test_bit(EXTENT_FLAG_COMPRESSED, &em->flags))
4685 offset_in_extent = em_start - em->start;
Chris Masonec29ed52011-02-23 16:23:20 -05004686 em_end = extent_map_end(em);
Chris Masonea8efc72011-03-08 11:54:40 -05004687 em_len = em_end - em_start;
Yehuda Sadeh1506fcc2009-01-21 14:39:14 -05004688 flags = 0;
Filipe Mananaf0986312018-06-20 10:02:30 +01004689 if (em->block_start < EXTENT_MAP_LAST_BYTE)
4690 disko = em->block_start + offset_in_extent;
4691 else
4692 disko = 0;
Yehuda Sadeh1506fcc2009-01-21 14:39:14 -05004693
Chris Masonea8efc72011-03-08 11:54:40 -05004694 /*
4695 * bump off for our next call to get_extent
4696 */
4697 off = extent_map_end(em);
4698 if (off >= max)
4699 end = 1;
4700
Heiko Carstens93dbfad2009-04-03 10:33:45 -04004701 if (em->block_start == EXTENT_MAP_LAST_BYTE) {
Yehuda Sadeh1506fcc2009-01-21 14:39:14 -05004702 end = 1;
4703 flags |= FIEMAP_EXTENT_LAST;
Heiko Carstens93dbfad2009-04-03 10:33:45 -04004704 } else if (em->block_start == EXTENT_MAP_INLINE) {
Yehuda Sadeh1506fcc2009-01-21 14:39:14 -05004705 flags |= (FIEMAP_EXTENT_DATA_INLINE |
4706 FIEMAP_EXTENT_NOT_ALIGNED);
Heiko Carstens93dbfad2009-04-03 10:33:45 -04004707 } else if (em->block_start == EXTENT_MAP_DELALLOC) {
Yehuda Sadeh1506fcc2009-01-21 14:39:14 -05004708 flags |= (FIEMAP_EXTENT_DELALLOC |
4709 FIEMAP_EXTENT_UNKNOWN);
Josef Bacikdc046b12014-09-10 16:20:45 -04004710 } else if (fieinfo->fi_extents_max) {
4711 u64 bytenr = em->block_start -
4712 (em->start - em->orig_start);
Liu Bofe09e162013-09-22 12:54:23 +08004713
Liu Bofe09e162013-09-22 12:54:23 +08004714 /*
4715 * As btrfs supports shared space, this information
4716 * can be exported to userspace tools via
Josef Bacikdc046b12014-09-10 16:20:45 -04004717 * flag FIEMAP_EXTENT_SHARED. If fi_extents_max == 0
4718 * then we're just getting a count and we can skip the
4719 * lookup stuff.
Liu Bofe09e162013-09-22 12:54:23 +08004720 */
Edmund Nadolskibb739cf2017-06-28 21:56:58 -06004721 ret = btrfs_check_shared(root,
4722 btrfs_ino(BTRFS_I(inode)),
David Sterba5911c8f2019-05-15 15:31:04 +02004723 bytenr, roots, tmp_ulist);
Josef Bacikdc046b12014-09-10 16:20:45 -04004724 if (ret < 0)
Liu Bofe09e162013-09-22 12:54:23 +08004725 goto out_free;
Josef Bacikdc046b12014-09-10 16:20:45 -04004726 if (ret)
Liu Bofe09e162013-09-22 12:54:23 +08004727 flags |= FIEMAP_EXTENT_SHARED;
Josef Bacikdc046b12014-09-10 16:20:45 -04004728 ret = 0;
Yehuda Sadeh1506fcc2009-01-21 14:39:14 -05004729 }
4730 if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags))
4731 flags |= FIEMAP_EXTENT_ENCODED;
Josef Bacik0d2b2372015-05-19 10:44:04 -04004732 if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags))
4733 flags |= FIEMAP_EXTENT_UNWRITTEN;
Yehuda Sadeh1506fcc2009-01-21 14:39:14 -05004734
Yehuda Sadeh1506fcc2009-01-21 14:39:14 -05004735 free_extent_map(em);
4736 em = NULL;
Chris Masonec29ed52011-02-23 16:23:20 -05004737 if ((em_start >= last) || em_len == (u64)-1 ||
4738 (last == (u64)-1 && isize <= em_end)) {
Yehuda Sadeh1506fcc2009-01-21 14:39:14 -05004739 flags |= FIEMAP_EXTENT_LAST;
4740 end = 1;
4741 }
4742
Chris Masonec29ed52011-02-23 16:23:20 -05004743 /* now scan forward to see if this is really the last extent. */
David Sterbae3350e12017-06-23 04:09:57 +02004744 em = get_extent_skip_holes(inode, off, last_for_get_extent);
Chris Masonec29ed52011-02-23 16:23:20 -05004745 if (IS_ERR(em)) {
4746 ret = PTR_ERR(em);
4747 goto out;
4748 }
4749 if (!em) {
Josef Bacik975f84f2010-11-23 19:36:57 +00004750 flags |= FIEMAP_EXTENT_LAST;
4751 end = 1;
4752 }
Qu Wenruo47518322017-04-07 10:43:15 +08004753 ret = emit_fiemap_extent(fieinfo, &cache, em_start, disko,
4754 em_len, flags);
Chengyu Song26e726a2015-03-24 18:12:56 -04004755 if (ret) {
4756 if (ret == 1)
4757 ret = 0;
Chris Masonec29ed52011-02-23 16:23:20 -05004758 goto out_free;
Chengyu Song26e726a2015-03-24 18:12:56 -04004759 }
Yehuda Sadeh1506fcc2009-01-21 14:39:14 -05004760 }
4761out_free:
Qu Wenruo47518322017-04-07 10:43:15 +08004762 if (!ret)
David Sterba5c5aff92019-03-20 11:29:46 +01004763 ret = emit_last_fiemap_cache(fieinfo, &cache);
Yehuda Sadeh1506fcc2009-01-21 14:39:14 -05004764 free_extent_map(em);
4765out:
Liu Bofe09e162013-09-22 12:54:23 +08004766 btrfs_free_path(path);
Liu Boa52f4cd2013-05-01 16:23:41 +00004767 unlock_extent_cached(&BTRFS_I(inode)->io_tree, start, start + len - 1,
David Sterbae43bbe52017-12-12 21:43:52 +01004768 &cached_state);
David Sterba5911c8f2019-05-15 15:31:04 +02004769
4770out_free_ulist:
4771 ulist_free(roots);
4772 ulist_free(tmp_ulist);
Yehuda Sadeh1506fcc2009-01-21 14:39:14 -05004773 return ret;
4774}
4775
Chris Mason727011e2010-08-06 13:21:20 -04004776static void __free_extent_buffer(struct extent_buffer *eb)
4777{
Eric Sandeen6d49ba12013-04-22 16:12:31 +00004778 btrfs_leak_debug_del(&eb->leak_list);
Chris Mason727011e2010-08-06 13:21:20 -04004779 kmem_cache_free(extent_buffer_cache, eb);
4780}
4781
Josef Bacika26e8c92014-03-28 17:07:27 -04004782int extent_buffer_under_io(struct extent_buffer *eb)
Chris Masond1310b22008-01-24 16:13:08 -05004783{
Josef Bacik0b32f4b2012-03-13 09:38:00 -04004784 return (atomic_read(&eb->io_pages) ||
4785 test_bit(EXTENT_BUFFER_WRITEBACK, &eb->bflags) ||
4786 test_bit(EXTENT_BUFFER_DIRTY, &eb->bflags));
Chris Masond1310b22008-01-24 16:13:08 -05004787}
4788
Miao Xie897ca6e92010-10-26 20:57:29 -04004789/*
David Sterba55ac0132018-07-19 17:24:32 +02004790 * Release all pages attached to the extent buffer.
Miao Xie897ca6e92010-10-26 20:57:29 -04004791 */
David Sterba55ac0132018-07-19 17:24:32 +02004792static void btrfs_release_extent_buffer_pages(struct extent_buffer *eb)
Miao Xie897ca6e92010-10-26 20:57:29 -04004793{
Nikolay Borisovd64766f2018-06-27 16:38:22 +03004794 int i;
4795 int num_pages;
Nikolay Borisovb0132a32018-06-27 16:38:24 +03004796 int mapped = !test_bit(EXTENT_BUFFER_UNMAPPED, &eb->bflags);
Miao Xie897ca6e92010-10-26 20:57:29 -04004797
Josef Bacik0b32f4b2012-03-13 09:38:00 -04004798 BUG_ON(extent_buffer_under_io(eb));
Miao Xie897ca6e92010-10-26 20:57:29 -04004799
Nikolay Borisovd64766f2018-06-27 16:38:22 +03004800 num_pages = num_extent_pages(eb);
4801 for (i = 0; i < num_pages; i++) {
4802 struct page *page = eb->pages[i];
Miao Xie897ca6e92010-10-26 20:57:29 -04004803
Forrest Liu5d2361d2015-02-09 17:31:45 +08004804 if (!page)
4805 continue;
4806 if (mapped)
Josef Bacik4f2de97a2012-03-07 16:20:05 -05004807 spin_lock(&page->mapping->private_lock);
Forrest Liu5d2361d2015-02-09 17:31:45 +08004808 /*
4809 * We do this since we'll remove the pages after we've
4810 * removed the eb from the radix tree, so we could race
4811 * and have this page now attached to the new eb. So
4812 * only clear page_private if it's still connected to
4813 * this eb.
4814 */
4815 if (PagePrivate(page) &&
4816 page->private == (unsigned long)eb) {
4817 BUG_ON(test_bit(EXTENT_BUFFER_DIRTY, &eb->bflags));
4818 BUG_ON(PageDirty(page));
4819 BUG_ON(PageWriteback(page));
Josef Bacik4f2de97a2012-03-07 16:20:05 -05004820 /*
Forrest Liu5d2361d2015-02-09 17:31:45 +08004821 * We need to make sure we haven't be attached
4822 * to a new eb.
Josef Bacik4f2de97a2012-03-07 16:20:05 -05004823 */
Forrest Liu5d2361d2015-02-09 17:31:45 +08004824 ClearPagePrivate(page);
4825 set_page_private(page, 0);
4826 /* One for the page private */
Kirill A. Shutemov09cbfea2016-04-01 15:29:47 +03004827 put_page(page);
Josef Bacik4f2de97a2012-03-07 16:20:05 -05004828 }
Forrest Liu5d2361d2015-02-09 17:31:45 +08004829
4830 if (mapped)
4831 spin_unlock(&page->mapping->private_lock);
4832
Nicholas D Steeves01327612016-05-19 21:18:45 -04004833 /* One for when we allocated the page */
Kirill A. Shutemov09cbfea2016-04-01 15:29:47 +03004834 put_page(page);
Nikolay Borisovd64766f2018-06-27 16:38:22 +03004835 }
Miao Xie897ca6e92010-10-26 20:57:29 -04004836}
4837
4838/*
4839 * Helper for releasing the extent buffer.
4840 */
4841static inline void btrfs_release_extent_buffer(struct extent_buffer *eb)
4842{
David Sterba55ac0132018-07-19 17:24:32 +02004843 btrfs_release_extent_buffer_pages(eb);
Miao Xie897ca6e92010-10-26 20:57:29 -04004844 __free_extent_buffer(eb);
4845}
4846
Josef Bacikf28491e2013-12-16 13:24:27 -05004847static struct extent_buffer *
4848__alloc_extent_buffer(struct btrfs_fs_info *fs_info, u64 start,
David Sterba23d79d82014-06-15 02:55:29 +02004849 unsigned long len)
Josef Bacikdb7f3432013-08-07 14:54:37 -04004850{
4851 struct extent_buffer *eb = NULL;
4852
Michal Hockod1b5c562015-08-19 14:17:40 +02004853 eb = kmem_cache_zalloc(extent_buffer_cache, GFP_NOFS|__GFP_NOFAIL);
Josef Bacikdb7f3432013-08-07 14:54:37 -04004854 eb->start = start;
4855 eb->len = len;
Josef Bacikf28491e2013-12-16 13:24:27 -05004856 eb->fs_info = fs_info;
Josef Bacikdb7f3432013-08-07 14:54:37 -04004857 eb->bflags = 0;
4858 rwlock_init(&eb->lock);
Josef Bacikdb7f3432013-08-07 14:54:37 -04004859 atomic_set(&eb->blocking_readers, 0);
4860 atomic_set(&eb->blocking_writers, 0);
David Sterbaed1b4ed2018-08-24 16:31:17 +02004861 eb->lock_nested = false;
Josef Bacikdb7f3432013-08-07 14:54:37 -04004862 init_waitqueue_head(&eb->write_lock_wq);
4863 init_waitqueue_head(&eb->read_lock_wq);
4864
4865 btrfs_leak_debug_add(&eb->leak_list, &buffers);
4866
4867 spin_lock_init(&eb->refs_lock);
4868 atomic_set(&eb->refs, 1);
4869 atomic_set(&eb->io_pages, 0);
4870
4871 /*
4872 * Sanity checks, currently the maximum is 64k covered by 16x 4k pages
4873 */
4874 BUILD_BUG_ON(BTRFS_MAX_METADATA_BLOCKSIZE
4875 > MAX_INLINE_EXTENT_BUFFER_SIZE);
4876 BUG_ON(len > MAX_INLINE_EXTENT_BUFFER_SIZE);
4877
David Sterba843ccf92018-08-24 14:56:28 +02004878#ifdef CONFIG_BTRFS_DEBUG
4879 atomic_set(&eb->spinning_writers, 0);
David Sterbaafd495a2018-08-24 15:57:38 +02004880 atomic_set(&eb->spinning_readers, 0);
David Sterba5c9c7992018-08-24 16:15:51 +02004881 atomic_set(&eb->read_locks, 0);
David Sterbac79adfc2018-08-24 16:24:26 +02004882 atomic_set(&eb->write_locks, 0);
David Sterba843ccf92018-08-24 14:56:28 +02004883#endif
4884
Josef Bacikdb7f3432013-08-07 14:54:37 -04004885 return eb;
4886}
4887
4888struct extent_buffer *btrfs_clone_extent_buffer(struct extent_buffer *src)
4889{
David Sterbacc5e31a2018-03-01 18:20:27 +01004890 int i;
Josef Bacikdb7f3432013-08-07 14:54:37 -04004891 struct page *p;
4892 struct extent_buffer *new;
David Sterbacc5e31a2018-03-01 18:20:27 +01004893 int num_pages = num_extent_pages(src);
Josef Bacikdb7f3432013-08-07 14:54:37 -04004894
David Sterba3f556f72014-06-15 03:20:26 +02004895 new = __alloc_extent_buffer(src->fs_info, src->start, src->len);
Josef Bacikdb7f3432013-08-07 14:54:37 -04004896 if (new == NULL)
4897 return NULL;
4898
4899 for (i = 0; i < num_pages; i++) {
Josef Bacik9ec72672013-08-07 16:57:23 -04004900 p = alloc_page(GFP_NOFS);
Josef Bacikdb7f3432013-08-07 14:54:37 -04004901 if (!p) {
4902 btrfs_release_extent_buffer(new);
4903 return NULL;
4904 }
4905 attach_extent_buffer_page(new, p);
4906 WARN_ON(PageDirty(p));
4907 SetPageUptodate(p);
4908 new->pages[i] = p;
David Sterbafba1acf2016-11-08 17:56:24 +01004909 copy_page(page_address(p), page_address(src->pages[i]));
Josef Bacikdb7f3432013-08-07 14:54:37 -04004910 }
4911
Josef Bacikdb7f3432013-08-07 14:54:37 -04004912 set_bit(EXTENT_BUFFER_UPTODATE, &new->bflags);
Nikolay Borisovb0132a32018-06-27 16:38:24 +03004913 set_bit(EXTENT_BUFFER_UNMAPPED, &new->bflags);
Josef Bacikdb7f3432013-08-07 14:54:37 -04004914
4915 return new;
4916}
4917
Omar Sandoval0f331222015-09-29 20:50:31 -07004918struct extent_buffer *__alloc_dummy_extent_buffer(struct btrfs_fs_info *fs_info,
4919 u64 start, unsigned long len)
Josef Bacikdb7f3432013-08-07 14:54:37 -04004920{
4921 struct extent_buffer *eb;
David Sterbacc5e31a2018-03-01 18:20:27 +01004922 int num_pages;
4923 int i;
Josef Bacikdb7f3432013-08-07 14:54:37 -04004924
David Sterba3f556f72014-06-15 03:20:26 +02004925 eb = __alloc_extent_buffer(fs_info, start, len);
Josef Bacikdb7f3432013-08-07 14:54:37 -04004926 if (!eb)
4927 return NULL;
4928
David Sterba65ad0102018-06-29 10:56:49 +02004929 num_pages = num_extent_pages(eb);
Josef Bacikdb7f3432013-08-07 14:54:37 -04004930 for (i = 0; i < num_pages; i++) {
Josef Bacik9ec72672013-08-07 16:57:23 -04004931 eb->pages[i] = alloc_page(GFP_NOFS);
Josef Bacikdb7f3432013-08-07 14:54:37 -04004932 if (!eb->pages[i])
4933 goto err;
4934 }
4935 set_extent_buffer_uptodate(eb);
4936 btrfs_set_header_nritems(eb, 0);
Nikolay Borisovb0132a32018-06-27 16:38:24 +03004937 set_bit(EXTENT_BUFFER_UNMAPPED, &eb->bflags);
Josef Bacikdb7f3432013-08-07 14:54:37 -04004938
4939 return eb;
4940err:
4941 for (; i > 0; i--)
4942 __free_page(eb->pages[i - 1]);
4943 __free_extent_buffer(eb);
4944 return NULL;
4945}
4946
Omar Sandoval0f331222015-09-29 20:50:31 -07004947struct extent_buffer *alloc_dummy_extent_buffer(struct btrfs_fs_info *fs_info,
Jeff Mahoneyda170662016-06-15 09:22:56 -04004948 u64 start)
Omar Sandoval0f331222015-09-29 20:50:31 -07004949{
Jeff Mahoneyda170662016-06-15 09:22:56 -04004950 return __alloc_dummy_extent_buffer(fs_info, start, fs_info->nodesize);
Omar Sandoval0f331222015-09-29 20:50:31 -07004951}
4952
Josef Bacik0b32f4b2012-03-13 09:38:00 -04004953static void check_buffer_tree_ref(struct extent_buffer *eb)
4954{
Chris Mason242e18c2013-01-29 17:49:37 -05004955 int refs;
Josef Bacik0b32f4b2012-03-13 09:38:00 -04004956 /* the ref bit is tricky. We have to make sure it is set
4957 * if we have the buffer dirty. Otherwise the
4958 * code to free a buffer can end up dropping a dirty
4959 * page
4960 *
4961 * Once the ref bit is set, it won't go away while the
4962 * buffer is dirty or in writeback, and it also won't
4963 * go away while we have the reference count on the
4964 * eb bumped.
4965 *
4966 * We can't just set the ref bit without bumping the
4967 * ref on the eb because free_extent_buffer might
4968 * see the ref bit and try to clear it. If this happens
4969 * free_extent_buffer might end up dropping our original
4970 * ref by mistake and freeing the page before we are able
4971 * to add one more ref.
4972 *
4973 * So bump the ref count first, then set the bit. If someone
4974 * beat us to it, drop the ref we added.
4975 */
Chris Mason242e18c2013-01-29 17:49:37 -05004976 refs = atomic_read(&eb->refs);
4977 if (refs >= 2 && test_bit(EXTENT_BUFFER_TREE_REF, &eb->bflags))
4978 return;
4979
Josef Bacik594831c2012-07-20 16:11:08 -04004980 spin_lock(&eb->refs_lock);
4981 if (!test_and_set_bit(EXTENT_BUFFER_TREE_REF, &eb->bflags))
Josef Bacik0b32f4b2012-03-13 09:38:00 -04004982 atomic_inc(&eb->refs);
Josef Bacik594831c2012-07-20 16:11:08 -04004983 spin_unlock(&eb->refs_lock);
Josef Bacik0b32f4b2012-03-13 09:38:00 -04004984}
4985
Mel Gorman2457aec2014-06-04 16:10:31 -07004986static void mark_extent_buffer_accessed(struct extent_buffer *eb,
4987 struct page *accessed)
Josef Bacik5df42352012-03-15 18:24:42 -04004988{
David Sterbacc5e31a2018-03-01 18:20:27 +01004989 int num_pages, i;
Josef Bacik5df42352012-03-15 18:24:42 -04004990
Josef Bacik0b32f4b2012-03-13 09:38:00 -04004991 check_buffer_tree_ref(eb);
4992
David Sterba65ad0102018-06-29 10:56:49 +02004993 num_pages = num_extent_pages(eb);
Josef Bacik5df42352012-03-15 18:24:42 -04004994 for (i = 0; i < num_pages; i++) {
David Sterbafb85fc92014-07-31 01:03:53 +02004995 struct page *p = eb->pages[i];
4996
Mel Gorman2457aec2014-06-04 16:10:31 -07004997 if (p != accessed)
4998 mark_page_accessed(p);
Josef Bacik5df42352012-03-15 18:24:42 -04004999 }
5000}
5001
Josef Bacikf28491e2013-12-16 13:24:27 -05005002struct extent_buffer *find_extent_buffer(struct btrfs_fs_info *fs_info,
5003 u64 start)
Chandra Seetharaman452c75c2013-10-07 10:45:25 -05005004{
5005 struct extent_buffer *eb;
5006
5007 rcu_read_lock();
Josef Bacikf28491e2013-12-16 13:24:27 -05005008 eb = radix_tree_lookup(&fs_info->buffer_radix,
Kirill A. Shutemov09cbfea2016-04-01 15:29:47 +03005009 start >> PAGE_SHIFT);
Chandra Seetharaman452c75c2013-10-07 10:45:25 -05005010 if (eb && atomic_inc_not_zero(&eb->refs)) {
5011 rcu_read_unlock();
Filipe Manana062c19e2015-04-23 11:28:48 +01005012 /*
5013 * Lock our eb's refs_lock to avoid races with
5014 * free_extent_buffer. When we get our eb it might be flagged
5015 * with EXTENT_BUFFER_STALE and another task running
5016 * free_extent_buffer might have seen that flag set,
5017 * eb->refs == 2, that the buffer isn't under IO (dirty and
5018 * writeback flags not set) and it's still in the tree (flag
5019 * EXTENT_BUFFER_TREE_REF set), therefore being in the process
5020 * of decrementing the extent buffer's reference count twice.
5021 * So here we could race and increment the eb's reference count,
5022 * clear its stale flag, mark it as dirty and drop our reference
5023 * before the other task finishes executing free_extent_buffer,
5024 * which would later result in an attempt to free an extent
5025 * buffer that is dirty.
5026 */
5027 if (test_bit(EXTENT_BUFFER_STALE, &eb->bflags)) {
5028 spin_lock(&eb->refs_lock);
5029 spin_unlock(&eb->refs_lock);
5030 }
Mel Gorman2457aec2014-06-04 16:10:31 -07005031 mark_extent_buffer_accessed(eb, NULL);
Chandra Seetharaman452c75c2013-10-07 10:45:25 -05005032 return eb;
5033 }
5034 rcu_read_unlock();
5035
5036 return NULL;
5037}
5038
Josef Bacikfaa2dbf2014-05-07 17:06:09 -04005039#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
5040struct extent_buffer *alloc_test_extent_buffer(struct btrfs_fs_info *fs_info,
Jeff Mahoneyda170662016-06-15 09:22:56 -04005041 u64 start)
Josef Bacikfaa2dbf2014-05-07 17:06:09 -04005042{
5043 struct extent_buffer *eb, *exists = NULL;
5044 int ret;
5045
5046 eb = find_extent_buffer(fs_info, start);
5047 if (eb)
5048 return eb;
Jeff Mahoneyda170662016-06-15 09:22:56 -04005049 eb = alloc_dummy_extent_buffer(fs_info, start);
Josef Bacikfaa2dbf2014-05-07 17:06:09 -04005050 if (!eb)
5051 return NULL;
5052 eb->fs_info = fs_info;
5053again:
David Sterbae1860a72016-05-09 14:11:38 +02005054 ret = radix_tree_preload(GFP_NOFS);
Josef Bacikfaa2dbf2014-05-07 17:06:09 -04005055 if (ret)
5056 goto free_eb;
5057 spin_lock(&fs_info->buffer_lock);
5058 ret = radix_tree_insert(&fs_info->buffer_radix,
Kirill A. Shutemov09cbfea2016-04-01 15:29:47 +03005059 start >> PAGE_SHIFT, eb);
Josef Bacikfaa2dbf2014-05-07 17:06:09 -04005060 spin_unlock(&fs_info->buffer_lock);
5061 radix_tree_preload_end();
5062 if (ret == -EEXIST) {
5063 exists = find_extent_buffer(fs_info, start);
5064 if (exists)
5065 goto free_eb;
5066 else
5067 goto again;
5068 }
5069 check_buffer_tree_ref(eb);
5070 set_bit(EXTENT_BUFFER_IN_TREE, &eb->bflags);
5071
Josef Bacikfaa2dbf2014-05-07 17:06:09 -04005072 return eb;
5073free_eb:
5074 btrfs_release_extent_buffer(eb);
5075 return exists;
5076}
5077#endif
5078
Josef Bacikf28491e2013-12-16 13:24:27 -05005079struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info,
David Sterbace3e6982014-06-15 03:00:04 +02005080 u64 start)
Chris Masond1310b22008-01-24 16:13:08 -05005081{
Jeff Mahoneyda170662016-06-15 09:22:56 -04005082 unsigned long len = fs_info->nodesize;
David Sterbacc5e31a2018-03-01 18:20:27 +01005083 int num_pages;
5084 int i;
Kirill A. Shutemov09cbfea2016-04-01 15:29:47 +03005085 unsigned long index = start >> PAGE_SHIFT;
Chris Masond1310b22008-01-24 16:13:08 -05005086 struct extent_buffer *eb;
Chris Mason6af118ce2008-07-22 11:18:07 -04005087 struct extent_buffer *exists = NULL;
Chris Masond1310b22008-01-24 16:13:08 -05005088 struct page *p;
Josef Bacikf28491e2013-12-16 13:24:27 -05005089 struct address_space *mapping = fs_info->btree_inode->i_mapping;
Chris Masond1310b22008-01-24 16:13:08 -05005090 int uptodate = 1;
Miao Xie19fe0a82010-10-26 20:57:29 -04005091 int ret;
Chris Masond1310b22008-01-24 16:13:08 -05005092
Jeff Mahoneyda170662016-06-15 09:22:56 -04005093 if (!IS_ALIGNED(start, fs_info->sectorsize)) {
Liu Boc871b0f2016-06-06 12:01:23 -07005094 btrfs_err(fs_info, "bad tree block start %llu", start);
5095 return ERR_PTR(-EINVAL);
5096 }
5097
Josef Bacikf28491e2013-12-16 13:24:27 -05005098 eb = find_extent_buffer(fs_info, start);
Chandra Seetharaman452c75c2013-10-07 10:45:25 -05005099 if (eb)
Chris Mason6af118ce2008-07-22 11:18:07 -04005100 return eb;
Chris Mason6af118ce2008-07-22 11:18:07 -04005101
David Sterba23d79d82014-06-15 02:55:29 +02005102 eb = __alloc_extent_buffer(fs_info, start, len);
Peter2b114d12008-04-01 11:21:40 -04005103 if (!eb)
Liu Boc871b0f2016-06-06 12:01:23 -07005104 return ERR_PTR(-ENOMEM);
Chris Masond1310b22008-01-24 16:13:08 -05005105
David Sterba65ad0102018-06-29 10:56:49 +02005106 num_pages = num_extent_pages(eb);
Chris Mason727011e2010-08-06 13:21:20 -04005107 for (i = 0; i < num_pages; i++, index++) {
Michal Hockod1b5c562015-08-19 14:17:40 +02005108 p = find_or_create_page(mapping, index, GFP_NOFS|__GFP_NOFAIL);
Liu Boc871b0f2016-06-06 12:01:23 -07005109 if (!p) {
5110 exists = ERR_PTR(-ENOMEM);
Chris Mason6af118ce2008-07-22 11:18:07 -04005111 goto free_eb;
Liu Boc871b0f2016-06-06 12:01:23 -07005112 }
Josef Bacik4f2de97a2012-03-07 16:20:05 -05005113
5114 spin_lock(&mapping->private_lock);
5115 if (PagePrivate(p)) {
5116 /*
5117 * We could have already allocated an eb for this page
5118 * and attached one so lets see if we can get a ref on
5119 * the existing eb, and if we can we know it's good and
5120 * we can just return that one, else we know we can just
5121 * overwrite page->private.
5122 */
5123 exists = (struct extent_buffer *)p->private;
5124 if (atomic_inc_not_zero(&exists->refs)) {
5125 spin_unlock(&mapping->private_lock);
5126 unlock_page(p);
Kirill A. Shutemov09cbfea2016-04-01 15:29:47 +03005127 put_page(p);
Mel Gorman2457aec2014-06-04 16:10:31 -07005128 mark_extent_buffer_accessed(exists, p);
Josef Bacik4f2de97a2012-03-07 16:20:05 -05005129 goto free_eb;
5130 }
Omar Sandoval5ca64f42015-02-24 02:47:05 -08005131 exists = NULL;
Josef Bacik4f2de97a2012-03-07 16:20:05 -05005132
Josef Bacik0b32f4b2012-03-13 09:38:00 -04005133 /*
Josef Bacik4f2de97a2012-03-07 16:20:05 -05005134 * Do this so attach doesn't complain and we need to
5135 * drop the ref the old guy had.
5136 */
5137 ClearPagePrivate(p);
Josef Bacik0b32f4b2012-03-13 09:38:00 -04005138 WARN_ON(PageDirty(p));
Kirill A. Shutemov09cbfea2016-04-01 15:29:47 +03005139 put_page(p);
Chris Masond1310b22008-01-24 16:13:08 -05005140 }
Josef Bacik4f2de97a2012-03-07 16:20:05 -05005141 attach_extent_buffer_page(eb, p);
5142 spin_unlock(&mapping->private_lock);
Josef Bacik0b32f4b2012-03-13 09:38:00 -04005143 WARN_ON(PageDirty(p));
Chris Mason727011e2010-08-06 13:21:20 -04005144 eb->pages[i] = p;
Chris Masond1310b22008-01-24 16:13:08 -05005145 if (!PageUptodate(p))
5146 uptodate = 0;
Chris Masoneb14ab82011-02-10 12:35:00 -05005147
5148 /*
Nikolay Borisovb16d0112018-07-04 10:24:52 +03005149 * We can't unlock the pages just yet since the extent buffer
5150 * hasn't been properly inserted in the radix tree, this
5151 * opens a race with btree_releasepage which can free a page
5152 * while we are still filling in all pages for the buffer and
5153 * we could crash.
Chris Masoneb14ab82011-02-10 12:35:00 -05005154 */
Chris Masond1310b22008-01-24 16:13:08 -05005155 }
5156 if (uptodate)
Chris Masonb4ce94d2009-02-04 09:25:08 -05005157 set_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags);
Josef Bacik115391d2012-03-09 09:51:43 -05005158again:
David Sterbae1860a72016-05-09 14:11:38 +02005159 ret = radix_tree_preload(GFP_NOFS);
Liu Boc871b0f2016-06-06 12:01:23 -07005160 if (ret) {
5161 exists = ERR_PTR(ret);
Miao Xie19fe0a82010-10-26 20:57:29 -04005162 goto free_eb;
Liu Boc871b0f2016-06-06 12:01:23 -07005163 }
Miao Xie19fe0a82010-10-26 20:57:29 -04005164
Josef Bacikf28491e2013-12-16 13:24:27 -05005165 spin_lock(&fs_info->buffer_lock);
5166 ret = radix_tree_insert(&fs_info->buffer_radix,
Kirill A. Shutemov09cbfea2016-04-01 15:29:47 +03005167 start >> PAGE_SHIFT, eb);
Josef Bacikf28491e2013-12-16 13:24:27 -05005168 spin_unlock(&fs_info->buffer_lock);
Chandra Seetharaman452c75c2013-10-07 10:45:25 -05005169 radix_tree_preload_end();
Miao Xie19fe0a82010-10-26 20:57:29 -04005170 if (ret == -EEXIST) {
Josef Bacikf28491e2013-12-16 13:24:27 -05005171 exists = find_extent_buffer(fs_info, start);
Chandra Seetharaman452c75c2013-10-07 10:45:25 -05005172 if (exists)
5173 goto free_eb;
5174 else
Josef Bacik115391d2012-03-09 09:51:43 -05005175 goto again;
Chris Mason6af118ce2008-07-22 11:18:07 -04005176 }
Chris Mason6af118ce2008-07-22 11:18:07 -04005177 /* add one reference for the tree */
Josef Bacik0b32f4b2012-03-13 09:38:00 -04005178 check_buffer_tree_ref(eb);
Josef Bacik34b41ac2013-12-13 10:41:51 -05005179 set_bit(EXTENT_BUFFER_IN_TREE, &eb->bflags);
Chris Masoneb14ab82011-02-10 12:35:00 -05005180
5181 /*
Nikolay Borisovb16d0112018-07-04 10:24:52 +03005182 * Now it's safe to unlock the pages because any calls to
5183 * btree_releasepage will correctly detect that a page belongs to a
5184 * live buffer and won't free them prematurely.
Chris Masoneb14ab82011-02-10 12:35:00 -05005185 */
Nikolay Borisov28187ae2018-07-04 10:24:51 +03005186 for (i = 0; i < num_pages; i++)
5187 unlock_page(eb->pages[i]);
Chris Masond1310b22008-01-24 16:13:08 -05005188 return eb;
5189
Chris Mason6af118ce2008-07-22 11:18:07 -04005190free_eb:
Omar Sandoval5ca64f42015-02-24 02:47:05 -08005191 WARN_ON(!atomic_dec_and_test(&eb->refs));
Chris Mason727011e2010-08-06 13:21:20 -04005192 for (i = 0; i < num_pages; i++) {
5193 if (eb->pages[i])
5194 unlock_page(eb->pages[i]);
5195 }
Chris Masoneb14ab82011-02-10 12:35:00 -05005196
Miao Xie897ca6e92010-10-26 20:57:29 -04005197 btrfs_release_extent_buffer(eb);
Chris Mason6af118ce2008-07-22 11:18:07 -04005198 return exists;
Chris Masond1310b22008-01-24 16:13:08 -05005199}
Chris Masond1310b22008-01-24 16:13:08 -05005200
Josef Bacik3083ee22012-03-09 16:01:49 -05005201static inline void btrfs_release_extent_buffer_rcu(struct rcu_head *head)
5202{
5203 struct extent_buffer *eb =
5204 container_of(head, struct extent_buffer, rcu_head);
5205
5206 __free_extent_buffer(eb);
5207}
5208
David Sterbaf7a52a42013-04-26 14:56:29 +00005209static int release_extent_buffer(struct extent_buffer *eb)
Josef Bacik3083ee22012-03-09 16:01:49 -05005210{
Nikolay Borisov07e21c42018-06-27 16:38:23 +03005211 lockdep_assert_held(&eb->refs_lock);
5212
Josef Bacik3083ee22012-03-09 16:01:49 -05005213 WARN_ON(atomic_read(&eb->refs) == 0);
5214 if (atomic_dec_and_test(&eb->refs)) {
Josef Bacik34b41ac2013-12-13 10:41:51 -05005215 if (test_and_clear_bit(EXTENT_BUFFER_IN_TREE, &eb->bflags)) {
Josef Bacikf28491e2013-12-16 13:24:27 -05005216 struct btrfs_fs_info *fs_info = eb->fs_info;
Josef Bacik3083ee22012-03-09 16:01:49 -05005217
Jan Schmidt815a51c2012-05-16 17:00:02 +02005218 spin_unlock(&eb->refs_lock);
Josef Bacik3083ee22012-03-09 16:01:49 -05005219
Josef Bacikf28491e2013-12-16 13:24:27 -05005220 spin_lock(&fs_info->buffer_lock);
5221 radix_tree_delete(&fs_info->buffer_radix,
Kirill A. Shutemov09cbfea2016-04-01 15:29:47 +03005222 eb->start >> PAGE_SHIFT);
Josef Bacikf28491e2013-12-16 13:24:27 -05005223 spin_unlock(&fs_info->buffer_lock);
Josef Bacik34b41ac2013-12-13 10:41:51 -05005224 } else {
5225 spin_unlock(&eb->refs_lock);
Jan Schmidt815a51c2012-05-16 17:00:02 +02005226 }
Josef Bacik3083ee22012-03-09 16:01:49 -05005227
5228 /* Should be safe to release our pages at this point */
David Sterba55ac0132018-07-19 17:24:32 +02005229 btrfs_release_extent_buffer_pages(eb);
Josef Bacikbcb7e442015-03-16 17:38:02 -04005230#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
Nikolay Borisovb0132a32018-06-27 16:38:24 +03005231 if (unlikely(test_bit(EXTENT_BUFFER_UNMAPPED, &eb->bflags))) {
Josef Bacikbcb7e442015-03-16 17:38:02 -04005232 __free_extent_buffer(eb);
5233 return 1;
5234 }
5235#endif
Josef Bacik3083ee22012-03-09 16:01:49 -05005236 call_rcu(&eb->rcu_head, btrfs_release_extent_buffer_rcu);
Josef Bacike64860a2012-07-20 16:05:36 -04005237 return 1;
Josef Bacik3083ee22012-03-09 16:01:49 -05005238 }
5239 spin_unlock(&eb->refs_lock);
Josef Bacike64860a2012-07-20 16:05:36 -04005240
5241 return 0;
Josef Bacik3083ee22012-03-09 16:01:49 -05005242}
5243
Chris Masond1310b22008-01-24 16:13:08 -05005244void free_extent_buffer(struct extent_buffer *eb)
5245{
Chris Mason242e18c2013-01-29 17:49:37 -05005246 int refs;
5247 int old;
Chris Masond1310b22008-01-24 16:13:08 -05005248 if (!eb)
5249 return;
5250
Chris Mason242e18c2013-01-29 17:49:37 -05005251 while (1) {
5252 refs = atomic_read(&eb->refs);
Nikolay Borisov46cc7752018-10-15 17:04:01 +03005253 if ((!test_bit(EXTENT_BUFFER_UNMAPPED, &eb->bflags) && refs <= 3)
5254 || (test_bit(EXTENT_BUFFER_UNMAPPED, &eb->bflags) &&
5255 refs == 1))
Chris Mason242e18c2013-01-29 17:49:37 -05005256 break;
5257 old = atomic_cmpxchg(&eb->refs, refs, refs - 1);
5258 if (old == refs)
5259 return;
5260 }
5261
Josef Bacik3083ee22012-03-09 16:01:49 -05005262 spin_lock(&eb->refs_lock);
5263 if (atomic_read(&eb->refs) == 2 &&
5264 test_bit(EXTENT_BUFFER_STALE, &eb->bflags) &&
Josef Bacik0b32f4b2012-03-13 09:38:00 -04005265 !extent_buffer_under_io(eb) &&
Josef Bacik3083ee22012-03-09 16:01:49 -05005266 test_and_clear_bit(EXTENT_BUFFER_TREE_REF, &eb->bflags))
5267 atomic_dec(&eb->refs);
Chris Masond1310b22008-01-24 16:13:08 -05005268
Josef Bacik3083ee22012-03-09 16:01:49 -05005269 /*
5270 * I know this is terrible, but it's temporary until we stop tracking
5271 * the uptodate bits and such for the extent buffers.
5272 */
David Sterbaf7a52a42013-04-26 14:56:29 +00005273 release_extent_buffer(eb);
Chris Masond1310b22008-01-24 16:13:08 -05005274}
Chris Masond1310b22008-01-24 16:13:08 -05005275
Josef Bacik3083ee22012-03-09 16:01:49 -05005276void free_extent_buffer_stale(struct extent_buffer *eb)
5277{
5278 if (!eb)
Chris Masond1310b22008-01-24 16:13:08 -05005279 return;
5280
Josef Bacik3083ee22012-03-09 16:01:49 -05005281 spin_lock(&eb->refs_lock);
5282 set_bit(EXTENT_BUFFER_STALE, &eb->bflags);
5283
Josef Bacik0b32f4b2012-03-13 09:38:00 -04005284 if (atomic_read(&eb->refs) == 2 && !extent_buffer_under_io(eb) &&
Josef Bacik3083ee22012-03-09 16:01:49 -05005285 test_and_clear_bit(EXTENT_BUFFER_TREE_REF, &eb->bflags))
5286 atomic_dec(&eb->refs);
David Sterbaf7a52a42013-04-26 14:56:29 +00005287 release_extent_buffer(eb);
Chris Masond1310b22008-01-24 16:13:08 -05005288}
5289
Chris Mason1d4284b2012-03-28 20:31:37 -04005290void clear_extent_buffer_dirty(struct extent_buffer *eb)
Chris Masond1310b22008-01-24 16:13:08 -05005291{
David Sterbacc5e31a2018-03-01 18:20:27 +01005292 int i;
5293 int num_pages;
Chris Masond1310b22008-01-24 16:13:08 -05005294 struct page *page;
5295
David Sterba65ad0102018-06-29 10:56:49 +02005296 num_pages = num_extent_pages(eb);
Chris Masond1310b22008-01-24 16:13:08 -05005297
5298 for (i = 0; i < num_pages; i++) {
David Sterbafb85fc92014-07-31 01:03:53 +02005299 page = eb->pages[i];
Chris Masonb9473432009-03-13 11:00:37 -04005300 if (!PageDirty(page))
Chris Masond2c3f4f2008-11-19 12:44:22 -05005301 continue;
5302
Chris Masona61e6f22008-07-22 11:18:08 -04005303 lock_page(page);
Chris Masoneb14ab82011-02-10 12:35:00 -05005304 WARN_ON(!PagePrivate(page));
5305
Chris Masond1310b22008-01-24 16:13:08 -05005306 clear_page_dirty_for_io(page);
Matthew Wilcoxb93b0162018-04-10 16:36:56 -07005307 xa_lock_irq(&page->mapping->i_pages);
Matthew Wilcox0a943c62017-12-04 10:37:22 -05005308 if (!PageDirty(page))
5309 __xa_clear_mark(&page->mapping->i_pages,
5310 page_index(page), PAGECACHE_TAG_DIRTY);
Matthew Wilcoxb93b0162018-04-10 16:36:56 -07005311 xa_unlock_irq(&page->mapping->i_pages);
Chris Masonbf0da8c2011-11-04 12:29:37 -04005312 ClearPageError(page);
Chris Masona61e6f22008-07-22 11:18:08 -04005313 unlock_page(page);
Chris Masond1310b22008-01-24 16:13:08 -05005314 }
Josef Bacik0b32f4b2012-03-13 09:38:00 -04005315 WARN_ON(atomic_read(&eb->refs) == 0);
Chris Masond1310b22008-01-24 16:13:08 -05005316}
Chris Masond1310b22008-01-24 16:13:08 -05005317
Liu Boabb57ef2018-09-14 01:44:42 +08005318bool set_extent_buffer_dirty(struct extent_buffer *eb)
Chris Masond1310b22008-01-24 16:13:08 -05005319{
David Sterbacc5e31a2018-03-01 18:20:27 +01005320 int i;
5321 int num_pages;
Liu Boabb57ef2018-09-14 01:44:42 +08005322 bool was_dirty;
Chris Masond1310b22008-01-24 16:13:08 -05005323
Josef Bacik0b32f4b2012-03-13 09:38:00 -04005324 check_buffer_tree_ref(eb);
5325
Chris Masonb9473432009-03-13 11:00:37 -04005326 was_dirty = test_and_set_bit(EXTENT_BUFFER_DIRTY, &eb->bflags);
Josef Bacik0b32f4b2012-03-13 09:38:00 -04005327
David Sterba65ad0102018-06-29 10:56:49 +02005328 num_pages = num_extent_pages(eb);
Josef Bacik3083ee22012-03-09 16:01:49 -05005329 WARN_ON(atomic_read(&eb->refs) == 0);
Josef Bacik0b32f4b2012-03-13 09:38:00 -04005330 WARN_ON(!test_bit(EXTENT_BUFFER_TREE_REF, &eb->bflags));
5331
Liu Boabb57ef2018-09-14 01:44:42 +08005332 if (!was_dirty)
5333 for (i = 0; i < num_pages; i++)
5334 set_page_dirty(eb->pages[i]);
Liu Bo51995c32018-09-14 01:46:08 +08005335
5336#ifdef CONFIG_BTRFS_DEBUG
5337 for (i = 0; i < num_pages; i++)
5338 ASSERT(PageDirty(eb->pages[i]));
5339#endif
5340
Chris Masonb9473432009-03-13 11:00:37 -04005341 return was_dirty;
Chris Masond1310b22008-01-24 16:13:08 -05005342}
Chris Masond1310b22008-01-24 16:13:08 -05005343
David Sterba69ba3922015-12-03 13:08:59 +01005344void clear_extent_buffer_uptodate(struct extent_buffer *eb)
Chris Mason1259ab72008-05-12 13:39:03 -04005345{
David Sterbacc5e31a2018-03-01 18:20:27 +01005346 int i;
Chris Mason1259ab72008-05-12 13:39:03 -04005347 struct page *page;
David Sterbacc5e31a2018-03-01 18:20:27 +01005348 int num_pages;
Chris Mason1259ab72008-05-12 13:39:03 -04005349
Chris Masonb4ce94d2009-02-04 09:25:08 -05005350 clear_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags);
David Sterba65ad0102018-06-29 10:56:49 +02005351 num_pages = num_extent_pages(eb);
Chris Mason1259ab72008-05-12 13:39:03 -04005352 for (i = 0; i < num_pages; i++) {
David Sterbafb85fc92014-07-31 01:03:53 +02005353 page = eb->pages[i];
Chris Mason33958dc2008-07-30 10:29:12 -04005354 if (page)
5355 ClearPageUptodate(page);
Chris Mason1259ab72008-05-12 13:39:03 -04005356 }
Chris Mason1259ab72008-05-12 13:39:03 -04005357}
5358
David Sterba09c25a82015-12-03 13:08:59 +01005359void set_extent_buffer_uptodate(struct extent_buffer *eb)
Chris Masond1310b22008-01-24 16:13:08 -05005360{
David Sterbacc5e31a2018-03-01 18:20:27 +01005361 int i;
Chris Masond1310b22008-01-24 16:13:08 -05005362 struct page *page;
David Sterbacc5e31a2018-03-01 18:20:27 +01005363 int num_pages;
Chris Masond1310b22008-01-24 16:13:08 -05005364
Josef Bacik0b32f4b2012-03-13 09:38:00 -04005365 set_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags);
David Sterba65ad0102018-06-29 10:56:49 +02005366 num_pages = num_extent_pages(eb);
Chris Masond1310b22008-01-24 16:13:08 -05005367 for (i = 0; i < num_pages; i++) {
David Sterbafb85fc92014-07-31 01:03:53 +02005368 page = eb->pages[i];
Chris Masond1310b22008-01-24 16:13:08 -05005369 SetPageUptodate(page);
5370 }
Chris Masond1310b22008-01-24 16:13:08 -05005371}
Chris Masond1310b22008-01-24 16:13:08 -05005372
Nikolay Borisovc2ccfbc2019-04-10 17:24:40 +03005373int read_extent_buffer_pages(struct extent_buffer *eb, int wait, int mirror_num)
Chris Masond1310b22008-01-24 16:13:08 -05005374{
David Sterbacc5e31a2018-03-01 18:20:27 +01005375 int i;
Chris Masond1310b22008-01-24 16:13:08 -05005376 struct page *page;
5377 int err;
5378 int ret = 0;
Chris Masonce9adaa2008-04-09 16:28:12 -04005379 int locked_pages = 0;
5380 int all_uptodate = 1;
David Sterbacc5e31a2018-03-01 18:20:27 +01005381 int num_pages;
Chris Mason727011e2010-08-06 13:21:20 -04005382 unsigned long num_reads = 0;
Chris Masona86c12c2008-02-07 10:50:54 -05005383 struct bio *bio = NULL;
Chris Masonc8b97812008-10-29 14:49:59 -04005384 unsigned long bio_flags = 0;
Nikolay Borisovc2ccfbc2019-04-10 17:24:40 +03005385 struct extent_io_tree *tree = &BTRFS_I(eb->fs_info->btree_inode)->io_tree;
Chris Masona86c12c2008-02-07 10:50:54 -05005386
Chris Masonb4ce94d2009-02-04 09:25:08 -05005387 if (test_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags))
Chris Masond1310b22008-01-24 16:13:08 -05005388 return 0;
5389
David Sterba65ad0102018-06-29 10:56:49 +02005390 num_pages = num_extent_pages(eb);
Josef Bacik8436ea912016-09-02 15:40:03 -04005391 for (i = 0; i < num_pages; i++) {
David Sterbafb85fc92014-07-31 01:03:53 +02005392 page = eb->pages[i];
Arne Jansenbb82ab82011-06-10 14:06:53 +02005393 if (wait == WAIT_NONE) {
David Woodhouse2db04962008-08-07 11:19:43 -04005394 if (!trylock_page(page))
Chris Masonce9adaa2008-04-09 16:28:12 -04005395 goto unlock_exit;
Chris Masond1310b22008-01-24 16:13:08 -05005396 } else {
5397 lock_page(page);
5398 }
Chris Masonce9adaa2008-04-09 16:28:12 -04005399 locked_pages++;
Liu Bo2571e732016-08-03 12:33:01 -07005400 }
5401 /*
5402 * We need to firstly lock all pages to make sure that
5403 * the uptodate bit of our pages won't be affected by
5404 * clear_extent_buffer_uptodate().
5405 */
Josef Bacik8436ea912016-09-02 15:40:03 -04005406 for (i = 0; i < num_pages; i++) {
Liu Bo2571e732016-08-03 12:33:01 -07005407 page = eb->pages[i];
Chris Mason727011e2010-08-06 13:21:20 -04005408 if (!PageUptodate(page)) {
5409 num_reads++;
Chris Masonce9adaa2008-04-09 16:28:12 -04005410 all_uptodate = 0;
Chris Mason727011e2010-08-06 13:21:20 -04005411 }
Chris Masonce9adaa2008-04-09 16:28:12 -04005412 }
Liu Bo2571e732016-08-03 12:33:01 -07005413
Chris Masonce9adaa2008-04-09 16:28:12 -04005414 if (all_uptodate) {
Josef Bacik8436ea912016-09-02 15:40:03 -04005415 set_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags);
Chris Masonce9adaa2008-04-09 16:28:12 -04005416 goto unlock_exit;
5417 }
5418
Filipe Manana656f30d2014-09-26 12:25:56 +01005419 clear_bit(EXTENT_BUFFER_READ_ERR, &eb->bflags);
Josef Bacik5cf1ab52012-04-16 09:42:26 -04005420 eb->read_mirror = 0;
Josef Bacik0b32f4b2012-03-13 09:38:00 -04005421 atomic_set(&eb->io_pages, num_reads);
Josef Bacik8436ea912016-09-02 15:40:03 -04005422 for (i = 0; i < num_pages; i++) {
David Sterbafb85fc92014-07-31 01:03:53 +02005423 page = eb->pages[i];
Liu Bobaf863b2016-07-11 10:39:07 -07005424
Chris Masonce9adaa2008-04-09 16:28:12 -04005425 if (!PageUptodate(page)) {
Liu Bobaf863b2016-07-11 10:39:07 -07005426 if (ret) {
5427 atomic_dec(&eb->io_pages);
5428 unlock_page(page);
5429 continue;
5430 }
5431
Chris Masonf1885912008-04-09 16:28:12 -04005432 ClearPageError(page);
Chris Masona86c12c2008-02-07 10:50:54 -05005433 err = __extent_read_full_page(tree, page,
David Sterba6af49db2017-06-23 04:09:57 +02005434 btree_get_extent, &bio,
Josef Bacikd4c7ca82013-04-19 19:49:09 -04005435 mirror_num, &bio_flags,
Mike Christie1f7ad752016-06-05 14:31:51 -05005436 REQ_META);
Liu Bobaf863b2016-07-11 10:39:07 -07005437 if (err) {
Chris Masond1310b22008-01-24 16:13:08 -05005438 ret = err;
Liu Bobaf863b2016-07-11 10:39:07 -07005439 /*
5440 * We use &bio in above __extent_read_full_page,
5441 * so we ensure that if it returns error, the
5442 * current page fails to add itself to bio and
5443 * it's been unlocked.
5444 *
5445 * We must dec io_pages by ourselves.
5446 */
5447 atomic_dec(&eb->io_pages);
5448 }
Chris Masond1310b22008-01-24 16:13:08 -05005449 } else {
5450 unlock_page(page);
5451 }
5452 }
5453
Jeff Mahoney355808c2011-10-03 23:23:14 -04005454 if (bio) {
Mike Christie1f7ad752016-06-05 14:31:51 -05005455 err = submit_one_bio(bio, mirror_num, bio_flags);
Jeff Mahoney79787ea2012-03-12 16:03:00 +01005456 if (err)
5457 return err;
Jeff Mahoney355808c2011-10-03 23:23:14 -04005458 }
Chris Masona86c12c2008-02-07 10:50:54 -05005459
Arne Jansenbb82ab82011-06-10 14:06:53 +02005460 if (ret || wait != WAIT_COMPLETE)
Chris Masond1310b22008-01-24 16:13:08 -05005461 return ret;
Chris Masond3977122009-01-05 21:25:51 -05005462
Josef Bacik8436ea912016-09-02 15:40:03 -04005463 for (i = 0; i < num_pages; i++) {
David Sterbafb85fc92014-07-31 01:03:53 +02005464 page = eb->pages[i];
Chris Masond1310b22008-01-24 16:13:08 -05005465 wait_on_page_locked(page);
Chris Masond3977122009-01-05 21:25:51 -05005466 if (!PageUptodate(page))
Chris Masond1310b22008-01-24 16:13:08 -05005467 ret = -EIO;
Chris Masond1310b22008-01-24 16:13:08 -05005468 }
Chris Masond3977122009-01-05 21:25:51 -05005469
Chris Masond1310b22008-01-24 16:13:08 -05005470 return ret;
Chris Masonce9adaa2008-04-09 16:28:12 -04005471
5472unlock_exit:
Chris Masond3977122009-01-05 21:25:51 -05005473 while (locked_pages > 0) {
Chris Masonce9adaa2008-04-09 16:28:12 -04005474 locked_pages--;
Josef Bacik8436ea912016-09-02 15:40:03 -04005475 page = eb->pages[locked_pages];
5476 unlock_page(page);
Chris Masonce9adaa2008-04-09 16:28:12 -04005477 }
5478 return ret;
Chris Masond1310b22008-01-24 16:13:08 -05005479}
Chris Masond1310b22008-01-24 16:13:08 -05005480
Jeff Mahoney1cbb1f42017-06-28 21:56:53 -06005481void read_extent_buffer(const struct extent_buffer *eb, void *dstv,
5482 unsigned long start, unsigned long len)
Chris Masond1310b22008-01-24 16:13:08 -05005483{
5484 size_t cur;
5485 size_t offset;
5486 struct page *page;
5487 char *kaddr;
5488 char *dst = (char *)dstv;
Johannes Thumshirn70730172018-12-05 15:23:03 +01005489 size_t start_offset = offset_in_page(eb->start);
Kirill A. Shutemov09cbfea2016-04-01 15:29:47 +03005490 unsigned long i = (start_offset + start) >> PAGE_SHIFT;
Chris Masond1310b22008-01-24 16:13:08 -05005491
Liu Bof716abd2017-08-09 11:10:16 -06005492 if (start + len > eb->len) {
5493 WARN(1, KERN_ERR "btrfs bad mapping eb start %llu len %lu, wanted %lu %lu\n",
5494 eb->start, eb->len, start, len);
5495 memset(dst, 0, len);
5496 return;
5497 }
Chris Masond1310b22008-01-24 16:13:08 -05005498
Johannes Thumshirn70730172018-12-05 15:23:03 +01005499 offset = offset_in_page(start_offset + start);
Chris Masond1310b22008-01-24 16:13:08 -05005500
Chris Masond3977122009-01-05 21:25:51 -05005501 while (len > 0) {
David Sterbafb85fc92014-07-31 01:03:53 +02005502 page = eb->pages[i];
Chris Masond1310b22008-01-24 16:13:08 -05005503
Kirill A. Shutemov09cbfea2016-04-01 15:29:47 +03005504 cur = min(len, (PAGE_SIZE - offset));
Chris Masona6591712011-07-19 12:04:14 -04005505 kaddr = page_address(page);
Chris Masond1310b22008-01-24 16:13:08 -05005506 memcpy(dst, kaddr + offset, cur);
Chris Masond1310b22008-01-24 16:13:08 -05005507
5508 dst += cur;
5509 len -= cur;
5510 offset = 0;
5511 i++;
5512 }
5513}
Chris Masond1310b22008-01-24 16:13:08 -05005514
Jeff Mahoney1cbb1f42017-06-28 21:56:53 -06005515int read_extent_buffer_to_user(const struct extent_buffer *eb,
5516 void __user *dstv,
5517 unsigned long start, unsigned long len)
Gerhard Heift550ac1d2014-01-30 16:24:01 +01005518{
5519 size_t cur;
5520 size_t offset;
5521 struct page *page;
5522 char *kaddr;
5523 char __user *dst = (char __user *)dstv;
Johannes Thumshirn70730172018-12-05 15:23:03 +01005524 size_t start_offset = offset_in_page(eb->start);
Kirill A. Shutemov09cbfea2016-04-01 15:29:47 +03005525 unsigned long i = (start_offset + start) >> PAGE_SHIFT;
Gerhard Heift550ac1d2014-01-30 16:24:01 +01005526 int ret = 0;
5527
5528 WARN_ON(start > eb->len);
5529 WARN_ON(start + len > eb->start + eb->len);
5530
Johannes Thumshirn70730172018-12-05 15:23:03 +01005531 offset = offset_in_page(start_offset + start);
Gerhard Heift550ac1d2014-01-30 16:24:01 +01005532
5533 while (len > 0) {
David Sterbafb85fc92014-07-31 01:03:53 +02005534 page = eb->pages[i];
Gerhard Heift550ac1d2014-01-30 16:24:01 +01005535
Kirill A. Shutemov09cbfea2016-04-01 15:29:47 +03005536 cur = min(len, (PAGE_SIZE - offset));
Gerhard Heift550ac1d2014-01-30 16:24:01 +01005537 kaddr = page_address(page);
5538 if (copy_to_user(dst, kaddr + offset, cur)) {
5539 ret = -EFAULT;
5540 break;
5541 }
5542
5543 dst += cur;
5544 len -= cur;
5545 offset = 0;
5546 i++;
5547 }
5548
5549 return ret;
5550}
5551
Liu Bo415b35a2016-06-17 19:16:21 -07005552/*
5553 * return 0 if the item is found within a page.
5554 * return 1 if the item spans two pages.
5555 * return -EINVAL otherwise.
5556 */
Jeff Mahoney1cbb1f42017-06-28 21:56:53 -06005557int map_private_extent_buffer(const struct extent_buffer *eb,
5558 unsigned long start, unsigned long min_len,
5559 char **map, unsigned long *map_start,
5560 unsigned long *map_len)
Chris Masond1310b22008-01-24 16:13:08 -05005561{
Johannes Thumshirncc2c39d2018-11-28 09:54:54 +01005562 size_t offset;
Chris Masond1310b22008-01-24 16:13:08 -05005563 char *kaddr;
5564 struct page *p;
Johannes Thumshirn70730172018-12-05 15:23:03 +01005565 size_t start_offset = offset_in_page(eb->start);
Kirill A. Shutemov09cbfea2016-04-01 15:29:47 +03005566 unsigned long i = (start_offset + start) >> PAGE_SHIFT;
Chris Masond1310b22008-01-24 16:13:08 -05005567 unsigned long end_i = (start_offset + start + min_len - 1) >>
Kirill A. Shutemov09cbfea2016-04-01 15:29:47 +03005568 PAGE_SHIFT;
Chris Masond1310b22008-01-24 16:13:08 -05005569
Liu Bof716abd2017-08-09 11:10:16 -06005570 if (start + min_len > eb->len) {
5571 WARN(1, KERN_ERR "btrfs bad mapping eb start %llu len %lu, wanted %lu %lu\n",
5572 eb->start, eb->len, start, min_len);
5573 return -EINVAL;
5574 }
5575
Chris Masond1310b22008-01-24 16:13:08 -05005576 if (i != end_i)
Liu Bo415b35a2016-06-17 19:16:21 -07005577 return 1;
Chris Masond1310b22008-01-24 16:13:08 -05005578
5579 if (i == 0) {
5580 offset = start_offset;
5581 *map_start = 0;
5582 } else {
5583 offset = 0;
Kirill A. Shutemov09cbfea2016-04-01 15:29:47 +03005584 *map_start = ((u64)i << PAGE_SHIFT) - start_offset;
Chris Masond1310b22008-01-24 16:13:08 -05005585 }
Chris Masond3977122009-01-05 21:25:51 -05005586
David Sterbafb85fc92014-07-31 01:03:53 +02005587 p = eb->pages[i];
Chris Masona6591712011-07-19 12:04:14 -04005588 kaddr = page_address(p);
Chris Masond1310b22008-01-24 16:13:08 -05005589 *map = kaddr + offset;
Kirill A. Shutemov09cbfea2016-04-01 15:29:47 +03005590 *map_len = PAGE_SIZE - offset;
Chris Masond1310b22008-01-24 16:13:08 -05005591 return 0;
5592}
Chris Masond1310b22008-01-24 16:13:08 -05005593
Jeff Mahoney1cbb1f42017-06-28 21:56:53 -06005594int memcmp_extent_buffer(const struct extent_buffer *eb, const void *ptrv,
5595 unsigned long start, unsigned long len)
Chris Masond1310b22008-01-24 16:13:08 -05005596{
5597 size_t cur;
5598 size_t offset;
5599 struct page *page;
5600 char *kaddr;
5601 char *ptr = (char *)ptrv;
Johannes Thumshirn70730172018-12-05 15:23:03 +01005602 size_t start_offset = offset_in_page(eb->start);
Kirill A. Shutemov09cbfea2016-04-01 15:29:47 +03005603 unsigned long i = (start_offset + start) >> PAGE_SHIFT;
Chris Masond1310b22008-01-24 16:13:08 -05005604 int ret = 0;
5605
5606 WARN_ON(start > eb->len);
5607 WARN_ON(start + len > eb->start + eb->len);
5608
Johannes Thumshirn70730172018-12-05 15:23:03 +01005609 offset = offset_in_page(start_offset + start);
Chris Masond1310b22008-01-24 16:13:08 -05005610
Chris Masond3977122009-01-05 21:25:51 -05005611 while (len > 0) {
David Sterbafb85fc92014-07-31 01:03:53 +02005612 page = eb->pages[i];
Chris Masond1310b22008-01-24 16:13:08 -05005613
Kirill A. Shutemov09cbfea2016-04-01 15:29:47 +03005614 cur = min(len, (PAGE_SIZE - offset));
Chris Masond1310b22008-01-24 16:13:08 -05005615
Chris Masona6591712011-07-19 12:04:14 -04005616 kaddr = page_address(page);
Chris Masond1310b22008-01-24 16:13:08 -05005617 ret = memcmp(ptr, kaddr + offset, cur);
Chris Masond1310b22008-01-24 16:13:08 -05005618 if (ret)
5619 break;
5620
5621 ptr += cur;
5622 len -= cur;
5623 offset = 0;
5624 i++;
5625 }
5626 return ret;
5627}
Chris Masond1310b22008-01-24 16:13:08 -05005628
David Sterbaf157bf72016-11-09 17:43:38 +01005629void write_extent_buffer_chunk_tree_uuid(struct extent_buffer *eb,
5630 const void *srcv)
5631{
5632 char *kaddr;
5633
5634 WARN_ON(!PageUptodate(eb->pages[0]));
5635 kaddr = page_address(eb->pages[0]);
5636 memcpy(kaddr + offsetof(struct btrfs_header, chunk_tree_uuid), srcv,
5637 BTRFS_FSID_SIZE);
5638}
5639
5640void write_extent_buffer_fsid(struct extent_buffer *eb, const void *srcv)
5641{
5642 char *kaddr;
5643
5644 WARN_ON(!PageUptodate(eb->pages[0]));
5645 kaddr = page_address(eb->pages[0]);
5646 memcpy(kaddr + offsetof(struct btrfs_header, fsid), srcv,
5647 BTRFS_FSID_SIZE);
5648}
5649
Chris Masond1310b22008-01-24 16:13:08 -05005650void write_extent_buffer(struct extent_buffer *eb, const void *srcv,
5651 unsigned long start, unsigned long len)
5652{
5653 size_t cur;
5654 size_t offset;
5655 struct page *page;
5656 char *kaddr;
5657 char *src = (char *)srcv;
Johannes Thumshirn70730172018-12-05 15:23:03 +01005658 size_t start_offset = offset_in_page(eb->start);
Kirill A. Shutemov09cbfea2016-04-01 15:29:47 +03005659 unsigned long i = (start_offset + start) >> PAGE_SHIFT;
Chris Masond1310b22008-01-24 16:13:08 -05005660
5661 WARN_ON(start > eb->len);
5662 WARN_ON(start + len > eb->start + eb->len);
5663
Johannes Thumshirn70730172018-12-05 15:23:03 +01005664 offset = offset_in_page(start_offset + start);
Chris Masond1310b22008-01-24 16:13:08 -05005665
Chris Masond3977122009-01-05 21:25:51 -05005666 while (len > 0) {
David Sterbafb85fc92014-07-31 01:03:53 +02005667 page = eb->pages[i];
Chris Masond1310b22008-01-24 16:13:08 -05005668 WARN_ON(!PageUptodate(page));
5669
Kirill A. Shutemov09cbfea2016-04-01 15:29:47 +03005670 cur = min(len, PAGE_SIZE - offset);
Chris Masona6591712011-07-19 12:04:14 -04005671 kaddr = page_address(page);
Chris Masond1310b22008-01-24 16:13:08 -05005672 memcpy(kaddr + offset, src, cur);
Chris Masond1310b22008-01-24 16:13:08 -05005673
5674 src += cur;
5675 len -= cur;
5676 offset = 0;
5677 i++;
5678 }
5679}
Chris Masond1310b22008-01-24 16:13:08 -05005680
David Sterbab159fa22016-11-08 18:09:03 +01005681void memzero_extent_buffer(struct extent_buffer *eb, unsigned long start,
5682 unsigned long len)
Chris Masond1310b22008-01-24 16:13:08 -05005683{
5684 size_t cur;
5685 size_t offset;
5686 struct page *page;
5687 char *kaddr;
Johannes Thumshirn70730172018-12-05 15:23:03 +01005688 size_t start_offset = offset_in_page(eb->start);
Kirill A. Shutemov09cbfea2016-04-01 15:29:47 +03005689 unsigned long i = (start_offset + start) >> PAGE_SHIFT;
Chris Masond1310b22008-01-24 16:13:08 -05005690
5691 WARN_ON(start > eb->len);
5692 WARN_ON(start + len > eb->start + eb->len);
5693
Johannes Thumshirn70730172018-12-05 15:23:03 +01005694 offset = offset_in_page(start_offset + start);
Chris Masond1310b22008-01-24 16:13:08 -05005695
Chris Masond3977122009-01-05 21:25:51 -05005696 while (len > 0) {
David Sterbafb85fc92014-07-31 01:03:53 +02005697 page = eb->pages[i];
Chris Masond1310b22008-01-24 16:13:08 -05005698 WARN_ON(!PageUptodate(page));
5699
Kirill A. Shutemov09cbfea2016-04-01 15:29:47 +03005700 cur = min(len, PAGE_SIZE - offset);
Chris Masona6591712011-07-19 12:04:14 -04005701 kaddr = page_address(page);
David Sterbab159fa22016-11-08 18:09:03 +01005702 memset(kaddr + offset, 0, cur);
Chris Masond1310b22008-01-24 16:13:08 -05005703
5704 len -= cur;
5705 offset = 0;
5706 i++;
5707 }
5708}
Chris Masond1310b22008-01-24 16:13:08 -05005709
David Sterba58e80122016-11-08 18:30:31 +01005710void copy_extent_buffer_full(struct extent_buffer *dst,
5711 struct extent_buffer *src)
5712{
5713 int i;
David Sterbacc5e31a2018-03-01 18:20:27 +01005714 int num_pages;
David Sterba58e80122016-11-08 18:30:31 +01005715
5716 ASSERT(dst->len == src->len);
5717
David Sterba65ad0102018-06-29 10:56:49 +02005718 num_pages = num_extent_pages(dst);
David Sterba58e80122016-11-08 18:30:31 +01005719 for (i = 0; i < num_pages; i++)
5720 copy_page(page_address(dst->pages[i]),
5721 page_address(src->pages[i]));
5722}
5723
Chris Masond1310b22008-01-24 16:13:08 -05005724void copy_extent_buffer(struct extent_buffer *dst, struct extent_buffer *src,
5725 unsigned long dst_offset, unsigned long src_offset,
5726 unsigned long len)
5727{
5728 u64 dst_len = dst->len;
5729 size_t cur;
5730 size_t offset;
5731 struct page *page;
5732 char *kaddr;
Johannes Thumshirn70730172018-12-05 15:23:03 +01005733 size_t start_offset = offset_in_page(dst->start);
Kirill A. Shutemov09cbfea2016-04-01 15:29:47 +03005734 unsigned long i = (start_offset + dst_offset) >> PAGE_SHIFT;
Chris Masond1310b22008-01-24 16:13:08 -05005735
5736 WARN_ON(src->len != dst_len);
5737
Johannes Thumshirn70730172018-12-05 15:23:03 +01005738 offset = offset_in_page(start_offset + dst_offset);
Chris Masond1310b22008-01-24 16:13:08 -05005739
Chris Masond3977122009-01-05 21:25:51 -05005740 while (len > 0) {
David Sterbafb85fc92014-07-31 01:03:53 +02005741 page = dst->pages[i];
Chris Masond1310b22008-01-24 16:13:08 -05005742 WARN_ON(!PageUptodate(page));
5743
Kirill A. Shutemov09cbfea2016-04-01 15:29:47 +03005744 cur = min(len, (unsigned long)(PAGE_SIZE - offset));
Chris Masond1310b22008-01-24 16:13:08 -05005745
Chris Masona6591712011-07-19 12:04:14 -04005746 kaddr = page_address(page);
Chris Masond1310b22008-01-24 16:13:08 -05005747 read_extent_buffer(src, kaddr + offset, src_offset, cur);
Chris Masond1310b22008-01-24 16:13:08 -05005748
5749 src_offset += cur;
5750 len -= cur;
5751 offset = 0;
5752 i++;
5753 }
5754}
Chris Masond1310b22008-01-24 16:13:08 -05005755
Omar Sandoval3e1e8bb2015-09-29 20:50:30 -07005756/*
5757 * eb_bitmap_offset() - calculate the page and offset of the byte containing the
5758 * given bit number
5759 * @eb: the extent buffer
5760 * @start: offset of the bitmap item in the extent buffer
5761 * @nr: bit number
5762 * @page_index: return index of the page in the extent buffer that contains the
5763 * given bit number
5764 * @page_offset: return offset into the page given by page_index
5765 *
5766 * This helper hides the ugliness of finding the byte in an extent buffer which
5767 * contains a given bit.
5768 */
5769static inline void eb_bitmap_offset(struct extent_buffer *eb,
5770 unsigned long start, unsigned long nr,
5771 unsigned long *page_index,
5772 size_t *page_offset)
5773{
Johannes Thumshirn70730172018-12-05 15:23:03 +01005774 size_t start_offset = offset_in_page(eb->start);
Omar Sandoval3e1e8bb2015-09-29 20:50:30 -07005775 size_t byte_offset = BIT_BYTE(nr);
5776 size_t offset;
5777
5778 /*
5779 * The byte we want is the offset of the extent buffer + the offset of
5780 * the bitmap item in the extent buffer + the offset of the byte in the
5781 * bitmap item.
5782 */
5783 offset = start_offset + start + byte_offset;
5784
Kirill A. Shutemov09cbfea2016-04-01 15:29:47 +03005785 *page_index = offset >> PAGE_SHIFT;
Johannes Thumshirn70730172018-12-05 15:23:03 +01005786 *page_offset = offset_in_page(offset);
Omar Sandoval3e1e8bb2015-09-29 20:50:30 -07005787}
5788
5789/**
5790 * extent_buffer_test_bit - determine whether a bit in a bitmap item is set
5791 * @eb: the extent buffer
5792 * @start: offset of the bitmap item in the extent buffer
5793 * @nr: bit number to test
5794 */
5795int extent_buffer_test_bit(struct extent_buffer *eb, unsigned long start,
5796 unsigned long nr)
5797{
Omar Sandoval2fe1d552016-09-22 17:24:20 -07005798 u8 *kaddr;
Omar Sandoval3e1e8bb2015-09-29 20:50:30 -07005799 struct page *page;
5800 unsigned long i;
5801 size_t offset;
5802
5803 eb_bitmap_offset(eb, start, nr, &i, &offset);
5804 page = eb->pages[i];
5805 WARN_ON(!PageUptodate(page));
5806 kaddr = page_address(page);
5807 return 1U & (kaddr[offset] >> (nr & (BITS_PER_BYTE - 1)));
5808}
5809
5810/**
5811 * extent_buffer_bitmap_set - set an area of a bitmap
5812 * @eb: the extent buffer
5813 * @start: offset of the bitmap item in the extent buffer
5814 * @pos: bit number of the first bit
5815 * @len: number of bits to set
5816 */
5817void extent_buffer_bitmap_set(struct extent_buffer *eb, unsigned long start,
5818 unsigned long pos, unsigned long len)
5819{
Omar Sandoval2fe1d552016-09-22 17:24:20 -07005820 u8 *kaddr;
Omar Sandoval3e1e8bb2015-09-29 20:50:30 -07005821 struct page *page;
5822 unsigned long i;
5823 size_t offset;
5824 const unsigned int size = pos + len;
5825 int bits_to_set = BITS_PER_BYTE - (pos % BITS_PER_BYTE);
Omar Sandoval2fe1d552016-09-22 17:24:20 -07005826 u8 mask_to_set = BITMAP_FIRST_BYTE_MASK(pos);
Omar Sandoval3e1e8bb2015-09-29 20:50:30 -07005827
5828 eb_bitmap_offset(eb, start, pos, &i, &offset);
5829 page = eb->pages[i];
5830 WARN_ON(!PageUptodate(page));
5831 kaddr = page_address(page);
5832
5833 while (len >= bits_to_set) {
5834 kaddr[offset] |= mask_to_set;
5835 len -= bits_to_set;
5836 bits_to_set = BITS_PER_BYTE;
Dan Carpenter9c894692016-10-12 11:33:21 +03005837 mask_to_set = ~0;
Kirill A. Shutemov09cbfea2016-04-01 15:29:47 +03005838 if (++offset >= PAGE_SIZE && len > 0) {
Omar Sandoval3e1e8bb2015-09-29 20:50:30 -07005839 offset = 0;
5840 page = eb->pages[++i];
5841 WARN_ON(!PageUptodate(page));
5842 kaddr = page_address(page);
5843 }
5844 }
5845 if (len) {
5846 mask_to_set &= BITMAP_LAST_BYTE_MASK(size);
5847 kaddr[offset] |= mask_to_set;
5848 }
5849}
5850
5851
5852/**
5853 * extent_buffer_bitmap_clear - clear an area of a bitmap
5854 * @eb: the extent buffer
5855 * @start: offset of the bitmap item in the extent buffer
5856 * @pos: bit number of the first bit
5857 * @len: number of bits to clear
5858 */
5859void extent_buffer_bitmap_clear(struct extent_buffer *eb, unsigned long start,
5860 unsigned long pos, unsigned long len)
5861{
Omar Sandoval2fe1d552016-09-22 17:24:20 -07005862 u8 *kaddr;
Omar Sandoval3e1e8bb2015-09-29 20:50:30 -07005863 struct page *page;
5864 unsigned long i;
5865 size_t offset;
5866 const unsigned int size = pos + len;
5867 int bits_to_clear = BITS_PER_BYTE - (pos % BITS_PER_BYTE);
Omar Sandoval2fe1d552016-09-22 17:24:20 -07005868 u8 mask_to_clear = BITMAP_FIRST_BYTE_MASK(pos);
Omar Sandoval3e1e8bb2015-09-29 20:50:30 -07005869
5870 eb_bitmap_offset(eb, start, pos, &i, &offset);
5871 page = eb->pages[i];
5872 WARN_ON(!PageUptodate(page));
5873 kaddr = page_address(page);
5874
5875 while (len >= bits_to_clear) {
5876 kaddr[offset] &= ~mask_to_clear;
5877 len -= bits_to_clear;
5878 bits_to_clear = BITS_PER_BYTE;
Dan Carpenter9c894692016-10-12 11:33:21 +03005879 mask_to_clear = ~0;
Kirill A. Shutemov09cbfea2016-04-01 15:29:47 +03005880 if (++offset >= PAGE_SIZE && len > 0) {
Omar Sandoval3e1e8bb2015-09-29 20:50:30 -07005881 offset = 0;
5882 page = eb->pages[++i];
5883 WARN_ON(!PageUptodate(page));
5884 kaddr = page_address(page);
5885 }
5886 }
5887 if (len) {
5888 mask_to_clear &= BITMAP_LAST_BYTE_MASK(size);
5889 kaddr[offset] &= ~mask_to_clear;
5890 }
5891}
5892
Sergei Trofimovich33872062011-04-11 21:52:52 +00005893static inline bool areas_overlap(unsigned long src, unsigned long dst, unsigned long len)
5894{
5895 unsigned long distance = (src > dst) ? src - dst : dst - src;
5896 return distance < len;
5897}
5898
Chris Masond1310b22008-01-24 16:13:08 -05005899static void copy_pages(struct page *dst_page, struct page *src_page,
5900 unsigned long dst_off, unsigned long src_off,
5901 unsigned long len)
5902{
Chris Masona6591712011-07-19 12:04:14 -04005903 char *dst_kaddr = page_address(dst_page);
Chris Masond1310b22008-01-24 16:13:08 -05005904 char *src_kaddr;
Chris Mason727011e2010-08-06 13:21:20 -04005905 int must_memmove = 0;
Chris Masond1310b22008-01-24 16:13:08 -05005906
Sergei Trofimovich33872062011-04-11 21:52:52 +00005907 if (dst_page != src_page) {
Chris Masona6591712011-07-19 12:04:14 -04005908 src_kaddr = page_address(src_page);
Sergei Trofimovich33872062011-04-11 21:52:52 +00005909 } else {
Chris Masond1310b22008-01-24 16:13:08 -05005910 src_kaddr = dst_kaddr;
Chris Mason727011e2010-08-06 13:21:20 -04005911 if (areas_overlap(src_off, dst_off, len))
5912 must_memmove = 1;
Sergei Trofimovich33872062011-04-11 21:52:52 +00005913 }
Chris Masond1310b22008-01-24 16:13:08 -05005914
Chris Mason727011e2010-08-06 13:21:20 -04005915 if (must_memmove)
5916 memmove(dst_kaddr + dst_off, src_kaddr + src_off, len);
5917 else
5918 memcpy(dst_kaddr + dst_off, src_kaddr + src_off, len);
Chris Masond1310b22008-01-24 16:13:08 -05005919}
5920
5921void memcpy_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset,
5922 unsigned long src_offset, unsigned long len)
5923{
Jeff Mahoney0b246af2016-06-22 18:54:23 -04005924 struct btrfs_fs_info *fs_info = dst->fs_info;
Chris Masond1310b22008-01-24 16:13:08 -05005925 size_t cur;
5926 size_t dst_off_in_page;
5927 size_t src_off_in_page;
Johannes Thumshirn70730172018-12-05 15:23:03 +01005928 size_t start_offset = offset_in_page(dst->start);
Chris Masond1310b22008-01-24 16:13:08 -05005929 unsigned long dst_i;
5930 unsigned long src_i;
5931
5932 if (src_offset + len > dst->len) {
Jeff Mahoney0b246af2016-06-22 18:54:23 -04005933 btrfs_err(fs_info,
Jeff Mahoney5d163e02016-09-20 10:05:00 -04005934 "memmove bogus src_offset %lu move len %lu dst len %lu",
5935 src_offset, len, dst->len);
Arnd Bergmann290342f2019-03-25 14:02:25 +01005936 BUG();
Chris Masond1310b22008-01-24 16:13:08 -05005937 }
5938 if (dst_offset + len > dst->len) {
Jeff Mahoney0b246af2016-06-22 18:54:23 -04005939 btrfs_err(fs_info,
Jeff Mahoney5d163e02016-09-20 10:05:00 -04005940 "memmove bogus dst_offset %lu move len %lu dst len %lu",
5941 dst_offset, len, dst->len);
Arnd Bergmann290342f2019-03-25 14:02:25 +01005942 BUG();
Chris Masond1310b22008-01-24 16:13:08 -05005943 }
5944
Chris Masond3977122009-01-05 21:25:51 -05005945 while (len > 0) {
Johannes Thumshirn70730172018-12-05 15:23:03 +01005946 dst_off_in_page = offset_in_page(start_offset + dst_offset);
5947 src_off_in_page = offset_in_page(start_offset + src_offset);
Chris Masond1310b22008-01-24 16:13:08 -05005948
Kirill A. Shutemov09cbfea2016-04-01 15:29:47 +03005949 dst_i = (start_offset + dst_offset) >> PAGE_SHIFT;
5950 src_i = (start_offset + src_offset) >> PAGE_SHIFT;
Chris Masond1310b22008-01-24 16:13:08 -05005951
Kirill A. Shutemov09cbfea2016-04-01 15:29:47 +03005952 cur = min(len, (unsigned long)(PAGE_SIZE -
Chris Masond1310b22008-01-24 16:13:08 -05005953 src_off_in_page));
5954 cur = min_t(unsigned long, cur,
Kirill A. Shutemov09cbfea2016-04-01 15:29:47 +03005955 (unsigned long)(PAGE_SIZE - dst_off_in_page));
Chris Masond1310b22008-01-24 16:13:08 -05005956
David Sterbafb85fc92014-07-31 01:03:53 +02005957 copy_pages(dst->pages[dst_i], dst->pages[src_i],
Chris Masond1310b22008-01-24 16:13:08 -05005958 dst_off_in_page, src_off_in_page, cur);
5959
5960 src_offset += cur;
5961 dst_offset += cur;
5962 len -= cur;
5963 }
5964}
Chris Masond1310b22008-01-24 16:13:08 -05005965
5966void memmove_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset,
5967 unsigned long src_offset, unsigned long len)
5968{
Jeff Mahoney0b246af2016-06-22 18:54:23 -04005969 struct btrfs_fs_info *fs_info = dst->fs_info;
Chris Masond1310b22008-01-24 16:13:08 -05005970 size_t cur;
5971 size_t dst_off_in_page;
5972 size_t src_off_in_page;
5973 unsigned long dst_end = dst_offset + len - 1;
5974 unsigned long src_end = src_offset + len - 1;
Johannes Thumshirn70730172018-12-05 15:23:03 +01005975 size_t start_offset = offset_in_page(dst->start);
Chris Masond1310b22008-01-24 16:13:08 -05005976 unsigned long dst_i;
5977 unsigned long src_i;
5978
5979 if (src_offset + len > dst->len) {
Jeff Mahoney0b246af2016-06-22 18:54:23 -04005980 btrfs_err(fs_info,
Jeff Mahoney5d163e02016-09-20 10:05:00 -04005981 "memmove bogus src_offset %lu move len %lu len %lu",
5982 src_offset, len, dst->len);
Arnd Bergmann290342f2019-03-25 14:02:25 +01005983 BUG();
Chris Masond1310b22008-01-24 16:13:08 -05005984 }
5985 if (dst_offset + len > dst->len) {
Jeff Mahoney0b246af2016-06-22 18:54:23 -04005986 btrfs_err(fs_info,
Jeff Mahoney5d163e02016-09-20 10:05:00 -04005987 "memmove bogus dst_offset %lu move len %lu len %lu",
5988 dst_offset, len, dst->len);
Arnd Bergmann290342f2019-03-25 14:02:25 +01005989 BUG();
Chris Masond1310b22008-01-24 16:13:08 -05005990 }
Chris Mason727011e2010-08-06 13:21:20 -04005991 if (dst_offset < src_offset) {
Chris Masond1310b22008-01-24 16:13:08 -05005992 memcpy_extent_buffer(dst, dst_offset, src_offset, len);
5993 return;
5994 }
Chris Masond3977122009-01-05 21:25:51 -05005995 while (len > 0) {
Kirill A. Shutemov09cbfea2016-04-01 15:29:47 +03005996 dst_i = (start_offset + dst_end) >> PAGE_SHIFT;
5997 src_i = (start_offset + src_end) >> PAGE_SHIFT;
Chris Masond1310b22008-01-24 16:13:08 -05005998
Johannes Thumshirn70730172018-12-05 15:23:03 +01005999 dst_off_in_page = offset_in_page(start_offset + dst_end);
6000 src_off_in_page = offset_in_page(start_offset + src_end);
Chris Masond1310b22008-01-24 16:13:08 -05006001
6002 cur = min_t(unsigned long, len, src_off_in_page + 1);
6003 cur = min(cur, dst_off_in_page + 1);
David Sterbafb85fc92014-07-31 01:03:53 +02006004 copy_pages(dst->pages[dst_i], dst->pages[src_i],
Chris Masond1310b22008-01-24 16:13:08 -05006005 dst_off_in_page - cur + 1,
6006 src_off_in_page - cur + 1, cur);
6007
6008 dst_end -= cur;
6009 src_end -= cur;
6010 len -= cur;
6011 }
6012}
Chris Mason6af118ce2008-07-22 11:18:07 -04006013
David Sterbaf7a52a42013-04-26 14:56:29 +00006014int try_release_extent_buffer(struct page *page)
Miao Xie19fe0a82010-10-26 20:57:29 -04006015{
Chris Mason6af118ce2008-07-22 11:18:07 -04006016 struct extent_buffer *eb;
Miao Xie897ca6e92010-10-26 20:57:29 -04006017
Miao Xie19fe0a82010-10-26 20:57:29 -04006018 /*
Nicholas D Steeves01327612016-05-19 21:18:45 -04006019 * We need to make sure nobody is attaching this page to an eb right
Josef Bacik3083ee22012-03-09 16:01:49 -05006020 * now.
Miao Xie19fe0a82010-10-26 20:57:29 -04006021 */
Josef Bacik3083ee22012-03-09 16:01:49 -05006022 spin_lock(&page->mapping->private_lock);
6023 if (!PagePrivate(page)) {
6024 spin_unlock(&page->mapping->private_lock);
6025 return 1;
Miao Xie19fe0a82010-10-26 20:57:29 -04006026 }
6027
Josef Bacik3083ee22012-03-09 16:01:49 -05006028 eb = (struct extent_buffer *)page->private;
6029 BUG_ON(!eb);
Miao Xie19fe0a82010-10-26 20:57:29 -04006030
Josef Bacik0b32f4b2012-03-13 09:38:00 -04006031 /*
Josef Bacik3083ee22012-03-09 16:01:49 -05006032 * This is a little awful but should be ok, we need to make sure that
6033 * the eb doesn't disappear out from under us while we're looking at
6034 * this page.
6035 */
6036 spin_lock(&eb->refs_lock);
Josef Bacik0b32f4b2012-03-13 09:38:00 -04006037 if (atomic_read(&eb->refs) != 1 || extent_buffer_under_io(eb)) {
Josef Bacik3083ee22012-03-09 16:01:49 -05006038 spin_unlock(&eb->refs_lock);
6039 spin_unlock(&page->mapping->private_lock);
6040 return 0;
6041 }
6042 spin_unlock(&page->mapping->private_lock);
6043
Josef Bacik3083ee22012-03-09 16:01:49 -05006044 /*
6045 * If tree ref isn't set then we know the ref on this eb is a real ref,
6046 * so just return, this page will likely be freed soon anyway.
6047 */
6048 if (!test_and_clear_bit(EXTENT_BUFFER_TREE_REF, &eb->bflags)) {
6049 spin_unlock(&eb->refs_lock);
6050 return 0;
6051 }
Josef Bacik3083ee22012-03-09 16:01:49 -05006052
David Sterbaf7a52a42013-04-26 14:56:29 +00006053 return release_extent_buffer(eb);
Chris Mason6af118ce2008-07-22 11:18:07 -04006054}