blob: 59d59d98bca141bd4665730911cd5ec95ed25863 [file] [log] [blame]
Chris Mason6cbd5572007-06-12 09:07:21 -04001/*
2 * Copyright (C) 2007 Oracle. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
17 */
Zach Brownec6b9102007-07-11 10:00:37 -040018#include <linux/sched.h>
Chris Masonedbd8d42007-12-21 16:27:24 -050019#include <linux/pagemap.h>
Chris Masonec44a352008-04-28 15:29:52 -040020#include <linux/writeback.h>
David Woodhouse21af8042008-08-12 14:13:26 +010021#include <linux/blkdev.h>
Chris Masonb7a9f292009-02-04 09:23:45 -050022#include <linux/sort.h>
Chris Mason4184ea72009-03-10 12:39:20 -040023#include <linux/rcupdate.h>
Josef Bacik817d52f2009-07-13 21:29:25 -040024#include <linux/kthread.h>
Tejun Heo5a0e3ad2010-03-24 17:04:11 +090025#include <linux/slab.h>
David Sterbadff51cd2011-06-14 12:52:17 +020026#include <linux/ratelimit.h>
Josef Bacikb150a4f2013-06-19 15:00:04 -040027#include <linux/percpu_counter.h>
Chris Mason74493f72007-12-11 09:25:06 -050028#include "hash.h"
Miao Xie995946d2014-04-02 19:51:06 +080029#include "tree-log.h"
Chris Masonfec577f2007-02-26 10:40:21 -050030#include "disk-io.h"
31#include "print-tree.h"
Chris Mason0b86a832008-03-24 15:01:56 -040032#include "volumes.h"
David Woodhouse53b381b2013-01-29 18:40:14 -050033#include "raid56.h"
Chris Mason925baed2008-06-25 16:01:30 -040034#include "locking.h"
Chris Masonfa9c0d792009-04-03 09:47:43 -040035#include "free-space-cache.h"
Miao Xie3fed40c2012-09-13 04:51:36 -060036#include "math.h"
Jeff Mahoney6ab0a202013-11-01 13:07:04 -040037#include "sysfs.h"
Josef Bacikfcebe452014-05-13 17:30:47 -070038#include "qgroup.h"
Chris Masonfec577f2007-02-26 10:40:21 -050039
Arne Jansen709c0482011-09-12 12:22:57 +020040#undef SCRAMBLE_DELAYED_REFS
41
Miao Xie9e622d62012-01-26 15:01:12 -050042/*
43 * control flags for do_chunk_alloc's force field
Chris Mason0e4f8f82011-04-15 16:05:44 -040044 * CHUNK_ALLOC_NO_FORCE means to only allocate a chunk
45 * if we really need one.
46 *
Chris Mason0e4f8f82011-04-15 16:05:44 -040047 * CHUNK_ALLOC_LIMITED means to only try and allocate one
48 * if we have very few chunks already allocated. This is
49 * used as part of the clustering code to help make sure
50 * we have a good pool of storage to cluster in, without
51 * filling the FS with empty chunks
52 *
Miao Xie9e622d62012-01-26 15:01:12 -050053 * CHUNK_ALLOC_FORCE means it must try to allocate one
54 *
Chris Mason0e4f8f82011-04-15 16:05:44 -040055 */
56enum {
57 CHUNK_ALLOC_NO_FORCE = 0,
Miao Xie9e622d62012-01-26 15:01:12 -050058 CHUNK_ALLOC_LIMITED = 1,
59 CHUNK_ALLOC_FORCE = 2,
Chris Mason0e4f8f82011-04-15 16:05:44 -040060};
61
Josef Bacikfb25e912011-07-26 17:00:46 -040062/*
63 * Control how reservations are dealt with.
64 *
65 * RESERVE_FREE - freeing a reservation.
66 * RESERVE_ALLOC - allocating space and we need to update bytes_may_use for
67 * ENOSPC accounting
68 * RESERVE_ALLOC_NO_ACCOUNT - allocating space and we should not update
69 * bytes_may_use as the ENOSPC accounting is done elsewhere
70 */
71enum {
72 RESERVE_FREE = 0,
73 RESERVE_ALLOC = 1,
74 RESERVE_ALLOC_NO_ACCOUNT = 2,
75};
76
Josef Bacikce93ec52014-11-17 15:45:48 -050077static int update_block_group(struct btrfs_trans_handle *trans,
78 struct btrfs_root *root, u64 bytenr,
79 u64 num_bytes, int alloc);
Yan Zheng5d4f98a2009-06-10 10:45:14 -040080static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
81 struct btrfs_root *root,
Qu Wenruoc682f9b2015-03-17 16:59:47 +080082 struct btrfs_delayed_ref_node *node, u64 parent,
Yan Zheng5d4f98a2009-06-10 10:45:14 -040083 u64 root_objectid, u64 owner_objectid,
84 u64 owner_offset, int refs_to_drop,
Qu Wenruoc682f9b2015-03-17 16:59:47 +080085 struct btrfs_delayed_extent_op *extra_op);
Yan Zheng5d4f98a2009-06-10 10:45:14 -040086static void __run_delayed_extent_op(struct btrfs_delayed_extent_op *extent_op,
87 struct extent_buffer *leaf,
88 struct btrfs_extent_item *ei);
89static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
90 struct btrfs_root *root,
91 u64 parent, u64 root_objectid,
92 u64 flags, u64 owner, u64 offset,
93 struct btrfs_key *ins, int ref_mod);
94static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans,
95 struct btrfs_root *root,
96 u64 parent, u64 root_objectid,
97 u64 flags, struct btrfs_disk_key *key,
Josef Bacikfcebe452014-05-13 17:30:47 -070098 int level, struct btrfs_key *ins,
99 int no_quota);
Josef Bacik6a632092009-02-20 11:00:09 -0500100static int do_chunk_alloc(struct btrfs_trans_handle *trans,
Josef Bacik698d0082012-09-12 14:08:47 -0400101 struct btrfs_root *extent_root, u64 flags,
102 int force);
Yan Zheng11833d62009-09-11 16:11:19 -0400103static int find_next_key(struct btrfs_path *path, int level,
104 struct btrfs_key *key);
Josef Bacik9ed74f22009-09-11 16:12:44 -0400105static void dump_space_info(struct btrfs_space_info *info, u64 bytes,
106 int dump_block_groups);
Josef Bacikfb25e912011-07-26 17:00:46 -0400107static int btrfs_update_reserved_bytes(struct btrfs_block_group_cache *cache,
Miao Xiee570fd22014-06-19 10:42:50 +0800108 u64 num_bytes, int reserve,
109 int delalloc);
Josef Bacik5d803662013-02-07 16:06:02 -0500110static int block_rsv_use_bytes(struct btrfs_block_rsv *block_rsv,
111 u64 num_bytes);
Eric Sandeen48a3b632013-04-25 20:41:01 +0000112int btrfs_pin_extent(struct btrfs_root *root,
113 u64 bytenr, u64 num_bytes, int reserved);
Josef Bacik6a632092009-02-20 11:00:09 -0500114
Josef Bacik817d52f2009-07-13 21:29:25 -0400115static noinline int
116block_group_cache_done(struct btrfs_block_group_cache *cache)
117{
118 smp_mb();
Josef Bacik36cce922013-08-05 11:15:21 -0400119 return cache->cached == BTRFS_CACHE_FINISHED ||
120 cache->cached == BTRFS_CACHE_ERROR;
Josef Bacik817d52f2009-07-13 21:29:25 -0400121}
122
Josef Bacik0f9dd462008-09-23 13:14:11 -0400123static int block_group_bits(struct btrfs_block_group_cache *cache, u64 bits)
124{
125 return (cache->flags & bits) == bits;
126}
127
David Sterba62a45b62011-04-20 15:52:26 +0200128static void btrfs_get_block_group(struct btrfs_block_group_cache *cache)
Josef Bacik11dfe352009-11-13 20:12:59 +0000129{
130 atomic_inc(&cache->count);
131}
132
133void btrfs_put_block_group(struct btrfs_block_group_cache *cache)
134{
Yan, Zhengf0486c62010-05-16 10:46:25 -0400135 if (atomic_dec_and_test(&cache->count)) {
136 WARN_ON(cache->pinned > 0);
137 WARN_ON(cache->reserved > 0);
Li Zefan34d52cb2011-03-29 13:46:06 +0800138 kfree(cache->free_space_ctl);
Josef Bacik11dfe352009-11-13 20:12:59 +0000139 kfree(cache);
Yan, Zhengf0486c62010-05-16 10:46:25 -0400140 }
Josef Bacik11dfe352009-11-13 20:12:59 +0000141}
142
Josef Bacik0f9dd462008-09-23 13:14:11 -0400143/*
144 * this adds the block group to the fs_info rb tree for the block group
145 * cache
146 */
Christoph Hellwigb2950862008-12-02 09:54:17 -0500147static int btrfs_add_block_group_cache(struct btrfs_fs_info *info,
Josef Bacik0f9dd462008-09-23 13:14:11 -0400148 struct btrfs_block_group_cache *block_group)
149{
150 struct rb_node **p;
151 struct rb_node *parent = NULL;
152 struct btrfs_block_group_cache *cache;
153
154 spin_lock(&info->block_group_cache_lock);
155 p = &info->block_group_cache_tree.rb_node;
156
157 while (*p) {
158 parent = *p;
159 cache = rb_entry(parent, struct btrfs_block_group_cache,
160 cache_node);
161 if (block_group->key.objectid < cache->key.objectid) {
162 p = &(*p)->rb_left;
163 } else if (block_group->key.objectid > cache->key.objectid) {
164 p = &(*p)->rb_right;
165 } else {
166 spin_unlock(&info->block_group_cache_lock);
167 return -EEXIST;
168 }
169 }
170
171 rb_link_node(&block_group->cache_node, parent, p);
172 rb_insert_color(&block_group->cache_node,
173 &info->block_group_cache_tree);
Liu Boa1897fd2012-12-27 09:01:23 +0000174
175 if (info->first_logical_byte > block_group->key.objectid)
176 info->first_logical_byte = block_group->key.objectid;
177
Josef Bacik0f9dd462008-09-23 13:14:11 -0400178 spin_unlock(&info->block_group_cache_lock);
179
180 return 0;
181}
182
183/*
184 * This will return the block group at or after bytenr if contains is 0, else
185 * it will return the block group that contains the bytenr
186 */
187static struct btrfs_block_group_cache *
188block_group_cache_tree_search(struct btrfs_fs_info *info, u64 bytenr,
189 int contains)
190{
191 struct btrfs_block_group_cache *cache, *ret = NULL;
192 struct rb_node *n;
193 u64 end, start;
194
195 spin_lock(&info->block_group_cache_lock);
196 n = info->block_group_cache_tree.rb_node;
197
198 while (n) {
199 cache = rb_entry(n, struct btrfs_block_group_cache,
200 cache_node);
201 end = cache->key.objectid + cache->key.offset - 1;
202 start = cache->key.objectid;
203
204 if (bytenr < start) {
205 if (!contains && (!ret || start < ret->key.objectid))
206 ret = cache;
207 n = n->rb_left;
208 } else if (bytenr > start) {
209 if (contains && bytenr <= end) {
210 ret = cache;
211 break;
212 }
213 n = n->rb_right;
214 } else {
215 ret = cache;
216 break;
217 }
218 }
Liu Boa1897fd2012-12-27 09:01:23 +0000219 if (ret) {
Josef Bacik11dfe352009-11-13 20:12:59 +0000220 btrfs_get_block_group(ret);
Liu Boa1897fd2012-12-27 09:01:23 +0000221 if (bytenr == 0 && info->first_logical_byte > ret->key.objectid)
222 info->first_logical_byte = ret->key.objectid;
223 }
Josef Bacik0f9dd462008-09-23 13:14:11 -0400224 spin_unlock(&info->block_group_cache_lock);
225
226 return ret;
227}
228
Yan Zheng11833d62009-09-11 16:11:19 -0400229static int add_excluded_extent(struct btrfs_root *root,
230 u64 start, u64 num_bytes)
Josef Bacik817d52f2009-07-13 21:29:25 -0400231{
Yan Zheng11833d62009-09-11 16:11:19 -0400232 u64 end = start + num_bytes - 1;
233 set_extent_bits(&root->fs_info->freed_extents[0],
234 start, end, EXTENT_UPTODATE, GFP_NOFS);
235 set_extent_bits(&root->fs_info->freed_extents[1],
236 start, end, EXTENT_UPTODATE, GFP_NOFS);
237 return 0;
Josef Bacik817d52f2009-07-13 21:29:25 -0400238}
239
Yan Zheng11833d62009-09-11 16:11:19 -0400240static void free_excluded_extents(struct btrfs_root *root,
241 struct btrfs_block_group_cache *cache)
Josef Bacik817d52f2009-07-13 21:29:25 -0400242{
Yan Zheng11833d62009-09-11 16:11:19 -0400243 u64 start, end;
244
245 start = cache->key.objectid;
246 end = start + cache->key.offset - 1;
247
248 clear_extent_bits(&root->fs_info->freed_extents[0],
249 start, end, EXTENT_UPTODATE, GFP_NOFS);
250 clear_extent_bits(&root->fs_info->freed_extents[1],
251 start, end, EXTENT_UPTODATE, GFP_NOFS);
252}
253
254static int exclude_super_stripes(struct btrfs_root *root,
255 struct btrfs_block_group_cache *cache)
256{
Josef Bacik817d52f2009-07-13 21:29:25 -0400257 u64 bytenr;
258 u64 *logical;
259 int stripe_len;
260 int i, nr, ret;
261
Yan, Zheng06b23312009-11-26 09:31:11 +0000262 if (cache->key.objectid < BTRFS_SUPER_INFO_OFFSET) {
263 stripe_len = BTRFS_SUPER_INFO_OFFSET - cache->key.objectid;
264 cache->bytes_super += stripe_len;
265 ret = add_excluded_extent(root, cache->key.objectid,
266 stripe_len);
Josef Bacik835d9742013-03-19 12:13:25 -0400267 if (ret)
268 return ret;
Yan, Zheng06b23312009-11-26 09:31:11 +0000269 }
270
Josef Bacik817d52f2009-07-13 21:29:25 -0400271 for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
272 bytenr = btrfs_sb_offset(i);
273 ret = btrfs_rmap_block(&root->fs_info->mapping_tree,
274 cache->key.objectid, bytenr,
275 0, &logical, &nr, &stripe_len);
Josef Bacik835d9742013-03-19 12:13:25 -0400276 if (ret)
277 return ret;
Yan Zheng11833d62009-09-11 16:11:19 -0400278
Josef Bacik817d52f2009-07-13 21:29:25 -0400279 while (nr--) {
Josef Bacik51bf5f02013-04-23 12:55:21 -0400280 u64 start, len;
281
282 if (logical[nr] > cache->key.objectid +
283 cache->key.offset)
284 continue;
285
286 if (logical[nr] + stripe_len <= cache->key.objectid)
287 continue;
288
289 start = logical[nr];
290 if (start < cache->key.objectid) {
291 start = cache->key.objectid;
292 len = (logical[nr] + stripe_len) - start;
293 } else {
294 len = min_t(u64, stripe_len,
295 cache->key.objectid +
296 cache->key.offset - start);
297 }
298
299 cache->bytes_super += len;
300 ret = add_excluded_extent(root, start, len);
Josef Bacik835d9742013-03-19 12:13:25 -0400301 if (ret) {
302 kfree(logical);
303 return ret;
304 }
Josef Bacik817d52f2009-07-13 21:29:25 -0400305 }
Yan Zheng11833d62009-09-11 16:11:19 -0400306
Josef Bacik817d52f2009-07-13 21:29:25 -0400307 kfree(logical);
308 }
Josef Bacik817d52f2009-07-13 21:29:25 -0400309 return 0;
310}
311
Yan Zheng11833d62009-09-11 16:11:19 -0400312static struct btrfs_caching_control *
313get_caching_control(struct btrfs_block_group_cache *cache)
314{
315 struct btrfs_caching_control *ctl;
316
317 spin_lock(&cache->lock);
Josef Bacikdde5abe2010-09-16 16:17:03 -0400318 if (!cache->caching_ctl) {
319 spin_unlock(&cache->lock);
320 return NULL;
321 }
322
Yan Zheng11833d62009-09-11 16:11:19 -0400323 ctl = cache->caching_ctl;
324 atomic_inc(&ctl->count);
325 spin_unlock(&cache->lock);
326 return ctl;
327}
328
329static void put_caching_control(struct btrfs_caching_control *ctl)
330{
331 if (atomic_dec_and_test(&ctl->count))
332 kfree(ctl);
333}
334
Josef Bacik0f9dd462008-09-23 13:14:11 -0400335/*
336 * this is only called by cache_block_group, since we could have freed extents
337 * we need to check the pinned_extents for any extents that can't be used yet
338 * since their free space will be released as soon as the transaction commits.
339 */
Josef Bacik817d52f2009-07-13 21:29:25 -0400340static u64 add_new_free_space(struct btrfs_block_group_cache *block_group,
Josef Bacik0f9dd462008-09-23 13:14:11 -0400341 struct btrfs_fs_info *info, u64 start, u64 end)
342{
Josef Bacik817d52f2009-07-13 21:29:25 -0400343 u64 extent_start, extent_end, size, total_added = 0;
Josef Bacik0f9dd462008-09-23 13:14:11 -0400344 int ret;
345
346 while (start < end) {
Yan Zheng11833d62009-09-11 16:11:19 -0400347 ret = find_first_extent_bit(info->pinned_extents, start,
Josef Bacik0f9dd462008-09-23 13:14:11 -0400348 &extent_start, &extent_end,
Josef Bacike6138872012-09-27 17:07:30 -0400349 EXTENT_DIRTY | EXTENT_UPTODATE,
350 NULL);
Josef Bacik0f9dd462008-09-23 13:14:11 -0400351 if (ret)
352 break;
353
Yan, Zheng06b23312009-11-26 09:31:11 +0000354 if (extent_start <= start) {
Josef Bacik0f9dd462008-09-23 13:14:11 -0400355 start = extent_end + 1;
356 } else if (extent_start > start && extent_start < end) {
357 size = extent_start - start;
Josef Bacik817d52f2009-07-13 21:29:25 -0400358 total_added += size;
Josef Bacikea6a4782008-11-20 12:16:16 -0500359 ret = btrfs_add_free_space(block_group, start,
360 size);
Jeff Mahoney79787ea2012-03-12 16:03:00 +0100361 BUG_ON(ret); /* -ENOMEM or logic error */
Josef Bacik0f9dd462008-09-23 13:14:11 -0400362 start = extent_end + 1;
363 } else {
364 break;
365 }
366 }
367
368 if (start < end) {
369 size = end - start;
Josef Bacik817d52f2009-07-13 21:29:25 -0400370 total_added += size;
Josef Bacikea6a4782008-11-20 12:16:16 -0500371 ret = btrfs_add_free_space(block_group, start, size);
Jeff Mahoney79787ea2012-03-12 16:03:00 +0100372 BUG_ON(ret); /* -ENOMEM or logic error */
Josef Bacik0f9dd462008-09-23 13:14:11 -0400373 }
374
Josef Bacik817d52f2009-07-13 21:29:25 -0400375 return total_added;
Josef Bacik0f9dd462008-09-23 13:14:11 -0400376}
377
Qu Wenruod458b052014-02-28 10:46:19 +0800378static noinline void caching_thread(struct btrfs_work *work)
Chris Masone37c9e62007-05-09 20:13:14 -0400379{
Josef Bacikbab39bf2011-06-30 14:42:28 -0400380 struct btrfs_block_group_cache *block_group;
381 struct btrfs_fs_info *fs_info;
382 struct btrfs_caching_control *caching_ctl;
383 struct btrfs_root *extent_root;
Chris Masone37c9e62007-05-09 20:13:14 -0400384 struct btrfs_path *path;
Chris Mason5f39d392007-10-15 16:14:19 -0400385 struct extent_buffer *leaf;
Yan Zheng11833d62009-09-11 16:11:19 -0400386 struct btrfs_key key;
Josef Bacik817d52f2009-07-13 21:29:25 -0400387 u64 total_found = 0;
Yan Zheng11833d62009-09-11 16:11:19 -0400388 u64 last = 0;
389 u32 nritems;
Josef Bacik36cce922013-08-05 11:15:21 -0400390 int ret = -ENOMEM;
Chris Masonf510cfe2007-10-15 16:14:48 -0400391
Josef Bacikbab39bf2011-06-30 14:42:28 -0400392 caching_ctl = container_of(work, struct btrfs_caching_control, work);
393 block_group = caching_ctl->block_group;
394 fs_info = block_group->fs_info;
395 extent_root = fs_info->extent_root;
396
Chris Masone37c9e62007-05-09 20:13:14 -0400397 path = btrfs_alloc_path();
398 if (!path)
Josef Bacikbab39bf2011-06-30 14:42:28 -0400399 goto out;
Yan7d7d6062007-09-14 16:15:28 -0400400
Josef Bacik817d52f2009-07-13 21:29:25 -0400401 last = max_t(u64, block_group->key.objectid, BTRFS_SUPER_INFO_OFFSET);
Yan Zheng11833d62009-09-11 16:11:19 -0400402
Chris Mason5cd57b22008-06-25 16:01:30 -0400403 /*
Josef Bacik817d52f2009-07-13 21:29:25 -0400404 * We don't want to deadlock with somebody trying to allocate a new
405 * extent for the extent root while also trying to search the extent
406 * root to add free space. So we skip locking and search the commit
407 * root, since its read-only
Chris Mason5cd57b22008-06-25 16:01:30 -0400408 */
409 path->skip_locking = 1;
Josef Bacik817d52f2009-07-13 21:29:25 -0400410 path->search_commit_root = 1;
Josef Bacik026fd312011-05-13 10:32:11 -0400411 path->reada = 1;
Josef Bacik817d52f2009-07-13 21:29:25 -0400412
Yan Zhenge4404d62008-12-12 10:03:26 -0500413 key.objectid = last;
Chris Masone37c9e62007-05-09 20:13:14 -0400414 key.offset = 0;
Yan Zheng11833d62009-09-11 16:11:19 -0400415 key.type = BTRFS_EXTENT_ITEM_KEY;
Chris Mason013f1b12009-07-31 14:57:55 -0400416again:
Yan Zheng11833d62009-09-11 16:11:19 -0400417 mutex_lock(&caching_ctl->mutex);
Chris Mason013f1b12009-07-31 14:57:55 -0400418 /* need to make sure the commit_root doesn't disappear */
Josef Bacik9e351cc2014-03-13 15:42:13 -0400419 down_read(&fs_info->commit_root_sem);
Chris Mason013f1b12009-07-31 14:57:55 -0400420
Liu Bo52ee28d2013-07-11 17:51:15 +0800421next:
Yan Zheng11833d62009-09-11 16:11:19 -0400422 ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
Chris Masone37c9e62007-05-09 20:13:14 -0400423 if (ret < 0)
Josef Bacikef8bbdf2008-09-23 13:14:11 -0400424 goto err;
Yan Zhenga512bbf2008-12-08 16:46:26 -0500425
Yan Zheng11833d62009-09-11 16:11:19 -0400426 leaf = path->nodes[0];
427 nritems = btrfs_header_nritems(leaf);
428
Chris Masond3977122009-01-05 21:25:51 -0500429 while (1) {
David Sterba7841cb22011-05-31 18:07:27 +0200430 if (btrfs_fs_closing(fs_info) > 1) {
Yan Zhengf25784b2009-07-28 08:41:57 -0400431 last = (u64)-1;
Josef Bacik817d52f2009-07-13 21:29:25 -0400432 break;
Yan Zhengf25784b2009-07-28 08:41:57 -0400433 }
Josef Bacik817d52f2009-07-13 21:29:25 -0400434
Yan Zheng11833d62009-09-11 16:11:19 -0400435 if (path->slots[0] < nritems) {
436 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
437 } else {
438 ret = find_next_key(path, 0, &key);
439 if (ret)
Chris Masone37c9e62007-05-09 20:13:14 -0400440 break;
Josef Bacik817d52f2009-07-13 21:29:25 -0400441
Josef Bacikc9ea7b22013-09-19 10:02:11 -0400442 if (need_resched() ||
Josef Bacik9e351cc2014-03-13 15:42:13 -0400443 rwsem_is_contended(&fs_info->commit_root_sem)) {
Josef Bacik589d8ad2011-05-11 17:30:53 -0400444 caching_ctl->progress = last;
Chris Masonff5714c2011-05-28 07:00:39 -0400445 btrfs_release_path(path);
Josef Bacik9e351cc2014-03-13 15:42:13 -0400446 up_read(&fs_info->commit_root_sem);
Josef Bacik589d8ad2011-05-11 17:30:53 -0400447 mutex_unlock(&caching_ctl->mutex);
Yan Zheng11833d62009-09-11 16:11:19 -0400448 cond_resched();
Josef Bacik589d8ad2011-05-11 17:30:53 -0400449 goto again;
450 }
Josef Bacik0a3896d2013-04-19 14:37:26 -0400451
452 ret = btrfs_next_leaf(extent_root, path);
453 if (ret < 0)
454 goto err;
455 if (ret)
456 break;
Josef Bacik589d8ad2011-05-11 17:30:53 -0400457 leaf = path->nodes[0];
458 nritems = btrfs_header_nritems(leaf);
459 continue;
Yan Zheng11833d62009-09-11 16:11:19 -0400460 }
Josef Bacik817d52f2009-07-13 21:29:25 -0400461
Liu Bo52ee28d2013-07-11 17:51:15 +0800462 if (key.objectid < last) {
463 key.objectid = last;
464 key.offset = 0;
465 key.type = BTRFS_EXTENT_ITEM_KEY;
466
467 caching_ctl->progress = last;
468 btrfs_release_path(path);
469 goto next;
470 }
471
Yan Zheng11833d62009-09-11 16:11:19 -0400472 if (key.objectid < block_group->key.objectid) {
473 path->slots[0]++;
Josef Bacik817d52f2009-07-13 21:29:25 -0400474 continue;
Chris Masone37c9e62007-05-09 20:13:14 -0400475 }
Josef Bacik0f9dd462008-09-23 13:14:11 -0400476
Chris Masone37c9e62007-05-09 20:13:14 -0400477 if (key.objectid >= block_group->key.objectid +
Josef Bacik0f9dd462008-09-23 13:14:11 -0400478 block_group->key.offset)
Yan7d7d6062007-09-14 16:15:28 -0400479 break;
Yan7d7d6062007-09-14 16:15:28 -0400480
Josef Bacik3173a182013-03-07 14:22:04 -0500481 if (key.type == BTRFS_EXTENT_ITEM_KEY ||
482 key.type == BTRFS_METADATA_ITEM_KEY) {
Josef Bacik817d52f2009-07-13 21:29:25 -0400483 total_found += add_new_free_space(block_group,
484 fs_info, last,
485 key.objectid);
Josef Bacik3173a182013-03-07 14:22:04 -0500486 if (key.type == BTRFS_METADATA_ITEM_KEY)
487 last = key.objectid +
David Sterba707e8a02014-06-04 19:22:26 +0200488 fs_info->tree_root->nodesize;
Josef Bacik3173a182013-03-07 14:22:04 -0500489 else
490 last = key.objectid + key.offset;
Josef Bacik817d52f2009-07-13 21:29:25 -0400491
Yan Zheng11833d62009-09-11 16:11:19 -0400492 if (total_found > (1024 * 1024 * 2)) {
493 total_found = 0;
494 wake_up(&caching_ctl->wait);
495 }
Josef Bacik817d52f2009-07-13 21:29:25 -0400496 }
Chris Masone37c9e62007-05-09 20:13:14 -0400497 path->slots[0]++;
498 }
Josef Bacikef8bbdf2008-09-23 13:14:11 -0400499 ret = 0;
Josef Bacik817d52f2009-07-13 21:29:25 -0400500
501 total_found += add_new_free_space(block_group, fs_info, last,
502 block_group->key.objectid +
503 block_group->key.offset);
Yan Zheng11833d62009-09-11 16:11:19 -0400504 caching_ctl->progress = (u64)-1;
Josef Bacik817d52f2009-07-13 21:29:25 -0400505
506 spin_lock(&block_group->lock);
Yan Zheng11833d62009-09-11 16:11:19 -0400507 block_group->caching_ctl = NULL;
Josef Bacik817d52f2009-07-13 21:29:25 -0400508 block_group->cached = BTRFS_CACHE_FINISHED;
509 spin_unlock(&block_group->lock);
510
Chris Mason54aa1f42007-06-22 14:16:25 -0400511err:
Chris Masone37c9e62007-05-09 20:13:14 -0400512 btrfs_free_path(path);
Josef Bacik9e351cc2014-03-13 15:42:13 -0400513 up_read(&fs_info->commit_root_sem);
Josef Bacik817d52f2009-07-13 21:29:25 -0400514
Yan Zheng11833d62009-09-11 16:11:19 -0400515 free_excluded_extents(extent_root, block_group);
516
517 mutex_unlock(&caching_ctl->mutex);
Josef Bacikbab39bf2011-06-30 14:42:28 -0400518out:
Josef Bacik36cce922013-08-05 11:15:21 -0400519 if (ret) {
520 spin_lock(&block_group->lock);
521 block_group->caching_ctl = NULL;
522 block_group->cached = BTRFS_CACHE_ERROR;
523 spin_unlock(&block_group->lock);
524 }
Yan Zheng11833d62009-09-11 16:11:19 -0400525 wake_up(&caching_ctl->wait);
526
527 put_caching_control(caching_ctl);
Josef Bacik11dfe352009-11-13 20:12:59 +0000528 btrfs_put_block_group(block_group);
Josef Bacik817d52f2009-07-13 21:29:25 -0400529}
530
Josef Bacik9d66e232010-08-25 16:54:15 -0400531static int cache_block_group(struct btrfs_block_group_cache *cache,
Josef Bacik9d66e232010-08-25 16:54:15 -0400532 int load_cache_only)
Josef Bacik817d52f2009-07-13 21:29:25 -0400533{
Josef Bacik291c7d22011-11-14 13:52:14 -0500534 DEFINE_WAIT(wait);
Yan Zheng11833d62009-09-11 16:11:19 -0400535 struct btrfs_fs_info *fs_info = cache->fs_info;
536 struct btrfs_caching_control *caching_ctl;
Josef Bacik817d52f2009-07-13 21:29:25 -0400537 int ret = 0;
538
Josef Bacik291c7d22011-11-14 13:52:14 -0500539 caching_ctl = kzalloc(sizeof(*caching_ctl), GFP_NOFS);
Jeff Mahoney79787ea2012-03-12 16:03:00 +0100540 if (!caching_ctl)
541 return -ENOMEM;
Josef Bacik291c7d22011-11-14 13:52:14 -0500542
543 INIT_LIST_HEAD(&caching_ctl->list);
544 mutex_init(&caching_ctl->mutex);
545 init_waitqueue_head(&caching_ctl->wait);
546 caching_ctl->block_group = cache;
547 caching_ctl->progress = cache->key.objectid;
548 atomic_set(&caching_ctl->count, 1);
Liu Bo9e0af232014-08-15 23:36:53 +0800549 btrfs_init_work(&caching_ctl->work, btrfs_cache_helper,
550 caching_thread, NULL, NULL);
Josef Bacik291c7d22011-11-14 13:52:14 -0500551
552 spin_lock(&cache->lock);
553 /*
554 * This should be a rare occasion, but this could happen I think in the
555 * case where one thread starts to load the space cache info, and then
556 * some other thread starts a transaction commit which tries to do an
557 * allocation while the other thread is still loading the space cache
558 * info. The previous loop should have kept us from choosing this block
559 * group, but if we've moved to the state where we will wait on caching
560 * block groups we need to first check if we're doing a fast load here,
561 * so we can wait for it to finish, otherwise we could end up allocating
562 * from a block group who's cache gets evicted for one reason or
563 * another.
564 */
565 while (cache->cached == BTRFS_CACHE_FAST) {
566 struct btrfs_caching_control *ctl;
567
568 ctl = cache->caching_ctl;
569 atomic_inc(&ctl->count);
570 prepare_to_wait(&ctl->wait, &wait, TASK_UNINTERRUPTIBLE);
571 spin_unlock(&cache->lock);
572
573 schedule();
574
575 finish_wait(&ctl->wait, &wait);
576 put_caching_control(ctl);
577 spin_lock(&cache->lock);
578 }
579
580 if (cache->cached != BTRFS_CACHE_NO) {
581 spin_unlock(&cache->lock);
582 kfree(caching_ctl);
Yan Zheng11833d62009-09-11 16:11:19 -0400583 return 0;
Josef Bacik291c7d22011-11-14 13:52:14 -0500584 }
585 WARN_ON(cache->caching_ctl);
586 cache->caching_ctl = caching_ctl;
587 cache->cached = BTRFS_CACHE_FAST;
588 spin_unlock(&cache->lock);
Yan Zheng11833d62009-09-11 16:11:19 -0400589
Josef Bacikd53ba472012-04-12 16:03:57 -0400590 if (fs_info->mount_opt & BTRFS_MOUNT_SPACE_CACHE) {
Josef Bacikcb83b7b2014-11-26 11:52:54 -0500591 mutex_lock(&caching_ctl->mutex);
Josef Bacik9d66e232010-08-25 16:54:15 -0400592 ret = load_free_space_cache(fs_info, cache);
593
594 spin_lock(&cache->lock);
595 if (ret == 1) {
Josef Bacik291c7d22011-11-14 13:52:14 -0500596 cache->caching_ctl = NULL;
Josef Bacik9d66e232010-08-25 16:54:15 -0400597 cache->cached = BTRFS_CACHE_FINISHED;
598 cache->last_byte_to_unpin = (u64)-1;
Josef Bacikcb83b7b2014-11-26 11:52:54 -0500599 caching_ctl->progress = (u64)-1;
Josef Bacik9d66e232010-08-25 16:54:15 -0400600 } else {
Josef Bacik291c7d22011-11-14 13:52:14 -0500601 if (load_cache_only) {
602 cache->caching_ctl = NULL;
603 cache->cached = BTRFS_CACHE_NO;
604 } else {
605 cache->cached = BTRFS_CACHE_STARTED;
Filipe Manana4f69cb92014-11-26 15:28:51 +0000606 cache->has_caching_ctl = 1;
Josef Bacik291c7d22011-11-14 13:52:14 -0500607 }
Josef Bacik9d66e232010-08-25 16:54:15 -0400608 }
609 spin_unlock(&cache->lock);
Josef Bacikcb83b7b2014-11-26 11:52:54 -0500610 mutex_unlock(&caching_ctl->mutex);
611
Josef Bacik291c7d22011-11-14 13:52:14 -0500612 wake_up(&caching_ctl->wait);
Josef Bacik3c148742011-02-02 15:53:47 +0000613 if (ret == 1) {
Josef Bacik291c7d22011-11-14 13:52:14 -0500614 put_caching_control(caching_ctl);
Josef Bacik3c148742011-02-02 15:53:47 +0000615 free_excluded_extents(fs_info->extent_root, cache);
Josef Bacik9d66e232010-08-25 16:54:15 -0400616 return 0;
Josef Bacik3c148742011-02-02 15:53:47 +0000617 }
Josef Bacik291c7d22011-11-14 13:52:14 -0500618 } else {
619 /*
620 * We are not going to do the fast caching, set cached to the
621 * appropriate value and wakeup any waiters.
622 */
623 spin_lock(&cache->lock);
624 if (load_cache_only) {
625 cache->caching_ctl = NULL;
626 cache->cached = BTRFS_CACHE_NO;
627 } else {
628 cache->cached = BTRFS_CACHE_STARTED;
Filipe Manana4f69cb92014-11-26 15:28:51 +0000629 cache->has_caching_ctl = 1;
Josef Bacik291c7d22011-11-14 13:52:14 -0500630 }
Josef Bacik817d52f2009-07-13 21:29:25 -0400631 spin_unlock(&cache->lock);
Josef Bacik291c7d22011-11-14 13:52:14 -0500632 wake_up(&caching_ctl->wait);
633 }
634
635 if (load_cache_only) {
636 put_caching_control(caching_ctl);
Yan Zheng11833d62009-09-11 16:11:19 -0400637 return 0;
Josef Bacik817d52f2009-07-13 21:29:25 -0400638 }
Josef Bacik817d52f2009-07-13 21:29:25 -0400639
Josef Bacik9e351cc2014-03-13 15:42:13 -0400640 down_write(&fs_info->commit_root_sem);
Josef Bacik291c7d22011-11-14 13:52:14 -0500641 atomic_inc(&caching_ctl->count);
Yan Zheng11833d62009-09-11 16:11:19 -0400642 list_add_tail(&caching_ctl->list, &fs_info->caching_block_groups);
Josef Bacik9e351cc2014-03-13 15:42:13 -0400643 up_write(&fs_info->commit_root_sem);
Yan Zheng11833d62009-09-11 16:11:19 -0400644
Josef Bacik11dfe352009-11-13 20:12:59 +0000645 btrfs_get_block_group(cache);
Yan Zheng11833d62009-09-11 16:11:19 -0400646
Qu Wenruoe66f0bb2014-02-28 10:46:12 +0800647 btrfs_queue_work(fs_info->caching_workers, &caching_ctl->work);
Josef Bacik817d52f2009-07-13 21:29:25 -0400648
Josef Bacikef8bbdf2008-09-23 13:14:11 -0400649 return ret;
Chris Masone37c9e62007-05-09 20:13:14 -0400650}
651
Josef Bacik0f9dd462008-09-23 13:14:11 -0400652/*
653 * return the block group that starts at or after bytenr
654 */
Chris Masond3977122009-01-05 21:25:51 -0500655static struct btrfs_block_group_cache *
656btrfs_lookup_first_block_group(struct btrfs_fs_info *info, u64 bytenr)
Chris Mason0ef3e662008-05-24 14:04:53 -0400657{
Josef Bacik0f9dd462008-09-23 13:14:11 -0400658 struct btrfs_block_group_cache *cache;
Chris Mason0ef3e662008-05-24 14:04:53 -0400659
Josef Bacik0f9dd462008-09-23 13:14:11 -0400660 cache = block_group_cache_tree_search(info, bytenr, 0);
Chris Mason0ef3e662008-05-24 14:04:53 -0400661
Josef Bacik0f9dd462008-09-23 13:14:11 -0400662 return cache;
Chris Mason0ef3e662008-05-24 14:04:53 -0400663}
664
Josef Bacik0f9dd462008-09-23 13:14:11 -0400665/*
Sankar P9f556842009-05-14 13:52:22 -0400666 * return the block group that contains the given bytenr
Josef Bacik0f9dd462008-09-23 13:14:11 -0400667 */
Chris Masond3977122009-01-05 21:25:51 -0500668struct btrfs_block_group_cache *btrfs_lookup_block_group(
669 struct btrfs_fs_info *info,
670 u64 bytenr)
Chris Masonbe744172007-05-06 10:15:01 -0400671{
Josef Bacik0f9dd462008-09-23 13:14:11 -0400672 struct btrfs_block_group_cache *cache;
Chris Masonbe744172007-05-06 10:15:01 -0400673
Josef Bacik0f9dd462008-09-23 13:14:11 -0400674 cache = block_group_cache_tree_search(info, bytenr, 1);
Chris Mason96b51792007-10-15 16:15:19 -0400675
Josef Bacik0f9dd462008-09-23 13:14:11 -0400676 return cache;
Chris Masonbe744172007-05-06 10:15:01 -0400677}
Chris Mason0b86a832008-03-24 15:01:56 -0400678
Josef Bacik0f9dd462008-09-23 13:14:11 -0400679static struct btrfs_space_info *__find_space_info(struct btrfs_fs_info *info,
680 u64 flags)
Chris Mason6324fbf2008-03-24 15:01:59 -0400681{
Josef Bacik0f9dd462008-09-23 13:14:11 -0400682 struct list_head *head = &info->space_info;
Josef Bacik0f9dd462008-09-23 13:14:11 -0400683 struct btrfs_space_info *found;
Chris Mason4184ea72009-03-10 12:39:20 -0400684
Ilya Dryomov52ba6922012-01-16 22:04:47 +0200685 flags &= BTRFS_BLOCK_GROUP_TYPE_MASK;
Yan, Zhengb742bb82010-05-16 10:46:24 -0400686
Chris Mason4184ea72009-03-10 12:39:20 -0400687 rcu_read_lock();
688 list_for_each_entry_rcu(found, head, list) {
Josef Bacik67377732010-09-16 16:19:09 -0400689 if (found->flags & flags) {
Chris Mason4184ea72009-03-10 12:39:20 -0400690 rcu_read_unlock();
Josef Bacik0f9dd462008-09-23 13:14:11 -0400691 return found;
Chris Mason4184ea72009-03-10 12:39:20 -0400692 }
Josef Bacik0f9dd462008-09-23 13:14:11 -0400693 }
Chris Mason4184ea72009-03-10 12:39:20 -0400694 rcu_read_unlock();
Josef Bacik0f9dd462008-09-23 13:14:11 -0400695 return NULL;
Chris Mason6324fbf2008-03-24 15:01:59 -0400696}
697
Chris Mason4184ea72009-03-10 12:39:20 -0400698/*
699 * after adding space to the filesystem, we need to clear the full flags
700 * on all the space infos.
701 */
702void btrfs_clear_space_info_full(struct btrfs_fs_info *info)
703{
704 struct list_head *head = &info->space_info;
705 struct btrfs_space_info *found;
706
707 rcu_read_lock();
708 list_for_each_entry_rcu(found, head, list)
709 found->full = 0;
710 rcu_read_unlock();
711}
712
Filipe Manana1a4ed8f2014-10-27 10:44:24 +0000713/* simple helper to search for an existing data extent at a given offset */
714int btrfs_lookup_data_extent(struct btrfs_root *root, u64 start, u64 len)
Chris Masone02119d2008-09-05 16:13:11 -0400715{
716 int ret;
717 struct btrfs_key key;
Zheng Yan31840ae2008-09-23 13:14:14 -0400718 struct btrfs_path *path;
Chris Masone02119d2008-09-05 16:13:11 -0400719
Zheng Yan31840ae2008-09-23 13:14:14 -0400720 path = btrfs_alloc_path();
Mark Fashehd8926bb2011-07-13 10:38:47 -0700721 if (!path)
722 return -ENOMEM;
723
Chris Masone02119d2008-09-05 16:13:11 -0400724 key.objectid = start;
725 key.offset = len;
Josef Bacik3173a182013-03-07 14:22:04 -0500726 key.type = BTRFS_EXTENT_ITEM_KEY;
Chris Masone02119d2008-09-05 16:13:11 -0400727 ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, path,
728 0, 0);
Zheng Yan31840ae2008-09-23 13:14:14 -0400729 btrfs_free_path(path);
Chris Mason7bb86312007-12-11 09:25:06 -0500730 return ret;
731}
732
Chris Masond8d5f3e2007-12-11 12:42:00 -0500733/*
Josef Bacik3173a182013-03-07 14:22:04 -0500734 * helper function to lookup reference count and flags of a tree block.
Yan, Zhenga22285a2010-05-16 10:48:46 -0400735 *
736 * the head node for delayed ref is used to store the sum of all the
737 * reference count modifications queued up in the rbtree. the head
738 * node may also store the extent flags to set. This way you can check
739 * to see what the reference count and extent flags would be if all of
740 * the delayed refs are not processed.
741 */
742int btrfs_lookup_extent_info(struct btrfs_trans_handle *trans,
743 struct btrfs_root *root, u64 bytenr,
Josef Bacik3173a182013-03-07 14:22:04 -0500744 u64 offset, int metadata, u64 *refs, u64 *flags)
Yan, Zhenga22285a2010-05-16 10:48:46 -0400745{
746 struct btrfs_delayed_ref_head *head;
747 struct btrfs_delayed_ref_root *delayed_refs;
748 struct btrfs_path *path;
749 struct btrfs_extent_item *ei;
750 struct extent_buffer *leaf;
751 struct btrfs_key key;
752 u32 item_size;
753 u64 num_refs;
754 u64 extent_flags;
755 int ret;
756
Josef Bacik3173a182013-03-07 14:22:04 -0500757 /*
758 * If we don't have skinny metadata, don't bother doing anything
759 * different
760 */
761 if (metadata && !btrfs_fs_incompat(root->fs_info, SKINNY_METADATA)) {
David Sterba707e8a02014-06-04 19:22:26 +0200762 offset = root->nodesize;
Josef Bacik3173a182013-03-07 14:22:04 -0500763 metadata = 0;
764 }
765
Yan, Zhenga22285a2010-05-16 10:48:46 -0400766 path = btrfs_alloc_path();
767 if (!path)
768 return -ENOMEM;
769
Yan, Zhenga22285a2010-05-16 10:48:46 -0400770 if (!trans) {
771 path->skip_locking = 1;
772 path->search_commit_root = 1;
773 }
Filipe David Borba Manana639eefc2013-12-08 00:26:29 +0000774
775search_again:
776 key.objectid = bytenr;
777 key.offset = offset;
778 if (metadata)
779 key.type = BTRFS_METADATA_ITEM_KEY;
780 else
781 key.type = BTRFS_EXTENT_ITEM_KEY;
782
Yan, Zhenga22285a2010-05-16 10:48:46 -0400783 ret = btrfs_search_slot(trans, root->fs_info->extent_root,
784 &key, path, 0, 0);
785 if (ret < 0)
786 goto out_free;
787
Josef Bacik3173a182013-03-07 14:22:04 -0500788 if (ret > 0 && metadata && key.type == BTRFS_METADATA_ITEM_KEY) {
Filipe David Borba Manana74be9512013-07-05 23:12:06 +0100789 if (path->slots[0]) {
790 path->slots[0]--;
791 btrfs_item_key_to_cpu(path->nodes[0], &key,
792 path->slots[0]);
793 if (key.objectid == bytenr &&
794 key.type == BTRFS_EXTENT_ITEM_KEY &&
David Sterba707e8a02014-06-04 19:22:26 +0200795 key.offset == root->nodesize)
Filipe David Borba Manana74be9512013-07-05 23:12:06 +0100796 ret = 0;
797 }
Josef Bacik3173a182013-03-07 14:22:04 -0500798 }
799
Yan, Zhenga22285a2010-05-16 10:48:46 -0400800 if (ret == 0) {
801 leaf = path->nodes[0];
802 item_size = btrfs_item_size_nr(leaf, path->slots[0]);
803 if (item_size >= sizeof(*ei)) {
804 ei = btrfs_item_ptr(leaf, path->slots[0],
805 struct btrfs_extent_item);
806 num_refs = btrfs_extent_refs(leaf, ei);
807 extent_flags = btrfs_extent_flags(leaf, ei);
808 } else {
809#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
810 struct btrfs_extent_item_v0 *ei0;
811 BUG_ON(item_size != sizeof(*ei0));
812 ei0 = btrfs_item_ptr(leaf, path->slots[0],
813 struct btrfs_extent_item_v0);
814 num_refs = btrfs_extent_refs_v0(leaf, ei0);
815 /* FIXME: this isn't correct for data */
816 extent_flags = BTRFS_BLOCK_FLAG_FULL_BACKREF;
817#else
818 BUG();
819#endif
820 }
821 BUG_ON(num_refs == 0);
822 } else {
823 num_refs = 0;
824 extent_flags = 0;
825 ret = 0;
826 }
827
828 if (!trans)
829 goto out;
830
831 delayed_refs = &trans->transaction->delayed_refs;
832 spin_lock(&delayed_refs->lock);
833 head = btrfs_find_delayed_ref_head(trans, bytenr);
834 if (head) {
835 if (!mutex_trylock(&head->mutex)) {
836 atomic_inc(&head->node.refs);
837 spin_unlock(&delayed_refs->lock);
838
David Sterbab3b4aa72011-04-21 01:20:15 +0200839 btrfs_release_path(path);
Yan, Zhenga22285a2010-05-16 10:48:46 -0400840
David Sterba8cc33e52011-05-02 15:29:25 +0200841 /*
842 * Mutex was contended, block until it's released and try
843 * again
844 */
Yan, Zhenga22285a2010-05-16 10:48:46 -0400845 mutex_lock(&head->mutex);
846 mutex_unlock(&head->mutex);
847 btrfs_put_delayed_ref(&head->node);
Filipe David Borba Manana639eefc2013-12-08 00:26:29 +0000848 goto search_again;
Yan, Zhenga22285a2010-05-16 10:48:46 -0400849 }
Josef Bacikd7df2c72014-01-23 09:21:38 -0500850 spin_lock(&head->lock);
Yan, Zhenga22285a2010-05-16 10:48:46 -0400851 if (head->extent_op && head->extent_op->update_flags)
852 extent_flags |= head->extent_op->flags_to_set;
853 else
854 BUG_ON(num_refs == 0);
855
856 num_refs += head->node.ref_mod;
Josef Bacikd7df2c72014-01-23 09:21:38 -0500857 spin_unlock(&head->lock);
Yan, Zhenga22285a2010-05-16 10:48:46 -0400858 mutex_unlock(&head->mutex);
859 }
860 spin_unlock(&delayed_refs->lock);
861out:
862 WARN_ON(num_refs == 0);
863 if (refs)
864 *refs = num_refs;
865 if (flags)
866 *flags = extent_flags;
867out_free:
868 btrfs_free_path(path);
869 return ret;
870}
871
872/*
Chris Masond8d5f3e2007-12-11 12:42:00 -0500873 * Back reference rules. Back refs have three main goals:
874 *
875 * 1) differentiate between all holders of references to an extent so that
876 * when a reference is dropped we can make sure it was a valid reference
877 * before freeing the extent.
878 *
879 * 2) Provide enough information to quickly find the holders of an extent
880 * if we notice a given block is corrupted or bad.
881 *
882 * 3) Make it easy to migrate blocks for FS shrinking or storage pool
883 * maintenance. This is actually the same as #2, but with a slightly
884 * different use case.
885 *
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400886 * There are two kinds of back refs. The implicit back refs is optimized
887 * for pointers in non-shared tree blocks. For a given pointer in a block,
888 * back refs of this kind provide information about the block's owner tree
889 * and the pointer's key. These information allow us to find the block by
890 * b-tree searching. The full back refs is for pointers in tree blocks not
891 * referenced by their owner trees. The location of tree block is recorded
892 * in the back refs. Actually the full back refs is generic, and can be
893 * used in all cases the implicit back refs is used. The major shortcoming
894 * of the full back refs is its overhead. Every time a tree block gets
895 * COWed, we have to update back refs entry for all pointers in it.
896 *
897 * For a newly allocated tree block, we use implicit back refs for
898 * pointers in it. This means most tree related operations only involve
899 * implicit back refs. For a tree block created in old transaction, the
900 * only way to drop a reference to it is COW it. So we can detect the
901 * event that tree block loses its owner tree's reference and do the
902 * back refs conversion.
903 *
904 * When a tree block is COW'd through a tree, there are four cases:
905 *
906 * The reference count of the block is one and the tree is the block's
907 * owner tree. Nothing to do in this case.
908 *
909 * The reference count of the block is one and the tree is not the
910 * block's owner tree. In this case, full back refs is used for pointers
911 * in the block. Remove these full back refs, add implicit back refs for
912 * every pointers in the new block.
913 *
914 * The reference count of the block is greater than one and the tree is
915 * the block's owner tree. In this case, implicit back refs is used for
916 * pointers in the block. Add full back refs for every pointers in the
917 * block, increase lower level extents' reference counts. The original
918 * implicit back refs are entailed to the new block.
919 *
920 * The reference count of the block is greater than one and the tree is
921 * not the block's owner tree. Add implicit back refs for every pointer in
922 * the new block, increase lower level extents' reference count.
923 *
924 * Back Reference Key composing:
925 *
926 * The key objectid corresponds to the first byte in the extent,
927 * The key type is used to differentiate between types of back refs.
928 * There are different meanings of the key offset for different types
929 * of back refs.
930 *
Chris Masond8d5f3e2007-12-11 12:42:00 -0500931 * File extents can be referenced by:
932 *
933 * - multiple snapshots, subvolumes, or different generations in one subvol
Zheng Yan31840ae2008-09-23 13:14:14 -0400934 * - different files inside a single subvolume
Chris Masond8d5f3e2007-12-11 12:42:00 -0500935 * - different offsets inside a file (bookend extents in file.c)
936 *
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400937 * The extent ref structure for the implicit back refs has fields for:
Chris Masond8d5f3e2007-12-11 12:42:00 -0500938 *
939 * - Objectid of the subvolume root
Chris Masond8d5f3e2007-12-11 12:42:00 -0500940 * - objectid of the file holding the reference
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400941 * - original offset in the file
942 * - how many bookend extents
Zheng Yan31840ae2008-09-23 13:14:14 -0400943 *
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400944 * The key offset for the implicit back refs is hash of the first
945 * three fields.
Chris Masond8d5f3e2007-12-11 12:42:00 -0500946 *
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400947 * The extent ref structure for the full back refs has field for:
Chris Masond8d5f3e2007-12-11 12:42:00 -0500948 *
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400949 * - number of pointers in the tree leaf
Chris Masond8d5f3e2007-12-11 12:42:00 -0500950 *
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400951 * The key offset for the implicit back refs is the first byte of
952 * the tree leaf
Chris Masond8d5f3e2007-12-11 12:42:00 -0500953 *
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400954 * When a file extent is allocated, The implicit back refs is used.
955 * the fields are filled in:
Chris Masond8d5f3e2007-12-11 12:42:00 -0500956 *
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400957 * (root_key.objectid, inode objectid, offset in file, 1)
958 *
959 * When a file extent is removed file truncation, we find the
960 * corresponding implicit back refs and check the following fields:
961 *
962 * (btrfs_header_owner(leaf), inode objectid, offset in file)
Chris Masond8d5f3e2007-12-11 12:42:00 -0500963 *
964 * Btree extents can be referenced by:
965 *
966 * - Different subvolumes
Chris Masond8d5f3e2007-12-11 12:42:00 -0500967 *
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400968 * Both the implicit back refs and the full back refs for tree blocks
969 * only consist of key. The key offset for the implicit back refs is
970 * objectid of block's owner tree. The key offset for the full back refs
971 * is the first byte of parent block.
Chris Masond8d5f3e2007-12-11 12:42:00 -0500972 *
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400973 * When implicit back refs is used, information about the lowest key and
974 * level of the tree block are required. These information are stored in
975 * tree block info structure.
Chris Masond8d5f3e2007-12-11 12:42:00 -0500976 */
Zheng Yan31840ae2008-09-23 13:14:14 -0400977
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400978#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
979static int convert_extent_item_v0(struct btrfs_trans_handle *trans,
980 struct btrfs_root *root,
981 struct btrfs_path *path,
982 u64 owner, u32 extra_size)
Chris Mason74493f72007-12-11 09:25:06 -0500983{
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400984 struct btrfs_extent_item *item;
985 struct btrfs_extent_item_v0 *ei0;
986 struct btrfs_extent_ref_v0 *ref0;
987 struct btrfs_tree_block_info *bi;
Zheng Yan31840ae2008-09-23 13:14:14 -0400988 struct extent_buffer *leaf;
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400989 struct btrfs_key key;
990 struct btrfs_key found_key;
991 u32 new_size = sizeof(*item);
992 u64 refs;
Chris Mason74493f72007-12-11 09:25:06 -0500993 int ret;
994
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400995 leaf = path->nodes[0];
996 BUG_ON(btrfs_item_size_nr(leaf, path->slots[0]) != sizeof(*ei0));
Chris Mason74493f72007-12-11 09:25:06 -0500997
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400998 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
999 ei0 = btrfs_item_ptr(leaf, path->slots[0],
1000 struct btrfs_extent_item_v0);
1001 refs = btrfs_extent_refs_v0(leaf, ei0);
1002
1003 if (owner == (u64)-1) {
1004 while (1) {
1005 if (path->slots[0] >= btrfs_header_nritems(leaf)) {
1006 ret = btrfs_next_leaf(root, path);
1007 if (ret < 0)
1008 return ret;
Jeff Mahoney79787ea2012-03-12 16:03:00 +01001009 BUG_ON(ret > 0); /* Corruption */
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001010 leaf = path->nodes[0];
1011 }
1012 btrfs_item_key_to_cpu(leaf, &found_key,
1013 path->slots[0]);
1014 BUG_ON(key.objectid != found_key.objectid);
1015 if (found_key.type != BTRFS_EXTENT_REF_V0_KEY) {
1016 path->slots[0]++;
1017 continue;
1018 }
1019 ref0 = btrfs_item_ptr(leaf, path->slots[0],
1020 struct btrfs_extent_ref_v0);
1021 owner = btrfs_ref_objectid_v0(leaf, ref0);
1022 break;
1023 }
1024 }
David Sterbab3b4aa72011-04-21 01:20:15 +02001025 btrfs_release_path(path);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001026
1027 if (owner < BTRFS_FIRST_FREE_OBJECTID)
1028 new_size += sizeof(*bi);
1029
1030 new_size -= sizeof(*ei0);
1031 ret = btrfs_search_slot(trans, root, &key, path,
1032 new_size + extra_size, 1);
Zheng Yan31840ae2008-09-23 13:14:14 -04001033 if (ret < 0)
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001034 return ret;
Jeff Mahoney79787ea2012-03-12 16:03:00 +01001035 BUG_ON(ret); /* Corruption */
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001036
Tsutomu Itoh4b90c682013-04-16 05:18:49 +00001037 btrfs_extend_item(root, path, new_size);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001038
1039 leaf = path->nodes[0];
1040 item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
1041 btrfs_set_extent_refs(leaf, item, refs);
1042 /* FIXME: get real generation */
1043 btrfs_set_extent_generation(leaf, item, 0);
1044 if (owner < BTRFS_FIRST_FREE_OBJECTID) {
1045 btrfs_set_extent_flags(leaf, item,
1046 BTRFS_EXTENT_FLAG_TREE_BLOCK |
1047 BTRFS_BLOCK_FLAG_FULL_BACKREF);
1048 bi = (struct btrfs_tree_block_info *)(item + 1);
1049 /* FIXME: get first key of the block */
1050 memset_extent_buffer(leaf, 0, (unsigned long)bi, sizeof(*bi));
1051 btrfs_set_tree_block_level(leaf, bi, (int)owner);
1052 } else {
1053 btrfs_set_extent_flags(leaf, item, BTRFS_EXTENT_FLAG_DATA);
1054 }
1055 btrfs_mark_buffer_dirty(leaf);
1056 return 0;
1057}
1058#endif
1059
1060static u64 hash_extent_data_ref(u64 root_objectid, u64 owner, u64 offset)
1061{
1062 u32 high_crc = ~(u32)0;
1063 u32 low_crc = ~(u32)0;
1064 __le64 lenum;
1065
1066 lenum = cpu_to_le64(root_objectid);
Filipe David Borba Manana14a958e2014-01-12 02:22:46 +00001067 high_crc = btrfs_crc32c(high_crc, &lenum, sizeof(lenum));
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001068 lenum = cpu_to_le64(owner);
Filipe David Borba Manana14a958e2014-01-12 02:22:46 +00001069 low_crc = btrfs_crc32c(low_crc, &lenum, sizeof(lenum));
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001070 lenum = cpu_to_le64(offset);
Filipe David Borba Manana14a958e2014-01-12 02:22:46 +00001071 low_crc = btrfs_crc32c(low_crc, &lenum, sizeof(lenum));
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001072
1073 return ((u64)high_crc << 31) ^ (u64)low_crc;
1074}
1075
1076static u64 hash_extent_data_ref_item(struct extent_buffer *leaf,
1077 struct btrfs_extent_data_ref *ref)
1078{
1079 return hash_extent_data_ref(btrfs_extent_data_ref_root(leaf, ref),
1080 btrfs_extent_data_ref_objectid(leaf, ref),
1081 btrfs_extent_data_ref_offset(leaf, ref));
1082}
1083
1084static int match_extent_data_ref(struct extent_buffer *leaf,
1085 struct btrfs_extent_data_ref *ref,
1086 u64 root_objectid, u64 owner, u64 offset)
1087{
1088 if (btrfs_extent_data_ref_root(leaf, ref) != root_objectid ||
1089 btrfs_extent_data_ref_objectid(leaf, ref) != owner ||
1090 btrfs_extent_data_ref_offset(leaf, ref) != offset)
1091 return 0;
1092 return 1;
1093}
1094
1095static noinline int lookup_extent_data_ref(struct btrfs_trans_handle *trans,
1096 struct btrfs_root *root,
1097 struct btrfs_path *path,
1098 u64 bytenr, u64 parent,
1099 u64 root_objectid,
1100 u64 owner, u64 offset)
1101{
1102 struct btrfs_key key;
1103 struct btrfs_extent_data_ref *ref;
1104 struct extent_buffer *leaf;
1105 u32 nritems;
1106 int ret;
1107 int recow;
1108 int err = -ENOENT;
1109
1110 key.objectid = bytenr;
1111 if (parent) {
1112 key.type = BTRFS_SHARED_DATA_REF_KEY;
1113 key.offset = parent;
1114 } else {
1115 key.type = BTRFS_EXTENT_DATA_REF_KEY;
1116 key.offset = hash_extent_data_ref(root_objectid,
1117 owner, offset);
1118 }
1119again:
1120 recow = 0;
1121 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
1122 if (ret < 0) {
1123 err = ret;
1124 goto fail;
1125 }
1126
1127 if (parent) {
1128 if (!ret)
1129 return 0;
1130#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
1131 key.type = BTRFS_EXTENT_REF_V0_KEY;
David Sterbab3b4aa72011-04-21 01:20:15 +02001132 btrfs_release_path(path);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001133 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
1134 if (ret < 0) {
1135 err = ret;
1136 goto fail;
1137 }
1138 if (!ret)
1139 return 0;
1140#endif
1141 goto fail;
Zheng Yan31840ae2008-09-23 13:14:14 -04001142 }
1143
1144 leaf = path->nodes[0];
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001145 nritems = btrfs_header_nritems(leaf);
1146 while (1) {
1147 if (path->slots[0] >= nritems) {
1148 ret = btrfs_next_leaf(root, path);
1149 if (ret < 0)
1150 err = ret;
1151 if (ret)
1152 goto fail;
1153
1154 leaf = path->nodes[0];
1155 nritems = btrfs_header_nritems(leaf);
1156 recow = 1;
1157 }
1158
1159 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
1160 if (key.objectid != bytenr ||
1161 key.type != BTRFS_EXTENT_DATA_REF_KEY)
1162 goto fail;
1163
1164 ref = btrfs_item_ptr(leaf, path->slots[0],
1165 struct btrfs_extent_data_ref);
1166
1167 if (match_extent_data_ref(leaf, ref, root_objectid,
1168 owner, offset)) {
1169 if (recow) {
David Sterbab3b4aa72011-04-21 01:20:15 +02001170 btrfs_release_path(path);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001171 goto again;
1172 }
1173 err = 0;
1174 break;
1175 }
1176 path->slots[0]++;
Zheng Yan31840ae2008-09-23 13:14:14 -04001177 }
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001178fail:
1179 return err;
Zheng Yan31840ae2008-09-23 13:14:14 -04001180}
1181
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001182static noinline int insert_extent_data_ref(struct btrfs_trans_handle *trans,
1183 struct btrfs_root *root,
1184 struct btrfs_path *path,
1185 u64 bytenr, u64 parent,
1186 u64 root_objectid, u64 owner,
1187 u64 offset, int refs_to_add)
Zheng Yan31840ae2008-09-23 13:14:14 -04001188{
1189 struct btrfs_key key;
1190 struct extent_buffer *leaf;
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001191 u32 size;
Zheng Yan31840ae2008-09-23 13:14:14 -04001192 u32 num_refs;
1193 int ret;
1194
1195 key.objectid = bytenr;
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001196 if (parent) {
1197 key.type = BTRFS_SHARED_DATA_REF_KEY;
1198 key.offset = parent;
1199 size = sizeof(struct btrfs_shared_data_ref);
Zheng Yan31840ae2008-09-23 13:14:14 -04001200 } else {
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001201 key.type = BTRFS_EXTENT_DATA_REF_KEY;
1202 key.offset = hash_extent_data_ref(root_objectid,
1203 owner, offset);
1204 size = sizeof(struct btrfs_extent_data_ref);
Zheng Yan31840ae2008-09-23 13:14:14 -04001205 }
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001206
1207 ret = btrfs_insert_empty_item(trans, root, path, &key, size);
1208 if (ret && ret != -EEXIST)
1209 goto fail;
1210
1211 leaf = path->nodes[0];
1212 if (parent) {
1213 struct btrfs_shared_data_ref *ref;
1214 ref = btrfs_item_ptr(leaf, path->slots[0],
1215 struct btrfs_shared_data_ref);
1216 if (ret == 0) {
1217 btrfs_set_shared_data_ref_count(leaf, ref, refs_to_add);
1218 } else {
1219 num_refs = btrfs_shared_data_ref_count(leaf, ref);
1220 num_refs += refs_to_add;
1221 btrfs_set_shared_data_ref_count(leaf, ref, num_refs);
1222 }
1223 } else {
1224 struct btrfs_extent_data_ref *ref;
1225 while (ret == -EEXIST) {
1226 ref = btrfs_item_ptr(leaf, path->slots[0],
1227 struct btrfs_extent_data_ref);
1228 if (match_extent_data_ref(leaf, ref, root_objectid,
1229 owner, offset))
1230 break;
David Sterbab3b4aa72011-04-21 01:20:15 +02001231 btrfs_release_path(path);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001232 key.offset++;
1233 ret = btrfs_insert_empty_item(trans, root, path, &key,
1234 size);
1235 if (ret && ret != -EEXIST)
1236 goto fail;
1237
1238 leaf = path->nodes[0];
1239 }
1240 ref = btrfs_item_ptr(leaf, path->slots[0],
1241 struct btrfs_extent_data_ref);
1242 if (ret == 0) {
1243 btrfs_set_extent_data_ref_root(leaf, ref,
1244 root_objectid);
1245 btrfs_set_extent_data_ref_objectid(leaf, ref, owner);
1246 btrfs_set_extent_data_ref_offset(leaf, ref, offset);
1247 btrfs_set_extent_data_ref_count(leaf, ref, refs_to_add);
1248 } else {
1249 num_refs = btrfs_extent_data_ref_count(leaf, ref);
1250 num_refs += refs_to_add;
1251 btrfs_set_extent_data_ref_count(leaf, ref, num_refs);
1252 }
1253 }
1254 btrfs_mark_buffer_dirty(leaf);
1255 ret = 0;
1256fail:
David Sterbab3b4aa72011-04-21 01:20:15 +02001257 btrfs_release_path(path);
Chris Mason7bb86312007-12-11 09:25:06 -05001258 return ret;
Chris Mason74493f72007-12-11 09:25:06 -05001259}
1260
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001261static noinline int remove_extent_data_ref(struct btrfs_trans_handle *trans,
1262 struct btrfs_root *root,
1263 struct btrfs_path *path,
Josef Bacikfcebe452014-05-13 17:30:47 -07001264 int refs_to_drop, int *last_ref)
Zheng Yan31840ae2008-09-23 13:14:14 -04001265{
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001266 struct btrfs_key key;
1267 struct btrfs_extent_data_ref *ref1 = NULL;
1268 struct btrfs_shared_data_ref *ref2 = NULL;
Zheng Yan31840ae2008-09-23 13:14:14 -04001269 struct extent_buffer *leaf;
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001270 u32 num_refs = 0;
Zheng Yan31840ae2008-09-23 13:14:14 -04001271 int ret = 0;
1272
1273 leaf = path->nodes[0];
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001274 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
1275
1276 if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
1277 ref1 = btrfs_item_ptr(leaf, path->slots[0],
1278 struct btrfs_extent_data_ref);
1279 num_refs = btrfs_extent_data_ref_count(leaf, ref1);
1280 } else if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
1281 ref2 = btrfs_item_ptr(leaf, path->slots[0],
1282 struct btrfs_shared_data_ref);
1283 num_refs = btrfs_shared_data_ref_count(leaf, ref2);
1284#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
1285 } else if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
1286 struct btrfs_extent_ref_v0 *ref0;
1287 ref0 = btrfs_item_ptr(leaf, path->slots[0],
1288 struct btrfs_extent_ref_v0);
1289 num_refs = btrfs_ref_count_v0(leaf, ref0);
1290#endif
1291 } else {
1292 BUG();
1293 }
1294
Chris Mason56bec292009-03-13 10:10:06 -04001295 BUG_ON(num_refs < refs_to_drop);
1296 num_refs -= refs_to_drop;
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001297
Zheng Yan31840ae2008-09-23 13:14:14 -04001298 if (num_refs == 0) {
1299 ret = btrfs_del_item(trans, root, path);
Josef Bacikfcebe452014-05-13 17:30:47 -07001300 *last_ref = 1;
Zheng Yan31840ae2008-09-23 13:14:14 -04001301 } else {
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001302 if (key.type == BTRFS_EXTENT_DATA_REF_KEY)
1303 btrfs_set_extent_data_ref_count(leaf, ref1, num_refs);
1304 else if (key.type == BTRFS_SHARED_DATA_REF_KEY)
1305 btrfs_set_shared_data_ref_count(leaf, ref2, num_refs);
1306#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
1307 else {
1308 struct btrfs_extent_ref_v0 *ref0;
1309 ref0 = btrfs_item_ptr(leaf, path->slots[0],
1310 struct btrfs_extent_ref_v0);
1311 btrfs_set_ref_count_v0(leaf, ref0, num_refs);
1312 }
1313#endif
Zheng Yan31840ae2008-09-23 13:14:14 -04001314 btrfs_mark_buffer_dirty(leaf);
1315 }
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001316 return ret;
1317}
1318
Zhaolei9ed0dea2015-08-06 22:16:24 +08001319static noinline u32 extent_data_ref_count(struct btrfs_path *path,
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001320 struct btrfs_extent_inline_ref *iref)
1321{
1322 struct btrfs_key key;
1323 struct extent_buffer *leaf;
1324 struct btrfs_extent_data_ref *ref1;
1325 struct btrfs_shared_data_ref *ref2;
1326 u32 num_refs = 0;
1327
1328 leaf = path->nodes[0];
1329 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
1330 if (iref) {
1331 if (btrfs_extent_inline_ref_type(leaf, iref) ==
1332 BTRFS_EXTENT_DATA_REF_KEY) {
1333 ref1 = (struct btrfs_extent_data_ref *)(&iref->offset);
1334 num_refs = btrfs_extent_data_ref_count(leaf, ref1);
1335 } else {
1336 ref2 = (struct btrfs_shared_data_ref *)(iref + 1);
1337 num_refs = btrfs_shared_data_ref_count(leaf, ref2);
1338 }
1339 } else if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
1340 ref1 = btrfs_item_ptr(leaf, path->slots[0],
1341 struct btrfs_extent_data_ref);
1342 num_refs = btrfs_extent_data_ref_count(leaf, ref1);
1343 } else if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
1344 ref2 = btrfs_item_ptr(leaf, path->slots[0],
1345 struct btrfs_shared_data_ref);
1346 num_refs = btrfs_shared_data_ref_count(leaf, ref2);
1347#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
1348 } else if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
1349 struct btrfs_extent_ref_v0 *ref0;
1350 ref0 = btrfs_item_ptr(leaf, path->slots[0],
1351 struct btrfs_extent_ref_v0);
1352 num_refs = btrfs_ref_count_v0(leaf, ref0);
1353#endif
1354 } else {
1355 WARN_ON(1);
1356 }
1357 return num_refs;
1358}
1359
1360static noinline int lookup_tree_block_ref(struct btrfs_trans_handle *trans,
1361 struct btrfs_root *root,
1362 struct btrfs_path *path,
1363 u64 bytenr, u64 parent,
1364 u64 root_objectid)
1365{
1366 struct btrfs_key key;
1367 int ret;
1368
1369 key.objectid = bytenr;
1370 if (parent) {
1371 key.type = BTRFS_SHARED_BLOCK_REF_KEY;
1372 key.offset = parent;
1373 } else {
1374 key.type = BTRFS_TREE_BLOCK_REF_KEY;
1375 key.offset = root_objectid;
1376 }
1377
1378 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
1379 if (ret > 0)
1380 ret = -ENOENT;
1381#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
1382 if (ret == -ENOENT && parent) {
David Sterbab3b4aa72011-04-21 01:20:15 +02001383 btrfs_release_path(path);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001384 key.type = BTRFS_EXTENT_REF_V0_KEY;
1385 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
1386 if (ret > 0)
1387 ret = -ENOENT;
1388 }
1389#endif
1390 return ret;
1391}
1392
1393static noinline int insert_tree_block_ref(struct btrfs_trans_handle *trans,
1394 struct btrfs_root *root,
1395 struct btrfs_path *path,
1396 u64 bytenr, u64 parent,
1397 u64 root_objectid)
1398{
1399 struct btrfs_key key;
1400 int ret;
1401
1402 key.objectid = bytenr;
1403 if (parent) {
1404 key.type = BTRFS_SHARED_BLOCK_REF_KEY;
1405 key.offset = parent;
1406 } else {
1407 key.type = BTRFS_TREE_BLOCK_REF_KEY;
1408 key.offset = root_objectid;
1409 }
1410
1411 ret = btrfs_insert_empty_item(trans, root, path, &key, 0);
David Sterbab3b4aa72011-04-21 01:20:15 +02001412 btrfs_release_path(path);
Zheng Yan31840ae2008-09-23 13:14:14 -04001413 return ret;
1414}
1415
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001416static inline int extent_ref_type(u64 parent, u64 owner)
1417{
1418 int type;
1419 if (owner < BTRFS_FIRST_FREE_OBJECTID) {
1420 if (parent > 0)
1421 type = BTRFS_SHARED_BLOCK_REF_KEY;
1422 else
1423 type = BTRFS_TREE_BLOCK_REF_KEY;
1424 } else {
1425 if (parent > 0)
1426 type = BTRFS_SHARED_DATA_REF_KEY;
1427 else
1428 type = BTRFS_EXTENT_DATA_REF_KEY;
1429 }
1430 return type;
1431}
1432
Yan Zheng2c47e6052009-06-27 21:07:35 -04001433static int find_next_key(struct btrfs_path *path, int level,
1434 struct btrfs_key *key)
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001435
1436{
Yan Zheng2c47e6052009-06-27 21:07:35 -04001437 for (; level < BTRFS_MAX_LEVEL; level++) {
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001438 if (!path->nodes[level])
1439 break;
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001440 if (path->slots[level] + 1 >=
1441 btrfs_header_nritems(path->nodes[level]))
1442 continue;
1443 if (level == 0)
1444 btrfs_item_key_to_cpu(path->nodes[level], key,
1445 path->slots[level] + 1);
1446 else
1447 btrfs_node_key_to_cpu(path->nodes[level], key,
1448 path->slots[level] + 1);
1449 return 0;
1450 }
1451 return 1;
1452}
1453
1454/*
1455 * look for inline back ref. if back ref is found, *ref_ret is set
1456 * to the address of inline back ref, and 0 is returned.
1457 *
1458 * if back ref isn't found, *ref_ret is set to the address where it
1459 * should be inserted, and -ENOENT is returned.
1460 *
1461 * if insert is true and there are too many inline back refs, the path
1462 * points to the extent item, and -EAGAIN is returned.
1463 *
1464 * NOTE: inline back refs are ordered in the same way that back ref
1465 * items in the tree are ordered.
1466 */
1467static noinline_for_stack
1468int lookup_inline_extent_backref(struct btrfs_trans_handle *trans,
1469 struct btrfs_root *root,
1470 struct btrfs_path *path,
1471 struct btrfs_extent_inline_ref **ref_ret,
1472 u64 bytenr, u64 num_bytes,
1473 u64 parent, u64 root_objectid,
1474 u64 owner, u64 offset, int insert)
1475{
1476 struct btrfs_key key;
1477 struct extent_buffer *leaf;
1478 struct btrfs_extent_item *ei;
1479 struct btrfs_extent_inline_ref *iref;
1480 u64 flags;
1481 u64 item_size;
1482 unsigned long ptr;
1483 unsigned long end;
1484 int extra_size;
1485 int type;
1486 int want;
1487 int ret;
1488 int err = 0;
Josef Bacik3173a182013-03-07 14:22:04 -05001489 bool skinny_metadata = btrfs_fs_incompat(root->fs_info,
1490 SKINNY_METADATA);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001491
1492 key.objectid = bytenr;
1493 key.type = BTRFS_EXTENT_ITEM_KEY;
1494 key.offset = num_bytes;
1495
1496 want = extent_ref_type(parent, owner);
1497 if (insert) {
1498 extra_size = btrfs_extent_inline_ref_size(want);
Yan Zheng85d41982009-06-11 08:51:10 -04001499 path->keep_locks = 1;
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001500 } else
1501 extra_size = -1;
Josef Bacik3173a182013-03-07 14:22:04 -05001502
1503 /*
1504 * Owner is our parent level, so we can just add one to get the level
1505 * for the block we are interested in.
1506 */
1507 if (skinny_metadata && owner < BTRFS_FIRST_FREE_OBJECTID) {
1508 key.type = BTRFS_METADATA_ITEM_KEY;
1509 key.offset = owner;
1510 }
1511
1512again:
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001513 ret = btrfs_search_slot(trans, root, &key, path, extra_size, 1);
1514 if (ret < 0) {
1515 err = ret;
1516 goto out;
1517 }
Josef Bacik3173a182013-03-07 14:22:04 -05001518
1519 /*
1520 * We may be a newly converted file system which still has the old fat
1521 * extent entries for metadata, so try and see if we have one of those.
1522 */
1523 if (ret > 0 && skinny_metadata) {
1524 skinny_metadata = false;
1525 if (path->slots[0]) {
1526 path->slots[0]--;
1527 btrfs_item_key_to_cpu(path->nodes[0], &key,
1528 path->slots[0]);
1529 if (key.objectid == bytenr &&
1530 key.type == BTRFS_EXTENT_ITEM_KEY &&
1531 key.offset == num_bytes)
1532 ret = 0;
1533 }
1534 if (ret) {
Filipe Manana9ce49a02014-04-24 15:15:28 +01001535 key.objectid = bytenr;
Josef Bacik3173a182013-03-07 14:22:04 -05001536 key.type = BTRFS_EXTENT_ITEM_KEY;
1537 key.offset = num_bytes;
1538 btrfs_release_path(path);
1539 goto again;
1540 }
1541 }
1542
Jeff Mahoney79787ea2012-03-12 16:03:00 +01001543 if (ret && !insert) {
1544 err = -ENOENT;
1545 goto out;
Dulshani Gunawardhanafae7f212013-10-31 10:30:08 +05301546 } else if (WARN_ON(ret)) {
Josef Bacik492104c2013-03-08 15:41:02 -05001547 err = -EIO;
Josef Bacik492104c2013-03-08 15:41:02 -05001548 goto out;
Jeff Mahoney79787ea2012-03-12 16:03:00 +01001549 }
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001550
1551 leaf = path->nodes[0];
1552 item_size = btrfs_item_size_nr(leaf, path->slots[0]);
1553#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
1554 if (item_size < sizeof(*ei)) {
1555 if (!insert) {
1556 err = -ENOENT;
1557 goto out;
1558 }
1559 ret = convert_extent_item_v0(trans, root, path, owner,
1560 extra_size);
1561 if (ret < 0) {
1562 err = ret;
1563 goto out;
1564 }
1565 leaf = path->nodes[0];
1566 item_size = btrfs_item_size_nr(leaf, path->slots[0]);
1567 }
1568#endif
1569 BUG_ON(item_size < sizeof(*ei));
1570
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001571 ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
1572 flags = btrfs_extent_flags(leaf, ei);
1573
1574 ptr = (unsigned long)(ei + 1);
1575 end = (unsigned long)ei + item_size;
1576
Josef Bacik3173a182013-03-07 14:22:04 -05001577 if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK && !skinny_metadata) {
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001578 ptr += sizeof(struct btrfs_tree_block_info);
1579 BUG_ON(ptr > end);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001580 }
1581
1582 err = -ENOENT;
1583 while (1) {
1584 if (ptr >= end) {
1585 WARN_ON(ptr > end);
1586 break;
1587 }
1588 iref = (struct btrfs_extent_inline_ref *)ptr;
1589 type = btrfs_extent_inline_ref_type(leaf, iref);
1590 if (want < type)
1591 break;
1592 if (want > type) {
1593 ptr += btrfs_extent_inline_ref_size(type);
1594 continue;
1595 }
1596
1597 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
1598 struct btrfs_extent_data_ref *dref;
1599 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
1600 if (match_extent_data_ref(leaf, dref, root_objectid,
1601 owner, offset)) {
1602 err = 0;
1603 break;
1604 }
1605 if (hash_extent_data_ref_item(leaf, dref) <
1606 hash_extent_data_ref(root_objectid, owner, offset))
1607 break;
1608 } else {
1609 u64 ref_offset;
1610 ref_offset = btrfs_extent_inline_ref_offset(leaf, iref);
1611 if (parent > 0) {
1612 if (parent == ref_offset) {
1613 err = 0;
1614 break;
1615 }
1616 if (ref_offset < parent)
1617 break;
1618 } else {
1619 if (root_objectid == ref_offset) {
1620 err = 0;
1621 break;
1622 }
1623 if (ref_offset < root_objectid)
1624 break;
1625 }
1626 }
1627 ptr += btrfs_extent_inline_ref_size(type);
1628 }
1629 if (err == -ENOENT && insert) {
1630 if (item_size + extra_size >=
1631 BTRFS_MAX_EXTENT_ITEM_SIZE(root)) {
1632 err = -EAGAIN;
1633 goto out;
1634 }
1635 /*
1636 * To add new inline back ref, we have to make sure
1637 * there is no corresponding back ref item.
1638 * For simplicity, we just do not add new inline back
1639 * ref if there is any kind of item for this block
1640 */
Yan Zheng2c47e6052009-06-27 21:07:35 -04001641 if (find_next_key(path, 0, &key) == 0 &&
1642 key.objectid == bytenr &&
Yan Zheng85d41982009-06-11 08:51:10 -04001643 key.type < BTRFS_BLOCK_GROUP_ITEM_KEY) {
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001644 err = -EAGAIN;
1645 goto out;
1646 }
1647 }
1648 *ref_ret = (struct btrfs_extent_inline_ref *)ptr;
1649out:
Yan Zheng85d41982009-06-11 08:51:10 -04001650 if (insert) {
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001651 path->keep_locks = 0;
1652 btrfs_unlock_up_safe(path, 1);
1653 }
1654 return err;
1655}
1656
1657/*
1658 * helper to add new inline back ref
1659 */
1660static noinline_for_stack
Tsutomu Itohfd279fa2013-04-16 05:19:11 +00001661void setup_inline_extent_backref(struct btrfs_root *root,
Jeff Mahoney143bede2012-03-01 14:56:26 +01001662 struct btrfs_path *path,
1663 struct btrfs_extent_inline_ref *iref,
1664 u64 parent, u64 root_objectid,
1665 u64 owner, u64 offset, int refs_to_add,
1666 struct btrfs_delayed_extent_op *extent_op)
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001667{
1668 struct extent_buffer *leaf;
1669 struct btrfs_extent_item *ei;
1670 unsigned long ptr;
1671 unsigned long end;
1672 unsigned long item_offset;
1673 u64 refs;
1674 int size;
1675 int type;
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001676
1677 leaf = path->nodes[0];
1678 ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
1679 item_offset = (unsigned long)iref - (unsigned long)ei;
1680
1681 type = extent_ref_type(parent, owner);
1682 size = btrfs_extent_inline_ref_size(type);
1683
Tsutomu Itoh4b90c682013-04-16 05:18:49 +00001684 btrfs_extend_item(root, path, size);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001685
1686 ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
1687 refs = btrfs_extent_refs(leaf, ei);
1688 refs += refs_to_add;
1689 btrfs_set_extent_refs(leaf, ei, refs);
1690 if (extent_op)
1691 __run_delayed_extent_op(extent_op, leaf, ei);
1692
1693 ptr = (unsigned long)ei + item_offset;
1694 end = (unsigned long)ei + btrfs_item_size_nr(leaf, path->slots[0]);
1695 if (ptr < end - size)
1696 memmove_extent_buffer(leaf, ptr + size, ptr,
1697 end - size - ptr);
1698
1699 iref = (struct btrfs_extent_inline_ref *)ptr;
1700 btrfs_set_extent_inline_ref_type(leaf, iref, type);
1701 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
1702 struct btrfs_extent_data_ref *dref;
1703 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
1704 btrfs_set_extent_data_ref_root(leaf, dref, root_objectid);
1705 btrfs_set_extent_data_ref_objectid(leaf, dref, owner);
1706 btrfs_set_extent_data_ref_offset(leaf, dref, offset);
1707 btrfs_set_extent_data_ref_count(leaf, dref, refs_to_add);
1708 } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
1709 struct btrfs_shared_data_ref *sref;
1710 sref = (struct btrfs_shared_data_ref *)(iref + 1);
1711 btrfs_set_shared_data_ref_count(leaf, sref, refs_to_add);
1712 btrfs_set_extent_inline_ref_offset(leaf, iref, parent);
1713 } else if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
1714 btrfs_set_extent_inline_ref_offset(leaf, iref, parent);
1715 } else {
1716 btrfs_set_extent_inline_ref_offset(leaf, iref, root_objectid);
1717 }
1718 btrfs_mark_buffer_dirty(leaf);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001719}
1720
1721static int lookup_extent_backref(struct btrfs_trans_handle *trans,
1722 struct btrfs_root *root,
1723 struct btrfs_path *path,
1724 struct btrfs_extent_inline_ref **ref_ret,
1725 u64 bytenr, u64 num_bytes, u64 parent,
1726 u64 root_objectid, u64 owner, u64 offset)
1727{
1728 int ret;
1729
1730 ret = lookup_inline_extent_backref(trans, root, path, ref_ret,
1731 bytenr, num_bytes, parent,
1732 root_objectid, owner, offset, 0);
1733 if (ret != -ENOENT)
1734 return ret;
1735
David Sterbab3b4aa72011-04-21 01:20:15 +02001736 btrfs_release_path(path);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001737 *ref_ret = NULL;
1738
1739 if (owner < BTRFS_FIRST_FREE_OBJECTID) {
1740 ret = lookup_tree_block_ref(trans, root, path, bytenr, parent,
1741 root_objectid);
1742 } else {
1743 ret = lookup_extent_data_ref(trans, root, path, bytenr, parent,
1744 root_objectid, owner, offset);
1745 }
1746 return ret;
1747}
1748
1749/*
1750 * helper to update/remove inline back ref
1751 */
1752static noinline_for_stack
Tsutomu Itohafe5fea2013-04-16 05:18:22 +00001753void update_inline_extent_backref(struct btrfs_root *root,
Jeff Mahoney143bede2012-03-01 14:56:26 +01001754 struct btrfs_path *path,
1755 struct btrfs_extent_inline_ref *iref,
1756 int refs_to_mod,
Josef Bacikfcebe452014-05-13 17:30:47 -07001757 struct btrfs_delayed_extent_op *extent_op,
1758 int *last_ref)
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001759{
1760 struct extent_buffer *leaf;
1761 struct btrfs_extent_item *ei;
1762 struct btrfs_extent_data_ref *dref = NULL;
1763 struct btrfs_shared_data_ref *sref = NULL;
1764 unsigned long ptr;
1765 unsigned long end;
1766 u32 item_size;
1767 int size;
1768 int type;
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001769 u64 refs;
1770
1771 leaf = path->nodes[0];
1772 ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
1773 refs = btrfs_extent_refs(leaf, ei);
1774 WARN_ON(refs_to_mod < 0 && refs + refs_to_mod <= 0);
1775 refs += refs_to_mod;
1776 btrfs_set_extent_refs(leaf, ei, refs);
1777 if (extent_op)
1778 __run_delayed_extent_op(extent_op, leaf, ei);
1779
1780 type = btrfs_extent_inline_ref_type(leaf, iref);
1781
1782 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
1783 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
1784 refs = btrfs_extent_data_ref_count(leaf, dref);
1785 } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
1786 sref = (struct btrfs_shared_data_ref *)(iref + 1);
1787 refs = btrfs_shared_data_ref_count(leaf, sref);
1788 } else {
1789 refs = 1;
1790 BUG_ON(refs_to_mod != -1);
1791 }
1792
1793 BUG_ON(refs_to_mod < 0 && refs < -refs_to_mod);
1794 refs += refs_to_mod;
1795
1796 if (refs > 0) {
1797 if (type == BTRFS_EXTENT_DATA_REF_KEY)
1798 btrfs_set_extent_data_ref_count(leaf, dref, refs);
1799 else
1800 btrfs_set_shared_data_ref_count(leaf, sref, refs);
1801 } else {
Josef Bacikfcebe452014-05-13 17:30:47 -07001802 *last_ref = 1;
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001803 size = btrfs_extent_inline_ref_size(type);
1804 item_size = btrfs_item_size_nr(leaf, path->slots[0]);
1805 ptr = (unsigned long)iref;
1806 end = (unsigned long)ei + item_size;
1807 if (ptr + size < end)
1808 memmove_extent_buffer(leaf, ptr, ptr + size,
1809 end - ptr - size);
1810 item_size -= size;
Tsutomu Itohafe5fea2013-04-16 05:18:22 +00001811 btrfs_truncate_item(root, path, item_size, 1);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001812 }
1813 btrfs_mark_buffer_dirty(leaf);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001814}
1815
1816static noinline_for_stack
1817int insert_inline_extent_backref(struct btrfs_trans_handle *trans,
1818 struct btrfs_root *root,
1819 struct btrfs_path *path,
1820 u64 bytenr, u64 num_bytes, u64 parent,
1821 u64 root_objectid, u64 owner,
1822 u64 offset, int refs_to_add,
1823 struct btrfs_delayed_extent_op *extent_op)
1824{
1825 struct btrfs_extent_inline_ref *iref;
1826 int ret;
1827
1828 ret = lookup_inline_extent_backref(trans, root, path, &iref,
1829 bytenr, num_bytes, parent,
1830 root_objectid, owner, offset, 1);
1831 if (ret == 0) {
1832 BUG_ON(owner < BTRFS_FIRST_FREE_OBJECTID);
Tsutomu Itohafe5fea2013-04-16 05:18:22 +00001833 update_inline_extent_backref(root, path, iref,
Josef Bacikfcebe452014-05-13 17:30:47 -07001834 refs_to_add, extent_op, NULL);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001835 } else if (ret == -ENOENT) {
Tsutomu Itohfd279fa2013-04-16 05:19:11 +00001836 setup_inline_extent_backref(root, path, iref, parent,
Jeff Mahoney143bede2012-03-01 14:56:26 +01001837 root_objectid, owner, offset,
1838 refs_to_add, extent_op);
1839 ret = 0;
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001840 }
1841 return ret;
1842}
1843
1844static int insert_extent_backref(struct btrfs_trans_handle *trans,
1845 struct btrfs_root *root,
1846 struct btrfs_path *path,
1847 u64 bytenr, u64 parent, u64 root_objectid,
1848 u64 owner, u64 offset, int refs_to_add)
1849{
1850 int ret;
1851 if (owner < BTRFS_FIRST_FREE_OBJECTID) {
1852 BUG_ON(refs_to_add != 1);
1853 ret = insert_tree_block_ref(trans, root, path, bytenr,
1854 parent, root_objectid);
1855 } else {
1856 ret = insert_extent_data_ref(trans, root, path, bytenr,
1857 parent, root_objectid,
1858 owner, offset, refs_to_add);
1859 }
1860 return ret;
1861}
1862
1863static int remove_extent_backref(struct btrfs_trans_handle *trans,
1864 struct btrfs_root *root,
1865 struct btrfs_path *path,
1866 struct btrfs_extent_inline_ref *iref,
Josef Bacikfcebe452014-05-13 17:30:47 -07001867 int refs_to_drop, int is_data, int *last_ref)
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001868{
Jeff Mahoney143bede2012-03-01 14:56:26 +01001869 int ret = 0;
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001870
1871 BUG_ON(!is_data && refs_to_drop != 1);
1872 if (iref) {
Tsutomu Itohafe5fea2013-04-16 05:18:22 +00001873 update_inline_extent_backref(root, path, iref,
Josef Bacikfcebe452014-05-13 17:30:47 -07001874 -refs_to_drop, NULL, last_ref);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001875 } else if (is_data) {
Josef Bacikfcebe452014-05-13 17:30:47 -07001876 ret = remove_extent_data_ref(trans, root, path, refs_to_drop,
1877 last_ref);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001878 } else {
Josef Bacikfcebe452014-05-13 17:30:47 -07001879 *last_ref = 1;
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001880 ret = btrfs_del_item(trans, root, path);
1881 }
1882 return ret;
1883}
1884
Li Dongyang5378e602011-03-24 10:24:27 +00001885static int btrfs_issue_discard(struct block_device *bdev,
Chris Mason15916de2008-11-19 21:17:22 -05001886 u64 start, u64 len)
1887{
Li Dongyang5378e602011-03-24 10:24:27 +00001888 return blkdev_issue_discard(bdev, start >> 9, len >> 9, GFP_NOFS, 0);
Chris Mason15916de2008-11-19 21:17:22 -05001889}
Chris Mason15916de2008-11-19 21:17:22 -05001890
Filipe Manana1edb647b2014-12-08 14:01:12 +00001891int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr,
1892 u64 num_bytes, u64 *actual_bytes)
Liu Hui1f3c79a2009-01-05 15:57:51 -05001893{
Liu Hui1f3c79a2009-01-05 15:57:51 -05001894 int ret;
Li Dongyang5378e602011-03-24 10:24:27 +00001895 u64 discarded_bytes = 0;
Jan Schmidta1d3c472011-08-04 17:15:33 +02001896 struct btrfs_bio *bbio = NULL;
Liu Hui1f3c79a2009-01-05 15:57:51 -05001897
Christoph Hellwige244a0a2009-10-14 09:24:59 -04001898
Liu Hui1f3c79a2009-01-05 15:57:51 -05001899 /* Tell the block device(s) that the sectors can be discarded */
Stefan Behrens3ec706c2012-11-05 15:46:42 +01001900 ret = btrfs_map_block(root->fs_info, REQ_DISCARD,
Jan Schmidta1d3c472011-08-04 17:15:33 +02001901 bytenr, &num_bytes, &bbio, 0);
Jeff Mahoney79787ea2012-03-12 16:03:00 +01001902 /* Error condition is -ENOMEM */
Liu Hui1f3c79a2009-01-05 15:57:51 -05001903 if (!ret) {
Jan Schmidta1d3c472011-08-04 17:15:33 +02001904 struct btrfs_bio_stripe *stripe = bbio->stripes;
Liu Hui1f3c79a2009-01-05 15:57:51 -05001905 int i;
1906
Liu Hui1f3c79a2009-01-05 15:57:51 -05001907
Jan Schmidta1d3c472011-08-04 17:15:33 +02001908 for (i = 0; i < bbio->num_stripes; i++, stripe++) {
Josef Bacikd5e20032011-08-04 14:52:27 +00001909 if (!stripe->dev->can_discard)
1910 continue;
1911
Li Dongyang5378e602011-03-24 10:24:27 +00001912 ret = btrfs_issue_discard(stripe->dev->bdev,
1913 stripe->physical,
1914 stripe->length);
1915 if (!ret)
1916 discarded_bytes += stripe->length;
1917 else if (ret != -EOPNOTSUPP)
Jeff Mahoney79787ea2012-03-12 16:03:00 +01001918 break; /* Logic errors or -ENOMEM, or -EIO but I don't know how that could happen JDM */
Josef Bacikd5e20032011-08-04 14:52:27 +00001919
1920 /*
1921 * Just in case we get back EOPNOTSUPP for some reason,
1922 * just ignore the return value so we don't screw up
1923 * people calling discard_extent.
1924 */
1925 ret = 0;
Liu Hui1f3c79a2009-01-05 15:57:51 -05001926 }
Zhao Lei6e9606d2015-01-20 15:11:34 +08001927 btrfs_put_bbio(bbio);
Liu Hui1f3c79a2009-01-05 15:57:51 -05001928 }
Li Dongyang5378e602011-03-24 10:24:27 +00001929
1930 if (actual_bytes)
1931 *actual_bytes = discarded_bytes;
1932
Liu Hui1f3c79a2009-01-05 15:57:51 -05001933
David Woodhouse53b381b2013-01-29 18:40:14 -05001934 if (ret == -EOPNOTSUPP)
1935 ret = 0;
Liu Hui1f3c79a2009-01-05 15:57:51 -05001936 return ret;
Liu Hui1f3c79a2009-01-05 15:57:51 -05001937}
1938
Jeff Mahoney79787ea2012-03-12 16:03:00 +01001939/* Can return -ENOMEM */
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001940int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
1941 struct btrfs_root *root,
1942 u64 bytenr, u64 num_bytes, u64 parent,
Josef Bacikfcebe452014-05-13 17:30:47 -07001943 u64 root_objectid, u64 owner, u64 offset,
1944 int no_quota)
Zheng Yan31840ae2008-09-23 13:14:14 -04001945{
1946 int ret;
Arne Jansen66d7e7f2011-09-12 15:26:38 +02001947 struct btrfs_fs_info *fs_info = root->fs_info;
1948
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001949 BUG_ON(owner < BTRFS_FIRST_FREE_OBJECTID &&
1950 root_objectid == BTRFS_TREE_LOG_OBJECTID);
Zheng Yan31840ae2008-09-23 13:14:14 -04001951
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001952 if (owner < BTRFS_FIRST_FREE_OBJECTID) {
Arne Jansen66d7e7f2011-09-12 15:26:38 +02001953 ret = btrfs_add_delayed_tree_ref(fs_info, trans, bytenr,
1954 num_bytes,
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001955 parent, root_objectid, (int)owner,
Josef Bacikfcebe452014-05-13 17:30:47 -07001956 BTRFS_ADD_DELAYED_REF, NULL, no_quota);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001957 } else {
Arne Jansen66d7e7f2011-09-12 15:26:38 +02001958 ret = btrfs_add_delayed_data_ref(fs_info, trans, bytenr,
1959 num_bytes,
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001960 parent, root_objectid, owner, offset,
Josef Bacikfcebe452014-05-13 17:30:47 -07001961 BTRFS_ADD_DELAYED_REF, NULL, no_quota);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001962 }
Zheng Yan31840ae2008-09-23 13:14:14 -04001963 return ret;
1964}
1965
Chris Mason925baed2008-06-25 16:01:30 -04001966static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001967 struct btrfs_root *root,
Qu Wenruoc682f9b2015-03-17 16:59:47 +08001968 struct btrfs_delayed_ref_node *node,
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001969 u64 parent, u64 root_objectid,
1970 u64 owner, u64 offset, int refs_to_add,
1971 struct btrfs_delayed_extent_op *extent_op)
Chris Mason56bec292009-03-13 10:10:06 -04001972{
Josef Bacikfcebe452014-05-13 17:30:47 -07001973 struct btrfs_fs_info *fs_info = root->fs_info;
Chris Mason5caf2a02007-04-02 11:20:42 -04001974 struct btrfs_path *path;
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001975 struct extent_buffer *leaf;
Chris Mason234b63a2007-03-13 10:46:10 -04001976 struct btrfs_extent_item *item;
Josef Bacikfcebe452014-05-13 17:30:47 -07001977 struct btrfs_key key;
Qu Wenruoc682f9b2015-03-17 16:59:47 +08001978 u64 bytenr = node->bytenr;
1979 u64 num_bytes = node->num_bytes;
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001980 u64 refs;
1981 int ret;
Qu Wenruoc682f9b2015-03-17 16:59:47 +08001982 int no_quota = node->no_quota;
Chris Mason037e6392007-03-07 11:50:24 -05001983
Chris Mason5caf2a02007-04-02 11:20:42 -04001984 path = btrfs_alloc_path();
Chris Mason54aa1f42007-06-22 14:16:25 -04001985 if (!path)
1986 return -ENOMEM;
Chris Mason26b80032007-08-08 20:17:12 -04001987
Josef Bacikfcebe452014-05-13 17:30:47 -07001988 if (!is_fstree(root_objectid) || !root->fs_info->quota_enabled)
1989 no_quota = 1;
1990
Chris Mason3c12ac72008-04-21 12:01:38 -04001991 path->reada = 1;
Chris Masonb9473432009-03-13 11:00:37 -04001992 path->leave_spinning = 1;
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001993 /* this will setup the path even if it fails to insert the back ref */
Josef Bacikfcebe452014-05-13 17:30:47 -07001994 ret = insert_inline_extent_backref(trans, fs_info->extent_root, path,
1995 bytenr, num_bytes, parent,
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001996 root_objectid, owner, offset,
1997 refs_to_add, extent_op);
Qu Wenruo0ed47922015-04-16 16:55:08 +08001998 if ((ret < 0 && ret != -EAGAIN) || !ret)
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001999 goto out;
Josef Bacikfcebe452014-05-13 17:30:47 -07002000
2001 /*
2002 * Ok we had -EAGAIN which means we didn't have space to insert and
2003 * inline extent ref, so just update the reference count and add a
2004 * normal backref.
2005 */
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002006 leaf = path->nodes[0];
Josef Bacikfcebe452014-05-13 17:30:47 -07002007 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002008 item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
2009 refs = btrfs_extent_refs(leaf, item);
2010 btrfs_set_extent_refs(leaf, item, refs + refs_to_add);
2011 if (extent_op)
2012 __run_delayed_extent_op(extent_op, leaf, item);
Zheng Yan31840ae2008-09-23 13:14:14 -04002013
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002014 btrfs_mark_buffer_dirty(leaf);
David Sterbab3b4aa72011-04-21 01:20:15 +02002015 btrfs_release_path(path);
Chris Mason7bb86312007-12-11 09:25:06 -05002016
Chris Mason3c12ac72008-04-21 12:01:38 -04002017 path->reada = 1;
Chris Masonb9473432009-03-13 11:00:37 -04002018 path->leave_spinning = 1;
Chris Mason56bec292009-03-13 10:10:06 -04002019 /* now insert the actual backref */
Zheng Yan31840ae2008-09-23 13:14:14 -04002020 ret = insert_extent_backref(trans, root->fs_info->extent_root,
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002021 path, bytenr, parent, root_objectid,
2022 owner, offset, refs_to_add);
Jeff Mahoney79787ea2012-03-12 16:03:00 +01002023 if (ret)
2024 btrfs_abort_transaction(trans, root, ret);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002025out:
Chris Mason74493f72007-12-11 09:25:06 -05002026 btrfs_free_path(path);
Liu Bo30d133f2013-10-11 16:30:23 +08002027 return ret;
Chris Mason02217ed2007-03-02 16:08:05 -05002028}
2029
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002030static int run_delayed_data_ref(struct btrfs_trans_handle *trans,
2031 struct btrfs_root *root,
2032 struct btrfs_delayed_ref_node *node,
2033 struct btrfs_delayed_extent_op *extent_op,
2034 int insert_reserved)
Chris Masone9d0b132007-08-10 14:06:19 -04002035{
Chris Mason56bec292009-03-13 10:10:06 -04002036 int ret = 0;
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002037 struct btrfs_delayed_data_ref *ref;
2038 struct btrfs_key ins;
2039 u64 parent = 0;
2040 u64 ref_root = 0;
2041 u64 flags = 0;
Chris Mason56bec292009-03-13 10:10:06 -04002042
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002043 ins.objectid = node->bytenr;
2044 ins.offset = node->num_bytes;
2045 ins.type = BTRFS_EXTENT_ITEM_KEY;
Chris Mason56bec292009-03-13 10:10:06 -04002046
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002047 ref = btrfs_delayed_node_to_data_ref(node);
Liu Bo599c75e2013-07-16 19:03:36 +08002048 trace_run_delayed_data_ref(node, ref, node->action);
2049
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002050 if (node->type == BTRFS_SHARED_DATA_REF_KEY)
2051 parent = ref->parent;
Josef Bacikfcebe452014-05-13 17:30:47 -07002052 ref_root = ref->root;
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002053
2054 if (node->action == BTRFS_ADD_DELAYED_REF && insert_reserved) {
Josef Bacik3173a182013-03-07 14:22:04 -05002055 if (extent_op)
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002056 flags |= extent_op->flags_to_set;
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002057 ret = alloc_reserved_file_extent(trans, root,
2058 parent, ref_root, flags,
2059 ref->objectid, ref->offset,
2060 &ins, node->ref_mod);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002061 } else if (node->action == BTRFS_ADD_DELAYED_REF) {
Qu Wenruoc682f9b2015-03-17 16:59:47 +08002062 ret = __btrfs_inc_extent_ref(trans, root, node, parent,
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002063 ref_root, ref->objectid,
2064 ref->offset, node->ref_mod,
Qu Wenruoc682f9b2015-03-17 16:59:47 +08002065 extent_op);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002066 } else if (node->action == BTRFS_DROP_DELAYED_REF) {
Qu Wenruoc682f9b2015-03-17 16:59:47 +08002067 ret = __btrfs_free_extent(trans, root, node, parent,
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002068 ref_root, ref->objectid,
2069 ref->offset, node->ref_mod,
Qu Wenruoc682f9b2015-03-17 16:59:47 +08002070 extent_op);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002071 } else {
2072 BUG();
2073 }
Chris Mason56bec292009-03-13 10:10:06 -04002074 return ret;
2075}
2076
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002077static void __run_delayed_extent_op(struct btrfs_delayed_extent_op *extent_op,
2078 struct extent_buffer *leaf,
2079 struct btrfs_extent_item *ei)
2080{
2081 u64 flags = btrfs_extent_flags(leaf, ei);
2082 if (extent_op->update_flags) {
2083 flags |= extent_op->flags_to_set;
2084 btrfs_set_extent_flags(leaf, ei, flags);
2085 }
2086
2087 if (extent_op->update_key) {
2088 struct btrfs_tree_block_info *bi;
2089 BUG_ON(!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK));
2090 bi = (struct btrfs_tree_block_info *)(ei + 1);
2091 btrfs_set_tree_block_key(leaf, bi, &extent_op->key);
2092 }
2093}
2094
2095static int run_delayed_extent_op(struct btrfs_trans_handle *trans,
2096 struct btrfs_root *root,
2097 struct btrfs_delayed_ref_node *node,
2098 struct btrfs_delayed_extent_op *extent_op)
2099{
2100 struct btrfs_key key;
2101 struct btrfs_path *path;
2102 struct btrfs_extent_item *ei;
2103 struct extent_buffer *leaf;
2104 u32 item_size;
2105 int ret;
2106 int err = 0;
Josef Bacikb1c79e02013-05-09 13:49:30 -04002107 int metadata = !extent_op->is_data;
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002108
Jeff Mahoney79787ea2012-03-12 16:03:00 +01002109 if (trans->aborted)
2110 return 0;
2111
Josef Bacik3173a182013-03-07 14:22:04 -05002112 if (metadata && !btrfs_fs_incompat(root->fs_info, SKINNY_METADATA))
2113 metadata = 0;
2114
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002115 path = btrfs_alloc_path();
2116 if (!path)
2117 return -ENOMEM;
2118
2119 key.objectid = node->bytenr;
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002120
Josef Bacik3173a182013-03-07 14:22:04 -05002121 if (metadata) {
Josef Bacik3173a182013-03-07 14:22:04 -05002122 key.type = BTRFS_METADATA_ITEM_KEY;
Josef Bacikb1c79e02013-05-09 13:49:30 -04002123 key.offset = extent_op->level;
Josef Bacik3173a182013-03-07 14:22:04 -05002124 } else {
2125 key.type = BTRFS_EXTENT_ITEM_KEY;
2126 key.offset = node->num_bytes;
2127 }
2128
2129again:
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002130 path->reada = 1;
2131 path->leave_spinning = 1;
2132 ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key,
2133 path, 0, 1);
2134 if (ret < 0) {
2135 err = ret;
2136 goto out;
2137 }
2138 if (ret > 0) {
Josef Bacik3173a182013-03-07 14:22:04 -05002139 if (metadata) {
Filipe David Borba Manana55994882013-10-18 15:42:56 +01002140 if (path->slots[0] > 0) {
2141 path->slots[0]--;
2142 btrfs_item_key_to_cpu(path->nodes[0], &key,
2143 path->slots[0]);
2144 if (key.objectid == node->bytenr &&
2145 key.type == BTRFS_EXTENT_ITEM_KEY &&
2146 key.offset == node->num_bytes)
2147 ret = 0;
2148 }
2149 if (ret > 0) {
2150 btrfs_release_path(path);
2151 metadata = 0;
Josef Bacik3173a182013-03-07 14:22:04 -05002152
Filipe David Borba Manana55994882013-10-18 15:42:56 +01002153 key.objectid = node->bytenr;
2154 key.offset = node->num_bytes;
2155 key.type = BTRFS_EXTENT_ITEM_KEY;
2156 goto again;
2157 }
2158 } else {
2159 err = -EIO;
2160 goto out;
Josef Bacik3173a182013-03-07 14:22:04 -05002161 }
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002162 }
2163
2164 leaf = path->nodes[0];
2165 item_size = btrfs_item_size_nr(leaf, path->slots[0]);
2166#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
2167 if (item_size < sizeof(*ei)) {
2168 ret = convert_extent_item_v0(trans, root->fs_info->extent_root,
2169 path, (u64)-1, 0);
2170 if (ret < 0) {
2171 err = ret;
2172 goto out;
2173 }
2174 leaf = path->nodes[0];
2175 item_size = btrfs_item_size_nr(leaf, path->slots[0]);
2176 }
2177#endif
2178 BUG_ON(item_size < sizeof(*ei));
2179 ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
2180 __run_delayed_extent_op(extent_op, leaf, ei);
2181
2182 btrfs_mark_buffer_dirty(leaf);
2183out:
2184 btrfs_free_path(path);
2185 return err;
2186}
2187
2188static int run_delayed_tree_ref(struct btrfs_trans_handle *trans,
2189 struct btrfs_root *root,
2190 struct btrfs_delayed_ref_node *node,
2191 struct btrfs_delayed_extent_op *extent_op,
2192 int insert_reserved)
2193{
2194 int ret = 0;
2195 struct btrfs_delayed_tree_ref *ref;
2196 struct btrfs_key ins;
2197 u64 parent = 0;
2198 u64 ref_root = 0;
Josef Bacik3173a182013-03-07 14:22:04 -05002199 bool skinny_metadata = btrfs_fs_incompat(root->fs_info,
2200 SKINNY_METADATA);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002201
2202 ref = btrfs_delayed_node_to_tree_ref(node);
Liu Bo599c75e2013-07-16 19:03:36 +08002203 trace_run_delayed_tree_ref(node, ref, node->action);
2204
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002205 if (node->type == BTRFS_SHARED_BLOCK_REF_KEY)
2206 parent = ref->parent;
Josef Bacikfcebe452014-05-13 17:30:47 -07002207 ref_root = ref->root;
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002208
Josef Bacik3173a182013-03-07 14:22:04 -05002209 ins.objectid = node->bytenr;
2210 if (skinny_metadata) {
2211 ins.offset = ref->level;
2212 ins.type = BTRFS_METADATA_ITEM_KEY;
2213 } else {
2214 ins.offset = node->num_bytes;
2215 ins.type = BTRFS_EXTENT_ITEM_KEY;
2216 }
2217
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002218 BUG_ON(node->ref_mod != 1);
2219 if (node->action == BTRFS_ADD_DELAYED_REF && insert_reserved) {
Josef Bacik3173a182013-03-07 14:22:04 -05002220 BUG_ON(!extent_op || !extent_op->update_flags);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002221 ret = alloc_reserved_tree_block(trans, root,
2222 parent, ref_root,
2223 extent_op->flags_to_set,
2224 &extent_op->key,
Josef Bacikfcebe452014-05-13 17:30:47 -07002225 ref->level, &ins,
2226 node->no_quota);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002227 } else if (node->action == BTRFS_ADD_DELAYED_REF) {
Qu Wenruoc682f9b2015-03-17 16:59:47 +08002228 ret = __btrfs_inc_extent_ref(trans, root, node,
2229 parent, ref_root,
2230 ref->level, 0, 1,
Josef Bacikfcebe452014-05-13 17:30:47 -07002231 extent_op);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002232 } else if (node->action == BTRFS_DROP_DELAYED_REF) {
Qu Wenruoc682f9b2015-03-17 16:59:47 +08002233 ret = __btrfs_free_extent(trans, root, node,
2234 parent, ref_root,
2235 ref->level, 0, 1, extent_op);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002236 } else {
2237 BUG();
2238 }
2239 return ret;
2240}
2241
Chris Mason56bec292009-03-13 10:10:06 -04002242/* helper function to actually process a single delayed ref entry */
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002243static int run_one_delayed_ref(struct btrfs_trans_handle *trans,
2244 struct btrfs_root *root,
2245 struct btrfs_delayed_ref_node *node,
2246 struct btrfs_delayed_extent_op *extent_op,
2247 int insert_reserved)
Chris Mason56bec292009-03-13 10:10:06 -04002248{
Jeff Mahoney79787ea2012-03-12 16:03:00 +01002249 int ret = 0;
2250
Josef Bacik857cc2f2013-10-07 15:21:08 -04002251 if (trans->aborted) {
2252 if (insert_reserved)
2253 btrfs_pin_extent(root, node->bytenr,
2254 node->num_bytes, 1);
Jeff Mahoney79787ea2012-03-12 16:03:00 +01002255 return 0;
Josef Bacik857cc2f2013-10-07 15:21:08 -04002256 }
Jeff Mahoney79787ea2012-03-12 16:03:00 +01002257
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002258 if (btrfs_delayed_ref_is_head(node)) {
Chris Mason56bec292009-03-13 10:10:06 -04002259 struct btrfs_delayed_ref_head *head;
2260 /*
2261 * we've hit the end of the chain and we were supposed
2262 * to insert this extent into the tree. But, it got
2263 * deleted before we ever needed to insert it, so all
2264 * we have to do is clean up the accounting
2265 */
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002266 BUG_ON(extent_op);
2267 head = btrfs_delayed_node_to_head(node);
Liu Bo599c75e2013-07-16 19:03:36 +08002268 trace_run_delayed_ref_head(node, head, node->action);
2269
Chris Mason56bec292009-03-13 10:10:06 -04002270 if (insert_reserved) {
Yan, Zhengf0486c62010-05-16 10:46:25 -04002271 btrfs_pin_extent(root, node->bytenr,
2272 node->num_bytes, 1);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002273 if (head->is_data) {
2274 ret = btrfs_del_csums(trans, root,
2275 node->bytenr,
2276 node->num_bytes);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002277 }
Chris Mason56bec292009-03-13 10:10:06 -04002278 }
Jeff Mahoney79787ea2012-03-12 16:03:00 +01002279 return ret;
Chris Mason56bec292009-03-13 10:10:06 -04002280 }
Josef Bacikeb099672009-02-12 09:27:38 -05002281
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002282 if (node->type == BTRFS_TREE_BLOCK_REF_KEY ||
2283 node->type == BTRFS_SHARED_BLOCK_REF_KEY)
2284 ret = run_delayed_tree_ref(trans, root, node, extent_op,
2285 insert_reserved);
2286 else if (node->type == BTRFS_EXTENT_DATA_REF_KEY ||
2287 node->type == BTRFS_SHARED_DATA_REF_KEY)
2288 ret = run_delayed_data_ref(trans, root, node, extent_op,
2289 insert_reserved);
2290 else
2291 BUG();
2292 return ret;
Chris Masone9d0b132007-08-10 14:06:19 -04002293}
2294
Qu Wenruoc6fc2452015-03-30 17:03:00 +08002295static inline struct btrfs_delayed_ref_node *
Chris Mason56bec292009-03-13 10:10:06 -04002296select_delayed_ref(struct btrfs_delayed_ref_head *head)
Chris Masona28ec192007-03-06 20:08:01 -05002297{
Filipe Mananacffc3372015-07-09 13:13:44 +01002298 struct btrfs_delayed_ref_node *ref;
2299
Qu Wenruoc6fc2452015-03-30 17:03:00 +08002300 if (list_empty(&head->ref_list))
2301 return NULL;
Josef Bacikd7df2c72014-01-23 09:21:38 -05002302
Filipe Mananacffc3372015-07-09 13:13:44 +01002303 /*
2304 * Select a delayed ref of type BTRFS_ADD_DELAYED_REF first.
2305 * This is to prevent a ref count from going down to zero, which deletes
2306 * the extent item from the extent tree, when there still are references
2307 * to add, which would fail because they would not find the extent item.
2308 */
2309 list_for_each_entry(ref, &head->ref_list, list) {
2310 if (ref->action == BTRFS_ADD_DELAYED_REF)
2311 return ref;
2312 }
2313
Qu Wenruoc6fc2452015-03-30 17:03:00 +08002314 return list_entry(head->ref_list.next, struct btrfs_delayed_ref_node,
2315 list);
Chris Mason56bec292009-03-13 10:10:06 -04002316}
2317
Jeff Mahoney79787ea2012-03-12 16:03:00 +01002318/*
2319 * Returns 0 on success or if called with an already aborted transaction.
2320 * Returns -ENOMEM or -EIO on failure and will abort the transaction.
2321 */
Josef Bacikd7df2c72014-01-23 09:21:38 -05002322static noinline int __btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
2323 struct btrfs_root *root,
2324 unsigned long nr)
Chris Mason56bec292009-03-13 10:10:06 -04002325{
Chris Mason56bec292009-03-13 10:10:06 -04002326 struct btrfs_delayed_ref_root *delayed_refs;
2327 struct btrfs_delayed_ref_node *ref;
2328 struct btrfs_delayed_ref_head *locked_ref = NULL;
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002329 struct btrfs_delayed_extent_op *extent_op;
Jan Schmidt097b8a72012-06-21 11:08:04 +02002330 struct btrfs_fs_info *fs_info = root->fs_info;
Josef Bacik0a2b2a82014-01-23 10:54:11 -05002331 ktime_t start = ktime_get();
Chris Mason56bec292009-03-13 10:10:06 -04002332 int ret;
Josef Bacikd7df2c72014-01-23 09:21:38 -05002333 unsigned long count = 0;
Josef Bacik0a2b2a82014-01-23 10:54:11 -05002334 unsigned long actual_count = 0;
Chris Mason56bec292009-03-13 10:10:06 -04002335 int must_insert_reserved = 0;
Chris Mason56bec292009-03-13 10:10:06 -04002336
2337 delayed_refs = &trans->transaction->delayed_refs;
Chris Mason56bec292009-03-13 10:10:06 -04002338 while (1) {
2339 if (!locked_ref) {
Josef Bacikd7df2c72014-01-23 09:21:38 -05002340 if (count >= nr)
Chris Mason56bec292009-03-13 10:10:06 -04002341 break;
Chris Mason56bec292009-03-13 10:10:06 -04002342
Josef Bacikd7df2c72014-01-23 09:21:38 -05002343 spin_lock(&delayed_refs->lock);
2344 locked_ref = btrfs_select_ref_head(trans);
2345 if (!locked_ref) {
2346 spin_unlock(&delayed_refs->lock);
2347 break;
2348 }
Chris Masonc3e69d52009-03-13 10:17:05 -04002349
2350 /* grab the lock that says we are going to process
2351 * all the refs for this head */
2352 ret = btrfs_delayed_ref_lock(trans, locked_ref);
Josef Bacikd7df2c72014-01-23 09:21:38 -05002353 spin_unlock(&delayed_refs->lock);
Chris Masonc3e69d52009-03-13 10:17:05 -04002354 /*
2355 * we may have dropped the spin lock to get the head
2356 * mutex lock, and that might have given someone else
2357 * time to free the head. If that's true, it has been
2358 * removed from our list and we can move on.
2359 */
2360 if (ret == -EAGAIN) {
2361 locked_ref = NULL;
2362 count++;
2363 continue;
Chris Mason56bec292009-03-13 10:10:06 -04002364 }
2365 }
2366
Josef Bacikd7df2c72014-01-23 09:21:38 -05002367 spin_lock(&locked_ref->lock);
Josef Bacikae1e2062012-08-07 16:00:32 -04002368
2369 /*
Arne Jansend1270cd2011-09-13 15:16:43 +02002370 * locked_ref is the head node, so we have to go one
2371 * node back for any delayed ref updates
2372 */
2373 ref = select_delayed_ref(locked_ref);
2374
2375 if (ref && ref->seq &&
Jan Schmidt097b8a72012-06-21 11:08:04 +02002376 btrfs_check_delayed_seq(fs_info, delayed_refs, ref->seq)) {
Josef Bacikd7df2c72014-01-23 09:21:38 -05002377 spin_unlock(&locked_ref->lock);
Miao Xie093486c2012-12-19 08:10:10 +00002378 btrfs_delayed_ref_unlock(locked_ref);
Josef Bacikd7df2c72014-01-23 09:21:38 -05002379 spin_lock(&delayed_refs->lock);
2380 locked_ref->processing = 0;
Arne Jansend1270cd2011-09-13 15:16:43 +02002381 delayed_refs->num_heads_ready++;
2382 spin_unlock(&delayed_refs->lock);
Josef Bacikd7df2c72014-01-23 09:21:38 -05002383 locked_ref = NULL;
Arne Jansend1270cd2011-09-13 15:16:43 +02002384 cond_resched();
Josef Bacik27a377d2014-02-07 13:57:59 -05002385 count++;
Arne Jansend1270cd2011-09-13 15:16:43 +02002386 continue;
2387 }
2388
2389 /*
Chris Mason56bec292009-03-13 10:10:06 -04002390 * record the must insert reserved flag before we
2391 * drop the spin lock.
2392 */
2393 must_insert_reserved = locked_ref->must_insert_reserved;
2394 locked_ref->must_insert_reserved = 0;
2395
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002396 extent_op = locked_ref->extent_op;
2397 locked_ref->extent_op = NULL;
2398
Chris Mason56bec292009-03-13 10:10:06 -04002399 if (!ref) {
Josef Bacikd7df2c72014-01-23 09:21:38 -05002400
2401
Chris Mason56bec292009-03-13 10:10:06 -04002402 /* All delayed refs have been processed, Go ahead
2403 * and send the head node to run_one_delayed_ref,
2404 * so that any accounting fixes can happen
2405 */
2406 ref = &locked_ref->node;
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002407
2408 if (extent_op && must_insert_reserved) {
Miao Xie78a61842012-11-21 02:21:28 +00002409 btrfs_free_delayed_extent_op(extent_op);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002410 extent_op = NULL;
2411 }
2412
2413 if (extent_op) {
Josef Bacikd7df2c72014-01-23 09:21:38 -05002414 spin_unlock(&locked_ref->lock);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002415 ret = run_delayed_extent_op(trans, root,
2416 ref, extent_op);
Miao Xie78a61842012-11-21 02:21:28 +00002417 btrfs_free_delayed_extent_op(extent_op);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002418
Jeff Mahoney79787ea2012-03-12 16:03:00 +01002419 if (ret) {
Josef Bacik857cc2f2013-10-07 15:21:08 -04002420 /*
2421 * Need to reset must_insert_reserved if
2422 * there was an error so the abort stuff
2423 * can cleanup the reserved space
2424 * properly.
2425 */
2426 if (must_insert_reserved)
2427 locked_ref->must_insert_reserved = 1;
Josef Bacikd7df2c72014-01-23 09:21:38 -05002428 locked_ref->processing = 0;
Simon Kirbyc2cf52e2013-03-19 22:41:23 +00002429 btrfs_debug(fs_info, "run_delayed_extent_op returned %d", ret);
Miao Xie093486c2012-12-19 08:10:10 +00002430 btrfs_delayed_ref_unlock(locked_ref);
Jeff Mahoney79787ea2012-03-12 16:03:00 +01002431 return ret;
2432 }
Josef Bacikd7df2c72014-01-23 09:21:38 -05002433 continue;
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002434 }
Chris Mason56bec292009-03-13 10:10:06 -04002435
Josef Bacikd7df2c72014-01-23 09:21:38 -05002436 /*
2437 * Need to drop our head ref lock and re-aqcuire the
2438 * delayed ref lock and then re-check to make sure
2439 * nobody got added.
2440 */
2441 spin_unlock(&locked_ref->lock);
2442 spin_lock(&delayed_refs->lock);
2443 spin_lock(&locked_ref->lock);
Qu Wenruoc6fc2452015-03-30 17:03:00 +08002444 if (!list_empty(&locked_ref->ref_list) ||
Josef Bacik573a0752014-03-27 19:41:34 -04002445 locked_ref->extent_op) {
Josef Bacikd7df2c72014-01-23 09:21:38 -05002446 spin_unlock(&locked_ref->lock);
2447 spin_unlock(&delayed_refs->lock);
2448 continue;
2449 }
2450 ref->in_tree = 0;
2451 delayed_refs->num_heads--;
Liu Boc46effa2013-10-14 12:59:45 +08002452 rb_erase(&locked_ref->href_node,
2453 &delayed_refs->href_root);
Josef Bacikd7df2c72014-01-23 09:21:38 -05002454 spin_unlock(&delayed_refs->lock);
2455 } else {
Josef Bacik0a2b2a82014-01-23 10:54:11 -05002456 actual_count++;
Josef Bacikd7df2c72014-01-23 09:21:38 -05002457 ref->in_tree = 0;
Qu Wenruoc6fc2452015-03-30 17:03:00 +08002458 list_del(&ref->list);
Liu Boc46effa2013-10-14 12:59:45 +08002459 }
Josef Bacikd7df2c72014-01-23 09:21:38 -05002460 atomic_dec(&delayed_refs->num_entries);
2461
Miao Xie093486c2012-12-19 08:10:10 +00002462 if (!btrfs_delayed_ref_is_head(ref)) {
Arne Jansen22cd2e72012-08-09 00:16:53 -06002463 /*
2464 * when we play the delayed ref, also correct the
2465 * ref_mod on head
2466 */
2467 switch (ref->action) {
2468 case BTRFS_ADD_DELAYED_REF:
2469 case BTRFS_ADD_DELAYED_EXTENT:
2470 locked_ref->node.ref_mod -= ref->ref_mod;
2471 break;
2472 case BTRFS_DROP_DELAYED_REF:
2473 locked_ref->node.ref_mod += ref->ref_mod;
2474 break;
2475 default:
2476 WARN_ON(1);
2477 }
2478 }
Josef Bacikd7df2c72014-01-23 09:21:38 -05002479 spin_unlock(&locked_ref->lock);
Chris Mason56bec292009-03-13 10:10:06 -04002480
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002481 ret = run_one_delayed_ref(trans, root, ref, extent_op,
Chris Mason56bec292009-03-13 10:10:06 -04002482 must_insert_reserved);
Chris Mason56bec292009-03-13 10:10:06 -04002483
Miao Xie78a61842012-11-21 02:21:28 +00002484 btrfs_free_delayed_extent_op(extent_op);
Jeff Mahoney79787ea2012-03-12 16:03:00 +01002485 if (ret) {
Josef Bacikd7df2c72014-01-23 09:21:38 -05002486 locked_ref->processing = 0;
Miao Xie093486c2012-12-19 08:10:10 +00002487 btrfs_delayed_ref_unlock(locked_ref);
2488 btrfs_put_delayed_ref(ref);
Simon Kirbyc2cf52e2013-03-19 22:41:23 +00002489 btrfs_debug(fs_info, "run_one_delayed_ref returned %d", ret);
Jeff Mahoney79787ea2012-03-12 16:03:00 +01002490 return ret;
2491 }
2492
Miao Xie093486c2012-12-19 08:10:10 +00002493 /*
2494 * If this node is a head, that means all the refs in this head
2495 * have been dealt with, and we will pick the next head to deal
2496 * with, so we must unlock the head and drop it from the cluster
2497 * list before we release it.
2498 */
2499 if (btrfs_delayed_ref_is_head(ref)) {
Josef Bacik12621332015-02-03 07:50:16 -08002500 if (locked_ref->is_data &&
2501 locked_ref->total_ref_mod < 0) {
2502 spin_lock(&delayed_refs->lock);
2503 delayed_refs->pending_csums -= ref->num_bytes;
2504 spin_unlock(&delayed_refs->lock);
2505 }
Miao Xie093486c2012-12-19 08:10:10 +00002506 btrfs_delayed_ref_unlock(locked_ref);
2507 locked_ref = NULL;
2508 }
2509 btrfs_put_delayed_ref(ref);
2510 count++;
Chris Mason1887be62009-03-13 10:11:24 -04002511 cond_resched();
Chris Mason56bec292009-03-13 10:10:06 -04002512 }
Josef Bacik0a2b2a82014-01-23 10:54:11 -05002513
2514 /*
2515 * We don't want to include ref heads since we can have empty ref heads
2516 * and those will drastically skew our runtime down since we just do
2517 * accounting, no actual extent tree updates.
2518 */
2519 if (actual_count > 0) {
2520 u64 runtime = ktime_to_ns(ktime_sub(ktime_get(), start));
2521 u64 avg;
2522
2523 /*
2524 * We weigh the current average higher than our current runtime
2525 * to avoid large swings in the average.
2526 */
2527 spin_lock(&delayed_refs->lock);
2528 avg = fs_info->avg_delayed_ref_runtime * 3 + runtime;
David Sterbaf8c269d2015-01-16 17:21:12 +01002529 fs_info->avg_delayed_ref_runtime = avg >> 2; /* div by 4 */
Josef Bacik0a2b2a82014-01-23 10:54:11 -05002530 spin_unlock(&delayed_refs->lock);
2531 }
Josef Bacikd7df2c72014-01-23 09:21:38 -05002532 return 0;
Chris Masonc3e69d52009-03-13 10:17:05 -04002533}
2534
Arne Jansen709c0482011-09-12 12:22:57 +02002535#ifdef SCRAMBLE_DELAYED_REFS
2536/*
2537 * Normally delayed refs get processed in ascending bytenr order. This
2538 * correlates in most cases to the order added. To expose dependencies on this
2539 * order, we start to process the tree in the middle instead of the beginning
2540 */
2541static u64 find_middle(struct rb_root *root)
2542{
2543 struct rb_node *n = root->rb_node;
2544 struct btrfs_delayed_ref_node *entry;
2545 int alt = 1;
2546 u64 middle;
2547 u64 first = 0, last = 0;
2548
2549 n = rb_first(root);
2550 if (n) {
2551 entry = rb_entry(n, struct btrfs_delayed_ref_node, rb_node);
2552 first = entry->bytenr;
2553 }
2554 n = rb_last(root);
2555 if (n) {
2556 entry = rb_entry(n, struct btrfs_delayed_ref_node, rb_node);
2557 last = entry->bytenr;
2558 }
2559 n = root->rb_node;
2560
2561 while (n) {
2562 entry = rb_entry(n, struct btrfs_delayed_ref_node, rb_node);
2563 WARN_ON(!entry->in_tree);
2564
2565 middle = entry->bytenr;
2566
2567 if (alt)
2568 n = n->rb_left;
2569 else
2570 n = n->rb_right;
2571
2572 alt = 1 - alt;
2573 }
2574 return middle;
2575}
2576#endif
2577
Josef Bacik1be41b72013-06-12 13:56:06 -04002578static inline u64 heads_to_leaves(struct btrfs_root *root, u64 heads)
2579{
2580 u64 num_bytes;
2581
2582 num_bytes = heads * (sizeof(struct btrfs_extent_item) +
2583 sizeof(struct btrfs_extent_inline_ref));
2584 if (!btrfs_fs_incompat(root->fs_info, SKINNY_METADATA))
2585 num_bytes += heads * sizeof(struct btrfs_tree_block_info);
2586
2587 /*
2588 * We don't ever fill up leaves all the way so multiply by 2 just to be
2589 * closer to what we're really going to want to ouse.
2590 */
David Sterbaf8c269d2015-01-16 17:21:12 +01002591 return div_u64(num_bytes, BTRFS_LEAF_DATA_SIZE(root));
Josef Bacik1be41b72013-06-12 13:56:06 -04002592}
2593
Josef Bacik12621332015-02-03 07:50:16 -08002594/*
2595 * Takes the number of bytes to be csumm'ed and figures out how many leaves it
2596 * would require to store the csums for that many bytes.
2597 */
Chris Mason28f75a02015-02-04 06:59:29 -08002598u64 btrfs_csum_bytes_to_leaves(struct btrfs_root *root, u64 csum_bytes)
Josef Bacik12621332015-02-03 07:50:16 -08002599{
2600 u64 csum_size;
2601 u64 num_csums_per_leaf;
2602 u64 num_csums;
2603
2604 csum_size = BTRFS_LEAF_DATA_SIZE(root) - sizeof(struct btrfs_item);
2605 num_csums_per_leaf = div64_u64(csum_size,
2606 (u64)btrfs_super_csum_size(root->fs_info->super_copy));
2607 num_csums = div64_u64(csum_bytes, root->sectorsize);
2608 num_csums += num_csums_per_leaf - 1;
2609 num_csums = div64_u64(num_csums, num_csums_per_leaf);
2610 return num_csums;
2611}
2612
Josef Bacik0a2b2a82014-01-23 10:54:11 -05002613int btrfs_check_space_for_delayed_refs(struct btrfs_trans_handle *trans,
Josef Bacik1be41b72013-06-12 13:56:06 -04002614 struct btrfs_root *root)
2615{
2616 struct btrfs_block_rsv *global_rsv;
2617 u64 num_heads = trans->transaction->delayed_refs.num_heads_ready;
Josef Bacik12621332015-02-03 07:50:16 -08002618 u64 csum_bytes = trans->transaction->delayed_refs.pending_csums;
Josef Bacikcb723e42015-02-18 08:06:57 -08002619 u64 num_dirty_bgs = trans->transaction->num_dirty_bgs;
2620 u64 num_bytes, num_dirty_bgs_bytes;
Josef Bacik1be41b72013-06-12 13:56:06 -04002621 int ret = 0;
2622
2623 num_bytes = btrfs_calc_trans_metadata_size(root, 1);
2624 num_heads = heads_to_leaves(root, num_heads);
2625 if (num_heads > 1)
David Sterba707e8a02014-06-04 19:22:26 +02002626 num_bytes += (num_heads - 1) * root->nodesize;
Josef Bacik1be41b72013-06-12 13:56:06 -04002627 num_bytes <<= 1;
Chris Mason28f75a02015-02-04 06:59:29 -08002628 num_bytes += btrfs_csum_bytes_to_leaves(root, csum_bytes) * root->nodesize;
Josef Bacikcb723e42015-02-18 08:06:57 -08002629 num_dirty_bgs_bytes = btrfs_calc_trans_metadata_size(root,
2630 num_dirty_bgs);
Josef Bacik1be41b72013-06-12 13:56:06 -04002631 global_rsv = &root->fs_info->global_block_rsv;
2632
2633 /*
2634 * If we can't allocate any more chunks lets make sure we have _lots_ of
2635 * wiggle room since running delayed refs can create more delayed refs.
2636 */
Josef Bacikcb723e42015-02-18 08:06:57 -08002637 if (global_rsv->space_info->full) {
2638 num_dirty_bgs_bytes <<= 1;
Josef Bacik1be41b72013-06-12 13:56:06 -04002639 num_bytes <<= 1;
Josef Bacikcb723e42015-02-18 08:06:57 -08002640 }
Josef Bacik1be41b72013-06-12 13:56:06 -04002641
2642 spin_lock(&global_rsv->lock);
Josef Bacikcb723e42015-02-18 08:06:57 -08002643 if (global_rsv->reserved <= num_bytes + num_dirty_bgs_bytes)
Josef Bacik1be41b72013-06-12 13:56:06 -04002644 ret = 1;
2645 spin_unlock(&global_rsv->lock);
2646 return ret;
2647}
2648
Josef Bacik0a2b2a82014-01-23 10:54:11 -05002649int btrfs_should_throttle_delayed_refs(struct btrfs_trans_handle *trans,
2650 struct btrfs_root *root)
2651{
2652 struct btrfs_fs_info *fs_info = root->fs_info;
2653 u64 num_entries =
2654 atomic_read(&trans->transaction->delayed_refs.num_entries);
2655 u64 avg_runtime;
Chris Masona79b7d42014-05-22 16:18:52 -07002656 u64 val;
Josef Bacik0a2b2a82014-01-23 10:54:11 -05002657
2658 smp_mb();
2659 avg_runtime = fs_info->avg_delayed_ref_runtime;
Chris Masona79b7d42014-05-22 16:18:52 -07002660 val = num_entries * avg_runtime;
Josef Bacik0a2b2a82014-01-23 10:54:11 -05002661 if (num_entries * avg_runtime >= NSEC_PER_SEC)
2662 return 1;
Chris Masona79b7d42014-05-22 16:18:52 -07002663 if (val >= NSEC_PER_SEC / 2)
2664 return 2;
Josef Bacik0a2b2a82014-01-23 10:54:11 -05002665
2666 return btrfs_check_space_for_delayed_refs(trans, root);
2667}
2668
Chris Masona79b7d42014-05-22 16:18:52 -07002669struct async_delayed_refs {
2670 struct btrfs_root *root;
2671 int count;
2672 int error;
2673 int sync;
2674 struct completion wait;
2675 struct btrfs_work work;
2676};
2677
2678static void delayed_ref_async_start(struct btrfs_work *work)
2679{
2680 struct async_delayed_refs *async;
2681 struct btrfs_trans_handle *trans;
2682 int ret;
2683
2684 async = container_of(work, struct async_delayed_refs, work);
2685
2686 trans = btrfs_join_transaction(async->root);
2687 if (IS_ERR(trans)) {
2688 async->error = PTR_ERR(trans);
2689 goto done;
2690 }
2691
2692 /*
2693 * trans->sync means that when we call end_transaciton, we won't
2694 * wait on delayed refs
2695 */
2696 trans->sync = true;
2697 ret = btrfs_run_delayed_refs(trans, async->root, async->count);
2698 if (ret)
2699 async->error = ret;
2700
2701 ret = btrfs_end_transaction(trans, async->root);
2702 if (ret && !async->error)
2703 async->error = ret;
2704done:
2705 if (async->sync)
2706 complete(&async->wait);
2707 else
2708 kfree(async);
2709}
2710
2711int btrfs_async_run_delayed_refs(struct btrfs_root *root,
2712 unsigned long count, int wait)
2713{
2714 struct async_delayed_refs *async;
2715 int ret;
2716
2717 async = kmalloc(sizeof(*async), GFP_NOFS);
2718 if (!async)
2719 return -ENOMEM;
2720
2721 async->root = root->fs_info->tree_root;
2722 async->count = count;
2723 async->error = 0;
2724 if (wait)
2725 async->sync = 1;
2726 else
2727 async->sync = 0;
2728 init_completion(&async->wait);
2729
Liu Bo9e0af232014-08-15 23:36:53 +08002730 btrfs_init_work(&async->work, btrfs_extent_refs_helper,
2731 delayed_ref_async_start, NULL, NULL);
Chris Masona79b7d42014-05-22 16:18:52 -07002732
2733 btrfs_queue_work(root->fs_info->extent_workers, &async->work);
2734
2735 if (wait) {
2736 wait_for_completion(&async->wait);
2737 ret = async->error;
2738 kfree(async);
2739 return ret;
2740 }
2741 return 0;
2742}
2743
Chris Masonc3e69d52009-03-13 10:17:05 -04002744/*
2745 * this starts processing the delayed reference count updates and
2746 * extent insertions we have queued up so far. count can be
2747 * 0, which means to process everything in the tree at the start
2748 * of the run (but not newly added entries), or it can be some target
2749 * number you'd like to process.
Jeff Mahoney79787ea2012-03-12 16:03:00 +01002750 *
2751 * Returns 0 on success or if called with an aborted transaction
2752 * Returns <0 on error and aborts the transaction
Chris Masonc3e69d52009-03-13 10:17:05 -04002753 */
2754int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
2755 struct btrfs_root *root, unsigned long count)
2756{
2757 struct rb_node *node;
2758 struct btrfs_delayed_ref_root *delayed_refs;
Liu Boc46effa2013-10-14 12:59:45 +08002759 struct btrfs_delayed_ref_head *head;
Chris Masonc3e69d52009-03-13 10:17:05 -04002760 int ret;
2761 int run_all = count == (unsigned long)-1;
Chris Masonc3e69d52009-03-13 10:17:05 -04002762
Jeff Mahoney79787ea2012-03-12 16:03:00 +01002763 /* We'll clean this up in btrfs_cleanup_transaction */
2764 if (trans->aborted)
2765 return 0;
2766
Chris Masonc3e69d52009-03-13 10:17:05 -04002767 if (root == root->fs_info->extent_root)
2768 root = root->fs_info->tree_root;
2769
2770 delayed_refs = &trans->transaction->delayed_refs;
Liu Bo26455d32014-12-17 16:14:09 +08002771 if (count == 0)
Josef Bacikd7df2c72014-01-23 09:21:38 -05002772 count = atomic_read(&delayed_refs->num_entries) * 2;
Chris Masonbb721702013-01-29 18:44:12 -05002773
Chris Masonc3e69d52009-03-13 10:17:05 -04002774again:
Arne Jansen709c0482011-09-12 12:22:57 +02002775#ifdef SCRAMBLE_DELAYED_REFS
2776 delayed_refs->run_delayed_start = find_middle(&delayed_refs->root);
2777#endif
Josef Bacikd7df2c72014-01-23 09:21:38 -05002778 ret = __btrfs_run_delayed_refs(trans, root, count);
2779 if (ret < 0) {
2780 btrfs_abort_transaction(trans, root, ret);
2781 return ret;
Chris Masonc3e69d52009-03-13 10:17:05 -04002782 }
2783
Chris Mason56bec292009-03-13 10:10:06 -04002784 if (run_all) {
Josef Bacikd7df2c72014-01-23 09:21:38 -05002785 if (!list_empty(&trans->new_bgs))
Josef Bacikea658ba2012-09-11 16:57:25 -04002786 btrfs_create_pending_block_groups(trans, root);
Josef Bacikea658ba2012-09-11 16:57:25 -04002787
Josef Bacikd7df2c72014-01-23 09:21:38 -05002788 spin_lock(&delayed_refs->lock);
Liu Boc46effa2013-10-14 12:59:45 +08002789 node = rb_first(&delayed_refs->href_root);
Josef Bacikd7df2c72014-01-23 09:21:38 -05002790 if (!node) {
2791 spin_unlock(&delayed_refs->lock);
Chris Mason56bec292009-03-13 10:10:06 -04002792 goto out;
Josef Bacikd7df2c72014-01-23 09:21:38 -05002793 }
Chris Masonc3e69d52009-03-13 10:17:05 -04002794 count = (unsigned long)-1;
Chris Mason56bec292009-03-13 10:10:06 -04002795
2796 while (node) {
Liu Boc46effa2013-10-14 12:59:45 +08002797 head = rb_entry(node, struct btrfs_delayed_ref_head,
2798 href_node);
2799 if (btrfs_delayed_ref_is_head(&head->node)) {
2800 struct btrfs_delayed_ref_node *ref;
Chris Mason56bec292009-03-13 10:10:06 -04002801
Liu Boc46effa2013-10-14 12:59:45 +08002802 ref = &head->node;
Chris Mason56bec292009-03-13 10:10:06 -04002803 atomic_inc(&ref->refs);
2804
2805 spin_unlock(&delayed_refs->lock);
David Sterba8cc33e52011-05-02 15:29:25 +02002806 /*
2807 * Mutex was contended, block until it's
2808 * released and try again
2809 */
Chris Mason56bec292009-03-13 10:10:06 -04002810 mutex_lock(&head->mutex);
2811 mutex_unlock(&head->mutex);
2812
2813 btrfs_put_delayed_ref(ref);
Chris Mason1887be62009-03-13 10:11:24 -04002814 cond_resched();
Chris Mason56bec292009-03-13 10:10:06 -04002815 goto again;
Liu Boc46effa2013-10-14 12:59:45 +08002816 } else {
2817 WARN_ON(1);
Chris Mason56bec292009-03-13 10:10:06 -04002818 }
2819 node = rb_next(node);
2820 }
2821 spin_unlock(&delayed_refs->lock);
Josef Bacikd7df2c72014-01-23 09:21:38 -05002822 cond_resched();
Chris Mason56bec292009-03-13 10:10:06 -04002823 goto again;
2824 }
Chris Mason54aa1f42007-06-22 14:16:25 -04002825out:
Jan Schmidtedf39272012-06-28 18:04:55 +02002826 assert_qgroups_uptodate(trans);
Chris Masona28ec192007-03-06 20:08:01 -05002827 return 0;
2828}
2829
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002830int btrfs_set_disk_extent_flags(struct btrfs_trans_handle *trans,
2831 struct btrfs_root *root,
2832 u64 bytenr, u64 num_bytes, u64 flags,
Josef Bacikb1c79e02013-05-09 13:49:30 -04002833 int level, int is_data)
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002834{
2835 struct btrfs_delayed_extent_op *extent_op;
2836 int ret;
2837
Miao Xie78a61842012-11-21 02:21:28 +00002838 extent_op = btrfs_alloc_delayed_extent_op();
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002839 if (!extent_op)
2840 return -ENOMEM;
2841
2842 extent_op->flags_to_set = flags;
2843 extent_op->update_flags = 1;
2844 extent_op->update_key = 0;
2845 extent_op->is_data = is_data ? 1 : 0;
Josef Bacikb1c79e02013-05-09 13:49:30 -04002846 extent_op->level = level;
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002847
Arne Jansen66d7e7f2011-09-12 15:26:38 +02002848 ret = btrfs_add_delayed_extent_op(root->fs_info, trans, bytenr,
2849 num_bytes, extent_op);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002850 if (ret)
Miao Xie78a61842012-11-21 02:21:28 +00002851 btrfs_free_delayed_extent_op(extent_op);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002852 return ret;
2853}
2854
2855static noinline int check_delayed_ref(struct btrfs_trans_handle *trans,
2856 struct btrfs_root *root,
2857 struct btrfs_path *path,
2858 u64 objectid, u64 offset, u64 bytenr)
2859{
2860 struct btrfs_delayed_ref_head *head;
2861 struct btrfs_delayed_ref_node *ref;
2862 struct btrfs_delayed_data_ref *data_ref;
2863 struct btrfs_delayed_ref_root *delayed_refs;
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002864 int ret = 0;
2865
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002866 delayed_refs = &trans->transaction->delayed_refs;
2867 spin_lock(&delayed_refs->lock);
2868 head = btrfs_find_delayed_ref_head(trans, bytenr);
Josef Bacikd7df2c72014-01-23 09:21:38 -05002869 if (!head) {
2870 spin_unlock(&delayed_refs->lock);
2871 return 0;
2872 }
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002873
2874 if (!mutex_trylock(&head->mutex)) {
2875 atomic_inc(&head->node.refs);
2876 spin_unlock(&delayed_refs->lock);
2877
David Sterbab3b4aa72011-04-21 01:20:15 +02002878 btrfs_release_path(path);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002879
David Sterba8cc33e52011-05-02 15:29:25 +02002880 /*
2881 * Mutex was contended, block until it's released and let
2882 * caller try again
2883 */
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002884 mutex_lock(&head->mutex);
2885 mutex_unlock(&head->mutex);
2886 btrfs_put_delayed_ref(&head->node);
2887 return -EAGAIN;
2888 }
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002889 spin_unlock(&delayed_refs->lock);
Josef Bacikd7df2c72014-01-23 09:21:38 -05002890
2891 spin_lock(&head->lock);
Qu Wenruoc6fc2452015-03-30 17:03:00 +08002892 list_for_each_entry(ref, &head->ref_list, list) {
Josef Bacikd7df2c72014-01-23 09:21:38 -05002893 /* If it's a shared ref we know a cross reference exists */
2894 if (ref->type != BTRFS_EXTENT_DATA_REF_KEY) {
2895 ret = 1;
2896 break;
2897 }
2898
2899 data_ref = btrfs_delayed_node_to_data_ref(ref);
2900
2901 /*
2902 * If our ref doesn't match the one we're currently looking at
2903 * then we have a cross reference.
2904 */
2905 if (data_ref->root != root->root_key.objectid ||
2906 data_ref->objectid != objectid ||
2907 data_ref->offset != offset) {
2908 ret = 1;
2909 break;
2910 }
2911 }
2912 spin_unlock(&head->lock);
2913 mutex_unlock(&head->mutex);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002914 return ret;
2915}
2916
2917static noinline int check_committed_ref(struct btrfs_trans_handle *trans,
2918 struct btrfs_root *root,
2919 struct btrfs_path *path,
2920 u64 objectid, u64 offset, u64 bytenr)
Chris Masonbe20aa92007-12-17 20:14:01 -05002921{
2922 struct btrfs_root *extent_root = root->fs_info->extent_root;
Yan Zhengf321e492008-07-30 09:26:11 -04002923 struct extent_buffer *leaf;
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002924 struct btrfs_extent_data_ref *ref;
2925 struct btrfs_extent_inline_ref *iref;
2926 struct btrfs_extent_item *ei;
Chris Masonbe20aa92007-12-17 20:14:01 -05002927 struct btrfs_key key;
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002928 u32 item_size;
Yan Zhengf321e492008-07-30 09:26:11 -04002929 int ret;
Chris Masonbe20aa92007-12-17 20:14:01 -05002930
Chris Masonbe20aa92007-12-17 20:14:01 -05002931 key.objectid = bytenr;
Zheng Yan31840ae2008-09-23 13:14:14 -04002932 key.offset = (u64)-1;
Yan Zhengf321e492008-07-30 09:26:11 -04002933 key.type = BTRFS_EXTENT_ITEM_KEY;
Chris Masonbe20aa92007-12-17 20:14:01 -05002934
Chris Masonbe20aa92007-12-17 20:14:01 -05002935 ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
2936 if (ret < 0)
2937 goto out;
Jeff Mahoney79787ea2012-03-12 16:03:00 +01002938 BUG_ON(ret == 0); /* Corruption */
Yan Zheng80ff3852008-10-30 14:20:02 -04002939
2940 ret = -ENOENT;
2941 if (path->slots[0] == 0)
Zheng Yan31840ae2008-09-23 13:14:14 -04002942 goto out;
Chris Masonbe20aa92007-12-17 20:14:01 -05002943
Zheng Yan31840ae2008-09-23 13:14:14 -04002944 path->slots[0]--;
Yan Zhengf321e492008-07-30 09:26:11 -04002945 leaf = path->nodes[0];
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002946 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
Chris Masonbe20aa92007-12-17 20:14:01 -05002947
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002948 if (key.objectid != bytenr || key.type != BTRFS_EXTENT_ITEM_KEY)
Chris Masonbe20aa92007-12-17 20:14:01 -05002949 goto out;
Chris Masonbe20aa92007-12-17 20:14:01 -05002950
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002951 ret = 1;
2952 item_size = btrfs_item_size_nr(leaf, path->slots[0]);
2953#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
2954 if (item_size < sizeof(*ei)) {
2955 WARN_ON(item_size != sizeof(struct btrfs_extent_item_v0));
2956 goto out;
Chris Masonbe20aa92007-12-17 20:14:01 -05002957 }
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002958#endif
2959 ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
2960
2961 if (item_size != sizeof(*ei) +
2962 btrfs_extent_inline_ref_size(BTRFS_EXTENT_DATA_REF_KEY))
2963 goto out;
2964
2965 if (btrfs_extent_generation(leaf, ei) <=
2966 btrfs_root_last_snapshot(&root->root_item))
2967 goto out;
2968
2969 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
2970 if (btrfs_extent_inline_ref_type(leaf, iref) !=
2971 BTRFS_EXTENT_DATA_REF_KEY)
2972 goto out;
2973
2974 ref = (struct btrfs_extent_data_ref *)(&iref->offset);
2975 if (btrfs_extent_refs(leaf, ei) !=
2976 btrfs_extent_data_ref_count(leaf, ref) ||
2977 btrfs_extent_data_ref_root(leaf, ref) !=
2978 root->root_key.objectid ||
2979 btrfs_extent_data_ref_objectid(leaf, ref) != objectid ||
2980 btrfs_extent_data_ref_offset(leaf, ref) != offset)
2981 goto out;
2982
Yan Zhengf321e492008-07-30 09:26:11 -04002983 ret = 0;
Chris Masonbe20aa92007-12-17 20:14:01 -05002984out:
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002985 return ret;
2986}
2987
2988int btrfs_cross_ref_exist(struct btrfs_trans_handle *trans,
2989 struct btrfs_root *root,
2990 u64 objectid, u64 offset, u64 bytenr)
2991{
2992 struct btrfs_path *path;
2993 int ret;
2994 int ret2;
2995
2996 path = btrfs_alloc_path();
2997 if (!path)
2998 return -ENOENT;
2999
3000 do {
3001 ret = check_committed_ref(trans, root, path, objectid,
3002 offset, bytenr);
3003 if (ret && ret != -ENOENT)
3004 goto out;
3005
3006 ret2 = check_delayed_ref(trans, root, path, objectid,
3007 offset, bytenr);
3008 } while (ret2 == -EAGAIN);
3009
3010 if (ret2 && ret2 != -ENOENT) {
3011 ret = ret2;
3012 goto out;
3013 }
3014
3015 if (ret != -ENOENT || ret2 != -ENOENT)
3016 ret = 0;
3017out:
Yan Zhengf321e492008-07-30 09:26:11 -04003018 btrfs_free_path(path);
Yan, Zhengf0486c62010-05-16 10:46:25 -04003019 if (root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
3020 WARN_ON(ret > 0);
Yan Zhengf321e492008-07-30 09:26:11 -04003021 return ret;
3022}
3023
Yan Zheng5d4f98a2009-06-10 10:45:14 -04003024static int __btrfs_mod_ref(struct btrfs_trans_handle *trans,
Chris Masonb7a9f292009-02-04 09:23:45 -05003025 struct btrfs_root *root,
Yan Zheng5d4f98a2009-06-10 10:45:14 -04003026 struct extent_buffer *buf,
Josef Bacike339a6b2014-07-02 10:54:25 -07003027 int full_backref, int inc)
Zheng Yan31840ae2008-09-23 13:14:14 -04003028{
3029 u64 bytenr;
Yan Zheng5d4f98a2009-06-10 10:45:14 -04003030 u64 num_bytes;
3031 u64 parent;
Zheng Yan31840ae2008-09-23 13:14:14 -04003032 u64 ref_root;
Zheng Yan31840ae2008-09-23 13:14:14 -04003033 u32 nritems;
Zheng Yan31840ae2008-09-23 13:14:14 -04003034 struct btrfs_key key;
3035 struct btrfs_file_extent_item *fi;
3036 int i;
3037 int level;
3038 int ret = 0;
Zheng Yan31840ae2008-09-23 13:14:14 -04003039 int (*process_func)(struct btrfs_trans_handle *, struct btrfs_root *,
Arne Jansen66d7e7f2011-09-12 15:26:38 +02003040 u64, u64, u64, u64, u64, u64, int);
Zheng Yan31840ae2008-09-23 13:14:14 -04003041
David Sterbafccb84c2014-09-29 23:53:21 +02003042
3043 if (btrfs_test_is_dummy_root(root))
Josef Bacikfaa2dbf2014-05-07 17:06:09 -04003044 return 0;
David Sterbafccb84c2014-09-29 23:53:21 +02003045
Zheng Yan31840ae2008-09-23 13:14:14 -04003046 ref_root = btrfs_header_owner(buf);
Zheng Yan31840ae2008-09-23 13:14:14 -04003047 nritems = btrfs_header_nritems(buf);
3048 level = btrfs_header_level(buf);
3049
Miao Xie27cdeb72014-04-02 19:51:05 +08003050 if (!test_bit(BTRFS_ROOT_REF_COWS, &root->state) && level == 0)
Yan Zheng5d4f98a2009-06-10 10:45:14 -04003051 return 0;
Chris Masonb7a9f292009-02-04 09:23:45 -05003052
Yan Zheng5d4f98a2009-06-10 10:45:14 -04003053 if (inc)
3054 process_func = btrfs_inc_extent_ref;
3055 else
3056 process_func = btrfs_free_extent;
Zheng Yan31840ae2008-09-23 13:14:14 -04003057
Yan Zheng5d4f98a2009-06-10 10:45:14 -04003058 if (full_backref)
3059 parent = buf->start;
3060 else
3061 parent = 0;
3062
Zheng Yan31840ae2008-09-23 13:14:14 -04003063 for (i = 0; i < nritems; i++) {
Chris Masondb945352007-10-15 16:15:53 -04003064 if (level == 0) {
Chris Mason5f39d392007-10-15 16:14:19 -04003065 btrfs_item_key_to_cpu(buf, &key, i);
David Sterba962a2982014-06-04 18:41:45 +02003066 if (key.type != BTRFS_EXTENT_DATA_KEY)
Chris Mason54aa1f42007-06-22 14:16:25 -04003067 continue;
Chris Mason5f39d392007-10-15 16:14:19 -04003068 fi = btrfs_item_ptr(buf, i,
Chris Mason54aa1f42007-06-22 14:16:25 -04003069 struct btrfs_file_extent_item);
Chris Mason5f39d392007-10-15 16:14:19 -04003070 if (btrfs_file_extent_type(buf, fi) ==
Chris Mason54aa1f42007-06-22 14:16:25 -04003071 BTRFS_FILE_EXTENT_INLINE)
3072 continue;
Zheng Yan31840ae2008-09-23 13:14:14 -04003073 bytenr = btrfs_file_extent_disk_bytenr(buf, fi);
3074 if (bytenr == 0)
Chris Mason54aa1f42007-06-22 14:16:25 -04003075 continue;
Zheng Yan31840ae2008-09-23 13:14:14 -04003076
Yan Zheng5d4f98a2009-06-10 10:45:14 -04003077 num_bytes = btrfs_file_extent_disk_num_bytes(buf, fi);
3078 key.offset -= btrfs_file_extent_offset(buf, fi);
3079 ret = process_func(trans, root, bytenr, num_bytes,
3080 parent, ref_root, key.objectid,
Josef Bacike339a6b2014-07-02 10:54:25 -07003081 key.offset, 1);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04003082 if (ret)
3083 goto fail;
Chris Masonb7a9f292009-02-04 09:23:45 -05003084 } else {
3085 bytenr = btrfs_node_blockptr(buf, i);
David Sterba707e8a02014-06-04 19:22:26 +02003086 num_bytes = root->nodesize;
Yan Zheng5d4f98a2009-06-10 10:45:14 -04003087 ret = process_func(trans, root, bytenr, num_bytes,
Arne Jansen66d7e7f2011-09-12 15:26:38 +02003088 parent, ref_root, level - 1, 0,
Josef Bacike339a6b2014-07-02 10:54:25 -07003089 1);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04003090 if (ret)
Zheng Yan31840ae2008-09-23 13:14:14 -04003091 goto fail;
Chris Mason54aa1f42007-06-22 14:16:25 -04003092 }
3093 }
Zheng Yan31840ae2008-09-23 13:14:14 -04003094 return 0;
3095fail:
Chris Mason54aa1f42007-06-22 14:16:25 -04003096 return ret;
Chris Mason02217ed2007-03-02 16:08:05 -05003097}
3098
Yan Zheng5d4f98a2009-06-10 10:45:14 -04003099int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
Josef Bacike339a6b2014-07-02 10:54:25 -07003100 struct extent_buffer *buf, int full_backref)
Zheng Yan31840ae2008-09-23 13:14:14 -04003101{
Josef Bacike339a6b2014-07-02 10:54:25 -07003102 return __btrfs_mod_ref(trans, root, buf, full_backref, 1);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04003103}
Zheng Yan31840ae2008-09-23 13:14:14 -04003104
Yan Zheng5d4f98a2009-06-10 10:45:14 -04003105int btrfs_dec_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
Josef Bacike339a6b2014-07-02 10:54:25 -07003106 struct extent_buffer *buf, int full_backref)
Yan Zheng5d4f98a2009-06-10 10:45:14 -04003107{
Josef Bacike339a6b2014-07-02 10:54:25 -07003108 return __btrfs_mod_ref(trans, root, buf, full_backref, 0);
Zheng Yan31840ae2008-09-23 13:14:14 -04003109}
3110
Chris Mason9078a3e2007-04-26 16:46:15 -04003111static int write_one_cache_group(struct btrfs_trans_handle *trans,
3112 struct btrfs_root *root,
3113 struct btrfs_path *path,
3114 struct btrfs_block_group_cache *cache)
3115{
3116 int ret;
Chris Mason9078a3e2007-04-26 16:46:15 -04003117 struct btrfs_root *extent_root = root->fs_info->extent_root;
Chris Mason5f39d392007-10-15 16:14:19 -04003118 unsigned long bi;
3119 struct extent_buffer *leaf;
Chris Mason9078a3e2007-04-26 16:46:15 -04003120
Chris Mason9078a3e2007-04-26 16:46:15 -04003121 ret = btrfs_search_slot(trans, extent_root, &cache->key, path, 0, 1);
Josef Bacikdf95e7f2014-12-12 16:02:20 -05003122 if (ret) {
3123 if (ret > 0)
3124 ret = -ENOENT;
Chris Mason54aa1f42007-06-22 14:16:25 -04003125 goto fail;
Josef Bacikdf95e7f2014-12-12 16:02:20 -05003126 }
Chris Mason5f39d392007-10-15 16:14:19 -04003127
3128 leaf = path->nodes[0];
3129 bi = btrfs_item_ptr_offset(leaf, path->slots[0]);
3130 write_extent_buffer(leaf, &cache->item, bi, sizeof(cache->item));
3131 btrfs_mark_buffer_dirty(leaf);
Chris Mason54aa1f42007-06-22 14:16:25 -04003132fail:
Filipe Manana24b89d02015-04-25 18:31:05 +01003133 btrfs_release_path(path);
Josef Bacikdf95e7f2014-12-12 16:02:20 -05003134 return ret;
Chris Mason9078a3e2007-04-26 16:46:15 -04003135
3136}
3137
Yan Zheng4a8c9a62009-07-22 10:07:05 -04003138static struct btrfs_block_group_cache *
3139next_block_group(struct btrfs_root *root,
3140 struct btrfs_block_group_cache *cache)
3141{
3142 struct rb_node *node;
Filipe Manana292cbd52014-11-26 15:28:50 +00003143
Yan Zheng4a8c9a62009-07-22 10:07:05 -04003144 spin_lock(&root->fs_info->block_group_cache_lock);
Filipe Manana292cbd52014-11-26 15:28:50 +00003145
3146 /* If our block group was removed, we need a full search. */
3147 if (RB_EMPTY_NODE(&cache->cache_node)) {
3148 const u64 next_bytenr = cache->key.objectid + cache->key.offset;
3149
3150 spin_unlock(&root->fs_info->block_group_cache_lock);
3151 btrfs_put_block_group(cache);
3152 cache = btrfs_lookup_first_block_group(root->fs_info,
3153 next_bytenr);
3154 return cache;
3155 }
Yan Zheng4a8c9a62009-07-22 10:07:05 -04003156 node = rb_next(&cache->cache_node);
3157 btrfs_put_block_group(cache);
3158 if (node) {
3159 cache = rb_entry(node, struct btrfs_block_group_cache,
3160 cache_node);
Josef Bacik11dfe352009-11-13 20:12:59 +00003161 btrfs_get_block_group(cache);
Yan Zheng4a8c9a62009-07-22 10:07:05 -04003162 } else
3163 cache = NULL;
3164 spin_unlock(&root->fs_info->block_group_cache_lock);
3165 return cache;
3166}
3167
Josef Bacik0af3d002010-06-21 14:48:16 -04003168static int cache_save_setup(struct btrfs_block_group_cache *block_group,
3169 struct btrfs_trans_handle *trans,
3170 struct btrfs_path *path)
3171{
3172 struct btrfs_root *root = block_group->fs_info->tree_root;
3173 struct inode *inode = NULL;
3174 u64 alloc_hint = 0;
Josef Bacik2b209822010-12-03 13:17:53 -05003175 int dcs = BTRFS_DC_ERROR;
David Sterbaf8c269d2015-01-16 17:21:12 +01003176 u64 num_pages = 0;
Josef Bacik0af3d002010-06-21 14:48:16 -04003177 int retries = 0;
3178 int ret = 0;
3179
3180 /*
3181 * If this block group is smaller than 100 megs don't bother caching the
3182 * block group.
3183 */
3184 if (block_group->key.offset < (100 * 1024 * 1024)) {
3185 spin_lock(&block_group->lock);
3186 block_group->disk_cache_state = BTRFS_DC_WRITTEN;
3187 spin_unlock(&block_group->lock);
3188 return 0;
3189 }
3190
Josef Bacik0c0ef4b2015-02-12 09:43:51 -05003191 if (trans->aborted)
3192 return 0;
Josef Bacik0af3d002010-06-21 14:48:16 -04003193again:
3194 inode = lookup_free_space_inode(root, block_group, path);
3195 if (IS_ERR(inode) && PTR_ERR(inode) != -ENOENT) {
3196 ret = PTR_ERR(inode);
David Sterbab3b4aa72011-04-21 01:20:15 +02003197 btrfs_release_path(path);
Josef Bacik0af3d002010-06-21 14:48:16 -04003198 goto out;
3199 }
3200
3201 if (IS_ERR(inode)) {
3202 BUG_ON(retries);
3203 retries++;
3204
3205 if (block_group->ro)
3206 goto out_free;
3207
3208 ret = create_free_space_inode(root, trans, block_group, path);
3209 if (ret)
3210 goto out_free;
3211 goto again;
3212 }
3213
Josef Bacik5b0e95b2011-10-06 08:58:24 -04003214 /* We've already setup this transaction, go ahead and exit */
3215 if (block_group->cache_generation == trans->transid &&
3216 i_size_read(inode)) {
3217 dcs = BTRFS_DC_SETUP;
3218 goto out_put;
3219 }
3220
Josef Bacik0af3d002010-06-21 14:48:16 -04003221 /*
3222 * We want to set the generation to 0, that way if anything goes wrong
3223 * from here on out we know not to trust this cache when we load up next
3224 * time.
3225 */
3226 BTRFS_I(inode)->generation = 0;
3227 ret = btrfs_update_inode(trans, root, inode);
Josef Bacik0c0ef4b2015-02-12 09:43:51 -05003228 if (ret) {
3229 /*
3230 * So theoretically we could recover from this, simply set the
3231 * super cache generation to 0 so we know to invalidate the
3232 * cache, but then we'd have to keep track of the block groups
3233 * that fail this way so we know we _have_ to reset this cache
3234 * before the next commit or risk reading stale cache. So to
3235 * limit our exposure to horrible edge cases lets just abort the
3236 * transaction, this only happens in really bad situations
3237 * anyway.
3238 */
3239 btrfs_abort_transaction(trans, root, ret);
3240 goto out_put;
3241 }
Josef Bacik0af3d002010-06-21 14:48:16 -04003242 WARN_ON(ret);
3243
3244 if (i_size_read(inode) > 0) {
Miao Xie7b61cd92013-05-13 13:55:09 +00003245 ret = btrfs_check_trunc_cache_free_space(root,
3246 &root->fs_info->global_block_rsv);
3247 if (ret)
3248 goto out_put;
3249
Chris Mason1bbc6212015-04-06 12:46:08 -07003250 ret = btrfs_truncate_free_space_cache(root, trans, NULL, inode);
Josef Bacik0af3d002010-06-21 14:48:16 -04003251 if (ret)
3252 goto out_put;
3253 }
3254
3255 spin_lock(&block_group->lock);
Liu Bocf7c1ef2012-07-06 03:31:34 -06003256 if (block_group->cached != BTRFS_CACHE_FINISHED ||
Chris Masone4c88f02015-04-18 05:22:48 -07003257 !btrfs_test_opt(root, SPACE_CACHE)) {
Liu Bocf7c1ef2012-07-06 03:31:34 -06003258 /*
3259 * don't bother trying to write stuff out _if_
3260 * a) we're not cached,
3261 * b) we're with nospace_cache mount option.
3262 */
Josef Bacik2b209822010-12-03 13:17:53 -05003263 dcs = BTRFS_DC_WRITTEN;
Josef Bacik0af3d002010-06-21 14:48:16 -04003264 spin_unlock(&block_group->lock);
3265 goto out_put;
3266 }
3267 spin_unlock(&block_group->lock);
3268
Josef Bacik6fc823b2012-08-06 13:46:38 -06003269 /*
3270 * Try to preallocate enough space based on how big the block group is.
3271 * Keep in mind this has to include any pinned space which could end up
3272 * taking up quite a bit since it's not folded into the other space
3273 * cache.
3274 */
David Sterbaf8c269d2015-01-16 17:21:12 +01003275 num_pages = div_u64(block_group->key.offset, 256 * 1024 * 1024);
Josef Bacik0af3d002010-06-21 14:48:16 -04003276 if (!num_pages)
3277 num_pages = 1;
3278
Josef Bacik0af3d002010-06-21 14:48:16 -04003279 num_pages *= 16;
3280 num_pages *= PAGE_CACHE_SIZE;
3281
Dongsheng Yange2d1f922015-02-06 10:26:52 -05003282 ret = btrfs_check_data_free_space(inode, num_pages, num_pages);
Josef Bacik0af3d002010-06-21 14:48:16 -04003283 if (ret)
3284 goto out_put;
3285
3286 ret = btrfs_prealloc_file_range_trans(inode, trans, 0, 0, num_pages,
3287 num_pages, num_pages,
3288 &alloc_hint);
Josef Bacik2b209822010-12-03 13:17:53 -05003289 if (!ret)
3290 dcs = BTRFS_DC_SETUP;
Josef Bacik0af3d002010-06-21 14:48:16 -04003291 btrfs_free_reserved_data_space(inode, num_pages);
Josef Bacikc09544e2011-08-30 10:19:10 -04003292
Josef Bacik0af3d002010-06-21 14:48:16 -04003293out_put:
3294 iput(inode);
3295out_free:
David Sterbab3b4aa72011-04-21 01:20:15 +02003296 btrfs_release_path(path);
Josef Bacik0af3d002010-06-21 14:48:16 -04003297out:
3298 spin_lock(&block_group->lock);
Josef Bacike65cbb92011-12-13 16:04:54 -05003299 if (!ret && dcs == BTRFS_DC_SETUP)
Josef Bacik5b0e95b2011-10-06 08:58:24 -04003300 block_group->cache_generation = trans->transid;
Josef Bacik2b209822010-12-03 13:17:53 -05003301 block_group->disk_cache_state = dcs;
Josef Bacik0af3d002010-06-21 14:48:16 -04003302 spin_unlock(&block_group->lock);
3303
3304 return ret;
3305}
3306
Josef Bacikdcdf7f62015-03-02 16:37:31 -05003307int btrfs_setup_space_cache(struct btrfs_trans_handle *trans,
3308 struct btrfs_root *root)
3309{
3310 struct btrfs_block_group_cache *cache, *tmp;
3311 struct btrfs_transaction *cur_trans = trans->transaction;
3312 struct btrfs_path *path;
3313
3314 if (list_empty(&cur_trans->dirty_bgs) ||
3315 !btrfs_test_opt(root, SPACE_CACHE))
3316 return 0;
3317
3318 path = btrfs_alloc_path();
3319 if (!path)
3320 return -ENOMEM;
3321
3322 /* Could add new block groups, use _safe just in case */
3323 list_for_each_entry_safe(cache, tmp, &cur_trans->dirty_bgs,
3324 dirty_list) {
3325 if (cache->disk_cache_state == BTRFS_DC_CLEAR)
3326 cache_save_setup(cache, trans, path);
3327 }
3328
3329 btrfs_free_path(path);
3330 return 0;
3331}
3332
Chris Mason1bbc6212015-04-06 12:46:08 -07003333/*
3334 * transaction commit does final block group cache writeback during a
3335 * critical section where nothing is allowed to change the FS. This is
3336 * required in order for the cache to actually match the block group,
3337 * but can introduce a lot of latency into the commit.
3338 *
3339 * So, btrfs_start_dirty_block_groups is here to kick off block group
3340 * cache IO. There's a chance we'll have to redo some of it if the
3341 * block group changes again during the commit, but it greatly reduces
3342 * the commit latency by getting rid of the easy block groups while
3343 * we're still allowing others to join the commit.
3344 */
3345int btrfs_start_dirty_block_groups(struct btrfs_trans_handle *trans,
3346 struct btrfs_root *root)
3347{
3348 struct btrfs_block_group_cache *cache;
3349 struct btrfs_transaction *cur_trans = trans->transaction;
3350 int ret = 0;
3351 int should_put;
3352 struct btrfs_path *path = NULL;
3353 LIST_HEAD(dirty);
3354 struct list_head *io = &cur_trans->io_bgs;
3355 int num_started = 0;
3356 int loops = 0;
3357
3358 spin_lock(&cur_trans->dirty_bgs_lock);
Filipe Mananab58d1a92015-04-25 18:29:16 +01003359 if (list_empty(&cur_trans->dirty_bgs)) {
3360 spin_unlock(&cur_trans->dirty_bgs_lock);
3361 return 0;
Chris Mason1bbc6212015-04-06 12:46:08 -07003362 }
Filipe Mananab58d1a92015-04-25 18:29:16 +01003363 list_splice_init(&cur_trans->dirty_bgs, &dirty);
Chris Mason1bbc6212015-04-06 12:46:08 -07003364 spin_unlock(&cur_trans->dirty_bgs_lock);
3365
3366again:
Chris Mason1bbc6212015-04-06 12:46:08 -07003367 /*
3368 * make sure all the block groups on our dirty list actually
3369 * exist
3370 */
3371 btrfs_create_pending_block_groups(trans, root);
3372
3373 if (!path) {
3374 path = btrfs_alloc_path();
3375 if (!path)
3376 return -ENOMEM;
3377 }
3378
Filipe Mananab58d1a92015-04-25 18:29:16 +01003379 /*
3380 * cache_write_mutex is here only to save us from balance or automatic
3381 * removal of empty block groups deleting this block group while we are
3382 * writing out the cache
3383 */
3384 mutex_lock(&trans->transaction->cache_write_mutex);
Chris Mason1bbc6212015-04-06 12:46:08 -07003385 while (!list_empty(&dirty)) {
3386 cache = list_first_entry(&dirty,
3387 struct btrfs_block_group_cache,
3388 dirty_list);
Chris Mason1bbc6212015-04-06 12:46:08 -07003389 /*
3390 * this can happen if something re-dirties a block
3391 * group that is already under IO. Just wait for it to
3392 * finish and then do it all again
3393 */
3394 if (!list_empty(&cache->io_list)) {
3395 list_del_init(&cache->io_list);
3396 btrfs_wait_cache_io(root, trans, cache,
3397 &cache->io_ctl, path,
3398 cache->key.objectid);
3399 btrfs_put_block_group(cache);
3400 }
3401
3402
3403 /*
3404 * btrfs_wait_cache_io uses the cache->dirty_list to decide
3405 * if it should update the cache_state. Don't delete
3406 * until after we wait.
3407 *
3408 * Since we're not running in the commit critical section
3409 * we need the dirty_bgs_lock to protect from update_block_group
3410 */
3411 spin_lock(&cur_trans->dirty_bgs_lock);
3412 list_del_init(&cache->dirty_list);
3413 spin_unlock(&cur_trans->dirty_bgs_lock);
3414
3415 should_put = 1;
3416
3417 cache_save_setup(cache, trans, path);
3418
3419 if (cache->disk_cache_state == BTRFS_DC_SETUP) {
3420 cache->io_ctl.inode = NULL;
3421 ret = btrfs_write_out_cache(root, trans, cache, path);
3422 if (ret == 0 && cache->io_ctl.inode) {
3423 num_started++;
3424 should_put = 0;
3425
3426 /*
3427 * the cache_write_mutex is protecting
3428 * the io_list
3429 */
3430 list_add_tail(&cache->io_list, io);
3431 } else {
3432 /*
3433 * if we failed to write the cache, the
3434 * generation will be bad and life goes on
3435 */
3436 ret = 0;
3437 }
3438 }
Filipe Mananaff1f8252015-05-06 16:15:09 +01003439 if (!ret) {
Chris Mason1bbc6212015-04-06 12:46:08 -07003440 ret = write_one_cache_group(trans, root, path, cache);
Filipe Mananaff1f8252015-05-06 16:15:09 +01003441 /*
3442 * Our block group might still be attached to the list
3443 * of new block groups in the transaction handle of some
3444 * other task (struct btrfs_trans_handle->new_bgs). This
3445 * means its block group item isn't yet in the extent
3446 * tree. If this happens ignore the error, as we will
3447 * try again later in the critical section of the
3448 * transaction commit.
3449 */
3450 if (ret == -ENOENT) {
3451 ret = 0;
3452 spin_lock(&cur_trans->dirty_bgs_lock);
3453 if (list_empty(&cache->dirty_list)) {
3454 list_add_tail(&cache->dirty_list,
3455 &cur_trans->dirty_bgs);
3456 btrfs_get_block_group(cache);
3457 }
3458 spin_unlock(&cur_trans->dirty_bgs_lock);
3459 } else if (ret) {
3460 btrfs_abort_transaction(trans, root, ret);
3461 }
3462 }
Chris Mason1bbc6212015-04-06 12:46:08 -07003463
3464 /* if its not on the io list, we need to put the block group */
3465 if (should_put)
3466 btrfs_put_block_group(cache);
3467
3468 if (ret)
3469 break;
Filipe Mananab58d1a92015-04-25 18:29:16 +01003470
3471 /*
3472 * Avoid blocking other tasks for too long. It might even save
3473 * us from writing caches for block groups that are going to be
3474 * removed.
3475 */
3476 mutex_unlock(&trans->transaction->cache_write_mutex);
3477 mutex_lock(&trans->transaction->cache_write_mutex);
Chris Mason1bbc6212015-04-06 12:46:08 -07003478 }
Filipe Mananab58d1a92015-04-25 18:29:16 +01003479 mutex_unlock(&trans->transaction->cache_write_mutex);
Chris Mason1bbc6212015-04-06 12:46:08 -07003480
3481 /*
3482 * go through delayed refs for all the stuff we've just kicked off
3483 * and then loop back (just once)
3484 */
3485 ret = btrfs_run_delayed_refs(trans, root, 0);
3486 if (!ret && loops == 0) {
3487 loops++;
3488 spin_lock(&cur_trans->dirty_bgs_lock);
3489 list_splice_init(&cur_trans->dirty_bgs, &dirty);
Filipe Mananab58d1a92015-04-25 18:29:16 +01003490 /*
3491 * dirty_bgs_lock protects us from concurrent block group
3492 * deletes too (not just cache_write_mutex).
3493 */
3494 if (!list_empty(&dirty)) {
3495 spin_unlock(&cur_trans->dirty_bgs_lock);
3496 goto again;
3497 }
Chris Mason1bbc6212015-04-06 12:46:08 -07003498 spin_unlock(&cur_trans->dirty_bgs_lock);
Chris Mason1bbc6212015-04-06 12:46:08 -07003499 }
3500
3501 btrfs_free_path(path);
3502 return ret;
3503}
3504
Chris Mason96b51792007-10-15 16:15:19 -04003505int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
3506 struct btrfs_root *root)
Chris Mason9078a3e2007-04-26 16:46:15 -04003507{
Yan Zheng4a8c9a62009-07-22 10:07:05 -04003508 struct btrfs_block_group_cache *cache;
Josef Bacikce93ec52014-11-17 15:45:48 -05003509 struct btrfs_transaction *cur_trans = trans->transaction;
3510 int ret = 0;
Chris Masonc9dc4c62015-04-04 17:14:42 -07003511 int should_put;
Chris Mason9078a3e2007-04-26 16:46:15 -04003512 struct btrfs_path *path;
Chris Mason1bbc6212015-04-06 12:46:08 -07003513 struct list_head *io = &cur_trans->io_bgs;
Chris Masonc9dc4c62015-04-04 17:14:42 -07003514 int num_started = 0;
Chris Mason9078a3e2007-04-26 16:46:15 -04003515
3516 path = btrfs_alloc_path();
3517 if (!path)
3518 return -ENOMEM;
3519
Josef Bacikce93ec52014-11-17 15:45:48 -05003520 /*
3521 * We don't need the lock here since we are protected by the transaction
3522 * commit. We want to do the cache_save_setup first and then run the
3523 * delayed refs to make sure we have the best chance at doing this all
3524 * in one shot.
3525 */
3526 while (!list_empty(&cur_trans->dirty_bgs)) {
3527 cache = list_first_entry(&cur_trans->dirty_bgs,
3528 struct btrfs_block_group_cache,
3529 dirty_list);
Chris Masonc9dc4c62015-04-04 17:14:42 -07003530
3531 /*
3532 * this can happen if cache_save_setup re-dirties a block
3533 * group that is already under IO. Just wait for it to
3534 * finish and then do it all again
3535 */
3536 if (!list_empty(&cache->io_list)) {
3537 list_del_init(&cache->io_list);
3538 btrfs_wait_cache_io(root, trans, cache,
3539 &cache->io_ctl, path,
3540 cache->key.objectid);
3541 btrfs_put_block_group(cache);
Chris Masonc9dc4c62015-04-04 17:14:42 -07003542 }
3543
Chris Mason1bbc6212015-04-06 12:46:08 -07003544 /*
3545 * don't remove from the dirty list until after we've waited
3546 * on any pending IO
3547 */
Josef Bacikce93ec52014-11-17 15:45:48 -05003548 list_del_init(&cache->dirty_list);
Chris Masonc9dc4c62015-04-04 17:14:42 -07003549 should_put = 1;
3550
Chris Mason1bbc6212015-04-06 12:46:08 -07003551 cache_save_setup(cache, trans, path);
Chris Masonc9dc4c62015-04-04 17:14:42 -07003552
Josef Bacikce93ec52014-11-17 15:45:48 -05003553 if (!ret)
Chris Masonc9dc4c62015-04-04 17:14:42 -07003554 ret = btrfs_run_delayed_refs(trans, root, (unsigned long) -1);
3555
3556 if (!ret && cache->disk_cache_state == BTRFS_DC_SETUP) {
3557 cache->io_ctl.inode = NULL;
3558 ret = btrfs_write_out_cache(root, trans, cache, path);
3559 if (ret == 0 && cache->io_ctl.inode) {
3560 num_started++;
3561 should_put = 0;
Chris Mason1bbc6212015-04-06 12:46:08 -07003562 list_add_tail(&cache->io_list, io);
Chris Masonc9dc4c62015-04-04 17:14:42 -07003563 } else {
3564 /*
3565 * if we failed to write the cache, the
3566 * generation will be bad and life goes on
3567 */
3568 ret = 0;
3569 }
3570 }
Filipe Mananaff1f8252015-05-06 16:15:09 +01003571 if (!ret) {
Josef Bacikce93ec52014-11-17 15:45:48 -05003572 ret = write_one_cache_group(trans, root, path, cache);
Filipe Mananaff1f8252015-05-06 16:15:09 +01003573 if (ret)
3574 btrfs_abort_transaction(trans, root, ret);
3575 }
Chris Masonc9dc4c62015-04-04 17:14:42 -07003576
3577 /* if its not on the io list, we need to put the block group */
3578 if (should_put)
3579 btrfs_put_block_group(cache);
3580 }
3581
Chris Mason1bbc6212015-04-06 12:46:08 -07003582 while (!list_empty(io)) {
3583 cache = list_first_entry(io, struct btrfs_block_group_cache,
Chris Masonc9dc4c62015-04-04 17:14:42 -07003584 io_list);
3585 list_del_init(&cache->io_list);
Chris Masonc9dc4c62015-04-04 17:14:42 -07003586 btrfs_wait_cache_io(root, trans, cache,
3587 &cache->io_ctl, path, cache->key.objectid);
Josef Bacik0af3d002010-06-21 14:48:16 -04003588 btrfs_put_block_group(cache);
3589 }
3590
Chris Mason9078a3e2007-04-26 16:46:15 -04003591 btrfs_free_path(path);
Josef Bacikce93ec52014-11-17 15:45:48 -05003592 return ret;
Chris Mason9078a3e2007-04-26 16:46:15 -04003593}
3594
Yan Zhengd2fb3432008-12-11 16:30:39 -05003595int btrfs_extent_readonly(struct btrfs_root *root, u64 bytenr)
3596{
3597 struct btrfs_block_group_cache *block_group;
3598 int readonly = 0;
3599
3600 block_group = btrfs_lookup_block_group(root->fs_info, bytenr);
3601 if (!block_group || block_group->ro)
3602 readonly = 1;
3603 if (block_group)
Chris Masonfa9c0d792009-04-03 09:47:43 -04003604 btrfs_put_block_group(block_group);
Yan Zhengd2fb3432008-12-11 16:30:39 -05003605 return readonly;
3606}
3607
Jeff Mahoney6ab0a202013-11-01 13:07:04 -04003608static const char *alloc_name(u64 flags)
3609{
3610 switch (flags) {
3611 case BTRFS_BLOCK_GROUP_METADATA|BTRFS_BLOCK_GROUP_DATA:
3612 return "mixed";
3613 case BTRFS_BLOCK_GROUP_METADATA:
3614 return "metadata";
3615 case BTRFS_BLOCK_GROUP_DATA:
3616 return "data";
3617 case BTRFS_BLOCK_GROUP_SYSTEM:
3618 return "system";
3619 default:
3620 WARN_ON(1);
3621 return "invalid-combination";
3622 };
3623}
3624
Chris Mason593060d2008-03-25 16:50:33 -04003625static int update_space_info(struct btrfs_fs_info *info, u64 flags,
3626 u64 total_bytes, u64 bytes_used,
3627 struct btrfs_space_info **space_info)
3628{
3629 struct btrfs_space_info *found;
Yan, Zhengb742bb82010-05-16 10:46:24 -04003630 int i;
3631 int factor;
Josef Bacikb150a4f2013-06-19 15:00:04 -04003632 int ret;
Yan, Zhengb742bb82010-05-16 10:46:24 -04003633
3634 if (flags & (BTRFS_BLOCK_GROUP_DUP | BTRFS_BLOCK_GROUP_RAID1 |
3635 BTRFS_BLOCK_GROUP_RAID10))
3636 factor = 2;
3637 else
3638 factor = 1;
Chris Mason593060d2008-03-25 16:50:33 -04003639
3640 found = __find_space_info(info, flags);
3641 if (found) {
Josef Bacik25179202008-10-29 14:49:05 -04003642 spin_lock(&found->lock);
Chris Mason593060d2008-03-25 16:50:33 -04003643 found->total_bytes += total_bytes;
Josef Bacik89a55892010-10-14 14:52:27 -04003644 found->disk_total += total_bytes * factor;
Chris Mason593060d2008-03-25 16:50:33 -04003645 found->bytes_used += bytes_used;
Yan, Zhengb742bb82010-05-16 10:46:24 -04003646 found->disk_used += bytes_used * factor;
Filipe Manana2e6e5182015-05-12 00:28:11 +01003647 if (total_bytes > 0)
3648 found->full = 0;
Josef Bacik25179202008-10-29 14:49:05 -04003649 spin_unlock(&found->lock);
Chris Mason593060d2008-03-25 16:50:33 -04003650 *space_info = found;
3651 return 0;
3652 }
Yan Zhengc146afa2008-11-12 14:34:12 -05003653 found = kzalloc(sizeof(*found), GFP_NOFS);
Chris Mason593060d2008-03-25 16:50:33 -04003654 if (!found)
3655 return -ENOMEM;
3656
Tejun Heo908c7f12014-09-08 09:51:29 +09003657 ret = percpu_counter_init(&found->total_bytes_pinned, 0, GFP_KERNEL);
Josef Bacikb150a4f2013-06-19 15:00:04 -04003658 if (ret) {
3659 kfree(found);
3660 return ret;
3661 }
3662
Jeff Mahoneyc1895442014-05-27 12:59:57 -04003663 for (i = 0; i < BTRFS_NR_RAID_TYPES; i++)
Yan, Zhengb742bb82010-05-16 10:46:24 -04003664 INIT_LIST_HEAD(&found->block_groups[i]);
Josef Bacik80eb2342008-10-29 14:49:05 -04003665 init_rwsem(&found->groups_sem);
Josef Bacik0f9dd462008-09-23 13:14:11 -04003666 spin_lock_init(&found->lock);
Ilya Dryomov52ba6922012-01-16 22:04:47 +02003667 found->flags = flags & BTRFS_BLOCK_GROUP_TYPE_MASK;
Chris Mason593060d2008-03-25 16:50:33 -04003668 found->total_bytes = total_bytes;
Josef Bacik89a55892010-10-14 14:52:27 -04003669 found->disk_total = total_bytes * factor;
Chris Mason593060d2008-03-25 16:50:33 -04003670 found->bytes_used = bytes_used;
Yan, Zhengb742bb82010-05-16 10:46:24 -04003671 found->disk_used = bytes_used * factor;
Chris Mason593060d2008-03-25 16:50:33 -04003672 found->bytes_pinned = 0;
Zheng Yane8569812008-09-26 10:05:48 -04003673 found->bytes_reserved = 0;
Yan Zhengc146afa2008-11-12 14:34:12 -05003674 found->bytes_readonly = 0;
Yan, Zhengf0486c62010-05-16 10:46:25 -04003675 found->bytes_may_use = 0;
Filipe Manana2e6e5182015-05-12 00:28:11 +01003676 if (total_bytes > 0)
3677 found->full = 0;
3678 else
3679 found->full = 1;
Chris Mason0e4f8f82011-04-15 16:05:44 -04003680 found->force_alloc = CHUNK_ALLOC_NO_FORCE;
Josef Bacik6d741192011-04-11 20:20:11 -04003681 found->chunk_alloc = 0;
Josef Bacikfdb5eff2011-06-07 16:07:44 -04003682 found->flush = 0;
3683 init_waitqueue_head(&found->wait);
Josef Bacik633c0aa2014-10-31 09:49:34 -04003684 INIT_LIST_HEAD(&found->ro_bgs);
Jeff Mahoney6ab0a202013-11-01 13:07:04 -04003685
3686 ret = kobject_init_and_add(&found->kobj, &space_info_ktype,
3687 info->space_info_kobj, "%s",
3688 alloc_name(found->flags));
3689 if (ret) {
3690 kfree(found);
3691 return ret;
3692 }
3693
Chris Mason593060d2008-03-25 16:50:33 -04003694 *space_info = found;
Chris Mason4184ea72009-03-10 12:39:20 -04003695 list_add_rcu(&found->list, &info->space_info);
Li Zefanb4d7c3c2012-07-09 20:21:07 -06003696 if (flags & BTRFS_BLOCK_GROUP_DATA)
3697 info->data_sinfo = found;
Jeff Mahoney6ab0a202013-11-01 13:07:04 -04003698
3699 return ret;
Chris Mason593060d2008-03-25 16:50:33 -04003700}
3701
Chris Mason8790d502008-04-03 16:29:03 -04003702static void set_avail_alloc_bits(struct btrfs_fs_info *fs_info, u64 flags)
3703{
Ilya Dryomov899c81e2012-03-27 17:09:16 +03003704 u64 extra_flags = chunk_to_extended(flags) &
3705 BTRFS_EXTENDED_PROFILE_MASK;
Ilya Dryomova46d11a2012-01-16 22:04:47 +02003706
Miao Xiede98ced2013-01-29 10:13:12 +00003707 write_seqlock(&fs_info->profiles_lock);
Ilya Dryomova46d11a2012-01-16 22:04:47 +02003708 if (flags & BTRFS_BLOCK_GROUP_DATA)
3709 fs_info->avail_data_alloc_bits |= extra_flags;
3710 if (flags & BTRFS_BLOCK_GROUP_METADATA)
3711 fs_info->avail_metadata_alloc_bits |= extra_flags;
3712 if (flags & BTRFS_BLOCK_GROUP_SYSTEM)
3713 fs_info->avail_system_alloc_bits |= extra_flags;
Miao Xiede98ced2013-01-29 10:13:12 +00003714 write_sequnlock(&fs_info->profiles_lock);
Chris Mason8790d502008-04-03 16:29:03 -04003715}
Chris Mason593060d2008-03-25 16:50:33 -04003716
Ilya Dryomova46d11a2012-01-16 22:04:47 +02003717/*
Ilya Dryomovfc67c452012-03-27 17:09:17 +03003718 * returns target flags in extended format or 0 if restripe for this
3719 * chunk_type is not in progress
Ilya Dryomovc6664b42012-04-12 16:03:56 -04003720 *
3721 * should be called with either volume_mutex or balance_lock held
Ilya Dryomovfc67c452012-03-27 17:09:17 +03003722 */
3723static u64 get_restripe_target(struct btrfs_fs_info *fs_info, u64 flags)
3724{
3725 struct btrfs_balance_control *bctl = fs_info->balance_ctl;
3726 u64 target = 0;
3727
Ilya Dryomovfc67c452012-03-27 17:09:17 +03003728 if (!bctl)
3729 return 0;
3730
3731 if (flags & BTRFS_BLOCK_GROUP_DATA &&
3732 bctl->data.flags & BTRFS_BALANCE_ARGS_CONVERT) {
3733 target = BTRFS_BLOCK_GROUP_DATA | bctl->data.target;
3734 } else if (flags & BTRFS_BLOCK_GROUP_SYSTEM &&
3735 bctl->sys.flags & BTRFS_BALANCE_ARGS_CONVERT) {
3736 target = BTRFS_BLOCK_GROUP_SYSTEM | bctl->sys.target;
3737 } else if (flags & BTRFS_BLOCK_GROUP_METADATA &&
3738 bctl->meta.flags & BTRFS_BALANCE_ARGS_CONVERT) {
3739 target = BTRFS_BLOCK_GROUP_METADATA | bctl->meta.target;
3740 }
3741
3742 return target;
3743}
3744
3745/*
Ilya Dryomova46d11a2012-01-16 22:04:47 +02003746 * @flags: available profiles in extended format (see ctree.h)
3747 *
Ilya Dryomove4d8ec02012-01-16 22:04:48 +02003748 * Returns reduced profile in chunk format. If profile changing is in
3749 * progress (either running or paused) picks the target profile (if it's
3750 * already available), otherwise falls back to plain reducing.
Ilya Dryomova46d11a2012-01-16 22:04:47 +02003751 */
Eric Sandeen48a3b632013-04-25 20:41:01 +00003752static u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags)
Chris Masonec44a352008-04-28 15:29:52 -04003753{
Miao Xie95669972014-07-24 11:37:14 +08003754 u64 num_devices = root->fs_info->fs_devices->rw_devices;
Ilya Dryomovfc67c452012-03-27 17:09:17 +03003755 u64 target;
David Woodhouse53b381b2013-01-29 18:40:14 -05003756 u64 tmp;
Chris Masona061fc82008-05-07 11:43:44 -04003757
Ilya Dryomovfc67c452012-03-27 17:09:17 +03003758 /*
3759 * see if restripe for this chunk_type is in progress, if so
3760 * try to reduce to the target profile
3761 */
Ilya Dryomove4d8ec02012-01-16 22:04:48 +02003762 spin_lock(&root->fs_info->balance_lock);
Ilya Dryomovfc67c452012-03-27 17:09:17 +03003763 target = get_restripe_target(root->fs_info, flags);
3764 if (target) {
3765 /* pick target profile only if it's already available */
3766 if ((flags & target) & BTRFS_EXTENDED_PROFILE_MASK) {
Ilya Dryomove4d8ec02012-01-16 22:04:48 +02003767 spin_unlock(&root->fs_info->balance_lock);
Ilya Dryomovfc67c452012-03-27 17:09:17 +03003768 return extended_to_chunk(target);
Ilya Dryomove4d8ec02012-01-16 22:04:48 +02003769 }
3770 }
3771 spin_unlock(&root->fs_info->balance_lock);
3772
David Woodhouse53b381b2013-01-29 18:40:14 -05003773 /* First, mask out the RAID levels which aren't possible */
Chris Masona061fc82008-05-07 11:43:44 -04003774 if (num_devices == 1)
David Woodhouse53b381b2013-01-29 18:40:14 -05003775 flags &= ~(BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID0 |
3776 BTRFS_BLOCK_GROUP_RAID5);
3777 if (num_devices < 3)
3778 flags &= ~BTRFS_BLOCK_GROUP_RAID6;
Chris Masona061fc82008-05-07 11:43:44 -04003779 if (num_devices < 4)
3780 flags &= ~BTRFS_BLOCK_GROUP_RAID10;
3781
David Woodhouse53b381b2013-01-29 18:40:14 -05003782 tmp = flags & (BTRFS_BLOCK_GROUP_DUP | BTRFS_BLOCK_GROUP_RAID0 |
3783 BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID5 |
3784 BTRFS_BLOCK_GROUP_RAID6 | BTRFS_BLOCK_GROUP_RAID10);
3785 flags &= ~tmp;
Chris Masonec44a352008-04-28 15:29:52 -04003786
David Woodhouse53b381b2013-01-29 18:40:14 -05003787 if (tmp & BTRFS_BLOCK_GROUP_RAID6)
3788 tmp = BTRFS_BLOCK_GROUP_RAID6;
3789 else if (tmp & BTRFS_BLOCK_GROUP_RAID5)
3790 tmp = BTRFS_BLOCK_GROUP_RAID5;
3791 else if (tmp & BTRFS_BLOCK_GROUP_RAID10)
3792 tmp = BTRFS_BLOCK_GROUP_RAID10;
3793 else if (tmp & BTRFS_BLOCK_GROUP_RAID1)
3794 tmp = BTRFS_BLOCK_GROUP_RAID1;
3795 else if (tmp & BTRFS_BLOCK_GROUP_RAID0)
3796 tmp = BTRFS_BLOCK_GROUP_RAID0;
Chris Masonec44a352008-04-28 15:29:52 -04003797
David Woodhouse53b381b2013-01-29 18:40:14 -05003798 return extended_to_chunk(flags | tmp);
Chris Masonec44a352008-04-28 15:29:52 -04003799}
3800
Filipe Mananaf8213bd2014-04-24 15:15:29 +01003801static u64 get_alloc_profile(struct btrfs_root *root, u64 orig_flags)
Josef Bacik6a632092009-02-20 11:00:09 -05003802{
Miao Xiede98ced2013-01-29 10:13:12 +00003803 unsigned seq;
Filipe Mananaf8213bd2014-04-24 15:15:29 +01003804 u64 flags;
Miao Xiede98ced2013-01-29 10:13:12 +00003805
3806 do {
Filipe Mananaf8213bd2014-04-24 15:15:29 +01003807 flags = orig_flags;
Miao Xiede98ced2013-01-29 10:13:12 +00003808 seq = read_seqbegin(&root->fs_info->profiles_lock);
3809
3810 if (flags & BTRFS_BLOCK_GROUP_DATA)
3811 flags |= root->fs_info->avail_data_alloc_bits;
3812 else if (flags & BTRFS_BLOCK_GROUP_SYSTEM)
3813 flags |= root->fs_info->avail_system_alloc_bits;
3814 else if (flags & BTRFS_BLOCK_GROUP_METADATA)
3815 flags |= root->fs_info->avail_metadata_alloc_bits;
3816 } while (read_seqretry(&root->fs_info->profiles_lock, seq));
Ilya Dryomov6fef8df2012-01-16 22:04:47 +02003817
Yan, Zhengb742bb82010-05-16 10:46:24 -04003818 return btrfs_reduce_alloc_profile(root, flags);
3819}
Josef Bacik6a632092009-02-20 11:00:09 -05003820
Miao Xie6d07bce2011-01-05 10:07:31 +00003821u64 btrfs_get_alloc_profile(struct btrfs_root *root, int data)
Yan, Zhengb742bb82010-05-16 10:46:24 -04003822{
3823 u64 flags;
David Woodhouse53b381b2013-01-29 18:40:14 -05003824 u64 ret;
Josef Bacik6a632092009-02-20 11:00:09 -05003825
Yan, Zhengb742bb82010-05-16 10:46:24 -04003826 if (data)
3827 flags = BTRFS_BLOCK_GROUP_DATA;
3828 else if (root == root->fs_info->chunk_root)
3829 flags = BTRFS_BLOCK_GROUP_SYSTEM;
3830 else
3831 flags = BTRFS_BLOCK_GROUP_METADATA;
3832
David Woodhouse53b381b2013-01-29 18:40:14 -05003833 ret = get_alloc_profile(root, flags);
3834 return ret;
Josef Bacik6a632092009-02-20 11:00:09 -05003835}
3836
Josef Bacik6a632092009-02-20 11:00:09 -05003837/*
3838 * This will check the space that the inode allocates from to make sure we have
3839 * enough space for bytes.
3840 */
Dongsheng Yange2d1f922015-02-06 10:26:52 -05003841int btrfs_check_data_free_space(struct inode *inode, u64 bytes, u64 write_bytes)
Josef Bacik6a632092009-02-20 11:00:09 -05003842{
3843 struct btrfs_space_info *data_sinfo;
Yan, Zheng0ca1f7c2010-05-16 10:48:47 -04003844 struct btrfs_root *root = BTRFS_I(inode)->root;
Li Zefanb4d7c3c2012-07-09 20:21:07 -06003845 struct btrfs_fs_info *fs_info = root->fs_info;
Josef Bacikab6e24102010-03-19 14:38:13 +00003846 u64 used;
Zhao Lei94b947b2015-02-14 13:23:45 +08003847 int ret = 0;
Zhao Leic99f1b02015-03-02 19:32:20 +08003848 int need_commit = 2;
3849 int have_pinned_space;
Josef Bacik6a632092009-02-20 11:00:09 -05003850
3851 /* make sure bytes are sectorsize aligned */
Qu Wenruofda28322013-02-26 08:10:22 +00003852 bytes = ALIGN(bytes, root->sectorsize);
Josef Bacik6a632092009-02-20 11:00:09 -05003853
Miao Xie9dced182013-10-25 17:33:36 +08003854 if (btrfs_is_free_space_inode(inode)) {
Zhao Leic99f1b02015-03-02 19:32:20 +08003855 need_commit = 0;
Miao Xie9dced182013-10-25 17:33:36 +08003856 ASSERT(current->journal_info);
Josef Bacik0af3d002010-06-21 14:48:16 -04003857 }
3858
Li Zefanb4d7c3c2012-07-09 20:21:07 -06003859 data_sinfo = fs_info->data_sinfo;
Chris Mason33b4d472009-09-22 14:45:50 -04003860 if (!data_sinfo)
3861 goto alloc;
3862
Josef Bacik6a632092009-02-20 11:00:09 -05003863again:
3864 /* make sure we have enough space to handle the data first */
3865 spin_lock(&data_sinfo->lock);
Yan, Zheng8929ecfa2010-05-16 10:49:58 -04003866 used = data_sinfo->bytes_used + data_sinfo->bytes_reserved +
3867 data_sinfo->bytes_pinned + data_sinfo->bytes_readonly +
3868 data_sinfo->bytes_may_use;
Josef Bacikab6e24102010-03-19 14:38:13 +00003869
3870 if (used + bytes > data_sinfo->total_bytes) {
Josef Bacik4e06bdd2009-02-20 10:59:53 -05003871 struct btrfs_trans_handle *trans;
3872
Josef Bacik6a632092009-02-20 11:00:09 -05003873 /*
3874 * if we don't have enough free bytes in this space then we need
3875 * to alloc a new chunk.
3876 */
Zhao Leib9fd47c2015-02-09 14:40:20 +08003877 if (!data_sinfo->full) {
Josef Bacik6a632092009-02-20 11:00:09 -05003878 u64 alloc_target;
Josef Bacik6a632092009-02-20 11:00:09 -05003879
Chris Mason0e4f8f82011-04-15 16:05:44 -04003880 data_sinfo->force_alloc = CHUNK_ALLOC_FORCE;
Josef Bacik6a632092009-02-20 11:00:09 -05003881 spin_unlock(&data_sinfo->lock);
Chris Mason33b4d472009-09-22 14:45:50 -04003882alloc:
Josef Bacik6a632092009-02-20 11:00:09 -05003883 alloc_target = btrfs_get_alloc_profile(root, 1);
Miao Xie9dced182013-10-25 17:33:36 +08003884 /*
3885 * It is ugly that we don't call nolock join
3886 * transaction for the free space inode case here.
3887 * But it is safe because we only do the data space
3888 * reservation for the free space cache in the
3889 * transaction context, the common join transaction
3890 * just increase the counter of the current transaction
3891 * handler, doesn't try to acquire the trans_lock of
3892 * the fs.
3893 */
Josef Bacik7a7eaa42011-04-13 12:54:33 -04003894 trans = btrfs_join_transaction(root);
Yan, Zhenga22285a2010-05-16 10:48:46 -04003895 if (IS_ERR(trans))
3896 return PTR_ERR(trans);
Josef Bacik6a632092009-02-20 11:00:09 -05003897
3898 ret = do_chunk_alloc(trans, root->fs_info->extent_root,
Chris Mason0e4f8f82011-04-15 16:05:44 -04003899 alloc_target,
3900 CHUNK_ALLOC_NO_FORCE);
Josef Bacik6a632092009-02-20 11:00:09 -05003901 btrfs_end_transaction(trans, root);
Miao Xied52a5b52011-01-05 10:07:18 +00003902 if (ret < 0) {
3903 if (ret != -ENOSPC)
3904 return ret;
Zhao Leic99f1b02015-03-02 19:32:20 +08003905 else {
3906 have_pinned_space = 1;
Miao Xied52a5b52011-01-05 10:07:18 +00003907 goto commit_trans;
Zhao Leic99f1b02015-03-02 19:32:20 +08003908 }
Miao Xied52a5b52011-01-05 10:07:18 +00003909 }
Chris Mason33b4d472009-09-22 14:45:50 -04003910
Li Zefanb4d7c3c2012-07-09 20:21:07 -06003911 if (!data_sinfo)
3912 data_sinfo = fs_info->data_sinfo;
3913
Josef Bacik6a632092009-02-20 11:00:09 -05003914 goto again;
3915 }
Josef Bacikf2bb8f52011-05-25 13:10:16 -04003916
3917 /*
Josef Bacikb150a4f2013-06-19 15:00:04 -04003918 * If we don't have enough pinned space to deal with this
Zhao Lei94b947b2015-02-14 13:23:45 +08003919 * allocation, and no removed chunk in current transaction,
3920 * don't bother committing the transaction.
Josef Bacikf2bb8f52011-05-25 13:10:16 -04003921 */
Zhao Leic99f1b02015-03-02 19:32:20 +08003922 have_pinned_space = percpu_counter_compare(
3923 &data_sinfo->total_bytes_pinned,
3924 used + bytes - data_sinfo->total_bytes);
Josef Bacik6a632092009-02-20 11:00:09 -05003925 spin_unlock(&data_sinfo->lock);
Josef Bacik4e06bdd2009-02-20 10:59:53 -05003926
3927 /* commit the current transaction and try again */
Miao Xied52a5b52011-01-05 10:07:18 +00003928commit_trans:
Zhao Leic99f1b02015-03-02 19:32:20 +08003929 if (need_commit &&
Josef Bacika4abeea2011-04-11 17:25:13 -04003930 !atomic_read(&root->fs_info->open_ioctl_trans)) {
Zhao Leic99f1b02015-03-02 19:32:20 +08003931 need_commit--;
Josef Bacikb150a4f2013-06-19 15:00:04 -04003932
Zhao Lei9a4e7272015-04-09 12:34:43 +08003933 if (need_commit > 0)
3934 btrfs_wait_ordered_roots(fs_info, -1);
3935
Josef Bacik7a7eaa42011-04-13 12:54:33 -04003936 trans = btrfs_join_transaction(root);
Yan, Zhenga22285a2010-05-16 10:48:46 -04003937 if (IS_ERR(trans))
3938 return PTR_ERR(trans);
Zhao Leic99f1b02015-03-02 19:32:20 +08003939 if (have_pinned_space >= 0 ||
3940 trans->transaction->have_free_bgs ||
3941 need_commit > 0) {
Zhao Lei94b947b2015-02-14 13:23:45 +08003942 ret = btrfs_commit_transaction(trans, root);
3943 if (ret)
3944 return ret;
Zhao Leid7c15172015-02-26 10:49:20 +08003945 /*
3946 * make sure that all running delayed iput are
3947 * done
3948 */
3949 down_write(&root->fs_info->delayed_iput_sem);
3950 up_write(&root->fs_info->delayed_iput_sem);
Zhao Lei94b947b2015-02-14 13:23:45 +08003951 goto again;
3952 } else {
3953 btrfs_end_transaction(trans, root);
3954 }
Josef Bacik4e06bdd2009-02-20 10:59:53 -05003955 }
3956
Jeff Mahoneycab45e22013-10-16 16:27:01 -04003957 trace_btrfs_space_reservation(root->fs_info,
3958 "space_info:enospc",
3959 data_sinfo->flags, bytes, 1);
Josef Bacik6a632092009-02-20 11:00:09 -05003960 return -ENOSPC;
3961 }
Dongsheng Yange2d1f922015-02-06 10:26:52 -05003962 ret = btrfs_qgroup_reserve(root, write_bytes);
Dongsheng Yang237c0e92014-12-29 06:23:05 -05003963 if (ret)
3964 goto out;
Josef Bacik6a632092009-02-20 11:00:09 -05003965 data_sinfo->bytes_may_use += bytes;
Josef Bacik8c2a3ca2012-01-10 10:31:31 -05003966 trace_btrfs_space_reservation(root->fs_info, "space_info",
Liu Bo2bcc0322012-03-29 09:57:44 -04003967 data_sinfo->flags, bytes, 1);
Dongsheng Yang237c0e92014-12-29 06:23:05 -05003968out:
Josef Bacik6a632092009-02-20 11:00:09 -05003969 spin_unlock(&data_sinfo->lock);
3970
Dongsheng Yang237c0e92014-12-29 06:23:05 -05003971 return ret;
Josef Bacik6a632092009-02-20 11:00:09 -05003972}
3973
3974/*
Josef Bacikfb25e912011-07-26 17:00:46 -04003975 * Called if we need to clear a data reservation for this inode.
Josef Bacik6a632092009-02-20 11:00:09 -05003976 */
Yan, Zheng0ca1f7c2010-05-16 10:48:47 -04003977void btrfs_free_reserved_data_space(struct inode *inode, u64 bytes)
Josef Bacik6a632092009-02-20 11:00:09 -05003978{
Yan, Zheng0ca1f7c2010-05-16 10:48:47 -04003979 struct btrfs_root *root = BTRFS_I(inode)->root;
Josef Bacik6a632092009-02-20 11:00:09 -05003980 struct btrfs_space_info *data_sinfo;
3981
3982 /* make sure bytes are sectorsize aligned */
Qu Wenruofda28322013-02-26 08:10:22 +00003983 bytes = ALIGN(bytes, root->sectorsize);
Josef Bacik6a632092009-02-20 11:00:09 -05003984
Li Zefanb4d7c3c2012-07-09 20:21:07 -06003985 data_sinfo = root->fs_info->data_sinfo;
Josef Bacik6a632092009-02-20 11:00:09 -05003986 spin_lock(&data_sinfo->lock);
Josef Bacik7ee9e442013-06-21 16:37:03 -04003987 WARN_ON(data_sinfo->bytes_may_use < bytes);
Josef Bacik6a632092009-02-20 11:00:09 -05003988 data_sinfo->bytes_may_use -= bytes;
Josef Bacik8c2a3ca2012-01-10 10:31:31 -05003989 trace_btrfs_space_reservation(root->fs_info, "space_info",
Liu Bo2bcc0322012-03-29 09:57:44 -04003990 data_sinfo->flags, bytes, 0);
Josef Bacik6a632092009-02-20 11:00:09 -05003991 spin_unlock(&data_sinfo->lock);
3992}
3993
Josef Bacik97e728d2009-04-21 17:40:57 -04003994static void force_metadata_allocation(struct btrfs_fs_info *info)
3995{
3996 struct list_head *head = &info->space_info;
3997 struct btrfs_space_info *found;
3998
3999 rcu_read_lock();
4000 list_for_each_entry_rcu(found, head, list) {
4001 if (found->flags & BTRFS_BLOCK_GROUP_METADATA)
Chris Mason0e4f8f82011-04-15 16:05:44 -04004002 found->force_alloc = CHUNK_ALLOC_FORCE;
Josef Bacik97e728d2009-04-21 17:40:57 -04004003 }
4004 rcu_read_unlock();
4005}
4006
Miao Xie3c76cd82013-04-25 10:12:38 +00004007static inline u64 calc_global_rsv_need_space(struct btrfs_block_rsv *global)
4008{
4009 return (global->size << 1);
4010}
4011
Chris Masone5bc2452010-10-26 13:37:56 -04004012static int should_alloc_chunk(struct btrfs_root *root,
Josef Bacik698d0082012-09-12 14:08:47 -04004013 struct btrfs_space_info *sinfo, int force)
Yan, Zheng424499d2010-05-16 10:46:25 -04004014{
Josef Bacikfb25e912011-07-26 17:00:46 -04004015 struct btrfs_block_rsv *global_rsv = &root->fs_info->global_block_rsv;
Yan, Zheng424499d2010-05-16 10:46:25 -04004016 u64 num_bytes = sinfo->total_bytes - sinfo->bytes_readonly;
Chris Mason0e4f8f82011-04-15 16:05:44 -04004017 u64 num_allocated = sinfo->bytes_used + sinfo->bytes_reserved;
Chris Masone5bc2452010-10-26 13:37:56 -04004018 u64 thresh;
Yan, Zheng424499d2010-05-16 10:46:25 -04004019
Chris Mason0e4f8f82011-04-15 16:05:44 -04004020 if (force == CHUNK_ALLOC_FORCE)
4021 return 1;
4022
4023 /*
Josef Bacikfb25e912011-07-26 17:00:46 -04004024 * We need to take into account the global rsv because for all intents
4025 * and purposes it's used space. Don't worry about locking the
4026 * global_rsv, it doesn't change except when the transaction commits.
4027 */
Josef Bacik54338b52012-08-14 16:20:52 -04004028 if (sinfo->flags & BTRFS_BLOCK_GROUP_METADATA)
Miao Xie3c76cd82013-04-25 10:12:38 +00004029 num_allocated += calc_global_rsv_need_space(global_rsv);
Josef Bacikfb25e912011-07-26 17:00:46 -04004030
4031 /*
Chris Mason0e4f8f82011-04-15 16:05:44 -04004032 * in limited mode, we want to have some free space up to
4033 * about 1% of the FS size.
4034 */
4035 if (force == CHUNK_ALLOC_LIMITED) {
David Sterba6c417612011-04-13 15:41:04 +02004036 thresh = btrfs_super_total_bytes(root->fs_info->super_copy);
Chris Mason0e4f8f82011-04-15 16:05:44 -04004037 thresh = max_t(u64, 64 * 1024 * 1024,
4038 div_factor_fine(thresh, 1));
4039
4040 if (num_bytes - num_allocated < thresh)
4041 return 1;
4042 }
Chris Mason0e4f8f82011-04-15 16:05:44 -04004043
Josef Bacik698d0082012-09-12 14:08:47 -04004044 if (num_allocated + 2 * 1024 * 1024 < div_factor(num_bytes, 8))
Josef Bacik14ed0ca2010-10-15 15:23:48 -04004045 return 0;
Yan, Zheng424499d2010-05-16 10:46:25 -04004046 return 1;
4047}
4048
Filipe Manana39c2d7f2015-05-20 14:01:55 +01004049static u64 get_profile_num_devs(struct btrfs_root *root, u64 type)
Liu Bo15d1ff82012-03-29 09:57:44 -04004050{
4051 u64 num_dev;
4052
David Woodhouse53b381b2013-01-29 18:40:14 -05004053 if (type & (BTRFS_BLOCK_GROUP_RAID10 |
4054 BTRFS_BLOCK_GROUP_RAID0 |
4055 BTRFS_BLOCK_GROUP_RAID5 |
4056 BTRFS_BLOCK_GROUP_RAID6))
Liu Bo15d1ff82012-03-29 09:57:44 -04004057 num_dev = root->fs_info->fs_devices->rw_devices;
4058 else if (type & BTRFS_BLOCK_GROUP_RAID1)
4059 num_dev = 2;
4060 else
4061 num_dev = 1; /* DUP or single */
4062
Filipe Manana39c2d7f2015-05-20 14:01:55 +01004063 return num_dev;
Liu Bo15d1ff82012-03-29 09:57:44 -04004064}
4065
Filipe Manana39c2d7f2015-05-20 14:01:55 +01004066/*
4067 * If @is_allocation is true, reserve space in the system space info necessary
4068 * for allocating a chunk, otherwise if it's false, reserve space necessary for
4069 * removing a chunk.
4070 */
4071void check_system_chunk(struct btrfs_trans_handle *trans,
4072 struct btrfs_root *root,
Filipe Manana4617ea32015-06-09 17:48:21 +01004073 u64 type)
Liu Bo15d1ff82012-03-29 09:57:44 -04004074{
4075 struct btrfs_space_info *info;
4076 u64 left;
4077 u64 thresh;
Filipe Manana4fbcdf62015-05-20 14:01:54 +01004078 int ret = 0;
Filipe Manana39c2d7f2015-05-20 14:01:55 +01004079 u64 num_devs;
Filipe Manana4fbcdf62015-05-20 14:01:54 +01004080
4081 /*
4082 * Needed because we can end up allocating a system chunk and for an
4083 * atomic and race free space reservation in the chunk block reserve.
4084 */
4085 ASSERT(mutex_is_locked(&root->fs_info->chunk_mutex));
Liu Bo15d1ff82012-03-29 09:57:44 -04004086
4087 info = __find_space_info(root->fs_info, BTRFS_BLOCK_GROUP_SYSTEM);
4088 spin_lock(&info->lock);
4089 left = info->total_bytes - info->bytes_used - info->bytes_pinned -
Filipe Manana4fbcdf62015-05-20 14:01:54 +01004090 info->bytes_reserved - info->bytes_readonly -
4091 info->bytes_may_use;
Liu Bo15d1ff82012-03-29 09:57:44 -04004092 spin_unlock(&info->lock);
4093
Filipe Manana39c2d7f2015-05-20 14:01:55 +01004094 num_devs = get_profile_num_devs(root, type);
4095
4096 /* num_devs device items to update and 1 chunk item to add or remove */
Filipe Manana4617ea32015-06-09 17:48:21 +01004097 thresh = btrfs_calc_trunc_metadata_size(root, num_devs) +
4098 btrfs_calc_trans_metadata_size(root, 1);
Filipe Manana39c2d7f2015-05-20 14:01:55 +01004099
Liu Bo15d1ff82012-03-29 09:57:44 -04004100 if (left < thresh && btrfs_test_opt(root, ENOSPC_DEBUG)) {
Simon Kirbyc2cf52e2013-03-19 22:41:23 +00004101 btrfs_info(root->fs_info, "left=%llu, need=%llu, flags=%llu",
4102 left, thresh, type);
Liu Bo15d1ff82012-03-29 09:57:44 -04004103 dump_space_info(info, 0, 0);
4104 }
4105
4106 if (left < thresh) {
4107 u64 flags;
4108
4109 flags = btrfs_get_alloc_profile(root->fs_info->chunk_root, 0);
Filipe Manana4fbcdf62015-05-20 14:01:54 +01004110 /*
4111 * Ignore failure to create system chunk. We might end up not
4112 * needing it, as we might not need to COW all nodes/leafs from
4113 * the paths we visit in the chunk tree (they were already COWed
4114 * or created in the current transaction for example).
4115 */
4116 ret = btrfs_alloc_chunk(trans, root, flags);
4117 }
4118
4119 if (!ret) {
4120 ret = btrfs_block_rsv_add(root->fs_info->chunk_root,
4121 &root->fs_info->chunk_block_rsv,
4122 thresh, BTRFS_RESERVE_NO_FLUSH);
4123 if (!ret)
4124 trans->chunk_bytes_reserved += thresh;
Liu Bo15d1ff82012-03-29 09:57:44 -04004125 }
4126}
4127
Chris Mason6324fbf2008-03-24 15:01:59 -04004128static int do_chunk_alloc(struct btrfs_trans_handle *trans,
Josef Bacik698d0082012-09-12 14:08:47 -04004129 struct btrfs_root *extent_root, u64 flags, int force)
Chris Mason6324fbf2008-03-24 15:01:59 -04004130{
4131 struct btrfs_space_info *space_info;
Josef Bacik97e728d2009-04-21 17:40:57 -04004132 struct btrfs_fs_info *fs_info = extent_root->fs_info;
Josef Bacik6d741192011-04-11 20:20:11 -04004133 int wait_for_alloc = 0;
Yan Zhengc146afa2008-11-12 14:34:12 -05004134 int ret = 0;
4135
Josef Bacikc6b305a2012-12-18 09:16:16 -05004136 /* Don't re-enter if we're already allocating a chunk */
4137 if (trans->allocating_chunk)
4138 return -ENOSPC;
4139
Chris Mason6324fbf2008-03-24 15:01:59 -04004140 space_info = __find_space_info(extent_root->fs_info, flags);
Chris Mason593060d2008-03-25 16:50:33 -04004141 if (!space_info) {
4142 ret = update_space_info(extent_root->fs_info, flags,
4143 0, 0, &space_info);
Jeff Mahoney79787ea2012-03-12 16:03:00 +01004144 BUG_ON(ret); /* -ENOMEM */
Chris Mason593060d2008-03-25 16:50:33 -04004145 }
Jeff Mahoney79787ea2012-03-12 16:03:00 +01004146 BUG_ON(!space_info); /* Logic error */
Chris Mason6324fbf2008-03-24 15:01:59 -04004147
Josef Bacik6d741192011-04-11 20:20:11 -04004148again:
Josef Bacik25179202008-10-29 14:49:05 -04004149 spin_lock(&space_info->lock);
Miao Xie9e622d62012-01-26 15:01:12 -05004150 if (force < space_info->force_alloc)
Chris Mason0e4f8f82011-04-15 16:05:44 -04004151 force = space_info->force_alloc;
Josef Bacik25179202008-10-29 14:49:05 -04004152 if (space_info->full) {
Filipe David Borba Manana09fb99a2013-08-05 16:25:12 +01004153 if (should_alloc_chunk(extent_root, space_info, force))
4154 ret = -ENOSPC;
4155 else
4156 ret = 0;
Josef Bacik25179202008-10-29 14:49:05 -04004157 spin_unlock(&space_info->lock);
Filipe David Borba Manana09fb99a2013-08-05 16:25:12 +01004158 return ret;
Josef Bacik25179202008-10-29 14:49:05 -04004159 }
Chris Mason6324fbf2008-03-24 15:01:59 -04004160
Josef Bacik698d0082012-09-12 14:08:47 -04004161 if (!should_alloc_chunk(extent_root, space_info, force)) {
Josef Bacik25179202008-10-29 14:49:05 -04004162 spin_unlock(&space_info->lock);
Josef Bacik6d741192011-04-11 20:20:11 -04004163 return 0;
4164 } else if (space_info->chunk_alloc) {
4165 wait_for_alloc = 1;
4166 } else {
4167 space_info->chunk_alloc = 1;
Josef Bacik25179202008-10-29 14:49:05 -04004168 }
Chris Mason0e4f8f82011-04-15 16:05:44 -04004169
Josef Bacik25179202008-10-29 14:49:05 -04004170 spin_unlock(&space_info->lock);
4171
Josef Bacik6d741192011-04-11 20:20:11 -04004172 mutex_lock(&fs_info->chunk_mutex);
4173
4174 /*
4175 * The chunk_mutex is held throughout the entirety of a chunk
4176 * allocation, so once we've acquired the chunk_mutex we know that the
4177 * other guy is done and we need to recheck and see if we should
4178 * allocate.
4179 */
4180 if (wait_for_alloc) {
4181 mutex_unlock(&fs_info->chunk_mutex);
4182 wait_for_alloc = 0;
4183 goto again;
4184 }
4185
Josef Bacikc6b305a2012-12-18 09:16:16 -05004186 trans->allocating_chunk = true;
4187
Josef Bacik97e728d2009-04-21 17:40:57 -04004188 /*
Josef Bacik67377732010-09-16 16:19:09 -04004189 * If we have mixed data/metadata chunks we want to make sure we keep
4190 * allocating mixed chunks instead of individual chunks.
4191 */
4192 if (btrfs_mixed_space_info(space_info))
4193 flags |= (BTRFS_BLOCK_GROUP_DATA | BTRFS_BLOCK_GROUP_METADATA);
4194
4195 /*
Josef Bacik97e728d2009-04-21 17:40:57 -04004196 * if we're doing a data chunk, go ahead and make sure that
4197 * we keep a reasonable number of metadata chunks allocated in the
4198 * FS as well.
4199 */
Josef Bacik9ed74f22009-09-11 16:12:44 -04004200 if (flags & BTRFS_BLOCK_GROUP_DATA && fs_info->metadata_ratio) {
Josef Bacik97e728d2009-04-21 17:40:57 -04004201 fs_info->data_chunk_allocations++;
4202 if (!(fs_info->data_chunk_allocations %
4203 fs_info->metadata_ratio))
4204 force_metadata_allocation(fs_info);
4205 }
4206
Liu Bo15d1ff82012-03-29 09:57:44 -04004207 /*
4208 * Check if we have enough space in SYSTEM chunk because we may need
4209 * to update devices.
4210 */
Filipe Manana4617ea32015-06-09 17:48:21 +01004211 check_system_chunk(trans, extent_root, flags);
Liu Bo15d1ff82012-03-29 09:57:44 -04004212
Yan Zheng2b820322008-11-17 21:11:30 -05004213 ret = btrfs_alloc_chunk(trans, extent_root, flags);
Josef Bacikc6b305a2012-12-18 09:16:16 -05004214 trans->allocating_chunk = false;
Mark Fasheh92b8e8972011-07-12 10:57:59 -07004215
Josef Bacik9ed74f22009-09-11 16:12:44 -04004216 spin_lock(&space_info->lock);
Alexandre Olivaa81cb9a2013-02-21 21:15:14 +00004217 if (ret < 0 && ret != -ENOSPC)
4218 goto out;
Chris Masond3977122009-01-05 21:25:51 -05004219 if (ret)
Chris Mason6324fbf2008-03-24 15:01:59 -04004220 space_info->full = 1;
Yan, Zheng424499d2010-05-16 10:46:25 -04004221 else
4222 ret = 1;
Josef Bacik6d741192011-04-11 20:20:11 -04004223
Chris Mason0e4f8f82011-04-15 16:05:44 -04004224 space_info->force_alloc = CHUNK_ALLOC_NO_FORCE;
Alexandre Olivaa81cb9a2013-02-21 21:15:14 +00004225out:
Josef Bacik6d741192011-04-11 20:20:11 -04004226 space_info->chunk_alloc = 0;
Josef Bacik9ed74f22009-09-11 16:12:44 -04004227 spin_unlock(&space_info->lock);
Dan Carpentera25c75d2012-04-18 09:59:29 +03004228 mutex_unlock(&fs_info->chunk_mutex);
Filipe Manana00d80e32015-07-20 14:56:20 +01004229 /*
4230 * When we allocate a new chunk we reserve space in the chunk block
4231 * reserve to make sure we can COW nodes/leafs in the chunk tree or
4232 * add new nodes/leafs to it if we end up needing to do it when
4233 * inserting the chunk item and updating device items as part of the
4234 * second phase of chunk allocation, performed by
4235 * btrfs_finish_chunk_alloc(). So make sure we don't accumulate a
4236 * large number of new block groups to create in our transaction
4237 * handle's new_bgs list to avoid exhausting the chunk block reserve
4238 * in extreme cases - like having a single transaction create many new
4239 * block groups when starting to write out the free space caches of all
4240 * the block groups that were made dirty during the lifetime of the
4241 * transaction.
4242 */
4243 if (trans->chunk_bytes_reserved >= (2 * 1024 * 1024ull)) {
4244 btrfs_create_pending_block_groups(trans, trans->root);
4245 btrfs_trans_release_chunk_metadata(trans);
4246 }
Josef Bacik0f9dd462008-09-23 13:14:11 -04004247 return ret;
Chris Mason6324fbf2008-03-24 15:01:59 -04004248}
4249
Josef Bacika80c8dc2012-09-06 16:59:33 -04004250static int can_overcommit(struct btrfs_root *root,
4251 struct btrfs_space_info *space_info, u64 bytes,
Miao Xie08e007d2012-10-16 11:33:38 +00004252 enum btrfs_reserve_flush_enum flush)
Josef Bacika80c8dc2012-09-06 16:59:33 -04004253{
Josef Bacik96f1bb52013-01-30 17:02:51 -05004254 struct btrfs_block_rsv *global_rsv = &root->fs_info->global_block_rsv;
Josef Bacika80c8dc2012-09-06 16:59:33 -04004255 u64 profile = btrfs_get_alloc_profile(root, 0);
Miao Xie3c76cd82013-04-25 10:12:38 +00004256 u64 space_size;
Josef Bacika80c8dc2012-09-06 16:59:33 -04004257 u64 avail;
4258 u64 used;
4259
4260 used = space_info->bytes_used + space_info->bytes_reserved +
Josef Bacik96f1bb52013-01-30 17:02:51 -05004261 space_info->bytes_pinned + space_info->bytes_readonly;
4262
Josef Bacik96f1bb52013-01-30 17:02:51 -05004263 /*
4264 * We only want to allow over committing if we have lots of actual space
4265 * free, but if we don't have enough space to handle the global reserve
4266 * space then we could end up having a real enospc problem when trying
4267 * to allocate a chunk or some other such important allocation.
4268 */
Miao Xie3c76cd82013-04-25 10:12:38 +00004269 spin_lock(&global_rsv->lock);
4270 space_size = calc_global_rsv_need_space(global_rsv);
4271 spin_unlock(&global_rsv->lock);
4272 if (used + space_size >= space_info->total_bytes)
Josef Bacik96f1bb52013-01-30 17:02:51 -05004273 return 0;
4274
4275 used += space_info->bytes_may_use;
Josef Bacika80c8dc2012-09-06 16:59:33 -04004276
4277 spin_lock(&root->fs_info->free_chunk_lock);
4278 avail = root->fs_info->free_chunk_space;
4279 spin_unlock(&root->fs_info->free_chunk_lock);
4280
4281 /*
4282 * If we have dup, raid1 or raid10 then only half of the free
David Woodhouse53b381b2013-01-29 18:40:14 -05004283 * space is actually useable. For raid56, the space info used
4284 * doesn't include the parity drive, so we don't have to
4285 * change the math
Josef Bacika80c8dc2012-09-06 16:59:33 -04004286 */
4287 if (profile & (BTRFS_BLOCK_GROUP_DUP |
4288 BTRFS_BLOCK_GROUP_RAID1 |
4289 BTRFS_BLOCK_GROUP_RAID10))
4290 avail >>= 1;
4291
4292 /*
Miao Xie561c2942012-10-16 11:32:18 +00004293 * If we aren't flushing all things, let us overcommit up to
4294 * 1/2th of the space. If we can flush, don't let us overcommit
4295 * too much, let it overcommit up to 1/8 of the space.
Josef Bacika80c8dc2012-09-06 16:59:33 -04004296 */
Miao Xie08e007d2012-10-16 11:33:38 +00004297 if (flush == BTRFS_RESERVE_FLUSH_ALL)
Josef Bacik14575ae2013-09-17 10:48:00 -04004298 avail >>= 3;
Josef Bacika80c8dc2012-09-06 16:59:33 -04004299 else
Josef Bacik14575ae2013-09-17 10:48:00 -04004300 avail >>= 1;
Josef Bacika80c8dc2012-09-06 16:59:33 -04004301
Josef Bacik14575ae2013-09-17 10:48:00 -04004302 if (used + bytes < space_info->total_bytes + avail)
Josef Bacika80c8dc2012-09-06 16:59:33 -04004303 return 1;
4304 return 0;
4305}
4306
Eric Sandeen48a3b632013-04-25 20:41:01 +00004307static void btrfs_writeback_inodes_sb_nr(struct btrfs_root *root,
Miao Xie6c255e62014-03-06 13:55:01 +08004308 unsigned long nr_pages, int nr_items)
Miao Xieda633a42012-12-20 11:19:09 +00004309{
4310 struct super_block *sb = root->fs_info->sb;
Miao Xieda633a42012-12-20 11:19:09 +00004311
Josef Bacik925a6ef2013-06-20 12:31:27 -04004312 if (down_read_trylock(&sb->s_umount)) {
4313 writeback_inodes_sb_nr(sb, nr_pages, WB_REASON_FS_FREE_SPACE);
4314 up_read(&sb->s_umount);
4315 } else {
Miao Xieda633a42012-12-20 11:19:09 +00004316 /*
4317 * We needn't worry the filesystem going from r/w to r/o though
4318 * we don't acquire ->s_umount mutex, because the filesystem
4319 * should guarantee the delalloc inodes list be empty after
4320 * the filesystem is readonly(all dirty pages are written to
4321 * the disk).
4322 */
Miao Xie6c255e62014-03-06 13:55:01 +08004323 btrfs_start_delalloc_roots(root->fs_info, 0, nr_items);
Josef Bacik98ad69c2013-04-04 11:55:49 -04004324 if (!current->journal_info)
Miao Xie6c255e62014-03-06 13:55:01 +08004325 btrfs_wait_ordered_roots(root->fs_info, nr_items);
Miao Xieda633a42012-12-20 11:19:09 +00004326 }
4327}
4328
Miao Xie18cd8ea2013-11-04 23:13:22 +08004329static inline int calc_reclaim_items_nr(struct btrfs_root *root, u64 to_reclaim)
4330{
4331 u64 bytes;
4332 int nr;
4333
4334 bytes = btrfs_calc_trans_metadata_size(root, 1);
4335 nr = (int)div64_u64(to_reclaim, bytes);
4336 if (!nr)
4337 nr = 1;
4338 return nr;
4339}
4340
Miao Xiec61a16a2013-11-04 23:13:23 +08004341#define EXTENT_SIZE_PER_ITEM (256 * 1024)
4342
Yan, Zheng5da9d012010-05-16 10:46:25 -04004343/*
4344 * shrink metadata reservation for delalloc
4345 */
Josef Bacikf4c738c2012-07-02 17:10:51 -04004346static void shrink_delalloc(struct btrfs_root *root, u64 to_reclaim, u64 orig,
4347 bool wait_ordered)
Yan, Zheng5da9d012010-05-16 10:46:25 -04004348{
Yan, Zheng0ca1f7c2010-05-16 10:48:47 -04004349 struct btrfs_block_rsv *block_rsv;
Josef Bacik0019f102010-10-15 15:18:40 -04004350 struct btrfs_space_info *space_info;
Josef Bacik663350a2011-11-03 22:54:25 -04004351 struct btrfs_trans_handle *trans;
Josef Bacikf4c738c2012-07-02 17:10:51 -04004352 u64 delalloc_bytes;
Yan, Zheng5da9d012010-05-16 10:46:25 -04004353 u64 max_reclaim;
Josef Bacikb1953bc2011-01-21 21:10:01 +00004354 long time_left;
Miao Xied3ee29e2013-11-04 23:13:20 +08004355 unsigned long nr_pages;
4356 int loops;
Miao Xieb0244192013-11-04 23:13:25 +08004357 int items;
Miao Xie08e007d2012-10-16 11:33:38 +00004358 enum btrfs_reserve_flush_enum flush;
Yan, Zheng5da9d012010-05-16 10:46:25 -04004359
Miao Xiec61a16a2013-11-04 23:13:23 +08004360 /* Calc the number of the pages we need flush for space reservation */
Miao Xieb0244192013-11-04 23:13:25 +08004361 items = calc_reclaim_items_nr(root, to_reclaim);
4362 to_reclaim = items * EXTENT_SIZE_PER_ITEM;
Miao Xiec61a16a2013-11-04 23:13:23 +08004363
Josef Bacik663350a2011-11-03 22:54:25 -04004364 trans = (struct btrfs_trans_handle *)current->journal_info;
Yan, Zheng0ca1f7c2010-05-16 10:48:47 -04004365 block_rsv = &root->fs_info->delalloc_block_rsv;
Josef Bacik0019f102010-10-15 15:18:40 -04004366 space_info = block_rsv->space_info;
Chris Masonbf9022e2010-10-26 13:40:45 -04004367
Miao Xie963d6782013-01-29 10:10:51 +00004368 delalloc_bytes = percpu_counter_sum_positive(
4369 &root->fs_info->delalloc_bytes);
Josef Bacikf4c738c2012-07-02 17:10:51 -04004370 if (delalloc_bytes == 0) {
Josef Bacikfdb5eff2011-06-07 16:07:44 -04004371 if (trans)
Josef Bacikf4c738c2012-07-02 17:10:51 -04004372 return;
Miao Xie38c135a2013-11-04 23:13:21 +08004373 if (wait_ordered)
Miao Xieb0244192013-11-04 23:13:25 +08004374 btrfs_wait_ordered_roots(root->fs_info, items);
Josef Bacikf4c738c2012-07-02 17:10:51 -04004375 return;
Josef Bacikfdb5eff2011-06-07 16:07:44 -04004376 }
4377
Miao Xied3ee29e2013-11-04 23:13:20 +08004378 loops = 0;
Josef Bacikf4c738c2012-07-02 17:10:51 -04004379 while (delalloc_bytes && loops < 3) {
4380 max_reclaim = min(delalloc_bytes, to_reclaim);
4381 nr_pages = max_reclaim >> PAGE_CACHE_SHIFT;
Miao Xie6c255e62014-03-06 13:55:01 +08004382 btrfs_writeback_inodes_sb_nr(root, nr_pages, items);
Josef Bacikdea31f52012-09-06 16:47:00 -04004383 /*
4384 * We need to wait for the async pages to actually start before
4385 * we do anything.
4386 */
Miao Xie9f3a0742013-11-04 23:13:24 +08004387 max_reclaim = atomic_read(&root->fs_info->async_delalloc_pages);
4388 if (!max_reclaim)
4389 goto skip_async;
Josef Bacikdea31f52012-09-06 16:47:00 -04004390
Miao Xie9f3a0742013-11-04 23:13:24 +08004391 if (max_reclaim <= nr_pages)
4392 max_reclaim = 0;
4393 else
4394 max_reclaim -= nr_pages;
4395
4396 wait_event(root->fs_info->async_submit_wait,
4397 atomic_read(&root->fs_info->async_delalloc_pages) <=
4398 (int)max_reclaim);
4399skip_async:
Miao Xie08e007d2012-10-16 11:33:38 +00004400 if (!trans)
4401 flush = BTRFS_RESERVE_FLUSH_ALL;
4402 else
4403 flush = BTRFS_RESERVE_NO_FLUSH;
Josef Bacik0019f102010-10-15 15:18:40 -04004404 spin_lock(&space_info->lock);
Miao Xie08e007d2012-10-16 11:33:38 +00004405 if (can_overcommit(root, space_info, orig, flush)) {
Josef Bacikf4c738c2012-07-02 17:10:51 -04004406 spin_unlock(&space_info->lock);
4407 break;
4408 }
Josef Bacik0019f102010-10-15 15:18:40 -04004409 spin_unlock(&space_info->lock);
Yan, Zheng5da9d012010-05-16 10:46:25 -04004410
Chris Mason36e39c42011-03-12 07:08:42 -05004411 loops++;
Josef Bacikf104d042011-10-14 13:56:58 -04004412 if (wait_ordered && !trans) {
Miao Xieb0244192013-11-04 23:13:25 +08004413 btrfs_wait_ordered_roots(root->fs_info, items);
Josef Bacikf104d042011-10-14 13:56:58 -04004414 } else {
Josef Bacikf4c738c2012-07-02 17:10:51 -04004415 time_left = schedule_timeout_killable(1);
Josef Bacikf104d042011-10-14 13:56:58 -04004416 if (time_left)
4417 break;
4418 }
Miao Xie963d6782013-01-29 10:10:51 +00004419 delalloc_bytes = percpu_counter_sum_positive(
4420 &root->fs_info->delalloc_bytes);
Yan, Zheng5da9d012010-05-16 10:46:25 -04004421 }
Yan, Zheng5da9d012010-05-16 10:46:25 -04004422}
4423
Josef Bacik4a92b1b2011-08-30 12:34:28 -04004424/**
Josef Bacik663350a2011-11-03 22:54:25 -04004425 * maybe_commit_transaction - possibly commit the transaction if its ok to
4426 * @root - the root we're allocating for
4427 * @bytes - the number of bytes we want to reserve
4428 * @force - force the commit
Josef Bacik8bb8ab22010-10-15 16:52:49 -04004429 *
Josef Bacik663350a2011-11-03 22:54:25 -04004430 * This will check to make sure that committing the transaction will actually
4431 * get us somewhere and then commit the transaction if it does. Otherwise it
4432 * will return -ENOSPC.
Josef Bacik8bb8ab22010-10-15 16:52:49 -04004433 */
Josef Bacik663350a2011-11-03 22:54:25 -04004434static int may_commit_transaction(struct btrfs_root *root,
4435 struct btrfs_space_info *space_info,
4436 u64 bytes, int force)
4437{
4438 struct btrfs_block_rsv *delayed_rsv = &root->fs_info->delayed_block_rsv;
4439 struct btrfs_trans_handle *trans;
4440
4441 trans = (struct btrfs_trans_handle *)current->journal_info;
4442 if (trans)
4443 return -EAGAIN;
4444
4445 if (force)
4446 goto commit;
4447
4448 /* See if there is enough pinned space to make this reservation */
Josef Bacikb150a4f2013-06-19 15:00:04 -04004449 if (percpu_counter_compare(&space_info->total_bytes_pinned,
Miao Xie0424c542014-03-06 13:54:59 +08004450 bytes) >= 0)
Josef Bacik663350a2011-11-03 22:54:25 -04004451 goto commit;
Josef Bacik663350a2011-11-03 22:54:25 -04004452
4453 /*
4454 * See if there is some space in the delayed insertion reservation for
4455 * this reservation.
4456 */
4457 if (space_info != delayed_rsv->space_info)
4458 return -ENOSPC;
4459
4460 spin_lock(&delayed_rsv->lock);
Josef Bacikb150a4f2013-06-19 15:00:04 -04004461 if (percpu_counter_compare(&space_info->total_bytes_pinned,
4462 bytes - delayed_rsv->size) >= 0) {
Josef Bacik663350a2011-11-03 22:54:25 -04004463 spin_unlock(&delayed_rsv->lock);
4464 return -ENOSPC;
4465 }
4466 spin_unlock(&delayed_rsv->lock);
4467
4468commit:
4469 trans = btrfs_join_transaction(root);
4470 if (IS_ERR(trans))
4471 return -ENOSPC;
4472
4473 return btrfs_commit_transaction(trans, root);
4474}
4475
Josef Bacik96c3f432012-06-21 14:05:49 -04004476enum flush_state {
Josef Bacik67b0fd62012-09-24 13:42:00 -04004477 FLUSH_DELAYED_ITEMS_NR = 1,
4478 FLUSH_DELAYED_ITEMS = 2,
4479 FLUSH_DELALLOC = 3,
4480 FLUSH_DELALLOC_WAIT = 4,
Josef Bacikea658ba2012-09-11 16:57:25 -04004481 ALLOC_CHUNK = 5,
4482 COMMIT_TRANS = 6,
Josef Bacik96c3f432012-06-21 14:05:49 -04004483};
4484
4485static int flush_space(struct btrfs_root *root,
4486 struct btrfs_space_info *space_info, u64 num_bytes,
4487 u64 orig_bytes, int state)
4488{
4489 struct btrfs_trans_handle *trans;
4490 int nr;
Josef Bacikf4c738c2012-07-02 17:10:51 -04004491 int ret = 0;
Josef Bacik96c3f432012-06-21 14:05:49 -04004492
4493 switch (state) {
Josef Bacik96c3f432012-06-21 14:05:49 -04004494 case FLUSH_DELAYED_ITEMS_NR:
4495 case FLUSH_DELAYED_ITEMS:
Miao Xie18cd8ea2013-11-04 23:13:22 +08004496 if (state == FLUSH_DELAYED_ITEMS_NR)
4497 nr = calc_reclaim_items_nr(root, num_bytes) * 2;
4498 else
Josef Bacik96c3f432012-06-21 14:05:49 -04004499 nr = -1;
Miao Xie18cd8ea2013-11-04 23:13:22 +08004500
Josef Bacik96c3f432012-06-21 14:05:49 -04004501 trans = btrfs_join_transaction(root);
4502 if (IS_ERR(trans)) {
4503 ret = PTR_ERR(trans);
4504 break;
4505 }
4506 ret = btrfs_run_delayed_items_nr(trans, root, nr);
4507 btrfs_end_transaction(trans, root);
4508 break;
Josef Bacik67b0fd62012-09-24 13:42:00 -04004509 case FLUSH_DELALLOC:
4510 case FLUSH_DELALLOC_WAIT:
Miao Xie24af7dd2014-03-06 13:55:00 +08004511 shrink_delalloc(root, num_bytes * 2, orig_bytes,
Josef Bacik67b0fd62012-09-24 13:42:00 -04004512 state == FLUSH_DELALLOC_WAIT);
4513 break;
Josef Bacikea658ba2012-09-11 16:57:25 -04004514 case ALLOC_CHUNK:
4515 trans = btrfs_join_transaction(root);
4516 if (IS_ERR(trans)) {
4517 ret = PTR_ERR(trans);
4518 break;
4519 }
4520 ret = do_chunk_alloc(trans, root->fs_info->extent_root,
Josef Bacikea658ba2012-09-11 16:57:25 -04004521 btrfs_get_alloc_profile(root, 0),
4522 CHUNK_ALLOC_NO_FORCE);
4523 btrfs_end_transaction(trans, root);
4524 if (ret == -ENOSPC)
4525 ret = 0;
4526 break;
Josef Bacik96c3f432012-06-21 14:05:49 -04004527 case COMMIT_TRANS:
4528 ret = may_commit_transaction(root, space_info, orig_bytes, 0);
4529 break;
4530 default:
4531 ret = -ENOSPC;
4532 break;
4533 }
4534
4535 return ret;
4536}
Miao Xie21c7e752014-05-13 17:29:04 -07004537
4538static inline u64
4539btrfs_calc_reclaim_metadata_size(struct btrfs_root *root,
4540 struct btrfs_space_info *space_info)
4541{
4542 u64 used;
4543 u64 expected;
4544 u64 to_reclaim;
4545
4546 to_reclaim = min_t(u64, num_online_cpus() * 1024 * 1024,
4547 16 * 1024 * 1024);
4548 spin_lock(&space_info->lock);
4549 if (can_overcommit(root, space_info, to_reclaim,
4550 BTRFS_RESERVE_FLUSH_ALL)) {
4551 to_reclaim = 0;
4552 goto out;
4553 }
4554
4555 used = space_info->bytes_used + space_info->bytes_reserved +
4556 space_info->bytes_pinned + space_info->bytes_readonly +
4557 space_info->bytes_may_use;
4558 if (can_overcommit(root, space_info, 1024 * 1024,
4559 BTRFS_RESERVE_FLUSH_ALL))
4560 expected = div_factor_fine(space_info->total_bytes, 95);
4561 else
4562 expected = div_factor_fine(space_info->total_bytes, 90);
4563
4564 if (used > expected)
4565 to_reclaim = used - expected;
4566 else
4567 to_reclaim = 0;
4568 to_reclaim = min(to_reclaim, space_info->bytes_may_use +
4569 space_info->bytes_reserved);
4570out:
4571 spin_unlock(&space_info->lock);
4572
4573 return to_reclaim;
4574}
4575
4576static inline int need_do_async_reclaim(struct btrfs_space_info *space_info,
4577 struct btrfs_fs_info *fs_info, u64 used)
4578{
Josef Bacik365c5312015-02-18 13:58:15 -08004579 u64 thresh = div_factor_fine(space_info->total_bytes, 98);
4580
4581 /* If we're just plain full then async reclaim just slows us down. */
4582 if (space_info->bytes_used >= thresh)
4583 return 0;
4584
4585 return (used >= thresh && !btrfs_fs_closing(fs_info) &&
Miao Xie21c7e752014-05-13 17:29:04 -07004586 !test_bit(BTRFS_FS_STATE_REMOUNTING, &fs_info->fs_state));
4587}
4588
4589static int btrfs_need_do_async_reclaim(struct btrfs_space_info *space_info,
Liu Bo25ce4592014-09-10 12:58:50 +08004590 struct btrfs_fs_info *fs_info,
4591 int flush_state)
Miao Xie21c7e752014-05-13 17:29:04 -07004592{
4593 u64 used;
4594
4595 spin_lock(&space_info->lock);
Liu Bo25ce4592014-09-10 12:58:50 +08004596 /*
4597 * We run out of space and have not got any free space via flush_space,
4598 * so don't bother doing async reclaim.
4599 */
4600 if (flush_state > COMMIT_TRANS && space_info->full) {
4601 spin_unlock(&space_info->lock);
4602 return 0;
4603 }
4604
Miao Xie21c7e752014-05-13 17:29:04 -07004605 used = space_info->bytes_used + space_info->bytes_reserved +
4606 space_info->bytes_pinned + space_info->bytes_readonly +
4607 space_info->bytes_may_use;
4608 if (need_do_async_reclaim(space_info, fs_info, used)) {
4609 spin_unlock(&space_info->lock);
4610 return 1;
4611 }
4612 spin_unlock(&space_info->lock);
4613
4614 return 0;
4615}
4616
4617static void btrfs_async_reclaim_metadata_space(struct work_struct *work)
4618{
4619 struct btrfs_fs_info *fs_info;
4620 struct btrfs_space_info *space_info;
4621 u64 to_reclaim;
4622 int flush_state;
4623
4624 fs_info = container_of(work, struct btrfs_fs_info, async_reclaim_work);
4625 space_info = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_METADATA);
4626
4627 to_reclaim = btrfs_calc_reclaim_metadata_size(fs_info->fs_root,
4628 space_info);
4629 if (!to_reclaim)
4630 return;
4631
4632 flush_state = FLUSH_DELAYED_ITEMS_NR;
4633 do {
4634 flush_space(fs_info->fs_root, space_info, to_reclaim,
4635 to_reclaim, flush_state);
4636 flush_state++;
Liu Bo25ce4592014-09-10 12:58:50 +08004637 if (!btrfs_need_do_async_reclaim(space_info, fs_info,
4638 flush_state))
Miao Xie21c7e752014-05-13 17:29:04 -07004639 return;
Josef Bacik365c5312015-02-18 13:58:15 -08004640 } while (flush_state < COMMIT_TRANS);
Miao Xie21c7e752014-05-13 17:29:04 -07004641}
4642
4643void btrfs_init_async_reclaim_work(struct work_struct *work)
4644{
4645 INIT_WORK(work, btrfs_async_reclaim_metadata_space);
4646}
4647
Josef Bacik663350a2011-11-03 22:54:25 -04004648/**
Josef Bacik4a92b1b2011-08-30 12:34:28 -04004649 * reserve_metadata_bytes - try to reserve bytes from the block_rsv's space
4650 * @root - the root we're allocating for
4651 * @block_rsv - the block_rsv we're allocating for
4652 * @orig_bytes - the number of bytes we want
Adam Buchbinder48fc7f72012-09-19 21:48:00 -04004653 * @flush - whether or not we can flush to make our reservation
Josef Bacik8bb8ab22010-10-15 16:52:49 -04004654 *
Josef Bacik4a92b1b2011-08-30 12:34:28 -04004655 * This will reserve orgi_bytes number of bytes from the space info associated
4656 * with the block_rsv. If there is not enough space it will make an attempt to
4657 * flush out space to make room. It will do this by flushing delalloc if
4658 * possible or committing the transaction. If flush is 0 then no attempts to
4659 * regain reservations will be made and this will fail if there is not enough
4660 * space already.
Josef Bacik8bb8ab22010-10-15 16:52:49 -04004661 */
Josef Bacik4a92b1b2011-08-30 12:34:28 -04004662static int reserve_metadata_bytes(struct btrfs_root *root,
Josef Bacik8bb8ab22010-10-15 16:52:49 -04004663 struct btrfs_block_rsv *block_rsv,
Miao Xie08e007d2012-10-16 11:33:38 +00004664 u64 orig_bytes,
4665 enum btrfs_reserve_flush_enum flush)
Yan, Zhengf0486c62010-05-16 10:46:25 -04004666{
4667 struct btrfs_space_info *space_info = block_rsv->space_info;
Josef Bacik2bf64752011-09-26 17:12:22 -04004668 u64 used;
Josef Bacik8bb8ab22010-10-15 16:52:49 -04004669 u64 num_bytes = orig_bytes;
Josef Bacik67b0fd62012-09-24 13:42:00 -04004670 int flush_state = FLUSH_DELAYED_ITEMS_NR;
Josef Bacik8bb8ab22010-10-15 16:52:49 -04004671 int ret = 0;
Josef Bacikfdb5eff2011-06-07 16:07:44 -04004672 bool flushing = false;
Josef Bacik8bb8ab22010-10-15 16:52:49 -04004673
4674again:
Josef Bacikfdb5eff2011-06-07 16:07:44 -04004675 ret = 0;
Yan, Zhengf0486c62010-05-16 10:46:25 -04004676 spin_lock(&space_info->lock);
Josef Bacikfdb5eff2011-06-07 16:07:44 -04004677 /*
Miao Xie08e007d2012-10-16 11:33:38 +00004678 * We only want to wait if somebody other than us is flushing and we
4679 * are actually allowed to flush all things.
Josef Bacikfdb5eff2011-06-07 16:07:44 -04004680 */
Miao Xie08e007d2012-10-16 11:33:38 +00004681 while (flush == BTRFS_RESERVE_FLUSH_ALL && !flushing &&
4682 space_info->flush) {
Josef Bacikfdb5eff2011-06-07 16:07:44 -04004683 spin_unlock(&space_info->lock);
4684 /*
4685 * If we have a trans handle we can't wait because the flusher
4686 * may have to commit the transaction, which would mean we would
4687 * deadlock since we are waiting for the flusher to finish, but
4688 * hold the current transaction open.
4689 */
Josef Bacik663350a2011-11-03 22:54:25 -04004690 if (current->journal_info)
Josef Bacikfdb5eff2011-06-07 16:07:44 -04004691 return -EAGAIN;
Arne Jansenb9688bb2012-04-18 10:27:16 +02004692 ret = wait_event_killable(space_info->wait, !space_info->flush);
4693 /* Must have been killed, return */
4694 if (ret)
Josef Bacikfdb5eff2011-06-07 16:07:44 -04004695 return -EINTR;
4696
4697 spin_lock(&space_info->lock);
4698 }
4699
4700 ret = -ENOSPC;
Josef Bacik2bf64752011-09-26 17:12:22 -04004701 used = space_info->bytes_used + space_info->bytes_reserved +
4702 space_info->bytes_pinned + space_info->bytes_readonly +
4703 space_info->bytes_may_use;
Yan, Zhengf0486c62010-05-16 10:46:25 -04004704
Josef Bacik8bb8ab22010-10-15 16:52:49 -04004705 /*
4706 * The idea here is that we've not already over-reserved the block group
4707 * then we can go ahead and save our reservation first and then start
4708 * flushing if we need to. Otherwise if we've already overcommitted
4709 * lets start flushing stuff first and then come back and try to make
4710 * our reservation.
4711 */
Josef Bacik2bf64752011-09-26 17:12:22 -04004712 if (used <= space_info->total_bytes) {
4713 if (used + orig_bytes <= space_info->total_bytes) {
Josef Bacikfb25e912011-07-26 17:00:46 -04004714 space_info->bytes_may_use += orig_bytes;
Josef Bacik8c2a3ca2012-01-10 10:31:31 -05004715 trace_btrfs_space_reservation(root->fs_info,
Liu Bo2bcc0322012-03-29 09:57:44 -04004716 "space_info", space_info->flags, orig_bytes, 1);
Yan, Zhengf0486c62010-05-16 10:46:25 -04004717 ret = 0;
4718 } else {
Josef Bacik8bb8ab22010-10-15 16:52:49 -04004719 /*
4720 * Ok set num_bytes to orig_bytes since we aren't
4721 * overocmmitted, this way we only try and reclaim what
4722 * we need.
4723 */
4724 num_bytes = orig_bytes;
Yan, Zhengf0486c62010-05-16 10:46:25 -04004725 }
Josef Bacik8bb8ab22010-10-15 16:52:49 -04004726 } else {
4727 /*
4728 * Ok we're over committed, set num_bytes to the overcommitted
4729 * amount plus the amount of bytes that we need for this
4730 * reservation.
4731 */
Josef Bacik2bf64752011-09-26 17:12:22 -04004732 num_bytes = used - space_info->total_bytes +
Josef Bacik96c3f432012-06-21 14:05:49 -04004733 (orig_bytes * 2);
Yan, Zhengf0486c62010-05-16 10:46:25 -04004734 }
Josef Bacik8bb8ab22010-10-15 16:52:49 -04004735
Josef Bacik44734ed2012-09-28 16:04:19 -04004736 if (ret && can_overcommit(root, space_info, orig_bytes, flush)) {
4737 space_info->bytes_may_use += orig_bytes;
4738 trace_btrfs_space_reservation(root->fs_info, "space_info",
4739 space_info->flags, orig_bytes,
4740 1);
4741 ret = 0;
Josef Bacik2bf64752011-09-26 17:12:22 -04004742 }
4743
Josef Bacik8bb8ab22010-10-15 16:52:49 -04004744 /*
4745 * Couldn't make our reservation, save our place so while we're trying
4746 * to reclaim space we can actually use it instead of somebody else
4747 * stealing it from us.
Miao Xie08e007d2012-10-16 11:33:38 +00004748 *
4749 * We make the other tasks wait for the flush only when we can flush
4750 * all things.
Josef Bacik8bb8ab22010-10-15 16:52:49 -04004751 */
Josef Bacik72bcd992012-12-18 15:16:34 -05004752 if (ret && flush != BTRFS_RESERVE_NO_FLUSH) {
Josef Bacikfdb5eff2011-06-07 16:07:44 -04004753 flushing = true;
4754 space_info->flush = 1;
Miao Xie21c7e752014-05-13 17:29:04 -07004755 } else if (!ret && space_info->flags & BTRFS_BLOCK_GROUP_METADATA) {
4756 used += orig_bytes;
Josef Bacikf6acfd52014-09-18 11:27:17 -04004757 /*
4758 * We will do the space reservation dance during log replay,
4759 * which means we won't have fs_info->fs_root set, so don't do
4760 * the async reclaim as we will panic.
4761 */
4762 if (!root->fs_info->log_root_recovering &&
4763 need_do_async_reclaim(space_info, root->fs_info, used) &&
Miao Xie21c7e752014-05-13 17:29:04 -07004764 !work_busy(&root->fs_info->async_reclaim_work))
4765 queue_work(system_unbound_wq,
4766 &root->fs_info->async_reclaim_work);
Josef Bacik8bb8ab22010-10-15 16:52:49 -04004767 }
Yan, Zhengf0486c62010-05-16 10:46:25 -04004768 spin_unlock(&space_info->lock);
4769
Miao Xie08e007d2012-10-16 11:33:38 +00004770 if (!ret || flush == BTRFS_RESERVE_NO_FLUSH)
Josef Bacik8bb8ab22010-10-15 16:52:49 -04004771 goto out;
4772
Josef Bacik96c3f432012-06-21 14:05:49 -04004773 ret = flush_space(root, space_info, num_bytes, orig_bytes,
4774 flush_state);
4775 flush_state++;
Miao Xie08e007d2012-10-16 11:33:38 +00004776
4777 /*
4778 * If we are FLUSH_LIMIT, we can not flush delalloc, or the deadlock
4779 * would happen. So skip delalloc flush.
4780 */
4781 if (flush == BTRFS_RESERVE_FLUSH_LIMIT &&
4782 (flush_state == FLUSH_DELALLOC ||
4783 flush_state == FLUSH_DELALLOC_WAIT))
4784 flush_state = ALLOC_CHUNK;
4785
Josef Bacik96c3f432012-06-21 14:05:49 -04004786 if (!ret)
Josef Bacik8bb8ab22010-10-15 16:52:49 -04004787 goto again;
Miao Xie08e007d2012-10-16 11:33:38 +00004788 else if (flush == BTRFS_RESERVE_FLUSH_LIMIT &&
4789 flush_state < COMMIT_TRANS)
4790 goto again;
4791 else if (flush == BTRFS_RESERVE_FLUSH_ALL &&
4792 flush_state <= COMMIT_TRANS)
Josef Bacik8bb8ab22010-10-15 16:52:49 -04004793 goto again;
4794
4795out:
Josef Bacik5d803662013-02-07 16:06:02 -05004796 if (ret == -ENOSPC &&
4797 unlikely(root->orphan_cleanup_state == ORPHAN_CLEANUP_STARTED)) {
4798 struct btrfs_block_rsv *global_rsv =
4799 &root->fs_info->global_block_rsv;
4800
4801 if (block_rsv != global_rsv &&
4802 !block_rsv_use_bytes(global_rsv, orig_bytes))
4803 ret = 0;
4804 }
Jeff Mahoneycab45e22013-10-16 16:27:01 -04004805 if (ret == -ENOSPC)
4806 trace_btrfs_space_reservation(root->fs_info,
4807 "space_info:enospc",
4808 space_info->flags, orig_bytes, 1);
Josef Bacikfdb5eff2011-06-07 16:07:44 -04004809 if (flushing) {
Josef Bacik8bb8ab22010-10-15 16:52:49 -04004810 spin_lock(&space_info->lock);
Josef Bacikfdb5eff2011-06-07 16:07:44 -04004811 space_info->flush = 0;
4812 wake_up_all(&space_info->wait);
Josef Bacik8bb8ab22010-10-15 16:52:49 -04004813 spin_unlock(&space_info->lock);
4814 }
Yan, Zhengf0486c62010-05-16 10:46:25 -04004815 return ret;
4816}
4817
Jeff Mahoney79787ea2012-03-12 16:03:00 +01004818static struct btrfs_block_rsv *get_block_rsv(
4819 const struct btrfs_trans_handle *trans,
4820 const struct btrfs_root *root)
Yan, Zhengf0486c62010-05-16 10:46:25 -04004821{
Josef Bacik4c13d752011-08-30 11:31:29 -04004822 struct btrfs_block_rsv *block_rsv = NULL;
4823
Miao Xie27cdeb72014-04-02 19:51:05 +08004824 if (test_bit(BTRFS_ROOT_REF_COWS, &root->state))
Josef Bacik0e721102012-06-26 16:13:18 -04004825 block_rsv = trans->block_rsv;
4826
4827 if (root == root->fs_info->csum_root && trans->adding_csums)
Yan, Zhengf0486c62010-05-16 10:46:25 -04004828 block_rsv = trans->block_rsv;
Josef Bacik4c13d752011-08-30 11:31:29 -04004829
Stefan Behrensf7a81ea2013-08-15 17:11:19 +02004830 if (root == root->fs_info->uuid_root)
4831 block_rsv = trans->block_rsv;
4832
Josef Bacik4c13d752011-08-30 11:31:29 -04004833 if (!block_rsv)
Yan, Zhengf0486c62010-05-16 10:46:25 -04004834 block_rsv = root->block_rsv;
4835
4836 if (!block_rsv)
4837 block_rsv = &root->fs_info->empty_block_rsv;
4838
4839 return block_rsv;
4840}
4841
4842static int block_rsv_use_bytes(struct btrfs_block_rsv *block_rsv,
4843 u64 num_bytes)
4844{
4845 int ret = -ENOSPC;
4846 spin_lock(&block_rsv->lock);
4847 if (block_rsv->reserved >= num_bytes) {
4848 block_rsv->reserved -= num_bytes;
4849 if (block_rsv->reserved < block_rsv->size)
4850 block_rsv->full = 0;
4851 ret = 0;
4852 }
4853 spin_unlock(&block_rsv->lock);
4854 return ret;
4855}
4856
4857static void block_rsv_add_bytes(struct btrfs_block_rsv *block_rsv,
4858 u64 num_bytes, int update_size)
4859{
4860 spin_lock(&block_rsv->lock);
4861 block_rsv->reserved += num_bytes;
4862 if (update_size)
4863 block_rsv->size += num_bytes;
4864 else if (block_rsv->reserved >= block_rsv->size)
4865 block_rsv->full = 1;
4866 spin_unlock(&block_rsv->lock);
4867}
4868
Josef Bacikd52be812013-05-29 14:54:47 -04004869int btrfs_cond_migrate_bytes(struct btrfs_fs_info *fs_info,
4870 struct btrfs_block_rsv *dest, u64 num_bytes,
4871 int min_factor)
4872{
4873 struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv;
4874 u64 min_bytes;
4875
4876 if (global_rsv->space_info != dest->space_info)
4877 return -ENOSPC;
4878
4879 spin_lock(&global_rsv->lock);
4880 min_bytes = div_factor(global_rsv->size, min_factor);
4881 if (global_rsv->reserved < min_bytes + num_bytes) {
4882 spin_unlock(&global_rsv->lock);
4883 return -ENOSPC;
4884 }
4885 global_rsv->reserved -= num_bytes;
4886 if (global_rsv->reserved < global_rsv->size)
4887 global_rsv->full = 0;
4888 spin_unlock(&global_rsv->lock);
4889
4890 block_rsv_add_bytes(dest, num_bytes, 1);
4891 return 0;
4892}
4893
Josef Bacik8c2a3ca2012-01-10 10:31:31 -05004894static void block_rsv_release_bytes(struct btrfs_fs_info *fs_info,
4895 struct btrfs_block_rsv *block_rsv,
David Sterba62a45b62011-04-20 15:52:26 +02004896 struct btrfs_block_rsv *dest, u64 num_bytes)
Yan, Zhengf0486c62010-05-16 10:46:25 -04004897{
4898 struct btrfs_space_info *space_info = block_rsv->space_info;
4899
4900 spin_lock(&block_rsv->lock);
4901 if (num_bytes == (u64)-1)
4902 num_bytes = block_rsv->size;
4903 block_rsv->size -= num_bytes;
4904 if (block_rsv->reserved >= block_rsv->size) {
4905 num_bytes = block_rsv->reserved - block_rsv->size;
4906 block_rsv->reserved = block_rsv->size;
4907 block_rsv->full = 1;
4908 } else {
4909 num_bytes = 0;
4910 }
4911 spin_unlock(&block_rsv->lock);
4912
4913 if (num_bytes > 0) {
4914 if (dest) {
Josef Bacike9e22892011-01-24 21:43:19 +00004915 spin_lock(&dest->lock);
4916 if (!dest->full) {
4917 u64 bytes_to_add;
4918
4919 bytes_to_add = dest->size - dest->reserved;
4920 bytes_to_add = min(num_bytes, bytes_to_add);
4921 dest->reserved += bytes_to_add;
4922 if (dest->reserved >= dest->size)
4923 dest->full = 1;
4924 num_bytes -= bytes_to_add;
4925 }
4926 spin_unlock(&dest->lock);
4927 }
4928 if (num_bytes) {
Yan, Zhengf0486c62010-05-16 10:46:25 -04004929 spin_lock(&space_info->lock);
Josef Bacikfb25e912011-07-26 17:00:46 -04004930 space_info->bytes_may_use -= num_bytes;
Josef Bacik8c2a3ca2012-01-10 10:31:31 -05004931 trace_btrfs_space_reservation(fs_info, "space_info",
Liu Bo2bcc0322012-03-29 09:57:44 -04004932 space_info->flags, num_bytes, 0);
Yan, Zhengf0486c62010-05-16 10:46:25 -04004933 spin_unlock(&space_info->lock);
4934 }
4935 }
4936}
4937
4938static int block_rsv_migrate_bytes(struct btrfs_block_rsv *src,
4939 struct btrfs_block_rsv *dst, u64 num_bytes)
4940{
4941 int ret;
4942
4943 ret = block_rsv_use_bytes(src, num_bytes);
4944 if (ret)
4945 return ret;
4946
4947 block_rsv_add_bytes(dst, num_bytes, 1);
4948 return 0;
4949}
4950
Miao Xie66d8f3d2012-09-06 04:02:28 -06004951void btrfs_init_block_rsv(struct btrfs_block_rsv *rsv, unsigned short type)
Yan, Zhengf0486c62010-05-16 10:46:25 -04004952{
4953 memset(rsv, 0, sizeof(*rsv));
4954 spin_lock_init(&rsv->lock);
Miao Xie66d8f3d2012-09-06 04:02:28 -06004955 rsv->type = type;
Yan, Zhengf0486c62010-05-16 10:46:25 -04004956}
4957
Miao Xie66d8f3d2012-09-06 04:02:28 -06004958struct btrfs_block_rsv *btrfs_alloc_block_rsv(struct btrfs_root *root,
4959 unsigned short type)
Yan, Zhengf0486c62010-05-16 10:46:25 -04004960{
4961 struct btrfs_block_rsv *block_rsv;
4962 struct btrfs_fs_info *fs_info = root->fs_info;
Yan, Zhengf0486c62010-05-16 10:46:25 -04004963
4964 block_rsv = kmalloc(sizeof(*block_rsv), GFP_NOFS);
4965 if (!block_rsv)
4966 return NULL;
4967
Miao Xie66d8f3d2012-09-06 04:02:28 -06004968 btrfs_init_block_rsv(block_rsv, type);
Yan, Zhengf0486c62010-05-16 10:46:25 -04004969 block_rsv->space_info = __find_space_info(fs_info,
4970 BTRFS_BLOCK_GROUP_METADATA);
Yan, Zhengf0486c62010-05-16 10:46:25 -04004971 return block_rsv;
4972}
4973
4974void btrfs_free_block_rsv(struct btrfs_root *root,
4975 struct btrfs_block_rsv *rsv)
4976{
Josef Bacik2aaa6652012-08-29 14:27:18 -04004977 if (!rsv)
4978 return;
Josef Bacikdabdb642011-08-08 12:50:18 -04004979 btrfs_block_rsv_release(root, rsv, (u64)-1);
4980 kfree(rsv);
Yan, Zhengf0486c62010-05-16 10:46:25 -04004981}
4982
Chris Masoncdfb0802015-04-06 18:17:00 -07004983void __btrfs_free_block_rsv(struct btrfs_block_rsv *rsv)
4984{
4985 kfree(rsv);
4986}
4987
Miao Xie08e007d2012-10-16 11:33:38 +00004988int btrfs_block_rsv_add(struct btrfs_root *root,
4989 struct btrfs_block_rsv *block_rsv, u64 num_bytes,
4990 enum btrfs_reserve_flush_enum flush)
Yan, Zhengf0486c62010-05-16 10:46:25 -04004991{
4992 int ret;
4993
4994 if (num_bytes == 0)
4995 return 0;
Josef Bacik8bb8ab22010-10-15 16:52:49 -04004996
Miao Xie61b520a2011-11-10 20:45:05 -05004997 ret = reserve_metadata_bytes(root, block_rsv, num_bytes, flush);
Yan, Zhengf0486c62010-05-16 10:46:25 -04004998 if (!ret) {
4999 block_rsv_add_bytes(block_rsv, num_bytes, 1);
5000 return 0;
5001 }
5002
Yan, Zhengf0486c62010-05-16 10:46:25 -04005003 return ret;
5004}
5005
Josef Bacik4a92b1b2011-08-30 12:34:28 -04005006int btrfs_block_rsv_check(struct btrfs_root *root,
Josef Bacik36ba0222011-10-18 12:15:48 -04005007 struct btrfs_block_rsv *block_rsv, int min_factor)
Yan, Zhengf0486c62010-05-16 10:46:25 -04005008{
5009 u64 num_bytes = 0;
Yan, Zhengf0486c62010-05-16 10:46:25 -04005010 int ret = -ENOSPC;
5011
5012 if (!block_rsv)
5013 return 0;
5014
5015 spin_lock(&block_rsv->lock);
Josef Bacik36ba0222011-10-18 12:15:48 -04005016 num_bytes = div_factor(block_rsv->size, min_factor);
5017 if (block_rsv->reserved >= num_bytes)
Yan, Zhengf0486c62010-05-16 10:46:25 -04005018 ret = 0;
Yan, Zhengf0486c62010-05-16 10:46:25 -04005019 spin_unlock(&block_rsv->lock);
Yan, Zhengf0486c62010-05-16 10:46:25 -04005020
Josef Bacik36ba0222011-10-18 12:15:48 -04005021 return ret;
5022}
5023
Miao Xie08e007d2012-10-16 11:33:38 +00005024int btrfs_block_rsv_refill(struct btrfs_root *root,
5025 struct btrfs_block_rsv *block_rsv, u64 min_reserved,
5026 enum btrfs_reserve_flush_enum flush)
Josef Bacik36ba0222011-10-18 12:15:48 -04005027{
5028 u64 num_bytes = 0;
5029 int ret = -ENOSPC;
5030
5031 if (!block_rsv)
5032 return 0;
5033
5034 spin_lock(&block_rsv->lock);
5035 num_bytes = min_reserved;
Josef Bacik13553e52011-08-08 13:33:21 -04005036 if (block_rsv->reserved >= num_bytes)
Yan, Zhengf0486c62010-05-16 10:46:25 -04005037 ret = 0;
Josef Bacik13553e52011-08-08 13:33:21 -04005038 else
Yan, Zhengf0486c62010-05-16 10:46:25 -04005039 num_bytes -= block_rsv->reserved;
Yan, Zhengf0486c62010-05-16 10:46:25 -04005040 spin_unlock(&block_rsv->lock);
Josef Bacik13553e52011-08-08 13:33:21 -04005041
Yan, Zhengf0486c62010-05-16 10:46:25 -04005042 if (!ret)
5043 return 0;
5044
Miao Xieaa38a712011-11-18 17:43:00 +08005045 ret = reserve_metadata_bytes(root, block_rsv, num_bytes, flush);
Josef Bacikdabdb642011-08-08 12:50:18 -04005046 if (!ret) {
5047 block_rsv_add_bytes(block_rsv, num_bytes, 0);
Yan, Zhengf0486c62010-05-16 10:46:25 -04005048 return 0;
5049 }
5050
Josef Bacik13553e52011-08-08 13:33:21 -04005051 return ret;
Yan, Zhengf0486c62010-05-16 10:46:25 -04005052}
5053
5054int btrfs_block_rsv_migrate(struct btrfs_block_rsv *src_rsv,
5055 struct btrfs_block_rsv *dst_rsv,
5056 u64 num_bytes)
5057{
5058 return block_rsv_migrate_bytes(src_rsv, dst_rsv, num_bytes);
5059}
5060
5061void btrfs_block_rsv_release(struct btrfs_root *root,
5062 struct btrfs_block_rsv *block_rsv,
5063 u64 num_bytes)
5064{
5065 struct btrfs_block_rsv *global_rsv = &root->fs_info->global_block_rsv;
Liu Bo17504582013-12-29 21:44:50 +08005066 if (global_rsv == block_rsv ||
Yan, Zhengf0486c62010-05-16 10:46:25 -04005067 block_rsv->space_info != global_rsv->space_info)
5068 global_rsv = NULL;
Josef Bacik8c2a3ca2012-01-10 10:31:31 -05005069 block_rsv_release_bytes(root->fs_info, block_rsv, global_rsv,
5070 num_bytes);
Yan, Zhengf0486c62010-05-16 10:46:25 -04005071}
5072
Yan, Zheng8929ecfa2010-05-16 10:49:58 -04005073/*
5074 * helper to calculate size of global block reservation.
5075 * the desired value is sum of space used by extent tree,
5076 * checksum tree and root tree
5077 */
5078static u64 calc_global_metadata_size(struct btrfs_fs_info *fs_info)
5079{
5080 struct btrfs_space_info *sinfo;
5081 u64 num_bytes;
5082 u64 meta_used;
5083 u64 data_used;
David Sterba6c417612011-04-13 15:41:04 +02005084 int csum_size = btrfs_super_csum_size(fs_info->super_copy);
Yan, Zheng8929ecfa2010-05-16 10:49:58 -04005085
Yan, Zheng8929ecfa2010-05-16 10:49:58 -04005086 sinfo = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_DATA);
5087 spin_lock(&sinfo->lock);
5088 data_used = sinfo->bytes_used;
5089 spin_unlock(&sinfo->lock);
5090
5091 sinfo = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_METADATA);
5092 spin_lock(&sinfo->lock);
Josef Bacik6d487552010-10-15 15:13:32 -04005093 if (sinfo->flags & BTRFS_BLOCK_GROUP_DATA)
5094 data_used = 0;
Yan, Zheng8929ecfa2010-05-16 10:49:58 -04005095 meta_used = sinfo->bytes_used;
5096 spin_unlock(&sinfo->lock);
5097
5098 num_bytes = (data_used >> fs_info->sb->s_blocksize_bits) *
5099 csum_size * 2;
David Sterbaf8c269d2015-01-16 17:21:12 +01005100 num_bytes += div_u64(data_used + meta_used, 50);
Yan, Zheng8929ecfa2010-05-16 10:49:58 -04005101
5102 if (num_bytes * 3 > meta_used)
David Sterbaf8c269d2015-01-16 17:21:12 +01005103 num_bytes = div_u64(meta_used, 3);
Yan, Zheng8929ecfa2010-05-16 10:49:58 -04005104
David Sterba707e8a02014-06-04 19:22:26 +02005105 return ALIGN(num_bytes, fs_info->extent_root->nodesize << 10);
Yan, Zheng8929ecfa2010-05-16 10:49:58 -04005106}
5107
5108static void update_global_block_rsv(struct btrfs_fs_info *fs_info)
5109{
5110 struct btrfs_block_rsv *block_rsv = &fs_info->global_block_rsv;
5111 struct btrfs_space_info *sinfo = block_rsv->space_info;
5112 u64 num_bytes;
5113
5114 num_bytes = calc_global_metadata_size(fs_info);
5115
Yan, Zheng8929ecfa2010-05-16 10:49:58 -04005116 spin_lock(&sinfo->lock);
Stefan Behrens1f699d32012-04-27 12:41:46 -04005117 spin_lock(&block_rsv->lock);
Yan, Zheng8929ecfa2010-05-16 10:49:58 -04005118
Josef Bacikfdf30d12013-03-26 15:31:45 -04005119 block_rsv->size = min_t(u64, num_bytes, 512 * 1024 * 1024);
Yan, Zheng8929ecfa2010-05-16 10:49:58 -04005120
5121 num_bytes = sinfo->bytes_used + sinfo->bytes_pinned +
Josef Bacik6d487552010-10-15 15:13:32 -04005122 sinfo->bytes_reserved + sinfo->bytes_readonly +
5123 sinfo->bytes_may_use;
Yan, Zheng8929ecfa2010-05-16 10:49:58 -04005124
5125 if (sinfo->total_bytes > num_bytes) {
5126 num_bytes = sinfo->total_bytes - num_bytes;
5127 block_rsv->reserved += num_bytes;
Josef Bacikfb25e912011-07-26 17:00:46 -04005128 sinfo->bytes_may_use += num_bytes;
Josef Bacik8c2a3ca2012-01-10 10:31:31 -05005129 trace_btrfs_space_reservation(fs_info, "space_info",
Liu Bo2bcc0322012-03-29 09:57:44 -04005130 sinfo->flags, num_bytes, 1);
Yan, Zheng8929ecfa2010-05-16 10:49:58 -04005131 }
5132
5133 if (block_rsv->reserved >= block_rsv->size) {
5134 num_bytes = block_rsv->reserved - block_rsv->size;
Josef Bacikfb25e912011-07-26 17:00:46 -04005135 sinfo->bytes_may_use -= num_bytes;
Josef Bacik8c2a3ca2012-01-10 10:31:31 -05005136 trace_btrfs_space_reservation(fs_info, "space_info",
Liu Bo2bcc0322012-03-29 09:57:44 -04005137 sinfo->flags, num_bytes, 0);
Yan, Zheng8929ecfa2010-05-16 10:49:58 -04005138 block_rsv->reserved = block_rsv->size;
5139 block_rsv->full = 1;
5140 }
David Sterba182608c2011-05-05 13:13:16 +02005141
Yan, Zheng8929ecfa2010-05-16 10:49:58 -04005142 spin_unlock(&block_rsv->lock);
Stefan Behrens1f699d32012-04-27 12:41:46 -04005143 spin_unlock(&sinfo->lock);
Yan, Zheng8929ecfa2010-05-16 10:49:58 -04005144}
5145
Yan, Zhengf0486c62010-05-16 10:46:25 -04005146static void init_global_block_rsv(struct btrfs_fs_info *fs_info)
5147{
5148 struct btrfs_space_info *space_info;
5149
5150 space_info = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_SYSTEM);
5151 fs_info->chunk_block_rsv.space_info = space_info;
Yan, Zhengf0486c62010-05-16 10:46:25 -04005152
5153 space_info = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_METADATA);
Yan, Zheng8929ecfa2010-05-16 10:49:58 -04005154 fs_info->global_block_rsv.space_info = space_info;
Yan, Zheng8929ecfa2010-05-16 10:49:58 -04005155 fs_info->delalloc_block_rsv.space_info = space_info;
Yan, Zhengf0486c62010-05-16 10:46:25 -04005156 fs_info->trans_block_rsv.space_info = space_info;
5157 fs_info->empty_block_rsv.space_info = space_info;
Josef Bacik6d668dd2011-11-03 22:54:25 -04005158 fs_info->delayed_block_rsv.space_info = space_info;
Yan, Zhengf0486c62010-05-16 10:46:25 -04005159
Yan, Zheng8929ecfa2010-05-16 10:49:58 -04005160 fs_info->extent_root->block_rsv = &fs_info->global_block_rsv;
5161 fs_info->csum_root->block_rsv = &fs_info->global_block_rsv;
5162 fs_info->dev_root->block_rsv = &fs_info->global_block_rsv;
5163 fs_info->tree_root->block_rsv = &fs_info->global_block_rsv;
Stefan Behrens3a6cad92013-05-16 14:48:19 +00005164 if (fs_info->quota_root)
5165 fs_info->quota_root->block_rsv = &fs_info->global_block_rsv;
Yan, Zhengf0486c62010-05-16 10:46:25 -04005166 fs_info->chunk_root->block_rsv = &fs_info->chunk_block_rsv;
Yan, Zheng8929ecfa2010-05-16 10:49:58 -04005167
Yan, Zheng8929ecfa2010-05-16 10:49:58 -04005168 update_global_block_rsv(fs_info);
5169}
5170
5171static void release_global_block_rsv(struct btrfs_fs_info *fs_info)
5172{
Josef Bacik8c2a3ca2012-01-10 10:31:31 -05005173 block_rsv_release_bytes(fs_info, &fs_info->global_block_rsv, NULL,
5174 (u64)-1);
Yan, Zheng8929ecfa2010-05-16 10:49:58 -04005175 WARN_ON(fs_info->delalloc_block_rsv.size > 0);
5176 WARN_ON(fs_info->delalloc_block_rsv.reserved > 0);
5177 WARN_ON(fs_info->trans_block_rsv.size > 0);
5178 WARN_ON(fs_info->trans_block_rsv.reserved > 0);
5179 WARN_ON(fs_info->chunk_block_rsv.size > 0);
5180 WARN_ON(fs_info->chunk_block_rsv.reserved > 0);
Josef Bacik6d668dd2011-11-03 22:54:25 -04005181 WARN_ON(fs_info->delayed_block_rsv.size > 0);
5182 WARN_ON(fs_info->delayed_block_rsv.reserved > 0);
Josef Bacikfcb80c22011-05-03 10:40:22 -04005183}
5184
Yan, Zhenga22285a2010-05-16 10:48:46 -04005185void btrfs_trans_release_metadata(struct btrfs_trans_handle *trans,
5186 struct btrfs_root *root)
5187{
Josef Bacik0e721102012-06-26 16:13:18 -04005188 if (!trans->block_rsv)
5189 return;
5190
Yan, Zhenga22285a2010-05-16 10:48:46 -04005191 if (!trans->bytes_reserved)
5192 return;
5193
Chris Masone77266e2012-02-24 10:39:05 -05005194 trace_btrfs_space_reservation(root->fs_info, "transaction",
Liu Bo2bcc0322012-03-29 09:57:44 -04005195 trans->transid, trans->bytes_reserved, 0);
Josef Bacikb24e03d2011-10-14 14:40:17 -04005196 btrfs_block_rsv_release(root, trans->block_rsv, trans->bytes_reserved);
Yan, Zhenga22285a2010-05-16 10:48:46 -04005197 trans->bytes_reserved = 0;
5198}
5199
Filipe Manana4fbcdf62015-05-20 14:01:54 +01005200/*
5201 * To be called after all the new block groups attached to the transaction
5202 * handle have been created (btrfs_create_pending_block_groups()).
5203 */
5204void btrfs_trans_release_chunk_metadata(struct btrfs_trans_handle *trans)
5205{
5206 struct btrfs_fs_info *fs_info = trans->root->fs_info;
5207
5208 if (!trans->chunk_bytes_reserved)
5209 return;
5210
5211 WARN_ON_ONCE(!list_empty(&trans->new_bgs));
5212
5213 block_rsv_release_bytes(fs_info, &fs_info->chunk_block_rsv, NULL,
5214 trans->chunk_bytes_reserved);
5215 trans->chunk_bytes_reserved = 0;
5216}
5217
Jeff Mahoney79787ea2012-03-12 16:03:00 +01005218/* Can only return 0 or -ENOSPC */
Yan, Zhengd68fc572010-05-16 10:49:58 -04005219int btrfs_orphan_reserve_metadata(struct btrfs_trans_handle *trans,
5220 struct inode *inode)
5221{
5222 struct btrfs_root *root = BTRFS_I(inode)->root;
5223 struct btrfs_block_rsv *src_rsv = get_block_rsv(trans, root);
5224 struct btrfs_block_rsv *dst_rsv = root->orphan_block_rsv;
5225
5226 /*
Josef Bacikfcb80c22011-05-03 10:40:22 -04005227 * We need to hold space in order to delete our orphan item once we've
5228 * added it, so this takes the reservation so we can release it later
5229 * when we are truly done with the orphan item.
Yan, Zhengd68fc572010-05-16 10:49:58 -04005230 */
Chris Masonff5714c2011-05-28 07:00:39 -04005231 u64 num_bytes = btrfs_calc_trans_metadata_size(root, 1);
Josef Bacik8c2a3ca2012-01-10 10:31:31 -05005232 trace_btrfs_space_reservation(root->fs_info, "orphan",
5233 btrfs_ino(inode), num_bytes, 1);
Yan, Zhengd68fc572010-05-16 10:49:58 -04005234 return block_rsv_migrate_bytes(src_rsv, dst_rsv, num_bytes);
5235}
5236
5237void btrfs_orphan_release_metadata(struct inode *inode)
5238{
5239 struct btrfs_root *root = BTRFS_I(inode)->root;
Chris Masonff5714c2011-05-28 07:00:39 -04005240 u64 num_bytes = btrfs_calc_trans_metadata_size(root, 1);
Josef Bacik8c2a3ca2012-01-10 10:31:31 -05005241 trace_btrfs_space_reservation(root->fs_info, "orphan",
5242 btrfs_ino(inode), num_bytes, 0);
Yan, Zhengd68fc572010-05-16 10:49:58 -04005243 btrfs_block_rsv_release(root, root->orphan_block_rsv, num_bytes);
5244}
5245
Miao Xied5c12072013-02-28 10:04:33 +00005246/*
5247 * btrfs_subvolume_reserve_metadata() - reserve space for subvolume operation
5248 * root: the root of the parent directory
5249 * rsv: block reservation
5250 * items: the number of items that we need do reservation
5251 * qgroup_reserved: used to return the reserved size in qgroup
5252 *
5253 * This function is used to reserve the space for snapshot/subvolume
5254 * creation and deletion. Those operations are different with the
5255 * common file/directory operations, they change two fs/file trees
5256 * and root tree, the number of items that the qgroup reserves is
5257 * different with the free space reservation. So we can not use
5258 * the space reseravtion mechanism in start_transaction().
5259 */
5260int btrfs_subvolume_reserve_metadata(struct btrfs_root *root,
5261 struct btrfs_block_rsv *rsv,
5262 int items,
Jeff Mahoneyee3441b2013-07-09 16:37:21 -04005263 u64 *qgroup_reserved,
5264 bool use_global_rsv)
Yan, Zhenga22285a2010-05-16 10:48:46 -04005265{
Miao Xied5c12072013-02-28 10:04:33 +00005266 u64 num_bytes;
5267 int ret;
Jeff Mahoneyee3441b2013-07-09 16:37:21 -04005268 struct btrfs_block_rsv *global_rsv = &root->fs_info->global_block_rsv;
Miao Xied5c12072013-02-28 10:04:33 +00005269
5270 if (root->fs_info->quota_enabled) {
5271 /* One for parent inode, two for dir entries */
David Sterba707e8a02014-06-04 19:22:26 +02005272 num_bytes = 3 * root->nodesize;
Miao Xied5c12072013-02-28 10:04:33 +00005273 ret = btrfs_qgroup_reserve(root, num_bytes);
5274 if (ret)
5275 return ret;
5276 } else {
5277 num_bytes = 0;
5278 }
5279
5280 *qgroup_reserved = num_bytes;
5281
5282 num_bytes = btrfs_calc_trans_metadata_size(root, items);
5283 rsv->space_info = __find_space_info(root->fs_info,
5284 BTRFS_BLOCK_GROUP_METADATA);
5285 ret = btrfs_block_rsv_add(root, rsv, num_bytes,
5286 BTRFS_RESERVE_FLUSH_ALL);
Jeff Mahoneyee3441b2013-07-09 16:37:21 -04005287
5288 if (ret == -ENOSPC && use_global_rsv)
5289 ret = btrfs_block_rsv_migrate(global_rsv, rsv, num_bytes);
5290
Miao Xied5c12072013-02-28 10:04:33 +00005291 if (ret) {
5292 if (*qgroup_reserved)
5293 btrfs_qgroup_free(root, *qgroup_reserved);
5294 }
5295
5296 return ret;
5297}
5298
5299void btrfs_subvolume_release_metadata(struct btrfs_root *root,
5300 struct btrfs_block_rsv *rsv,
5301 u64 qgroup_reserved)
5302{
5303 btrfs_block_rsv_release(root, rsv, (u64)-1);
Yan, Zhenga22285a2010-05-16 10:48:46 -04005304}
5305
Josef Bacik7709cde2011-08-04 10:25:02 -04005306/**
5307 * drop_outstanding_extent - drop an outstanding extent
5308 * @inode: the inode we're dropping the extent for
Josef Bacikdcab6a32015-02-11 15:08:59 -05005309 * @num_bytes: the number of bytes we're relaseing.
Josef Bacik7709cde2011-08-04 10:25:02 -04005310 *
5311 * This is called when we are freeing up an outstanding extent, either called
5312 * after an error or after an extent is written. This will return the number of
5313 * reserved extents that need to be freed. This must be called with
5314 * BTRFS_I(inode)->lock held.
5315 */
Josef Bacikdcab6a32015-02-11 15:08:59 -05005316static unsigned drop_outstanding_extent(struct inode *inode, u64 num_bytes)
Josef Bacik9e0baf62011-07-15 15:16:44 +00005317{
Josef Bacik7fd2ae22011-11-08 15:47:34 -05005318 unsigned drop_inode_space = 0;
Josef Bacik9e0baf62011-07-15 15:16:44 +00005319 unsigned dropped_extents = 0;
Josef Bacikdcab6a32015-02-11 15:08:59 -05005320 unsigned num_extents = 0;
Josef Bacik9e0baf62011-07-15 15:16:44 +00005321
Josef Bacikdcab6a32015-02-11 15:08:59 -05005322 num_extents = (unsigned)div64_u64(num_bytes +
5323 BTRFS_MAX_EXTENT_SIZE - 1,
5324 BTRFS_MAX_EXTENT_SIZE);
5325 ASSERT(num_extents);
5326 ASSERT(BTRFS_I(inode)->outstanding_extents >= num_extents);
5327 BTRFS_I(inode)->outstanding_extents -= num_extents;
Josef Bacik9e0baf62011-07-15 15:16:44 +00005328
Josef Bacik7fd2ae22011-11-08 15:47:34 -05005329 if (BTRFS_I(inode)->outstanding_extents == 0 &&
Josef Bacik72ac3c02012-05-23 14:13:11 -04005330 test_and_clear_bit(BTRFS_INODE_DELALLOC_META_RESERVED,
5331 &BTRFS_I(inode)->runtime_flags))
Josef Bacik7fd2ae22011-11-08 15:47:34 -05005332 drop_inode_space = 1;
Josef Bacik7fd2ae22011-11-08 15:47:34 -05005333
Josef Bacik9e0baf62011-07-15 15:16:44 +00005334 /*
5335 * If we have more or the same amount of outsanding extents than we have
5336 * reserved then we need to leave the reserved extents count alone.
5337 */
5338 if (BTRFS_I(inode)->outstanding_extents >=
5339 BTRFS_I(inode)->reserved_extents)
Josef Bacik7fd2ae22011-11-08 15:47:34 -05005340 return drop_inode_space;
Josef Bacik9e0baf62011-07-15 15:16:44 +00005341
5342 dropped_extents = BTRFS_I(inode)->reserved_extents -
5343 BTRFS_I(inode)->outstanding_extents;
5344 BTRFS_I(inode)->reserved_extents -= dropped_extents;
Josef Bacik7fd2ae22011-11-08 15:47:34 -05005345 return dropped_extents + drop_inode_space;
Josef Bacik9e0baf62011-07-15 15:16:44 +00005346}
5347
Josef Bacik7709cde2011-08-04 10:25:02 -04005348/**
5349 * calc_csum_metadata_size - return the amount of metada space that must be
5350 * reserved/free'd for the given bytes.
5351 * @inode: the inode we're manipulating
5352 * @num_bytes: the number of bytes in question
5353 * @reserve: 1 if we are reserving space, 0 if we are freeing space
5354 *
5355 * This adjusts the number of csum_bytes in the inode and then returns the
5356 * correct amount of metadata that must either be reserved or freed. We
5357 * calculate how many checksums we can fit into one leaf and then divide the
5358 * number of bytes that will need to be checksumed by this value to figure out
5359 * how many checksums will be required. If we are adding bytes then the number
5360 * may go up and we will return the number of additional bytes that must be
5361 * reserved. If it is going down we will return the number of bytes that must
5362 * be freed.
5363 *
5364 * This must be called with BTRFS_I(inode)->lock held.
5365 */
5366static u64 calc_csum_metadata_size(struct inode *inode, u64 num_bytes,
5367 int reserve)
Yan, Zheng0ca1f7c2010-05-16 10:48:47 -04005368{
Josef Bacik7709cde2011-08-04 10:25:02 -04005369 struct btrfs_root *root = BTRFS_I(inode)->root;
Josef Bacik12621332015-02-03 07:50:16 -08005370 u64 old_csums, num_csums;
Josef Bacik7709cde2011-08-04 10:25:02 -04005371
5372 if (BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM &&
5373 BTRFS_I(inode)->csum_bytes == 0)
5374 return 0;
5375
Chris Mason28f75a02015-02-04 06:59:29 -08005376 old_csums = btrfs_csum_bytes_to_leaves(root, BTRFS_I(inode)->csum_bytes);
Josef Bacik7709cde2011-08-04 10:25:02 -04005377 if (reserve)
5378 BTRFS_I(inode)->csum_bytes += num_bytes;
5379 else
5380 BTRFS_I(inode)->csum_bytes -= num_bytes;
Chris Mason28f75a02015-02-04 06:59:29 -08005381 num_csums = btrfs_csum_bytes_to_leaves(root, BTRFS_I(inode)->csum_bytes);
Josef Bacik7709cde2011-08-04 10:25:02 -04005382
5383 /* No change, no need to reserve more */
5384 if (old_csums == num_csums)
5385 return 0;
5386
5387 if (reserve)
5388 return btrfs_calc_trans_metadata_size(root,
5389 num_csums - old_csums);
5390
5391 return btrfs_calc_trans_metadata_size(root, old_csums - num_csums);
Yan, Zheng0ca1f7c2010-05-16 10:48:47 -04005392}
5393
5394int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
5395{
5396 struct btrfs_root *root = BTRFS_I(inode)->root;
5397 struct btrfs_block_rsv *block_rsv = &root->fs_info->delalloc_block_rsv;
Josef Bacik9e0baf62011-07-15 15:16:44 +00005398 u64 to_reserve = 0;
Josef Bacik660d3f62011-12-09 11:18:51 -05005399 u64 csum_bytes;
Josef Bacik9e0baf62011-07-15 15:16:44 +00005400 unsigned nr_extents = 0;
Josef Bacik660d3f62011-12-09 11:18:51 -05005401 int extra_reserve = 0;
Miao Xie08e007d2012-10-16 11:33:38 +00005402 enum btrfs_reserve_flush_enum flush = BTRFS_RESERVE_FLUSH_ALL;
Jan Schmidteb6b88d2013-01-27 23:26:00 -07005403 int ret = 0;
Josef Bacikc64c2bd2012-12-14 13:48:14 -05005404 bool delalloc_lock = true;
Wang Shilong88e081bf2013-03-01 11:36:01 +00005405 u64 to_free = 0;
5406 unsigned dropped;
Yan, Zheng0ca1f7c2010-05-16 10:48:47 -04005407
Josef Bacikc64c2bd2012-12-14 13:48:14 -05005408 /* If we are a free space inode we need to not flush since we will be in
5409 * the middle of a transaction commit. We also don't need the delalloc
5410 * mutex since we won't race with anybody. We need this mostly to make
5411 * lockdep shut its filthy mouth.
5412 */
5413 if (btrfs_is_free_space_inode(inode)) {
Miao Xie08e007d2012-10-16 11:33:38 +00005414 flush = BTRFS_RESERVE_NO_FLUSH;
Josef Bacikc64c2bd2012-12-14 13:48:14 -05005415 delalloc_lock = false;
5416 }
Josef Bacikc09544e2011-08-30 10:19:10 -04005417
Miao Xie08e007d2012-10-16 11:33:38 +00005418 if (flush != BTRFS_RESERVE_NO_FLUSH &&
5419 btrfs_transaction_in_commit(root->fs_info))
Yan, Zheng0ca1f7c2010-05-16 10:48:47 -04005420 schedule_timeout(1);
5421
Josef Bacikc64c2bd2012-12-14 13:48:14 -05005422 if (delalloc_lock)
5423 mutex_lock(&BTRFS_I(inode)->delalloc_mutex);
5424
Yan, Zheng0ca1f7c2010-05-16 10:48:47 -04005425 num_bytes = ALIGN(num_bytes, root->sectorsize);
Josef Bacik8bb8ab22010-10-15 16:52:49 -04005426
Josef Bacik9e0baf62011-07-15 15:16:44 +00005427 spin_lock(&BTRFS_I(inode)->lock);
Josef Bacik6a41dd02015-03-13 15:12:23 -04005428 nr_extents = (unsigned)div64_u64(num_bytes +
5429 BTRFS_MAX_EXTENT_SIZE - 1,
5430 BTRFS_MAX_EXTENT_SIZE);
5431 BTRFS_I(inode)->outstanding_extents += nr_extents;
5432 nr_extents = 0;
Josef Bacik57a45ced2011-01-25 16:30:38 -05005433
Josef Bacik9e0baf62011-07-15 15:16:44 +00005434 if (BTRFS_I(inode)->outstanding_extents >
Josef Bacik660d3f62011-12-09 11:18:51 -05005435 BTRFS_I(inode)->reserved_extents)
Josef Bacik9e0baf62011-07-15 15:16:44 +00005436 nr_extents = BTRFS_I(inode)->outstanding_extents -
5437 BTRFS_I(inode)->reserved_extents;
Josef Bacik7fd2ae22011-11-08 15:47:34 -05005438
5439 /*
5440 * Add an item to reserve for updating the inode when we complete the
5441 * delalloc io.
5442 */
Josef Bacik72ac3c02012-05-23 14:13:11 -04005443 if (!test_bit(BTRFS_INODE_DELALLOC_META_RESERVED,
5444 &BTRFS_I(inode)->runtime_flags)) {
Josef Bacik7fd2ae22011-11-08 15:47:34 -05005445 nr_extents++;
Josef Bacik660d3f62011-12-09 11:18:51 -05005446 extra_reserve = 1;
Josef Bacik7fd2ae22011-11-08 15:47:34 -05005447 }
5448
5449 to_reserve = btrfs_calc_trans_metadata_size(root, nr_extents);
Josef Bacik7709cde2011-08-04 10:25:02 -04005450 to_reserve += calc_csum_metadata_size(inode, num_bytes, 1);
Josef Bacik660d3f62011-12-09 11:18:51 -05005451 csum_bytes = BTRFS_I(inode)->csum_bytes;
Josef Bacik9e0baf62011-07-15 15:16:44 +00005452 spin_unlock(&BTRFS_I(inode)->lock);
Josef Bacik57a45ced2011-01-25 16:30:38 -05005453
Wang Shilong88e081bf2013-03-01 11:36:01 +00005454 if (root->fs_info->quota_enabled) {
Dongsheng Yang237c0e92014-12-29 06:23:05 -05005455 ret = btrfs_qgroup_reserve(root, nr_extents * root->nodesize);
Wang Shilong88e081bf2013-03-01 11:36:01 +00005456 if (ret)
5457 goto out_fail;
Wang Shilonga9870c02013-03-01 11:33:01 +00005458 }
Arne Jansenc5567232011-09-14 15:44:05 +02005459
Wang Shilong88e081bf2013-03-01 11:36:01 +00005460 ret = reserve_metadata_bytes(root, block_rsv, to_reserve, flush);
5461 if (unlikely(ret)) {
5462 if (root->fs_info->quota_enabled)
Dongsheng Yang237c0e92014-12-29 06:23:05 -05005463 btrfs_qgroup_free(root, nr_extents * root->nodesize);
Wang Shilong88e081bf2013-03-01 11:36:01 +00005464 goto out_fail;
Josef Bacik9e0baf62011-07-15 15:16:44 +00005465 }
Yan, Zheng0ca1f7c2010-05-16 10:48:47 -04005466
Josef Bacik660d3f62011-12-09 11:18:51 -05005467 spin_lock(&BTRFS_I(inode)->lock);
5468 if (extra_reserve) {
Josef Bacik72ac3c02012-05-23 14:13:11 -04005469 set_bit(BTRFS_INODE_DELALLOC_META_RESERVED,
5470 &BTRFS_I(inode)->runtime_flags);
Josef Bacik660d3f62011-12-09 11:18:51 -05005471 nr_extents--;
5472 }
5473 BTRFS_I(inode)->reserved_extents += nr_extents;
5474 spin_unlock(&BTRFS_I(inode)->lock);
Josef Bacikc64c2bd2012-12-14 13:48:14 -05005475
5476 if (delalloc_lock)
5477 mutex_unlock(&BTRFS_I(inode)->delalloc_mutex);
Josef Bacik660d3f62011-12-09 11:18:51 -05005478
Josef Bacik8c2a3ca2012-01-10 10:31:31 -05005479 if (to_reserve)
Dulshani Gunawardhana67871252013-10-31 10:33:04 +05305480 trace_btrfs_space_reservation(root->fs_info, "delalloc",
Josef Bacik8c2a3ca2012-01-10 10:31:31 -05005481 btrfs_ino(inode), to_reserve, 1);
Yan, Zheng0ca1f7c2010-05-16 10:48:47 -04005482 block_rsv_add_bytes(block_rsv, to_reserve, 1);
5483
Yan, Zheng0ca1f7c2010-05-16 10:48:47 -04005484 return 0;
Wang Shilong88e081bf2013-03-01 11:36:01 +00005485
5486out_fail:
5487 spin_lock(&BTRFS_I(inode)->lock);
Josef Bacikdcab6a32015-02-11 15:08:59 -05005488 dropped = drop_outstanding_extent(inode, num_bytes);
Wang Shilong88e081bf2013-03-01 11:36:01 +00005489 /*
5490 * If the inodes csum_bytes is the same as the original
5491 * csum_bytes then we know we haven't raced with any free()ers
5492 * so we can just reduce our inodes csum bytes and carry on.
Wang Shilong88e081bf2013-03-01 11:36:01 +00005493 */
Josef Bacikf4881bc2013-03-25 16:03:35 -04005494 if (BTRFS_I(inode)->csum_bytes == csum_bytes) {
Wang Shilong88e081bf2013-03-01 11:36:01 +00005495 calc_csum_metadata_size(inode, num_bytes, 0);
Josef Bacikf4881bc2013-03-25 16:03:35 -04005496 } else {
5497 u64 orig_csum_bytes = BTRFS_I(inode)->csum_bytes;
5498 u64 bytes;
5499
5500 /*
5501 * This is tricky, but first we need to figure out how much we
5502 * free'd from any free-ers that occured during this
5503 * reservation, so we reset ->csum_bytes to the csum_bytes
5504 * before we dropped our lock, and then call the free for the
5505 * number of bytes that were freed while we were trying our
5506 * reservation.
5507 */
5508 bytes = csum_bytes - BTRFS_I(inode)->csum_bytes;
5509 BTRFS_I(inode)->csum_bytes = csum_bytes;
5510 to_free = calc_csum_metadata_size(inode, bytes, 0);
5511
5512
5513 /*
5514 * Now we need to see how much we would have freed had we not
5515 * been making this reservation and our ->csum_bytes were not
5516 * artificially inflated.
5517 */
5518 BTRFS_I(inode)->csum_bytes = csum_bytes - num_bytes;
5519 bytes = csum_bytes - orig_csum_bytes;
5520 bytes = calc_csum_metadata_size(inode, bytes, 0);
5521
5522 /*
5523 * Now reset ->csum_bytes to what it should be. If bytes is
5524 * more than to_free then we would have free'd more space had we
5525 * not had an artificially high ->csum_bytes, so we need to free
5526 * the remainder. If bytes is the same or less then we don't
5527 * need to do anything, the other free-ers did the correct
5528 * thing.
5529 */
5530 BTRFS_I(inode)->csum_bytes = orig_csum_bytes - num_bytes;
5531 if (bytes > to_free)
5532 to_free = bytes - to_free;
5533 else
5534 to_free = 0;
5535 }
Wang Shilong88e081bf2013-03-01 11:36:01 +00005536 spin_unlock(&BTRFS_I(inode)->lock);
Dongsheng Yange2d1f922015-02-06 10:26:52 -05005537 if (dropped)
Wang Shilong88e081bf2013-03-01 11:36:01 +00005538 to_free += btrfs_calc_trans_metadata_size(root, dropped);
5539
5540 if (to_free) {
5541 btrfs_block_rsv_release(root, block_rsv, to_free);
5542 trace_btrfs_space_reservation(root->fs_info, "delalloc",
5543 btrfs_ino(inode), to_free, 0);
5544 }
5545 if (delalloc_lock)
5546 mutex_unlock(&BTRFS_I(inode)->delalloc_mutex);
5547 return ret;
Yan, Zheng0ca1f7c2010-05-16 10:48:47 -04005548}
5549
Josef Bacik7709cde2011-08-04 10:25:02 -04005550/**
5551 * btrfs_delalloc_release_metadata - release a metadata reservation for an inode
5552 * @inode: the inode to release the reservation for
5553 * @num_bytes: the number of bytes we're releasing
5554 *
5555 * This will release the metadata reservation for an inode. This can be called
5556 * once we complete IO for a given set of bytes to release their metadata
5557 * reservations.
5558 */
Yan, Zheng0ca1f7c2010-05-16 10:48:47 -04005559void btrfs_delalloc_release_metadata(struct inode *inode, u64 num_bytes)
5560{
5561 struct btrfs_root *root = BTRFS_I(inode)->root;
Josef Bacik9e0baf62011-07-15 15:16:44 +00005562 u64 to_free = 0;
5563 unsigned dropped;
Yan, Zheng0ca1f7c2010-05-16 10:48:47 -04005564
5565 num_bytes = ALIGN(num_bytes, root->sectorsize);
Josef Bacik7709cde2011-08-04 10:25:02 -04005566 spin_lock(&BTRFS_I(inode)->lock);
Josef Bacikdcab6a32015-02-11 15:08:59 -05005567 dropped = drop_outstanding_extent(inode, num_bytes);
Yan, Zheng0ca1f7c2010-05-16 10:48:47 -04005568
Miao Xie09348562013-02-07 10:12:07 +00005569 if (num_bytes)
5570 to_free = calc_csum_metadata_size(inode, num_bytes, 0);
Josef Bacik7709cde2011-08-04 10:25:02 -04005571 spin_unlock(&BTRFS_I(inode)->lock);
Josef Bacik9e0baf62011-07-15 15:16:44 +00005572 if (dropped > 0)
5573 to_free += btrfs_calc_trans_metadata_size(root, dropped);
Yan, Zheng0ca1f7c2010-05-16 10:48:47 -04005574
Josef Bacik6a3891c2015-03-16 17:38:52 -04005575 if (btrfs_test_is_dummy_root(root))
5576 return;
5577
Josef Bacik8c2a3ca2012-01-10 10:31:31 -05005578 trace_btrfs_space_reservation(root->fs_info, "delalloc",
5579 btrfs_ino(inode), to_free, 0);
Arne Jansenc5567232011-09-14 15:44:05 +02005580
Yan, Zheng0ca1f7c2010-05-16 10:48:47 -04005581 btrfs_block_rsv_release(root, &root->fs_info->delalloc_block_rsv,
5582 to_free);
5583}
5584
Josef Bacik7709cde2011-08-04 10:25:02 -04005585/**
5586 * btrfs_delalloc_reserve_space - reserve data and metadata space for delalloc
5587 * @inode: inode we're writing to
5588 * @num_bytes: the number of bytes we want to allocate
5589 *
5590 * This will do the following things
5591 *
5592 * o reserve space in the data space info for num_bytes
5593 * o reserve space in the metadata space info based on number of outstanding
5594 * extents and how much csums will be needed
5595 * o add to the inodes ->delalloc_bytes
5596 * o add it to the fs_info's delalloc inodes list.
5597 *
5598 * This will return 0 for success and -ENOSPC if there is no space left.
5599 */
Yan, Zheng0ca1f7c2010-05-16 10:48:47 -04005600int btrfs_delalloc_reserve_space(struct inode *inode, u64 num_bytes)
5601{
5602 int ret;
5603
Dongsheng Yange2d1f922015-02-06 10:26:52 -05005604 ret = btrfs_check_data_free_space(inode, num_bytes, num_bytes);
Yan, Zheng0ca1f7c2010-05-16 10:48:47 -04005605 if (ret)
5606 return ret;
5607
5608 ret = btrfs_delalloc_reserve_metadata(inode, num_bytes);
5609 if (ret) {
5610 btrfs_free_reserved_data_space(inode, num_bytes);
5611 return ret;
5612 }
5613
5614 return 0;
5615}
5616
Josef Bacik7709cde2011-08-04 10:25:02 -04005617/**
5618 * btrfs_delalloc_release_space - release data and metadata space for delalloc
5619 * @inode: inode we're releasing space for
5620 * @num_bytes: the number of bytes we want to free up
5621 *
5622 * This must be matched with a call to btrfs_delalloc_reserve_space. This is
5623 * called in the case that we don't need the metadata AND data reservations
5624 * anymore. So if there is an error or we insert an inline extent.
5625 *
5626 * This function will release the metadata space that was not used and will
5627 * decrement ->delalloc_bytes and remove it from the fs_info delalloc_inodes
5628 * list if there are no delalloc bytes left.
5629 */
Yan, Zheng0ca1f7c2010-05-16 10:48:47 -04005630void btrfs_delalloc_release_space(struct inode *inode, u64 num_bytes)
5631{
5632 btrfs_delalloc_release_metadata(inode, num_bytes);
5633 btrfs_free_reserved_data_space(inode, num_bytes);
5634}
5635
Josef Bacikce93ec52014-11-17 15:45:48 -05005636static int update_block_group(struct btrfs_trans_handle *trans,
5637 struct btrfs_root *root, u64 bytenr,
5638 u64 num_bytes, int alloc)
Chris Mason9078a3e2007-04-26 16:46:15 -04005639{
Josef Bacik0af3d002010-06-21 14:48:16 -04005640 struct btrfs_block_group_cache *cache = NULL;
Chris Mason9078a3e2007-04-26 16:46:15 -04005641 struct btrfs_fs_info *info = root->fs_info;
Chris Masondb945352007-10-15 16:15:53 -04005642 u64 total = num_bytes;
Chris Mason9078a3e2007-04-26 16:46:15 -04005643 u64 old_val;
Chris Masondb945352007-10-15 16:15:53 -04005644 u64 byte_in_group;
Josef Bacik0af3d002010-06-21 14:48:16 -04005645 int factor;
Chris Mason3e1ad542007-05-07 20:03:49 -04005646
Yan Zheng5d4f98a2009-06-10 10:45:14 -04005647 /* block accounting for super block */
Miao Xieeb73c1b2013-05-15 07:48:22 +00005648 spin_lock(&info->delalloc_root_lock);
David Sterba6c417612011-04-13 15:41:04 +02005649 old_val = btrfs_super_bytes_used(info->super_copy);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04005650 if (alloc)
5651 old_val += num_bytes;
5652 else
5653 old_val -= num_bytes;
David Sterba6c417612011-04-13 15:41:04 +02005654 btrfs_set_super_bytes_used(info->super_copy, old_val);
Miao Xieeb73c1b2013-05-15 07:48:22 +00005655 spin_unlock(&info->delalloc_root_lock);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04005656
Chris Masond3977122009-01-05 21:25:51 -05005657 while (total) {
Chris Masondb945352007-10-15 16:15:53 -04005658 cache = btrfs_lookup_block_group(info, bytenr);
Josef Bacikf3465ca2008-11-12 14:19:50 -05005659 if (!cache)
Jeff Mahoney79787ea2012-03-12 16:03:00 +01005660 return -ENOENT;
Yan, Zhengb742bb82010-05-16 10:46:24 -04005661 if (cache->flags & (BTRFS_BLOCK_GROUP_DUP |
5662 BTRFS_BLOCK_GROUP_RAID1 |
5663 BTRFS_BLOCK_GROUP_RAID10))
5664 factor = 2;
5665 else
5666 factor = 1;
Josef Bacik9d66e232010-08-25 16:54:15 -04005667 /*
5668 * If this block group has free space cache written out, we
5669 * need to make sure to load it if we are removing space. This
5670 * is because we need the unpinning stage to actually add the
5671 * space back to the block group, otherwise we will leak space.
5672 */
5673 if (!alloc && cache->cached == BTRFS_CACHE_NO)
Liu Bof6373bf2012-12-27 09:01:18 +00005674 cache_block_group(cache, 1);
Josef Bacik0af3d002010-06-21 14:48:16 -04005675
Chris Masondb945352007-10-15 16:15:53 -04005676 byte_in_group = bytenr - cache->key.objectid;
5677 WARN_ON(byte_in_group > cache->key.offset);
Chris Mason9078a3e2007-04-26 16:46:15 -04005678
Josef Bacik25179202008-10-29 14:49:05 -04005679 spin_lock(&cache->space_info->lock);
Chris Masonc286ac42008-07-22 23:06:41 -04005680 spin_lock(&cache->lock);
Josef Bacik0af3d002010-06-21 14:48:16 -04005681
Josef Bacik73bc1872011-10-03 14:07:49 -04005682 if (btrfs_test_opt(root, SPACE_CACHE) &&
Josef Bacik0af3d002010-06-21 14:48:16 -04005683 cache->disk_cache_state < BTRFS_DC_CLEAR)
5684 cache->disk_cache_state = BTRFS_DC_CLEAR;
5685
Chris Mason9078a3e2007-04-26 16:46:15 -04005686 old_val = btrfs_block_group_used(&cache->item);
Chris Masondb945352007-10-15 16:15:53 -04005687 num_bytes = min(total, cache->key.offset - byte_in_group);
Chris Masoncd1bc462007-04-27 10:08:34 -04005688 if (alloc) {
Chris Masondb945352007-10-15 16:15:53 -04005689 old_val += num_bytes;
Yan Zheng11833d62009-09-11 16:11:19 -04005690 btrfs_set_block_group_used(&cache->item, old_val);
5691 cache->reserved -= num_bytes;
Yan Zheng11833d62009-09-11 16:11:19 -04005692 cache->space_info->bytes_reserved -= num_bytes;
Yan, Zhengb742bb82010-05-16 10:46:24 -04005693 cache->space_info->bytes_used += num_bytes;
5694 cache->space_info->disk_used += num_bytes * factor;
Chris Masonc286ac42008-07-22 23:06:41 -04005695 spin_unlock(&cache->lock);
Josef Bacik25179202008-10-29 14:49:05 -04005696 spin_unlock(&cache->space_info->lock);
Chris Masoncd1bc462007-04-27 10:08:34 -04005697 } else {
Chris Masondb945352007-10-15 16:15:53 -04005698 old_val -= num_bytes;
Filipe Mananaae0ab002014-11-26 15:28:52 +00005699 btrfs_set_block_group_used(&cache->item, old_val);
5700 cache->pinned += num_bytes;
5701 cache->space_info->bytes_pinned += num_bytes;
5702 cache->space_info->bytes_used -= num_bytes;
5703 cache->space_info->disk_used -= num_bytes * factor;
5704 spin_unlock(&cache->lock);
5705 spin_unlock(&cache->space_info->lock);
Josef Bacik47ab2a62014-09-18 11:20:02 -04005706
Filipe Mananaae0ab002014-11-26 15:28:52 +00005707 set_extent_dirty(info->pinned_extents,
5708 bytenr, bytenr + num_bytes - 1,
5709 GFP_NOFS | __GFP_NOFAIL);
Josef Bacik47ab2a62014-09-18 11:20:02 -04005710 /*
5711 * No longer have used bytes in this block group, queue
5712 * it for deletion.
5713 */
5714 if (old_val == 0) {
5715 spin_lock(&info->unused_bgs_lock);
5716 if (list_empty(&cache->bg_list)) {
5717 btrfs_get_block_group(cache);
5718 list_add_tail(&cache->bg_list,
5719 &info->unused_bgs);
5720 }
5721 spin_unlock(&info->unused_bgs_lock);
5722 }
Chris Masoncd1bc462007-04-27 10:08:34 -04005723 }
Chris Mason1bbc6212015-04-06 12:46:08 -07005724
5725 spin_lock(&trans->transaction->dirty_bgs_lock);
5726 if (list_empty(&cache->dirty_list)) {
5727 list_add_tail(&cache->dirty_list,
5728 &trans->transaction->dirty_bgs);
5729 trans->transaction->num_dirty_bgs++;
5730 btrfs_get_block_group(cache);
5731 }
5732 spin_unlock(&trans->transaction->dirty_bgs_lock);
5733
Chris Masonfa9c0d792009-04-03 09:47:43 -04005734 btrfs_put_block_group(cache);
Chris Masondb945352007-10-15 16:15:53 -04005735 total -= num_bytes;
5736 bytenr += num_bytes;
Chris Mason9078a3e2007-04-26 16:46:15 -04005737 }
5738 return 0;
5739}
Chris Mason6324fbf2008-03-24 15:01:59 -04005740
Chris Masona061fc82008-05-07 11:43:44 -04005741static u64 first_logical_byte(struct btrfs_root *root, u64 search_start)
5742{
Josef Bacik0f9dd462008-09-23 13:14:11 -04005743 struct btrfs_block_group_cache *cache;
Yan Zhengd2fb3432008-12-11 16:30:39 -05005744 u64 bytenr;
Josef Bacik0f9dd462008-09-23 13:14:11 -04005745
Liu Boa1897fd2012-12-27 09:01:23 +00005746 spin_lock(&root->fs_info->block_group_cache_lock);
5747 bytenr = root->fs_info->first_logical_byte;
5748 spin_unlock(&root->fs_info->block_group_cache_lock);
5749
5750 if (bytenr < (u64)-1)
5751 return bytenr;
5752
Josef Bacik0f9dd462008-09-23 13:14:11 -04005753 cache = btrfs_lookup_first_block_group(root->fs_info, search_start);
5754 if (!cache)
Chris Masona061fc82008-05-07 11:43:44 -04005755 return 0;
Josef Bacik0f9dd462008-09-23 13:14:11 -04005756
Yan Zhengd2fb3432008-12-11 16:30:39 -05005757 bytenr = cache->key.objectid;
Chris Masonfa9c0d792009-04-03 09:47:43 -04005758 btrfs_put_block_group(cache);
Yan Zhengd2fb3432008-12-11 16:30:39 -05005759
5760 return bytenr;
Chris Masona061fc82008-05-07 11:43:44 -04005761}
5762
Yan, Zhengf0486c62010-05-16 10:46:25 -04005763static int pin_down_extent(struct btrfs_root *root,
5764 struct btrfs_block_group_cache *cache,
5765 u64 bytenr, u64 num_bytes, int reserved)
Yan324ae4d2007-11-16 14:57:08 -05005766{
Yan Zheng11833d62009-09-11 16:11:19 -04005767 spin_lock(&cache->space_info->lock);
5768 spin_lock(&cache->lock);
5769 cache->pinned += num_bytes;
5770 cache->space_info->bytes_pinned += num_bytes;
5771 if (reserved) {
5772 cache->reserved -= num_bytes;
5773 cache->space_info->bytes_reserved -= num_bytes;
Yan324ae4d2007-11-16 14:57:08 -05005774 }
Yan Zheng11833d62009-09-11 16:11:19 -04005775 spin_unlock(&cache->lock);
5776 spin_unlock(&cache->space_info->lock);
5777
Yan, Zhengf0486c62010-05-16 10:46:25 -04005778 set_extent_dirty(root->fs_info->pinned_extents, bytenr,
5779 bytenr + num_bytes - 1, GFP_NOFS | __GFP_NOFAIL);
Dongsheng Yange2d1f922015-02-06 10:26:52 -05005780 if (reserved)
Josef Bacik0be5dc62013-10-07 15:18:52 -04005781 trace_btrfs_reserved_extent_free(root, bytenr, num_bytes);
Yan324ae4d2007-11-16 14:57:08 -05005782 return 0;
5783}
Chris Mason9078a3e2007-04-26 16:46:15 -04005784
Yan, Zhengf0486c62010-05-16 10:46:25 -04005785/*
5786 * this function must be called within transaction
5787 */
5788int btrfs_pin_extent(struct btrfs_root *root,
5789 u64 bytenr, u64 num_bytes, int reserved)
Zheng Yane8569812008-09-26 10:05:48 -04005790{
Yan, Zhengf0486c62010-05-16 10:46:25 -04005791 struct btrfs_block_group_cache *cache;
5792
5793 cache = btrfs_lookup_block_group(root->fs_info, bytenr);
Jeff Mahoney79787ea2012-03-12 16:03:00 +01005794 BUG_ON(!cache); /* Logic error */
Yan, Zhengf0486c62010-05-16 10:46:25 -04005795
5796 pin_down_extent(root, cache, bytenr, num_bytes, reserved);
5797
5798 btrfs_put_block_group(cache);
Yan Zheng11833d62009-09-11 16:11:19 -04005799 return 0;
5800}
Zheng Yane8569812008-09-26 10:05:48 -04005801
Yan, Zhengf0486c62010-05-16 10:46:25 -04005802/*
Chris Masone688b7252011-10-31 20:52:39 -04005803 * this function must be called within transaction
Yan, Zhengf0486c62010-05-16 10:46:25 -04005804 */
Liu Bodcfac412012-12-27 09:01:20 +00005805int btrfs_pin_extent_for_log_replay(struct btrfs_root *root,
Chris Masone688b7252011-10-31 20:52:39 -04005806 u64 bytenr, u64 num_bytes)
Yan, Zhengf0486c62010-05-16 10:46:25 -04005807{
Chris Masone688b7252011-10-31 20:52:39 -04005808 struct btrfs_block_group_cache *cache;
Josef Bacikb50c6e22013-04-25 15:55:30 -04005809 int ret;
Chris Masone688b7252011-10-31 20:52:39 -04005810
5811 cache = btrfs_lookup_block_group(root->fs_info, bytenr);
Josef Bacikb50c6e22013-04-25 15:55:30 -04005812 if (!cache)
5813 return -EINVAL;
Chris Masone688b7252011-10-31 20:52:39 -04005814
5815 /*
5816 * pull in the free space cache (if any) so that our pin
5817 * removes the free space from the cache. We have load_only set
5818 * to one because the slow code to read in the free extents does check
5819 * the pinned extents.
5820 */
Liu Bof6373bf2012-12-27 09:01:18 +00005821 cache_block_group(cache, 1);
Chris Masone688b7252011-10-31 20:52:39 -04005822
5823 pin_down_extent(root, cache, bytenr, num_bytes, 0);
5824
5825 /* remove us from the free space cache (if we're there at all) */
Josef Bacikb50c6e22013-04-25 15:55:30 -04005826 ret = btrfs_remove_free_space(cache, bytenr, num_bytes);
Chris Masone688b7252011-10-31 20:52:39 -04005827 btrfs_put_block_group(cache);
Josef Bacikb50c6e22013-04-25 15:55:30 -04005828 return ret;
Chris Masone688b7252011-10-31 20:52:39 -04005829}
5830
Josef Bacik8c2a1a32013-06-06 13:19:32 -04005831static int __exclude_logged_extent(struct btrfs_root *root, u64 start, u64 num_bytes)
5832{
5833 int ret;
5834 struct btrfs_block_group_cache *block_group;
5835 struct btrfs_caching_control *caching_ctl;
5836
5837 block_group = btrfs_lookup_block_group(root->fs_info, start);
5838 if (!block_group)
5839 return -EINVAL;
5840
5841 cache_block_group(block_group, 0);
5842 caching_ctl = get_caching_control(block_group);
5843
5844 if (!caching_ctl) {
5845 /* Logic error */
5846 BUG_ON(!block_group_cache_done(block_group));
5847 ret = btrfs_remove_free_space(block_group, start, num_bytes);
5848 } else {
5849 mutex_lock(&caching_ctl->mutex);
5850
5851 if (start >= caching_ctl->progress) {
5852 ret = add_excluded_extent(root, start, num_bytes);
5853 } else if (start + num_bytes <= caching_ctl->progress) {
5854 ret = btrfs_remove_free_space(block_group,
5855 start, num_bytes);
5856 } else {
5857 num_bytes = caching_ctl->progress - start;
5858 ret = btrfs_remove_free_space(block_group,
5859 start, num_bytes);
5860 if (ret)
5861 goto out_lock;
5862
5863 num_bytes = (start + num_bytes) -
5864 caching_ctl->progress;
5865 start = caching_ctl->progress;
5866 ret = add_excluded_extent(root, start, num_bytes);
5867 }
5868out_lock:
5869 mutex_unlock(&caching_ctl->mutex);
5870 put_caching_control(caching_ctl);
5871 }
5872 btrfs_put_block_group(block_group);
5873 return ret;
5874}
5875
5876int btrfs_exclude_logged_extents(struct btrfs_root *log,
5877 struct extent_buffer *eb)
5878{
5879 struct btrfs_file_extent_item *item;
5880 struct btrfs_key key;
5881 int found_type;
5882 int i;
5883
5884 if (!btrfs_fs_incompat(log->fs_info, MIXED_GROUPS))
5885 return 0;
5886
5887 for (i = 0; i < btrfs_header_nritems(eb); i++) {
5888 btrfs_item_key_to_cpu(eb, &key, i);
5889 if (key.type != BTRFS_EXTENT_DATA_KEY)
5890 continue;
5891 item = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item);
5892 found_type = btrfs_file_extent_type(eb, item);
5893 if (found_type == BTRFS_FILE_EXTENT_INLINE)
5894 continue;
5895 if (btrfs_file_extent_disk_bytenr(eb, item) == 0)
5896 continue;
5897 key.objectid = btrfs_file_extent_disk_bytenr(eb, item);
5898 key.offset = btrfs_file_extent_disk_num_bytes(eb, item);
5899 __exclude_logged_extent(log, key.objectid, key.offset);
5900 }
5901
5902 return 0;
5903}
5904
Josef Bacikfb25e912011-07-26 17:00:46 -04005905/**
5906 * btrfs_update_reserved_bytes - update the block_group and space info counters
5907 * @cache: The cache we are manipulating
5908 * @num_bytes: The number of bytes in question
5909 * @reserve: One of the reservation enums
Miao Xiee570fd22014-06-19 10:42:50 +08005910 * @delalloc: The blocks are allocated for the delalloc write
Josef Bacikfb25e912011-07-26 17:00:46 -04005911 *
5912 * This is called by the allocator when it reserves space, or by somebody who is
5913 * freeing space that was never actually used on disk. For example if you
5914 * reserve some space for a new leaf in transaction A and before transaction A
5915 * commits you free that leaf, you call this with reserve set to 0 in order to
5916 * clear the reservation.
5917 *
5918 * Metadata reservations should be called with RESERVE_ALLOC so we do the proper
5919 * ENOSPC accounting. For data we handle the reservation through clearing the
5920 * delalloc bits in the io_tree. We have to do this since we could end up
5921 * allocating less disk space for the amount of data we have reserved in the
5922 * case of compression.
5923 *
5924 * If this is a reservation and the block group has become read only we cannot
5925 * make the reservation and return -EAGAIN, otherwise this function always
5926 * succeeds.
Yan, Zhengf0486c62010-05-16 10:46:25 -04005927 */
Josef Bacikfb25e912011-07-26 17:00:46 -04005928static int btrfs_update_reserved_bytes(struct btrfs_block_group_cache *cache,
Miao Xiee570fd22014-06-19 10:42:50 +08005929 u64 num_bytes, int reserve, int delalloc)
Yan, Zhengf0486c62010-05-16 10:46:25 -04005930{
Josef Bacikfb25e912011-07-26 17:00:46 -04005931 struct btrfs_space_info *space_info = cache->space_info;
Yan, Zhengf0486c62010-05-16 10:46:25 -04005932 int ret = 0;
Jeff Mahoney79787ea2012-03-12 16:03:00 +01005933
Josef Bacikfb25e912011-07-26 17:00:46 -04005934 spin_lock(&space_info->lock);
5935 spin_lock(&cache->lock);
5936 if (reserve != RESERVE_FREE) {
Yan, Zhengf0486c62010-05-16 10:46:25 -04005937 if (cache->ro) {
5938 ret = -EAGAIN;
5939 } else {
Josef Bacikfb25e912011-07-26 17:00:46 -04005940 cache->reserved += num_bytes;
5941 space_info->bytes_reserved += num_bytes;
5942 if (reserve == RESERVE_ALLOC) {
Josef Bacik8c2a3ca2012-01-10 10:31:31 -05005943 trace_btrfs_space_reservation(cache->fs_info,
Liu Bo2bcc0322012-03-29 09:57:44 -04005944 "space_info", space_info->flags,
5945 num_bytes, 0);
Josef Bacikfb25e912011-07-26 17:00:46 -04005946 space_info->bytes_may_use -= num_bytes;
5947 }
Miao Xiee570fd22014-06-19 10:42:50 +08005948
5949 if (delalloc)
5950 cache->delalloc_bytes += num_bytes;
Yan, Zhengf0486c62010-05-16 10:46:25 -04005951 }
Josef Bacikfb25e912011-07-26 17:00:46 -04005952 } else {
5953 if (cache->ro)
5954 space_info->bytes_readonly += num_bytes;
5955 cache->reserved -= num_bytes;
5956 space_info->bytes_reserved -= num_bytes;
Miao Xiee570fd22014-06-19 10:42:50 +08005957
5958 if (delalloc)
5959 cache->delalloc_bytes -= num_bytes;
Yan, Zhengf0486c62010-05-16 10:46:25 -04005960 }
Josef Bacikfb25e912011-07-26 17:00:46 -04005961 spin_unlock(&cache->lock);
5962 spin_unlock(&space_info->lock);
Yan, Zhengf0486c62010-05-16 10:46:25 -04005963 return ret;
5964}
5965
Jeff Mahoney143bede2012-03-01 14:56:26 +01005966void btrfs_prepare_extent_commit(struct btrfs_trans_handle *trans,
Yan Zheng11833d62009-09-11 16:11:19 -04005967 struct btrfs_root *root)
5968{
5969 struct btrfs_fs_info *fs_info = root->fs_info;
5970 struct btrfs_caching_control *next;
5971 struct btrfs_caching_control *caching_ctl;
5972 struct btrfs_block_group_cache *cache;
5973
Josef Bacik9e351cc2014-03-13 15:42:13 -04005974 down_write(&fs_info->commit_root_sem);
Yan Zheng11833d62009-09-11 16:11:19 -04005975
5976 list_for_each_entry_safe(caching_ctl, next,
5977 &fs_info->caching_block_groups, list) {
5978 cache = caching_ctl->block_group;
5979 if (block_group_cache_done(cache)) {
5980 cache->last_byte_to_unpin = (u64)-1;
5981 list_del_init(&caching_ctl->list);
5982 put_caching_control(caching_ctl);
5983 } else {
5984 cache->last_byte_to_unpin = caching_ctl->progress;
5985 }
5986 }
5987
5988 if (fs_info->pinned_extents == &fs_info->freed_extents[0])
5989 fs_info->pinned_extents = &fs_info->freed_extents[1];
5990 else
5991 fs_info->pinned_extents = &fs_info->freed_extents[0];
5992
Josef Bacik9e351cc2014-03-13 15:42:13 -04005993 up_write(&fs_info->commit_root_sem);
Yan, Zheng8929ecfa2010-05-16 10:49:58 -04005994
5995 update_global_block_rsv(fs_info);
Yan Zheng11833d62009-09-11 16:11:19 -04005996}
5997
Filipe Manana678886b2014-12-07 21:31:47 +00005998static int unpin_extent_range(struct btrfs_root *root, u64 start, u64 end,
5999 const bool return_free_space)
Yan Zheng11833d62009-09-11 16:11:19 -04006000{
6001 struct btrfs_fs_info *fs_info = root->fs_info;
6002 struct btrfs_block_group_cache *cache = NULL;
Josef Bacik7b398f82012-10-22 15:52:28 -04006003 struct btrfs_space_info *space_info;
6004 struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv;
Yan Zheng11833d62009-09-11 16:11:19 -04006005 u64 len;
Josef Bacik7b398f82012-10-22 15:52:28 -04006006 bool readonly;
Yan Zheng11833d62009-09-11 16:11:19 -04006007
6008 while (start <= end) {
Josef Bacik7b398f82012-10-22 15:52:28 -04006009 readonly = false;
Yan Zheng11833d62009-09-11 16:11:19 -04006010 if (!cache ||
6011 start >= cache->key.objectid + cache->key.offset) {
6012 if (cache)
6013 btrfs_put_block_group(cache);
6014 cache = btrfs_lookup_block_group(fs_info, start);
Jeff Mahoney79787ea2012-03-12 16:03:00 +01006015 BUG_ON(!cache); /* Logic error */
Yan Zheng11833d62009-09-11 16:11:19 -04006016 }
6017
6018 len = cache->key.objectid + cache->key.offset - start;
6019 len = min(len, end + 1 - start);
6020
6021 if (start < cache->last_byte_to_unpin) {
6022 len = min(len, cache->last_byte_to_unpin - start);
Filipe Manana678886b2014-12-07 21:31:47 +00006023 if (return_free_space)
6024 btrfs_add_free_space(cache, start, len);
Yan Zheng11833d62009-09-11 16:11:19 -04006025 }
Josef Bacik25179202008-10-29 14:49:05 -04006026
Yan, Zhengf0486c62010-05-16 10:46:25 -04006027 start += len;
Josef Bacik7b398f82012-10-22 15:52:28 -04006028 space_info = cache->space_info;
Yan, Zhengf0486c62010-05-16 10:46:25 -04006029
Josef Bacik7b398f82012-10-22 15:52:28 -04006030 spin_lock(&space_info->lock);
Josef Bacik25179202008-10-29 14:49:05 -04006031 spin_lock(&cache->lock);
Yan Zheng11833d62009-09-11 16:11:19 -04006032 cache->pinned -= len;
Josef Bacik7b398f82012-10-22 15:52:28 -04006033 space_info->bytes_pinned -= len;
Liu Bod288db52014-07-02 16:58:01 +08006034 percpu_counter_add(&space_info->total_bytes_pinned, -len);
Josef Bacik7b398f82012-10-22 15:52:28 -04006035 if (cache->ro) {
6036 space_info->bytes_readonly += len;
6037 readonly = true;
6038 }
Josef Bacik25179202008-10-29 14:49:05 -04006039 spin_unlock(&cache->lock);
Josef Bacik7b398f82012-10-22 15:52:28 -04006040 if (!readonly && global_rsv->space_info == space_info) {
6041 spin_lock(&global_rsv->lock);
6042 if (!global_rsv->full) {
6043 len = min(len, global_rsv->size -
6044 global_rsv->reserved);
6045 global_rsv->reserved += len;
6046 space_info->bytes_may_use += len;
6047 if (global_rsv->reserved >= global_rsv->size)
6048 global_rsv->full = 1;
6049 }
6050 spin_unlock(&global_rsv->lock);
6051 }
6052 spin_unlock(&space_info->lock);
Yan Zheng11833d62009-09-11 16:11:19 -04006053 }
6054
6055 if (cache)
Chris Masonfa9c0d792009-04-03 09:47:43 -04006056 btrfs_put_block_group(cache);
Chris Masonccd467d2007-06-28 15:57:36 -04006057 return 0;
6058}
6059
6060int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans,
Yan Zheng11833d62009-09-11 16:11:19 -04006061 struct btrfs_root *root)
Chris Masona28ec192007-03-06 20:08:01 -05006062{
Yan Zheng11833d62009-09-11 16:11:19 -04006063 struct btrfs_fs_info *fs_info = root->fs_info;
6064 struct extent_io_tree *unpin;
Chris Mason1a5bc1672007-10-15 16:15:26 -04006065 u64 start;
6066 u64 end;
Chris Masona28ec192007-03-06 20:08:01 -05006067 int ret;
Chris Masona28ec192007-03-06 20:08:01 -05006068
Jeff Mahoney79787ea2012-03-12 16:03:00 +01006069 if (trans->aborted)
6070 return 0;
6071
Yan Zheng11833d62009-09-11 16:11:19 -04006072 if (fs_info->pinned_extents == &fs_info->freed_extents[0])
6073 unpin = &fs_info->freed_extents[1];
6074 else
6075 unpin = &fs_info->freed_extents[0];
6076
Chris Masond3977122009-01-05 21:25:51 -05006077 while (1) {
Filipe Mananad4b450c2015-01-29 19:18:25 +00006078 mutex_lock(&fs_info->unused_bg_unpin_mutex);
Chris Mason1a5bc1672007-10-15 16:15:26 -04006079 ret = find_first_extent_bit(unpin, 0, &start, &end,
Josef Bacike6138872012-09-27 17:07:30 -04006080 EXTENT_DIRTY, NULL);
Filipe Mananad4b450c2015-01-29 19:18:25 +00006081 if (ret) {
6082 mutex_unlock(&fs_info->unused_bg_unpin_mutex);
Chris Masona28ec192007-03-06 20:08:01 -05006083 break;
Filipe Mananad4b450c2015-01-29 19:18:25 +00006084 }
Liu Hui1f3c79a2009-01-05 15:57:51 -05006085
Li Dongyang5378e602011-03-24 10:24:27 +00006086 if (btrfs_test_opt(root, DISCARD))
6087 ret = btrfs_discard_extent(root, start,
6088 end + 1 - start, NULL);
Liu Hui1f3c79a2009-01-05 15:57:51 -05006089
Chris Mason1a5bc1672007-10-15 16:15:26 -04006090 clear_extent_dirty(unpin, start, end, GFP_NOFS);
Filipe Manana678886b2014-12-07 21:31:47 +00006091 unpin_extent_range(root, start, end, true);
Filipe Mananad4b450c2015-01-29 19:18:25 +00006092 mutex_unlock(&fs_info->unused_bg_unpin_mutex);
Chris Masonb9473432009-03-13 11:00:37 -04006093 cond_resched();
Chris Masona28ec192007-03-06 20:08:01 -05006094 }
Josef Bacik817d52f2009-07-13 21:29:25 -04006095
Chris Masone20d96d2007-03-22 12:13:20 -04006096 return 0;
6097}
6098
Josef Bacikb150a4f2013-06-19 15:00:04 -04006099static void add_pinned_bytes(struct btrfs_fs_info *fs_info, u64 num_bytes,
6100 u64 owner, u64 root_objectid)
6101{
6102 struct btrfs_space_info *space_info;
6103 u64 flags;
6104
6105 if (owner < BTRFS_FIRST_FREE_OBJECTID) {
6106 if (root_objectid == BTRFS_CHUNK_TREE_OBJECTID)
6107 flags = BTRFS_BLOCK_GROUP_SYSTEM;
6108 else
6109 flags = BTRFS_BLOCK_GROUP_METADATA;
6110 } else {
6111 flags = BTRFS_BLOCK_GROUP_DATA;
6112 }
6113
6114 space_info = __find_space_info(fs_info, flags);
6115 BUG_ON(!space_info); /* Logic bug */
6116 percpu_counter_add(&space_info->total_bytes_pinned, num_bytes);
6117}
6118
6119
Yan Zheng5d4f98a2009-06-10 10:45:14 -04006120static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
6121 struct btrfs_root *root,
Qu Wenruoc682f9b2015-03-17 16:59:47 +08006122 struct btrfs_delayed_ref_node *node, u64 parent,
Yan Zheng5d4f98a2009-06-10 10:45:14 -04006123 u64 root_objectid, u64 owner_objectid,
6124 u64 owner_offset, int refs_to_drop,
Qu Wenruoc682f9b2015-03-17 16:59:47 +08006125 struct btrfs_delayed_extent_op *extent_op)
Chris Masona28ec192007-03-06 20:08:01 -05006126{
Chris Masone2fa7222007-03-12 16:22:34 -04006127 struct btrfs_key key;
Yan Zheng5d4f98a2009-06-10 10:45:14 -04006128 struct btrfs_path *path;
Chris Mason1261ec42007-03-20 20:35:03 -04006129 struct btrfs_fs_info *info = root->fs_info;
6130 struct btrfs_root *extent_root = info->extent_root;
Chris Mason5f39d392007-10-15 16:14:19 -04006131 struct extent_buffer *leaf;
Yan Zheng5d4f98a2009-06-10 10:45:14 -04006132 struct btrfs_extent_item *ei;
6133 struct btrfs_extent_inline_ref *iref;
Chris Masona28ec192007-03-06 20:08:01 -05006134 int ret;
Yan Zheng5d4f98a2009-06-10 10:45:14 -04006135 int is_data;
Chris Mason952fcca2008-02-18 16:33:44 -05006136 int extent_slot = 0;
6137 int found_extent = 0;
6138 int num_to_del = 1;
Qu Wenruoc682f9b2015-03-17 16:59:47 +08006139 int no_quota = node->no_quota;
Yan Zheng5d4f98a2009-06-10 10:45:14 -04006140 u32 item_size;
6141 u64 refs;
Qu Wenruoc682f9b2015-03-17 16:59:47 +08006142 u64 bytenr = node->bytenr;
6143 u64 num_bytes = node->num_bytes;
Josef Bacikfcebe452014-05-13 17:30:47 -07006144 int last_ref = 0;
Josef Bacik3173a182013-03-07 14:22:04 -05006145 bool skinny_metadata = btrfs_fs_incompat(root->fs_info,
6146 SKINNY_METADATA);
Chris Mason037e6392007-03-07 11:50:24 -05006147
Josef Bacikfcebe452014-05-13 17:30:47 -07006148 if (!info->quota_enabled || !is_fstree(root_objectid))
6149 no_quota = 1;
6150
Chris Mason5caf2a02007-04-02 11:20:42 -04006151 path = btrfs_alloc_path();
Chris Mason54aa1f42007-06-22 14:16:25 -04006152 if (!path)
6153 return -ENOMEM;
6154
Chris Mason3c12ac72008-04-21 12:01:38 -04006155 path->reada = 1;
Chris Masonb9473432009-03-13 11:00:37 -04006156 path->leave_spinning = 1;
Yan Zheng5d4f98a2009-06-10 10:45:14 -04006157
6158 is_data = owner_objectid >= BTRFS_FIRST_FREE_OBJECTID;
6159 BUG_ON(!is_data && refs_to_drop != 1);
6160
Josef Bacik3173a182013-03-07 14:22:04 -05006161 if (is_data)
6162 skinny_metadata = 0;
6163
Yan Zheng5d4f98a2009-06-10 10:45:14 -04006164 ret = lookup_extent_backref(trans, extent_root, path, &iref,
6165 bytenr, num_bytes, parent,
6166 root_objectid, owner_objectid,
6167 owner_offset);
Chris Mason7bb86312007-12-11 09:25:06 -05006168 if (ret == 0) {
Chris Mason952fcca2008-02-18 16:33:44 -05006169 extent_slot = path->slots[0];
Yan Zheng5d4f98a2009-06-10 10:45:14 -04006170 while (extent_slot >= 0) {
6171 btrfs_item_key_to_cpu(path->nodes[0], &key,
Chris Mason952fcca2008-02-18 16:33:44 -05006172 extent_slot);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04006173 if (key.objectid != bytenr)
Chris Mason952fcca2008-02-18 16:33:44 -05006174 break;
Yan Zheng5d4f98a2009-06-10 10:45:14 -04006175 if (key.type == BTRFS_EXTENT_ITEM_KEY &&
6176 key.offset == num_bytes) {
Chris Mason952fcca2008-02-18 16:33:44 -05006177 found_extent = 1;
6178 break;
6179 }
Josef Bacik3173a182013-03-07 14:22:04 -05006180 if (key.type == BTRFS_METADATA_ITEM_KEY &&
6181 key.offset == owner_objectid) {
6182 found_extent = 1;
6183 break;
6184 }
Chris Mason952fcca2008-02-18 16:33:44 -05006185 if (path->slots[0] - extent_slot > 5)
6186 break;
Yan Zheng5d4f98a2009-06-10 10:45:14 -04006187 extent_slot--;
Chris Mason952fcca2008-02-18 16:33:44 -05006188 }
Yan Zheng5d4f98a2009-06-10 10:45:14 -04006189#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
6190 item_size = btrfs_item_size_nr(path->nodes[0], extent_slot);
6191 if (found_extent && item_size < sizeof(*ei))
6192 found_extent = 0;
6193#endif
Zheng Yan31840ae2008-09-23 13:14:14 -04006194 if (!found_extent) {
Yan Zheng5d4f98a2009-06-10 10:45:14 -04006195 BUG_ON(iref);
Chris Mason56bec292009-03-13 10:10:06 -04006196 ret = remove_extent_backref(trans, extent_root, path,
Yan Zheng5d4f98a2009-06-10 10:45:14 -04006197 NULL, refs_to_drop,
Josef Bacikfcebe452014-05-13 17:30:47 -07006198 is_data, &last_ref);
David Sterba005d6422012-09-18 07:52:32 -06006199 if (ret) {
6200 btrfs_abort_transaction(trans, extent_root, ret);
6201 goto out;
6202 }
David Sterbab3b4aa72011-04-21 01:20:15 +02006203 btrfs_release_path(path);
Chris Masonb9473432009-03-13 11:00:37 -04006204 path->leave_spinning = 1;
Yan Zheng5d4f98a2009-06-10 10:45:14 -04006205
6206 key.objectid = bytenr;
6207 key.type = BTRFS_EXTENT_ITEM_KEY;
6208 key.offset = num_bytes;
6209
Josef Bacik3173a182013-03-07 14:22:04 -05006210 if (!is_data && skinny_metadata) {
6211 key.type = BTRFS_METADATA_ITEM_KEY;
6212 key.offset = owner_objectid;
6213 }
6214
Zheng Yan31840ae2008-09-23 13:14:14 -04006215 ret = btrfs_search_slot(trans, extent_root,
6216 &key, path, -1, 1);
Josef Bacik3173a182013-03-07 14:22:04 -05006217 if (ret > 0 && skinny_metadata && path->slots[0]) {
6218 /*
6219 * Couldn't find our skinny metadata item,
6220 * see if we have ye olde extent item.
6221 */
6222 path->slots[0]--;
6223 btrfs_item_key_to_cpu(path->nodes[0], &key,
6224 path->slots[0]);
6225 if (key.objectid == bytenr &&
6226 key.type == BTRFS_EXTENT_ITEM_KEY &&
6227 key.offset == num_bytes)
6228 ret = 0;
6229 }
6230
6231 if (ret > 0 && skinny_metadata) {
6232 skinny_metadata = false;
Filipe Manana9ce49a02014-04-24 15:15:28 +01006233 key.objectid = bytenr;
Josef Bacik3173a182013-03-07 14:22:04 -05006234 key.type = BTRFS_EXTENT_ITEM_KEY;
6235 key.offset = num_bytes;
6236 btrfs_release_path(path);
6237 ret = btrfs_search_slot(trans, extent_root,
6238 &key, path, -1, 1);
6239 }
6240
Josef Bacikf3465ca2008-11-12 14:19:50 -05006241 if (ret) {
Simon Kirbyc2cf52e2013-03-19 22:41:23 +00006242 btrfs_err(info, "umm, got %d back from search, was looking for %llu",
Geert Uytterhoevenc1c9ff72013-08-20 13:20:07 +02006243 ret, bytenr);
Josef Bacikb783e622011-07-13 15:03:50 +00006244 if (ret > 0)
6245 btrfs_print_leaf(extent_root,
6246 path->nodes[0]);
Josef Bacikf3465ca2008-11-12 14:19:50 -05006247 }
David Sterba005d6422012-09-18 07:52:32 -06006248 if (ret < 0) {
6249 btrfs_abort_transaction(trans, extent_root, ret);
6250 goto out;
6251 }
Zheng Yan31840ae2008-09-23 13:14:14 -04006252 extent_slot = path->slots[0];
6253 }
Dulshani Gunawardhanafae7f212013-10-31 10:30:08 +05306254 } else if (WARN_ON(ret == -ENOENT)) {
Chris Mason7bb86312007-12-11 09:25:06 -05006255 btrfs_print_leaf(extent_root, path->nodes[0]);
Simon Kirbyc2cf52e2013-03-19 22:41:23 +00006256 btrfs_err(info,
6257 "unable to find ref byte nr %llu parent %llu root %llu owner %llu offset %llu",
Geert Uytterhoevenc1c9ff72013-08-20 13:20:07 +02006258 bytenr, parent, root_objectid, owner_objectid,
6259 owner_offset);
Josef Bacikc4a050b2014-03-14 16:36:53 -04006260 btrfs_abort_transaction(trans, extent_root, ret);
6261 goto out;
Jeff Mahoney79787ea2012-03-12 16:03:00 +01006262 } else {
David Sterba005d6422012-09-18 07:52:32 -06006263 btrfs_abort_transaction(trans, extent_root, ret);
6264 goto out;
Chris Mason7bb86312007-12-11 09:25:06 -05006265 }
Chris Mason5f39d392007-10-15 16:14:19 -04006266
6267 leaf = path->nodes[0];
Yan Zheng5d4f98a2009-06-10 10:45:14 -04006268 item_size = btrfs_item_size_nr(leaf, extent_slot);
6269#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
6270 if (item_size < sizeof(*ei)) {
6271 BUG_ON(found_extent || extent_slot != path->slots[0]);
6272 ret = convert_extent_item_v0(trans, extent_root, path,
6273 owner_objectid, 0);
David Sterba005d6422012-09-18 07:52:32 -06006274 if (ret < 0) {
6275 btrfs_abort_transaction(trans, extent_root, ret);
6276 goto out;
6277 }
Yan Zheng5d4f98a2009-06-10 10:45:14 -04006278
David Sterbab3b4aa72011-04-21 01:20:15 +02006279 btrfs_release_path(path);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04006280 path->leave_spinning = 1;
6281
6282 key.objectid = bytenr;
6283 key.type = BTRFS_EXTENT_ITEM_KEY;
6284 key.offset = num_bytes;
6285
6286 ret = btrfs_search_slot(trans, extent_root, &key, path,
6287 -1, 1);
6288 if (ret) {
Simon Kirbyc2cf52e2013-03-19 22:41:23 +00006289 btrfs_err(info, "umm, got %d back from search, was looking for %llu",
Geert Uytterhoevenc1c9ff72013-08-20 13:20:07 +02006290 ret, bytenr);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04006291 btrfs_print_leaf(extent_root, path->nodes[0]);
6292 }
David Sterba005d6422012-09-18 07:52:32 -06006293 if (ret < 0) {
6294 btrfs_abort_transaction(trans, extent_root, ret);
6295 goto out;
6296 }
6297
Yan Zheng5d4f98a2009-06-10 10:45:14 -04006298 extent_slot = path->slots[0];
6299 leaf = path->nodes[0];
6300 item_size = btrfs_item_size_nr(leaf, extent_slot);
6301 }
6302#endif
6303 BUG_ON(item_size < sizeof(*ei));
Chris Mason952fcca2008-02-18 16:33:44 -05006304 ei = btrfs_item_ptr(leaf, extent_slot,
Chris Mason123abc82007-03-14 14:14:43 -04006305 struct btrfs_extent_item);
Josef Bacik3173a182013-03-07 14:22:04 -05006306 if (owner_objectid < BTRFS_FIRST_FREE_OBJECTID &&
6307 key.type == BTRFS_EXTENT_ITEM_KEY) {
Yan Zheng5d4f98a2009-06-10 10:45:14 -04006308 struct btrfs_tree_block_info *bi;
6309 BUG_ON(item_size < sizeof(*ei) + sizeof(*bi));
6310 bi = (struct btrfs_tree_block_info *)(ei + 1);
6311 WARN_ON(owner_objectid != btrfs_tree_block_level(leaf, bi));
Chris Mason952fcca2008-02-18 16:33:44 -05006312 }
6313
Yan Zheng5d4f98a2009-06-10 10:45:14 -04006314 refs = btrfs_extent_refs(leaf, ei);
Josef Bacik32b02532013-04-24 16:38:50 -04006315 if (refs < refs_to_drop) {
6316 btrfs_err(info, "trying to drop %d refs but we only have %Lu "
David Sterba351fd352014-05-15 16:48:20 +02006317 "for bytenr %Lu", refs_to_drop, refs, bytenr);
Josef Bacik32b02532013-04-24 16:38:50 -04006318 ret = -EINVAL;
6319 btrfs_abort_transaction(trans, extent_root, ret);
6320 goto out;
6321 }
Yan Zheng5d4f98a2009-06-10 10:45:14 -04006322 refs -= refs_to_drop;
6323
6324 if (refs > 0) {
6325 if (extent_op)
6326 __run_delayed_extent_op(extent_op, leaf, ei);
6327 /*
6328 * In the case of inline back ref, reference count will
6329 * be updated by remove_extent_backref
6330 */
6331 if (iref) {
6332 BUG_ON(!found_extent);
6333 } else {
6334 btrfs_set_extent_refs(leaf, ei, refs);
6335 btrfs_mark_buffer_dirty(leaf);
6336 }
6337 if (found_extent) {
6338 ret = remove_extent_backref(trans, extent_root, path,
6339 iref, refs_to_drop,
Josef Bacikfcebe452014-05-13 17:30:47 -07006340 is_data, &last_ref);
David Sterba005d6422012-09-18 07:52:32 -06006341 if (ret) {
6342 btrfs_abort_transaction(trans, extent_root, ret);
6343 goto out;
6344 }
Yan Zheng5d4f98a2009-06-10 10:45:14 -04006345 }
Josef Bacikb150a4f2013-06-19 15:00:04 -04006346 add_pinned_bytes(root->fs_info, -num_bytes, owner_objectid,
6347 root_objectid);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04006348 } else {
Yan Zheng5d4f98a2009-06-10 10:45:14 -04006349 if (found_extent) {
6350 BUG_ON(is_data && refs_to_drop !=
Zhaolei9ed0dea2015-08-06 22:16:24 +08006351 extent_data_ref_count(path, iref));
Yan Zheng5d4f98a2009-06-10 10:45:14 -04006352 if (iref) {
6353 BUG_ON(path->slots[0] != extent_slot);
6354 } else {
6355 BUG_ON(path->slots[0] != extent_slot + 1);
6356 path->slots[0] = extent_slot;
6357 num_to_del = 2;
6358 }
Chris Mason78fae272007-03-25 11:35:08 -04006359 }
Chris Masonb9473432009-03-13 11:00:37 -04006360
Josef Bacikfcebe452014-05-13 17:30:47 -07006361 last_ref = 1;
Chris Mason952fcca2008-02-18 16:33:44 -05006362 ret = btrfs_del_items(trans, extent_root, path, path->slots[0],
6363 num_to_del);
David Sterba005d6422012-09-18 07:52:32 -06006364 if (ret) {
6365 btrfs_abort_transaction(trans, extent_root, ret);
6366 goto out;
6367 }
David Sterbab3b4aa72011-04-21 01:20:15 +02006368 btrfs_release_path(path);
David Woodhouse21af8042008-08-12 14:13:26 +01006369
Yan Zheng5d4f98a2009-06-10 10:45:14 -04006370 if (is_data) {
Chris Mason459931e2008-12-10 09:10:46 -05006371 ret = btrfs_del_csums(trans, root, bytenr, num_bytes);
David Sterba005d6422012-09-18 07:52:32 -06006372 if (ret) {
6373 btrfs_abort_transaction(trans, extent_root, ret);
6374 goto out;
6375 }
Chris Mason459931e2008-12-10 09:10:46 -05006376 }
6377
Josef Bacikce93ec52014-11-17 15:45:48 -05006378 ret = update_block_group(trans, root, bytenr, num_bytes, 0);
David Sterba005d6422012-09-18 07:52:32 -06006379 if (ret) {
6380 btrfs_abort_transaction(trans, extent_root, ret);
6381 goto out;
6382 }
Chris Masona28ec192007-03-06 20:08:01 -05006383 }
Josef Bacikfcebe452014-05-13 17:30:47 -07006384 btrfs_release_path(path);
6385
Jeff Mahoney79787ea2012-03-12 16:03:00 +01006386out:
Chris Mason5caf2a02007-04-02 11:20:42 -04006387 btrfs_free_path(path);
Chris Masona28ec192007-03-06 20:08:01 -05006388 return ret;
6389}
6390
6391/*
Yan, Zhengf0486c62010-05-16 10:46:25 -04006392 * when we free an block, it is possible (and likely) that we free the last
Chris Mason1887be62009-03-13 10:11:24 -04006393 * delayed ref for that extent as well. This searches the delayed ref tree for
6394 * a given extent, and if there are no other delayed refs to be processed, it
6395 * removes it from the tree.
6396 */
6397static noinline int check_ref_cleanup(struct btrfs_trans_handle *trans,
6398 struct btrfs_root *root, u64 bytenr)
6399{
6400 struct btrfs_delayed_ref_head *head;
6401 struct btrfs_delayed_ref_root *delayed_refs;
Yan, Zhengf0486c62010-05-16 10:46:25 -04006402 int ret = 0;
Chris Mason1887be62009-03-13 10:11:24 -04006403
6404 delayed_refs = &trans->transaction->delayed_refs;
6405 spin_lock(&delayed_refs->lock);
6406 head = btrfs_find_delayed_ref_head(trans, bytenr);
6407 if (!head)
Chris Masoncf93da72014-01-29 07:02:40 -08006408 goto out_delayed_unlock;
Chris Mason1887be62009-03-13 10:11:24 -04006409
Josef Bacikd7df2c72014-01-23 09:21:38 -05006410 spin_lock(&head->lock);
Qu Wenruoc6fc2452015-03-30 17:03:00 +08006411 if (!list_empty(&head->ref_list))
Chris Mason1887be62009-03-13 10:11:24 -04006412 goto out;
6413
Yan Zheng5d4f98a2009-06-10 10:45:14 -04006414 if (head->extent_op) {
6415 if (!head->must_insert_reserved)
6416 goto out;
Miao Xie78a61842012-11-21 02:21:28 +00006417 btrfs_free_delayed_extent_op(head->extent_op);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04006418 head->extent_op = NULL;
6419 }
6420
Chris Mason1887be62009-03-13 10:11:24 -04006421 /*
6422 * waiting for the lock here would deadlock. If someone else has it
6423 * locked they are already in the process of dropping it anyway
6424 */
6425 if (!mutex_trylock(&head->mutex))
6426 goto out;
6427
6428 /*
6429 * at this point we have a head with no other entries. Go
6430 * ahead and process it.
6431 */
6432 head->node.in_tree = 0;
Liu Boc46effa2013-10-14 12:59:45 +08006433 rb_erase(&head->href_node, &delayed_refs->href_root);
Chris Masonc3e69d52009-03-13 10:17:05 -04006434
Josef Bacikd7df2c72014-01-23 09:21:38 -05006435 atomic_dec(&delayed_refs->num_entries);
Chris Mason1887be62009-03-13 10:11:24 -04006436
6437 /*
6438 * we don't take a ref on the node because we're removing it from the
6439 * tree, so we just steal the ref the tree was holding.
6440 */
Chris Masonc3e69d52009-03-13 10:17:05 -04006441 delayed_refs->num_heads--;
Josef Bacikd7df2c72014-01-23 09:21:38 -05006442 if (head->processing == 0)
Chris Masonc3e69d52009-03-13 10:17:05 -04006443 delayed_refs->num_heads_ready--;
Josef Bacikd7df2c72014-01-23 09:21:38 -05006444 head->processing = 0;
6445 spin_unlock(&head->lock);
Chris Mason1887be62009-03-13 10:11:24 -04006446 spin_unlock(&delayed_refs->lock);
6447
Yan, Zhengf0486c62010-05-16 10:46:25 -04006448 BUG_ON(head->extent_op);
6449 if (head->must_insert_reserved)
6450 ret = 1;
6451
6452 mutex_unlock(&head->mutex);
Chris Mason1887be62009-03-13 10:11:24 -04006453 btrfs_put_delayed_ref(&head->node);
Yan, Zhengf0486c62010-05-16 10:46:25 -04006454 return ret;
Chris Mason1887be62009-03-13 10:11:24 -04006455out:
Josef Bacikd7df2c72014-01-23 09:21:38 -05006456 spin_unlock(&head->lock);
Chris Masoncf93da72014-01-29 07:02:40 -08006457
6458out_delayed_unlock:
Chris Mason1887be62009-03-13 10:11:24 -04006459 spin_unlock(&delayed_refs->lock);
6460 return 0;
6461}
6462
Yan, Zhengf0486c62010-05-16 10:46:25 -04006463void btrfs_free_tree_block(struct btrfs_trans_handle *trans,
6464 struct btrfs_root *root,
6465 struct extent_buffer *buf,
Jan Schmidt5581a512012-05-16 17:04:52 +02006466 u64 parent, int last_ref)
Yan, Zhengf0486c62010-05-16 10:46:25 -04006467{
Josef Bacikb150a4f2013-06-19 15:00:04 -04006468 int pin = 1;
Yan, Zhengf0486c62010-05-16 10:46:25 -04006469 int ret;
6470
6471 if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) {
Arne Jansen66d7e7f2011-09-12 15:26:38 +02006472 ret = btrfs_add_delayed_tree_ref(root->fs_info, trans,
6473 buf->start, buf->len,
6474 parent, root->root_key.objectid,
6475 btrfs_header_level(buf),
Jan Schmidt5581a512012-05-16 17:04:52 +02006476 BTRFS_DROP_DELAYED_REF, NULL, 0);
Jeff Mahoney79787ea2012-03-12 16:03:00 +01006477 BUG_ON(ret); /* -ENOMEM */
Yan, Zhengf0486c62010-05-16 10:46:25 -04006478 }
6479
6480 if (!last_ref)
6481 return;
6482
Yan, Zhengf0486c62010-05-16 10:46:25 -04006483 if (btrfs_header_generation(buf) == trans->transid) {
Filipe Manana62198722015-01-06 20:18:45 +00006484 struct btrfs_block_group_cache *cache;
6485
Yan, Zhengf0486c62010-05-16 10:46:25 -04006486 if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) {
6487 ret = check_ref_cleanup(trans, root, buf->start);
6488 if (!ret)
Josef Bacik37be25b2011-08-05 10:25:38 -04006489 goto out;
Yan, Zhengf0486c62010-05-16 10:46:25 -04006490 }
6491
Filipe Manana62198722015-01-06 20:18:45 +00006492 cache = btrfs_lookup_block_group(root->fs_info, buf->start);
6493
Yan, Zhengf0486c62010-05-16 10:46:25 -04006494 if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN)) {
6495 pin_down_extent(root, cache, buf->start, buf->len, 1);
Filipe Manana62198722015-01-06 20:18:45 +00006496 btrfs_put_block_group(cache);
Josef Bacik37be25b2011-08-05 10:25:38 -04006497 goto out;
Yan, Zhengf0486c62010-05-16 10:46:25 -04006498 }
6499
6500 WARN_ON(test_bit(EXTENT_BUFFER_DIRTY, &buf->bflags));
6501
6502 btrfs_add_free_space(cache, buf->start, buf->len);
Miao Xiee570fd22014-06-19 10:42:50 +08006503 btrfs_update_reserved_bytes(cache, buf->len, RESERVE_FREE, 0);
Filipe Manana62198722015-01-06 20:18:45 +00006504 btrfs_put_block_group(cache);
Josef Bacik0be5dc62013-10-07 15:18:52 -04006505 trace_btrfs_reserved_extent_free(root, buf->start, buf->len);
Josef Bacikb150a4f2013-06-19 15:00:04 -04006506 pin = 0;
Yan, Zhengf0486c62010-05-16 10:46:25 -04006507 }
6508out:
Josef Bacikb150a4f2013-06-19 15:00:04 -04006509 if (pin)
6510 add_pinned_bytes(root->fs_info, buf->len,
6511 btrfs_header_level(buf),
6512 root->root_key.objectid);
6513
Josef Bacika826d6d2011-03-16 13:42:43 -04006514 /*
6515 * Deleting the buffer, clear the corrupt flag since it doesn't matter
6516 * anymore.
6517 */
6518 clear_bit(EXTENT_BUFFER_CORRUPT, &buf->bflags);
Yan, Zhengf0486c62010-05-16 10:46:25 -04006519}
6520
Jeff Mahoney79787ea2012-03-12 16:03:00 +01006521/* Can return -ENOMEM */
Arne Jansen66d7e7f2011-09-12 15:26:38 +02006522int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root,
6523 u64 bytenr, u64 num_bytes, u64 parent, u64 root_objectid,
Josef Bacikfcebe452014-05-13 17:30:47 -07006524 u64 owner, u64 offset, int no_quota)
Chris Mason925baed2008-06-25 16:01:30 -04006525{
6526 int ret;
Arne Jansen66d7e7f2011-09-12 15:26:38 +02006527 struct btrfs_fs_info *fs_info = root->fs_info;
Chris Mason925baed2008-06-25 16:01:30 -04006528
David Sterbafccb84c2014-09-29 23:53:21 +02006529 if (btrfs_test_is_dummy_root(root))
Josef Bacikfaa2dbf2014-05-07 17:06:09 -04006530 return 0;
David Sterbafccb84c2014-09-29 23:53:21 +02006531
Josef Bacikb150a4f2013-06-19 15:00:04 -04006532 add_pinned_bytes(root->fs_info, num_bytes, owner, root_objectid);
6533
Chris Mason56bec292009-03-13 10:10:06 -04006534 /*
6535 * tree log blocks never actually go into the extent allocation
6536 * tree, just update pinning info and exit early.
Chris Mason56bec292009-03-13 10:10:06 -04006537 */
Yan Zheng5d4f98a2009-06-10 10:45:14 -04006538 if (root_objectid == BTRFS_TREE_LOG_OBJECTID) {
6539 WARN_ON(owner >= BTRFS_FIRST_FREE_OBJECTID);
Chris Masonb9473432009-03-13 11:00:37 -04006540 /* unlocks the pinned mutex */
Yan Zheng11833d62009-09-11 16:11:19 -04006541 btrfs_pin_extent(root, bytenr, num_bytes, 1);
Chris Mason56bec292009-03-13 10:10:06 -04006542 ret = 0;
Yan Zheng5d4f98a2009-06-10 10:45:14 -04006543 } else if (owner < BTRFS_FIRST_FREE_OBJECTID) {
Arne Jansen66d7e7f2011-09-12 15:26:38 +02006544 ret = btrfs_add_delayed_tree_ref(fs_info, trans, bytenr,
6545 num_bytes,
Yan Zheng5d4f98a2009-06-10 10:45:14 -04006546 parent, root_objectid, (int)owner,
Josef Bacikfcebe452014-05-13 17:30:47 -07006547 BTRFS_DROP_DELAYED_REF, NULL, no_quota);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04006548 } else {
Arne Jansen66d7e7f2011-09-12 15:26:38 +02006549 ret = btrfs_add_delayed_data_ref(fs_info, trans, bytenr,
6550 num_bytes,
6551 parent, root_objectid, owner,
6552 offset, BTRFS_DROP_DELAYED_REF,
Josef Bacikfcebe452014-05-13 17:30:47 -07006553 NULL, no_quota);
Chris Mason56bec292009-03-13 10:10:06 -04006554 }
Chris Mason925baed2008-06-25 16:01:30 -04006555 return ret;
6556}
6557
Chris Masonfec577f2007-02-26 10:40:21 -05006558/*
Josef Bacik817d52f2009-07-13 21:29:25 -04006559 * when we wait for progress in the block group caching, its because
6560 * our allocation attempt failed at least once. So, we must sleep
6561 * and let some progress happen before we try again.
6562 *
6563 * This function will sleep at least once waiting for new free space to
6564 * show up, and then it will check the block group free space numbers
6565 * for our min num_bytes. Another option is to have it go ahead
6566 * and look in the rbtree for a free extent of a given size, but this
6567 * is a good start.
Josef Bacik36cce922013-08-05 11:15:21 -04006568 *
6569 * Callers of this must check if cache->cached == BTRFS_CACHE_ERROR before using
6570 * any of the information in this block group.
Josef Bacik817d52f2009-07-13 21:29:25 -04006571 */
Josef Bacik36cce922013-08-05 11:15:21 -04006572static noinline void
Josef Bacik817d52f2009-07-13 21:29:25 -04006573wait_block_group_cache_progress(struct btrfs_block_group_cache *cache,
6574 u64 num_bytes)
6575{
Yan Zheng11833d62009-09-11 16:11:19 -04006576 struct btrfs_caching_control *caching_ctl;
Josef Bacik817d52f2009-07-13 21:29:25 -04006577
Yan Zheng11833d62009-09-11 16:11:19 -04006578 caching_ctl = get_caching_control(cache);
6579 if (!caching_ctl)
Josef Bacik36cce922013-08-05 11:15:21 -04006580 return;
Josef Bacik817d52f2009-07-13 21:29:25 -04006581
Yan Zheng11833d62009-09-11 16:11:19 -04006582 wait_event(caching_ctl->wait, block_group_cache_done(cache) ||
Li Zefan34d52cb2011-03-29 13:46:06 +08006583 (cache->free_space_ctl->free_space >= num_bytes));
Yan Zheng11833d62009-09-11 16:11:19 -04006584
6585 put_caching_control(caching_ctl);
Yan Zheng11833d62009-09-11 16:11:19 -04006586}
6587
6588static noinline int
6589wait_block_group_cache_done(struct btrfs_block_group_cache *cache)
6590{
6591 struct btrfs_caching_control *caching_ctl;
Josef Bacik36cce922013-08-05 11:15:21 -04006592 int ret = 0;
Yan Zheng11833d62009-09-11 16:11:19 -04006593
6594 caching_ctl = get_caching_control(cache);
6595 if (!caching_ctl)
Josef Bacik36cce922013-08-05 11:15:21 -04006596 return (cache->cached == BTRFS_CACHE_ERROR) ? -EIO : 0;
Yan Zheng11833d62009-09-11 16:11:19 -04006597
6598 wait_event(caching_ctl->wait, block_group_cache_done(cache));
Josef Bacik36cce922013-08-05 11:15:21 -04006599 if (cache->cached == BTRFS_CACHE_ERROR)
6600 ret = -EIO;
Yan Zheng11833d62009-09-11 16:11:19 -04006601 put_caching_control(caching_ctl);
Josef Bacik36cce922013-08-05 11:15:21 -04006602 return ret;
Josef Bacik817d52f2009-07-13 21:29:25 -04006603}
6604
Liu Bo31e50222012-11-21 14:18:10 +00006605int __get_raid_index(u64 flags)
Yan, Zhengb742bb82010-05-16 10:46:24 -04006606{
Ilya Dryomov7738a532012-03-27 17:09:17 +03006607 if (flags & BTRFS_BLOCK_GROUP_RAID10)
Miao Xiee6ec7162013-01-17 05:38:51 +00006608 return BTRFS_RAID_RAID10;
Ilya Dryomov7738a532012-03-27 17:09:17 +03006609 else if (flags & BTRFS_BLOCK_GROUP_RAID1)
Miao Xiee6ec7162013-01-17 05:38:51 +00006610 return BTRFS_RAID_RAID1;
Ilya Dryomov7738a532012-03-27 17:09:17 +03006611 else if (flags & BTRFS_BLOCK_GROUP_DUP)
Miao Xiee6ec7162013-01-17 05:38:51 +00006612 return BTRFS_RAID_DUP;
Ilya Dryomov7738a532012-03-27 17:09:17 +03006613 else if (flags & BTRFS_BLOCK_GROUP_RAID0)
Miao Xiee6ec7162013-01-17 05:38:51 +00006614 return BTRFS_RAID_RAID0;
David Woodhouse53b381b2013-01-29 18:40:14 -05006615 else if (flags & BTRFS_BLOCK_GROUP_RAID5)
Chris Masone942f882013-02-20 14:06:05 -05006616 return BTRFS_RAID_RAID5;
David Woodhouse53b381b2013-01-29 18:40:14 -05006617 else if (flags & BTRFS_BLOCK_GROUP_RAID6)
Chris Masone942f882013-02-20 14:06:05 -05006618 return BTRFS_RAID_RAID6;
Ilya Dryomov7738a532012-03-27 17:09:17 +03006619
Chris Masone942f882013-02-20 14:06:05 -05006620 return BTRFS_RAID_SINGLE; /* BTRFS_BLOCK_GROUP_SINGLE */
Yan, Zhengb742bb82010-05-16 10:46:24 -04006621}
6622
Jeff Mahoney6ab0a202013-11-01 13:07:04 -04006623int get_block_group_index(struct btrfs_block_group_cache *cache)
Ilya Dryomov7738a532012-03-27 17:09:17 +03006624{
Liu Bo31e50222012-11-21 14:18:10 +00006625 return __get_raid_index(cache->flags);
Ilya Dryomov7738a532012-03-27 17:09:17 +03006626}
6627
Jeff Mahoney6ab0a202013-11-01 13:07:04 -04006628static const char *btrfs_raid_type_names[BTRFS_NR_RAID_TYPES] = {
6629 [BTRFS_RAID_RAID10] = "raid10",
6630 [BTRFS_RAID_RAID1] = "raid1",
6631 [BTRFS_RAID_DUP] = "dup",
6632 [BTRFS_RAID_RAID0] = "raid0",
6633 [BTRFS_RAID_SINGLE] = "single",
6634 [BTRFS_RAID_RAID5] = "raid5",
6635 [BTRFS_RAID_RAID6] = "raid6",
6636};
6637
Jeff Mahoney1b8e5df2013-11-20 16:50:23 -05006638static const char *get_raid_name(enum btrfs_raid_types type)
Jeff Mahoney6ab0a202013-11-01 13:07:04 -04006639{
6640 if (type >= BTRFS_NR_RAID_TYPES)
6641 return NULL;
6642
6643 return btrfs_raid_type_names[type];
6644}
6645
Josef Bacik817d52f2009-07-13 21:29:25 -04006646enum btrfs_loop_type {
Josef Bacik285ff5a2012-01-13 15:27:45 -05006647 LOOP_CACHING_NOWAIT = 0,
6648 LOOP_CACHING_WAIT = 1,
6649 LOOP_ALLOC_CHUNK = 2,
6650 LOOP_NO_EMPTY_SIZE = 3,
Josef Bacik817d52f2009-07-13 21:29:25 -04006651};
6652
Miao Xiee570fd22014-06-19 10:42:50 +08006653static inline void
6654btrfs_lock_block_group(struct btrfs_block_group_cache *cache,
6655 int delalloc)
6656{
6657 if (delalloc)
6658 down_read(&cache->data_rwsem);
6659}
6660
6661static inline void
6662btrfs_grab_block_group(struct btrfs_block_group_cache *cache,
6663 int delalloc)
6664{
6665 btrfs_get_block_group(cache);
6666 if (delalloc)
6667 down_read(&cache->data_rwsem);
6668}
6669
6670static struct btrfs_block_group_cache *
6671btrfs_lock_cluster(struct btrfs_block_group_cache *block_group,
6672 struct btrfs_free_cluster *cluster,
6673 int delalloc)
6674{
6675 struct btrfs_block_group_cache *used_bg;
6676 bool locked = false;
6677again:
6678 spin_lock(&cluster->refill_lock);
6679 if (locked) {
6680 if (used_bg == cluster->block_group)
6681 return used_bg;
6682
6683 up_read(&used_bg->data_rwsem);
6684 btrfs_put_block_group(used_bg);
6685 }
6686
6687 used_bg = cluster->block_group;
6688 if (!used_bg)
6689 return NULL;
6690
6691 if (used_bg == block_group)
6692 return used_bg;
6693
6694 btrfs_get_block_group(used_bg);
6695
6696 if (!delalloc)
6697 return used_bg;
6698
6699 if (down_read_trylock(&used_bg->data_rwsem))
6700 return used_bg;
6701
6702 spin_unlock(&cluster->refill_lock);
6703 down_read(&used_bg->data_rwsem);
6704 locked = true;
6705 goto again;
6706}
6707
6708static inline void
6709btrfs_release_block_group(struct btrfs_block_group_cache *cache,
6710 int delalloc)
6711{
6712 if (delalloc)
6713 up_read(&cache->data_rwsem);
6714 btrfs_put_block_group(cache);
6715}
6716
Josef Bacik817d52f2009-07-13 21:29:25 -04006717/*
Chris Masonfec577f2007-02-26 10:40:21 -05006718 * walks the btree of allocated extents and find a hole of a given size.
6719 * The key ins is changed to record the hole:
Miao Xiea4820392013-09-09 13:19:42 +08006720 * ins->objectid == start position
Chris Mason62e27492007-03-15 12:56:47 -04006721 * ins->flags = BTRFS_EXTENT_ITEM_KEY
Miao Xiea4820392013-09-09 13:19:42 +08006722 * ins->offset == the size of the hole.
Chris Masonfec577f2007-02-26 10:40:21 -05006723 * Any available blocks before search_start are skipped.
Miao Xiea4820392013-09-09 13:19:42 +08006724 *
6725 * If there is no suitable free space, we will record the max size of
6726 * the free space extent currently.
Chris Masonfec577f2007-02-26 10:40:21 -05006727 */
Josef Bacik00361582013-08-14 14:02:47 -04006728static noinline int find_free_extent(struct btrfs_root *orig_root,
Chris Mason98ed5172008-01-03 10:01:48 -05006729 u64 num_bytes, u64 empty_size,
Chris Mason98ed5172008-01-03 10:01:48 -05006730 u64 hint_byte, struct btrfs_key *ins,
Miao Xiee570fd22014-06-19 10:42:50 +08006731 u64 flags, int delalloc)
Chris Masonfec577f2007-02-26 10:40:21 -05006732{
Josef Bacik80eb2342008-10-29 14:49:05 -04006733 int ret = 0;
Chris Masond3977122009-01-05 21:25:51 -05006734 struct btrfs_root *root = orig_root->fs_info->extent_root;
Chris Masonfa9c0d792009-04-03 09:47:43 -04006735 struct btrfs_free_cluster *last_ptr = NULL;
Josef Bacik80eb2342008-10-29 14:49:05 -04006736 struct btrfs_block_group_cache *block_group = NULL;
Josef Bacik81c9ad22012-01-18 10:56:06 -05006737 u64 search_start = 0;
Miao Xiea4820392013-09-09 13:19:42 +08006738 u64 max_extent_size = 0;
Chris Mason239b14b2008-03-24 15:02:07 -04006739 int empty_cluster = 2 * 1024 * 1024;
Josef Bacik80eb2342008-10-29 14:49:05 -04006740 struct btrfs_space_info *space_info;
Chris Masonfa9c0d792009-04-03 09:47:43 -04006741 int loop = 0;
David Sterbab6919a52013-04-29 13:39:40 +00006742 int index = __get_raid_index(flags);
6743 int alloc_type = (flags & BTRFS_BLOCK_GROUP_DATA) ?
Josef Bacikfb25e912011-07-26 17:00:46 -04006744 RESERVE_ALLOC_NO_ACCOUNT : RESERVE_ALLOC;
Josef Bacik0a243252009-09-11 16:11:20 -04006745 bool failed_cluster_refill = false;
Josef Bacik1cdda9b2009-10-06 10:04:28 -04006746 bool failed_alloc = false;
Josef Bacik67377732010-09-16 16:19:09 -04006747 bool use_cluster = true;
Miao Xie60d2adb2011-09-09 17:34:35 +08006748 bool have_caching_bg = false;
Chris Masonfec577f2007-02-26 10:40:21 -05006749
Chris Masondb945352007-10-15 16:15:53 -04006750 WARN_ON(num_bytes < root->sectorsize);
David Sterba962a2982014-06-04 18:41:45 +02006751 ins->type = BTRFS_EXTENT_ITEM_KEY;
Josef Bacik80eb2342008-10-29 14:49:05 -04006752 ins->objectid = 0;
6753 ins->offset = 0;
Chris Masonb1a4d962007-04-04 15:27:52 -04006754
David Sterbab6919a52013-04-29 13:39:40 +00006755 trace_find_free_extent(orig_root, num_bytes, empty_size, flags);
Josef Bacik3f7de032011-11-10 08:29:20 -05006756
David Sterbab6919a52013-04-29 13:39:40 +00006757 space_info = __find_space_info(root->fs_info, flags);
Josef Bacik1b1d1f62010-03-19 20:49:55 +00006758 if (!space_info) {
David Sterbab6919a52013-04-29 13:39:40 +00006759 btrfs_err(root->fs_info, "No space info for %llu", flags);
Josef Bacik1b1d1f62010-03-19 20:49:55 +00006760 return -ENOSPC;
6761 }
Josef Bacik2552d172009-04-03 10:14:19 -04006762
Josef Bacik67377732010-09-16 16:19:09 -04006763 /*
6764 * If the space info is for both data and metadata it means we have a
6765 * small filesystem and we can't use the clustering stuff.
6766 */
6767 if (btrfs_mixed_space_info(space_info))
6768 use_cluster = false;
6769
David Sterbab6919a52013-04-29 13:39:40 +00006770 if (flags & BTRFS_BLOCK_GROUP_METADATA && use_cluster) {
Chris Masonfa9c0d792009-04-03 09:47:43 -04006771 last_ptr = &root->fs_info->meta_alloc_cluster;
Chris Mason536ac8a2009-02-12 09:41:38 -05006772 if (!btrfs_test_opt(root, SSD))
6773 empty_cluster = 64 * 1024;
Chris Mason239b14b2008-03-24 15:02:07 -04006774 }
6775
David Sterbab6919a52013-04-29 13:39:40 +00006776 if ((flags & BTRFS_BLOCK_GROUP_DATA) && use_cluster &&
Josef Bacik67377732010-09-16 16:19:09 -04006777 btrfs_test_opt(root, SSD)) {
Chris Masonfa9c0d792009-04-03 09:47:43 -04006778 last_ptr = &root->fs_info->data_alloc_cluster;
6779 }
Josef Bacik0f9dd462008-09-23 13:14:11 -04006780
Chris Mason239b14b2008-03-24 15:02:07 -04006781 if (last_ptr) {
Chris Masonfa9c0d792009-04-03 09:47:43 -04006782 spin_lock(&last_ptr->lock);
6783 if (last_ptr->block_group)
6784 hint_byte = last_ptr->window_start;
6785 spin_unlock(&last_ptr->lock);
Chris Mason239b14b2008-03-24 15:02:07 -04006786 }
Chris Masonfa9c0d792009-04-03 09:47:43 -04006787
Chris Masona061fc82008-05-07 11:43:44 -04006788 search_start = max(search_start, first_logical_byte(root, 0));
Chris Mason239b14b2008-03-24 15:02:07 -04006789 search_start = max(search_start, hint_byte);
Chris Mason0b86a832008-03-24 15:01:56 -04006790
Josef Bacik817d52f2009-07-13 21:29:25 -04006791 if (!last_ptr)
Chris Masonfa9c0d792009-04-03 09:47:43 -04006792 empty_cluster = 0;
Chris Masonfa9c0d792009-04-03 09:47:43 -04006793
Josef Bacik2552d172009-04-03 10:14:19 -04006794 if (search_start == hint_byte) {
Josef Bacik2552d172009-04-03 10:14:19 -04006795 block_group = btrfs_lookup_block_group(root->fs_info,
6796 search_start);
Josef Bacik817d52f2009-07-13 21:29:25 -04006797 /*
6798 * we don't want to use the block group if it doesn't match our
6799 * allocation bits, or if its not cached.
Josef Bacikccf0e722009-11-10 21:23:48 -05006800 *
6801 * However if we are re-searching with an ideal block group
6802 * picked out then we don't care that the block group is cached.
Josef Bacik817d52f2009-07-13 21:29:25 -04006803 */
David Sterbab6919a52013-04-29 13:39:40 +00006804 if (block_group && block_group_bits(block_group, flags) &&
Josef Bacik285ff5a2012-01-13 15:27:45 -05006805 block_group->cached != BTRFS_CACHE_NO) {
Josef Bacik2552d172009-04-03 10:14:19 -04006806 down_read(&space_info->groups_sem);
Chris Mason44fb5512009-06-04 15:34:51 -04006807 if (list_empty(&block_group->list) ||
6808 block_group->ro) {
6809 /*
6810 * someone is removing this block group,
6811 * we can't jump into the have_block_group
6812 * target because our list pointers are not
6813 * valid
6814 */
6815 btrfs_put_block_group(block_group);
6816 up_read(&space_info->groups_sem);
Josef Bacikccf0e722009-11-10 21:23:48 -05006817 } else {
Yan, Zhengb742bb82010-05-16 10:46:24 -04006818 index = get_block_group_index(block_group);
Miao Xiee570fd22014-06-19 10:42:50 +08006819 btrfs_lock_block_group(block_group, delalloc);
Chris Mason44fb5512009-06-04 15:34:51 -04006820 goto have_block_group;
Josef Bacikccf0e722009-11-10 21:23:48 -05006821 }
Josef Bacik2552d172009-04-03 10:14:19 -04006822 } else if (block_group) {
Chris Masonfa9c0d792009-04-03 09:47:43 -04006823 btrfs_put_block_group(block_group);
Josef Bacik2552d172009-04-03 10:14:19 -04006824 }
Chris Mason42e70e72008-11-07 18:17:11 -05006825 }
Josef Bacik2552d172009-04-03 10:14:19 -04006826search:
Miao Xie60d2adb2011-09-09 17:34:35 +08006827 have_caching_bg = false;
Josef Bacik80eb2342008-10-29 14:49:05 -04006828 down_read(&space_info->groups_sem);
Yan, Zhengb742bb82010-05-16 10:46:24 -04006829 list_for_each_entry(block_group, &space_info->block_groups[index],
6830 list) {
Josef Bacik6226cb0a2009-04-03 10:14:18 -04006831 u64 offset;
Josef Bacik817d52f2009-07-13 21:29:25 -04006832 int cached;
Chris Mason8a1413a2008-11-10 16:13:54 -05006833
Miao Xiee570fd22014-06-19 10:42:50 +08006834 btrfs_grab_block_group(block_group, delalloc);
Josef Bacik2552d172009-04-03 10:14:19 -04006835 search_start = block_group->key.objectid;
Chris Mason42e70e72008-11-07 18:17:11 -05006836
Chris Mason83a50de2010-12-13 15:06:46 -05006837 /*
6838 * this can happen if we end up cycling through all the
6839 * raid types, but we want to make sure we only allocate
6840 * for the proper type.
6841 */
David Sterbab6919a52013-04-29 13:39:40 +00006842 if (!block_group_bits(block_group, flags)) {
Chris Mason83a50de2010-12-13 15:06:46 -05006843 u64 extra = BTRFS_BLOCK_GROUP_DUP |
6844 BTRFS_BLOCK_GROUP_RAID1 |
David Woodhouse53b381b2013-01-29 18:40:14 -05006845 BTRFS_BLOCK_GROUP_RAID5 |
6846 BTRFS_BLOCK_GROUP_RAID6 |
Chris Mason83a50de2010-12-13 15:06:46 -05006847 BTRFS_BLOCK_GROUP_RAID10;
6848
6849 /*
6850 * if they asked for extra copies and this block group
6851 * doesn't provide them, bail. This does allow us to
6852 * fill raid0 from raid1.
6853 */
David Sterbab6919a52013-04-29 13:39:40 +00006854 if ((flags & extra) && !(block_group->flags & extra))
Chris Mason83a50de2010-12-13 15:06:46 -05006855 goto loop;
6856 }
6857
Josef Bacik2552d172009-04-03 10:14:19 -04006858have_block_group:
Josef Bacik291c7d22011-11-14 13:52:14 -05006859 cached = block_group_cache_done(block_group);
6860 if (unlikely(!cached)) {
Liu Bof6373bf2012-12-27 09:01:18 +00006861 ret = cache_block_group(block_group, 0);
Chris Mason1d4284b2012-03-28 20:31:37 -04006862 BUG_ON(ret < 0);
6863 ret = 0;
Josef Bacikea6a4782008-11-20 12:16:16 -05006864 }
6865
Josef Bacik36cce922013-08-05 11:15:21 -04006866 if (unlikely(block_group->cached == BTRFS_CACHE_ERROR))
6867 goto loop;
Josef Bacikea6a4782008-11-20 12:16:16 -05006868 if (unlikely(block_group->ro))
Josef Bacik2552d172009-04-03 10:14:19 -04006869 goto loop;
Josef Bacik0f9dd462008-09-23 13:14:11 -04006870
Josef Bacik0a243252009-09-11 16:11:20 -04006871 /*
Alexandre Oliva062c05c2011-12-07 19:50:42 -05006872 * Ok we want to try and use the cluster allocator, so
6873 * lets look there
Josef Bacik0a243252009-09-11 16:11:20 -04006874 */
Alexandre Oliva062c05c2011-12-07 19:50:42 -05006875 if (last_ptr) {
Miao Xie215a63d2014-01-15 20:00:56 +08006876 struct btrfs_block_group_cache *used_block_group;
Chris Mason8de972b2013-01-04 15:39:43 -05006877 unsigned long aligned_cluster;
Chris Masonfa9c0d792009-04-03 09:47:43 -04006878 /*
6879 * the refill lock keeps out other
6880 * people trying to start a new cluster
6881 */
Miao Xiee570fd22014-06-19 10:42:50 +08006882 used_block_group = btrfs_lock_cluster(block_group,
6883 last_ptr,
6884 delalloc);
6885 if (!used_block_group)
Chris Mason44fb5512009-06-04 15:34:51 -04006886 goto refill_cluster;
Chris Mason44fb5512009-06-04 15:34:51 -04006887
Miao Xiee570fd22014-06-19 10:42:50 +08006888 if (used_block_group != block_group &&
6889 (used_block_group->ro ||
6890 !block_group_bits(used_block_group, flags)))
6891 goto release_cluster;
Alexandre Oliva274bd4f2011-12-07 20:08:40 -05006892
6893 offset = btrfs_alloc_from_cluster(used_block_group,
Miao Xiea4820392013-09-09 13:19:42 +08006894 last_ptr,
6895 num_bytes,
6896 used_block_group->key.objectid,
6897 &max_extent_size);
Chris Masonfa9c0d792009-04-03 09:47:43 -04006898 if (offset) {
6899 /* we have a block, we're done */
6900 spin_unlock(&last_ptr->refill_lock);
Josef Bacik3f7de032011-11-10 08:29:20 -05006901 trace_btrfs_reserve_extent_cluster(root,
Miao Xie89d43462014-01-15 20:00:57 +08006902 used_block_group,
6903 search_start, num_bytes);
Miao Xie215a63d2014-01-15 20:00:56 +08006904 if (used_block_group != block_group) {
Miao Xiee570fd22014-06-19 10:42:50 +08006905 btrfs_release_block_group(block_group,
6906 delalloc);
Miao Xie215a63d2014-01-15 20:00:56 +08006907 block_group = used_block_group;
6908 }
Chris Masonfa9c0d792009-04-03 09:47:43 -04006909 goto checks;
6910 }
6911
Alexandre Oliva274bd4f2011-12-07 20:08:40 -05006912 WARN_ON(last_ptr->block_group != used_block_group);
Miao Xiee570fd22014-06-19 10:42:50 +08006913release_cluster:
Alexandre Oliva062c05c2011-12-07 19:50:42 -05006914 /* If we are on LOOP_NO_EMPTY_SIZE, we can't
6915 * set up a new clusters, so lets just skip it
6916 * and let the allocator find whatever block
6917 * it can find. If we reach this point, we
6918 * will have tried the cluster allocator
6919 * plenty of times and not have found
6920 * anything, so we are likely way too
6921 * fragmented for the clustering stuff to find
Alexandre Olivaa5f6f712011-12-12 04:48:19 -02006922 * anything.
6923 *
6924 * However, if the cluster is taken from the
6925 * current block group, release the cluster
6926 * first, so that we stand a better chance of
6927 * succeeding in the unclustered
6928 * allocation. */
6929 if (loop >= LOOP_NO_EMPTY_SIZE &&
Miao Xiee570fd22014-06-19 10:42:50 +08006930 used_block_group != block_group) {
Alexandre Oliva062c05c2011-12-07 19:50:42 -05006931 spin_unlock(&last_ptr->refill_lock);
Miao Xiee570fd22014-06-19 10:42:50 +08006932 btrfs_release_block_group(used_block_group,
6933 delalloc);
Alexandre Oliva062c05c2011-12-07 19:50:42 -05006934 goto unclustered_alloc;
6935 }
6936
Chris Masonfa9c0d792009-04-03 09:47:43 -04006937 /*
6938 * this cluster didn't work out, free it and
6939 * start over
6940 */
6941 btrfs_return_cluster_to_free_space(NULL, last_ptr);
6942
Miao Xiee570fd22014-06-19 10:42:50 +08006943 if (used_block_group != block_group)
6944 btrfs_release_block_group(used_block_group,
6945 delalloc);
6946refill_cluster:
Alexandre Olivaa5f6f712011-12-12 04:48:19 -02006947 if (loop >= LOOP_NO_EMPTY_SIZE) {
6948 spin_unlock(&last_ptr->refill_lock);
6949 goto unclustered_alloc;
6950 }
6951
Chris Mason8de972b2013-01-04 15:39:43 -05006952 aligned_cluster = max_t(unsigned long,
6953 empty_cluster + empty_size,
6954 block_group->full_stripe_len);
6955
Chris Masonfa9c0d792009-04-03 09:47:43 -04006956 /* allocate a cluster in this block group */
Josef Bacik00361582013-08-14 14:02:47 -04006957 ret = btrfs_find_space_cluster(root, block_group,
6958 last_ptr, search_start,
6959 num_bytes,
6960 aligned_cluster);
Chris Masonfa9c0d792009-04-03 09:47:43 -04006961 if (ret == 0) {
6962 /*
6963 * now pull our allocation out of this
6964 * cluster
6965 */
6966 offset = btrfs_alloc_from_cluster(block_group,
Miao Xiea4820392013-09-09 13:19:42 +08006967 last_ptr,
6968 num_bytes,
6969 search_start,
6970 &max_extent_size);
Chris Masonfa9c0d792009-04-03 09:47:43 -04006971 if (offset) {
6972 /* we found one, proceed */
6973 spin_unlock(&last_ptr->refill_lock);
Josef Bacik3f7de032011-11-10 08:29:20 -05006974 trace_btrfs_reserve_extent_cluster(root,
6975 block_group, search_start,
6976 num_bytes);
Chris Masonfa9c0d792009-04-03 09:47:43 -04006977 goto checks;
6978 }
Josef Bacik0a243252009-09-11 16:11:20 -04006979 } else if (!cached && loop > LOOP_CACHING_NOWAIT
6980 && !failed_cluster_refill) {
Josef Bacik817d52f2009-07-13 21:29:25 -04006981 spin_unlock(&last_ptr->refill_lock);
6982
Josef Bacik0a243252009-09-11 16:11:20 -04006983 failed_cluster_refill = true;
Josef Bacik817d52f2009-07-13 21:29:25 -04006984 wait_block_group_cache_progress(block_group,
6985 num_bytes + empty_cluster + empty_size);
6986 goto have_block_group;
Chris Masonfa9c0d792009-04-03 09:47:43 -04006987 }
Josef Bacik817d52f2009-07-13 21:29:25 -04006988
Chris Masonfa9c0d792009-04-03 09:47:43 -04006989 /*
6990 * at this point we either didn't find a cluster
6991 * or we weren't able to allocate a block from our
6992 * cluster. Free the cluster we've been trying
6993 * to use, and go to the next block group
6994 */
Josef Bacik0a243252009-09-11 16:11:20 -04006995 btrfs_return_cluster_to_free_space(NULL, last_ptr);
Chris Masonfa9c0d792009-04-03 09:47:43 -04006996 spin_unlock(&last_ptr->refill_lock);
Josef Bacik0a243252009-09-11 16:11:20 -04006997 goto loop;
Chris Masonfa9c0d792009-04-03 09:47:43 -04006998 }
6999
Alexandre Oliva062c05c2011-12-07 19:50:42 -05007000unclustered_alloc:
Alexandre Olivaa5f6f712011-12-12 04:48:19 -02007001 spin_lock(&block_group->free_space_ctl->tree_lock);
7002 if (cached &&
7003 block_group->free_space_ctl->free_space <
7004 num_bytes + empty_cluster + empty_size) {
Miao Xiea4820392013-09-09 13:19:42 +08007005 if (block_group->free_space_ctl->free_space >
7006 max_extent_size)
7007 max_extent_size =
7008 block_group->free_space_ctl->free_space;
Alexandre Olivaa5f6f712011-12-12 04:48:19 -02007009 spin_unlock(&block_group->free_space_ctl->tree_lock);
7010 goto loop;
7011 }
7012 spin_unlock(&block_group->free_space_ctl->tree_lock);
7013
Josef Bacik6226cb0a2009-04-03 10:14:18 -04007014 offset = btrfs_find_space_for_alloc(block_group, search_start,
Miao Xiea4820392013-09-09 13:19:42 +08007015 num_bytes, empty_size,
7016 &max_extent_size);
Josef Bacik1cdda9b2009-10-06 10:04:28 -04007017 /*
7018 * If we didn't find a chunk, and we haven't failed on this
7019 * block group before, and this block group is in the middle of
7020 * caching and we are ok with waiting, then go ahead and wait
7021 * for progress to be made, and set failed_alloc to true.
7022 *
7023 * If failed_alloc is true then we've already waited on this
7024 * block group once and should move on to the next block group.
7025 */
7026 if (!offset && !failed_alloc && !cached &&
7027 loop > LOOP_CACHING_NOWAIT) {
Josef Bacik817d52f2009-07-13 21:29:25 -04007028 wait_block_group_cache_progress(block_group,
Josef Bacik1cdda9b2009-10-06 10:04:28 -04007029 num_bytes + empty_size);
7030 failed_alloc = true;
Josef Bacik817d52f2009-07-13 21:29:25 -04007031 goto have_block_group;
Josef Bacik1cdda9b2009-10-06 10:04:28 -04007032 } else if (!offset) {
Miao Xie60d2adb2011-09-09 17:34:35 +08007033 if (!cached)
7034 have_caching_bg = true;
Josef Bacik1cdda9b2009-10-06 10:04:28 -04007035 goto loop;
Josef Bacik817d52f2009-07-13 21:29:25 -04007036 }
Chris Masonfa9c0d792009-04-03 09:47:43 -04007037checks:
David Sterba4e54b172014-06-05 01:39:19 +02007038 search_start = ALIGN(offset, root->stripesize);
Chris Masone37c9e62007-05-09 20:13:14 -04007039
Josef Bacik2552d172009-04-03 10:14:19 -04007040 /* move on to the next group */
7041 if (search_start + num_bytes >
Miao Xie215a63d2014-01-15 20:00:56 +08007042 block_group->key.objectid + block_group->key.offset) {
7043 btrfs_add_free_space(block_group, offset, num_bytes);
Josef Bacik2552d172009-04-03 10:14:19 -04007044 goto loop;
Josef Bacik6226cb0a2009-04-03 10:14:18 -04007045 }
Josef Bacik80eb2342008-10-29 14:49:05 -04007046
Josef Bacik6226cb0a2009-04-03 10:14:18 -04007047 if (offset < search_start)
Miao Xie215a63d2014-01-15 20:00:56 +08007048 btrfs_add_free_space(block_group, offset,
Josef Bacik6226cb0a2009-04-03 10:14:18 -04007049 search_start - offset);
7050 BUG_ON(offset > search_start);
7051
Miao Xie215a63d2014-01-15 20:00:56 +08007052 ret = btrfs_update_reserved_bytes(block_group, num_bytes,
Miao Xiee570fd22014-06-19 10:42:50 +08007053 alloc_type, delalloc);
Yan, Zhengf0486c62010-05-16 10:46:25 -04007054 if (ret == -EAGAIN) {
Miao Xie215a63d2014-01-15 20:00:56 +08007055 btrfs_add_free_space(block_group, offset, num_bytes);
Yan, Zhengf0486c62010-05-16 10:46:25 -04007056 goto loop;
7057 }
Yan Zheng11833d62009-09-11 16:11:19 -04007058
Josef Bacik2552d172009-04-03 10:14:19 -04007059 /* we are all good, lets return */
Yan, Zhengf0486c62010-05-16 10:46:25 -04007060 ins->objectid = search_start;
7061 ins->offset = num_bytes;
7062
Josef Bacik3f7de032011-11-10 08:29:20 -05007063 trace_btrfs_reserve_extent(orig_root, block_group,
7064 search_start, num_bytes);
Miao Xiee570fd22014-06-19 10:42:50 +08007065 btrfs_release_block_group(block_group, delalloc);
Josef Bacik2552d172009-04-03 10:14:19 -04007066 break;
7067loop:
Josef Bacik0a243252009-09-11 16:11:20 -04007068 failed_cluster_refill = false;
Josef Bacik1cdda9b2009-10-06 10:04:28 -04007069 failed_alloc = false;
Yan, Zhengb742bb82010-05-16 10:46:24 -04007070 BUG_ON(index != get_block_group_index(block_group));
Miao Xiee570fd22014-06-19 10:42:50 +08007071 btrfs_release_block_group(block_group, delalloc);
Josef Bacik2552d172009-04-03 10:14:19 -04007072 }
7073 up_read(&space_info->groups_sem);
Chris Masonf5a31e12008-11-10 11:47:09 -05007074
Miao Xie60d2adb2011-09-09 17:34:35 +08007075 if (!ins->objectid && loop >= LOOP_CACHING_WAIT && have_caching_bg)
7076 goto search;
7077
Yan, Zhengb742bb82010-05-16 10:46:24 -04007078 if (!ins->objectid && ++index < BTRFS_NR_RAID_TYPES)
7079 goto search;
7080
Josef Bacik285ff5a2012-01-13 15:27:45 -05007081 /*
Josef Bacikccf0e722009-11-10 21:23:48 -05007082 * LOOP_CACHING_NOWAIT, search partially cached block groups, kicking
7083 * caching kthreads as we move along
Josef Bacik817d52f2009-07-13 21:29:25 -04007084 * LOOP_CACHING_WAIT, search everything, and wait if our bg is caching
7085 * LOOP_ALLOC_CHUNK, force a chunk allocation and try again
7086 * LOOP_NO_EMPTY_SIZE, set empty_size and empty_cluster to 0 and try
7087 * again
Chris Masonfa9c0d792009-04-03 09:47:43 -04007088 */
Josef Bacik723bda22011-05-27 16:11:38 -04007089 if (!ins->objectid && loop < LOOP_NO_EMPTY_SIZE) {
Yan, Zhengb742bb82010-05-16 10:46:24 -04007090 index = 0;
Josef Bacik723bda22011-05-27 16:11:38 -04007091 loop++;
Josef Bacik817d52f2009-07-13 21:29:25 -04007092 if (loop == LOOP_ALLOC_CHUNK) {
Josef Bacik00361582013-08-14 14:02:47 -04007093 struct btrfs_trans_handle *trans;
Wang Shilongf017f152014-03-13 13:19:47 +08007094 int exist = 0;
Josef Bacik00361582013-08-14 14:02:47 -04007095
Wang Shilongf017f152014-03-13 13:19:47 +08007096 trans = current->journal_info;
7097 if (trans)
7098 exist = 1;
7099 else
7100 trans = btrfs_join_transaction(root);
7101
Josef Bacik00361582013-08-14 14:02:47 -04007102 if (IS_ERR(trans)) {
7103 ret = PTR_ERR(trans);
7104 goto out;
7105 }
7106
David Sterbab6919a52013-04-29 13:39:40 +00007107 ret = do_chunk_alloc(trans, root, flags,
Josef Bacikea658ba2012-09-11 16:57:25 -04007108 CHUNK_ALLOC_FORCE);
7109 /*
7110 * Do not bail out on ENOSPC since we
7111 * can do more things.
7112 */
Josef Bacik00361582013-08-14 14:02:47 -04007113 if (ret < 0 && ret != -ENOSPC)
Josef Bacikea658ba2012-09-11 16:57:25 -04007114 btrfs_abort_transaction(trans,
7115 root, ret);
Josef Bacik00361582013-08-14 14:02:47 -04007116 else
7117 ret = 0;
Wang Shilongf017f152014-03-13 13:19:47 +08007118 if (!exist)
7119 btrfs_end_transaction(trans, root);
Josef Bacik00361582013-08-14 14:02:47 -04007120 if (ret)
Josef Bacikea658ba2012-09-11 16:57:25 -04007121 goto out;
Josef Bacik723bda22011-05-27 16:11:38 -04007122 }
7123
7124 if (loop == LOOP_NO_EMPTY_SIZE) {
Chris Masonfa9c0d792009-04-03 09:47:43 -04007125 empty_size = 0;
7126 empty_cluster = 0;
7127 }
Chris Mason42e70e72008-11-07 18:17:11 -05007128
Josef Bacik723bda22011-05-27 16:11:38 -04007129 goto search;
Josef Bacik2552d172009-04-03 10:14:19 -04007130 } else if (!ins->objectid) {
7131 ret = -ENOSPC;
Josef Bacikd82a6f12011-05-11 15:26:06 -04007132 } else if (ins->objectid) {
Josef Bacik2552d172009-04-03 10:14:19 -04007133 ret = 0;
7134 }
Jeff Mahoney79787ea2012-03-12 16:03:00 +01007135out:
Miao Xiea4820392013-09-09 13:19:42 +08007136 if (ret == -ENOSPC)
7137 ins->offset = max_extent_size;
Chris Mason0f70abe2007-02-28 16:46:22 -05007138 return ret;
Chris Masonfec577f2007-02-26 10:40:21 -05007139}
Chris Masonec44a352008-04-28 15:29:52 -04007140
Josef Bacik9ed74f22009-09-11 16:12:44 -04007141static void dump_space_info(struct btrfs_space_info *info, u64 bytes,
7142 int dump_block_groups)
Josef Bacik0f9dd462008-09-23 13:14:11 -04007143{
7144 struct btrfs_block_group_cache *cache;
Yan, Zhengb742bb82010-05-16 10:46:24 -04007145 int index = 0;
Josef Bacik0f9dd462008-09-23 13:14:11 -04007146
Josef Bacik9ed74f22009-09-11 16:12:44 -04007147 spin_lock(&info->lock);
Frank Holtonefe120a2013-12-20 11:37:06 -05007148 printk(KERN_INFO "BTRFS: space_info %llu has %llu free, is %sfull\n",
Geert Uytterhoevenc1c9ff72013-08-20 13:20:07 +02007149 info->flags,
7150 info->total_bytes - info->bytes_used - info->bytes_pinned -
7151 info->bytes_reserved - info->bytes_readonly,
Chris Masond3977122009-01-05 21:25:51 -05007152 (info->full) ? "" : "not ");
Frank Holtonefe120a2013-12-20 11:37:06 -05007153 printk(KERN_INFO "BTRFS: space_info total=%llu, used=%llu, pinned=%llu, "
Yan, Zheng8929ecfa2010-05-16 10:49:58 -04007154 "reserved=%llu, may_use=%llu, readonly=%llu\n",
Geert Uytterhoevenc1c9ff72013-08-20 13:20:07 +02007155 info->total_bytes, info->bytes_used, info->bytes_pinned,
7156 info->bytes_reserved, info->bytes_may_use,
7157 info->bytes_readonly);
Josef Bacik9ed74f22009-09-11 16:12:44 -04007158 spin_unlock(&info->lock);
7159
7160 if (!dump_block_groups)
7161 return;
Josef Bacik0f9dd462008-09-23 13:14:11 -04007162
Josef Bacik80eb2342008-10-29 14:49:05 -04007163 down_read(&info->groups_sem);
Yan, Zhengb742bb82010-05-16 10:46:24 -04007164again:
7165 list_for_each_entry(cache, &info->block_groups[index], list) {
Josef Bacik0f9dd462008-09-23 13:14:11 -04007166 spin_lock(&cache->lock);
Frank Holtonefe120a2013-12-20 11:37:06 -05007167 printk(KERN_INFO "BTRFS: "
7168 "block group %llu has %llu bytes, "
7169 "%llu used %llu pinned %llu reserved %s\n",
Geert Uytterhoevenc1c9ff72013-08-20 13:20:07 +02007170 cache->key.objectid, cache->key.offset,
7171 btrfs_block_group_used(&cache->item), cache->pinned,
7172 cache->reserved, cache->ro ? "[readonly]" : "");
Josef Bacik0f9dd462008-09-23 13:14:11 -04007173 btrfs_dump_free_space(cache, bytes);
7174 spin_unlock(&cache->lock);
7175 }
Yan, Zhengb742bb82010-05-16 10:46:24 -04007176 if (++index < BTRFS_NR_RAID_TYPES)
7177 goto again;
Josef Bacik80eb2342008-10-29 14:49:05 -04007178 up_read(&info->groups_sem);
Josef Bacik0f9dd462008-09-23 13:14:11 -04007179}
Zheng Yane8569812008-09-26 10:05:48 -04007180
Josef Bacik00361582013-08-14 14:02:47 -04007181int btrfs_reserve_extent(struct btrfs_root *root,
Yan Zheng11833d62009-09-11 16:11:19 -04007182 u64 num_bytes, u64 min_alloc_size,
7183 u64 empty_size, u64 hint_byte,
Miao Xiee570fd22014-06-19 10:42:50 +08007184 struct btrfs_key *ins, int is_data, int delalloc)
Chris Masonfec577f2007-02-26 10:40:21 -05007185{
Miao Xie9e622d62012-01-26 15:01:12 -05007186 bool final_tried = false;
David Sterbab6919a52013-04-29 13:39:40 +00007187 u64 flags;
Chris Masonfec577f2007-02-26 10:40:21 -05007188 int ret;
Chris Mason925baed2008-06-25 16:01:30 -04007189
David Sterbab6919a52013-04-29 13:39:40 +00007190 flags = btrfs_get_alloc_profile(root, is_data);
Chris Mason98d20f62008-04-14 09:46:10 -04007191again:
Chris Masondb945352007-10-15 16:15:53 -04007192 WARN_ON(num_bytes < root->sectorsize);
Josef Bacik00361582013-08-14 14:02:47 -04007193 ret = find_free_extent(root, num_bytes, empty_size, hint_byte, ins,
Miao Xiee570fd22014-06-19 10:42:50 +08007194 flags, delalloc);
Chris Mason3b951512008-04-17 11:29:12 -04007195
Miao Xie9e622d62012-01-26 15:01:12 -05007196 if (ret == -ENOSPC) {
Miao Xiea4820392013-09-09 13:19:42 +08007197 if (!final_tried && ins->offset) {
7198 num_bytes = min(num_bytes >> 1, ins->offset);
Zach Brown24542bf2012-11-16 00:04:43 +00007199 num_bytes = round_down(num_bytes, root->sectorsize);
Miao Xie9e622d62012-01-26 15:01:12 -05007200 num_bytes = max(num_bytes, min_alloc_size);
Miao Xie9e622d62012-01-26 15:01:12 -05007201 if (num_bytes == min_alloc_size)
7202 final_tried = true;
7203 goto again;
7204 } else if (btrfs_test_opt(root, ENOSPC_DEBUG)) {
7205 struct btrfs_space_info *sinfo;
Josef Bacik0f9dd462008-09-23 13:14:11 -04007206
David Sterbab6919a52013-04-29 13:39:40 +00007207 sinfo = __find_space_info(root->fs_info, flags);
Simon Kirbyc2cf52e2013-03-19 22:41:23 +00007208 btrfs_err(root->fs_info, "allocation failed flags %llu, wanted %llu",
Geert Uytterhoevenc1c9ff72013-08-20 13:20:07 +02007209 flags, num_bytes);
Jeff Mahoney53804282012-03-01 14:56:28 +01007210 if (sinfo)
7211 dump_space_info(sinfo, num_bytes, 1);
Miao Xie9e622d62012-01-26 15:01:12 -05007212 }
Chris Mason925baed2008-06-25 16:01:30 -04007213 }
Josef Bacik0f9dd462008-09-23 13:14:11 -04007214
7215 return ret;
Chris Masone6dcd2d2008-07-17 12:53:50 -04007216}
7217
Chris Masone688b7252011-10-31 20:52:39 -04007218static int __btrfs_free_reserved_extent(struct btrfs_root *root,
Miao Xiee570fd22014-06-19 10:42:50 +08007219 u64 start, u64 len,
7220 int pin, int delalloc)
Chris Mason65b51a02008-08-01 15:11:20 -04007221{
Josef Bacik0f9dd462008-09-23 13:14:11 -04007222 struct btrfs_block_group_cache *cache;
Liu Hui1f3c79a2009-01-05 15:57:51 -05007223 int ret = 0;
Josef Bacik0f9dd462008-09-23 13:14:11 -04007224
Josef Bacik0f9dd462008-09-23 13:14:11 -04007225 cache = btrfs_lookup_block_group(root->fs_info, start);
7226 if (!cache) {
Simon Kirbyc2cf52e2013-03-19 22:41:23 +00007227 btrfs_err(root->fs_info, "Unable to find block group for %llu",
Geert Uytterhoevenc1c9ff72013-08-20 13:20:07 +02007228 start);
Josef Bacik0f9dd462008-09-23 13:14:11 -04007229 return -ENOSPC;
7230 }
Liu Hui1f3c79a2009-01-05 15:57:51 -05007231
Chris Masone688b7252011-10-31 20:52:39 -04007232 if (pin)
7233 pin_down_extent(root, cache, start, len, 1);
7234 else {
Filipe Mananadcc82f42015-03-23 14:07:40 +00007235 if (btrfs_test_opt(root, DISCARD))
7236 ret = btrfs_discard_extent(root, start, len, NULL);
Chris Masone688b7252011-10-31 20:52:39 -04007237 btrfs_add_free_space(cache, start, len);
Miao Xiee570fd22014-06-19 10:42:50 +08007238 btrfs_update_reserved_bytes(cache, len, RESERVE_FREE, delalloc);
Chris Masone688b7252011-10-31 20:52:39 -04007239 }
Dongsheng Yang31193212014-12-12 16:44:35 +08007240
Chris Masonfa9c0d792009-04-03 09:47:43 -04007241 btrfs_put_block_group(cache);
Josef Bacik817d52f2009-07-13 21:29:25 -04007242
liubo1abe9b82011-03-24 11:18:59 +00007243 trace_btrfs_reserved_extent_free(root, start, len);
7244
Chris Masone6dcd2d2008-07-17 12:53:50 -04007245 return ret;
7246}
7247
Chris Masone688b7252011-10-31 20:52:39 -04007248int btrfs_free_reserved_extent(struct btrfs_root *root,
Miao Xiee570fd22014-06-19 10:42:50 +08007249 u64 start, u64 len, int delalloc)
Chris Masone688b7252011-10-31 20:52:39 -04007250{
Miao Xiee570fd22014-06-19 10:42:50 +08007251 return __btrfs_free_reserved_extent(root, start, len, 0, delalloc);
Chris Masone688b7252011-10-31 20:52:39 -04007252}
7253
7254int btrfs_free_and_pin_reserved_extent(struct btrfs_root *root,
7255 u64 start, u64 len)
7256{
Miao Xiee570fd22014-06-19 10:42:50 +08007257 return __btrfs_free_reserved_extent(root, start, len, 1, 0);
Chris Masone688b7252011-10-31 20:52:39 -04007258}
7259
Yan Zheng5d4f98a2009-06-10 10:45:14 -04007260static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
7261 struct btrfs_root *root,
7262 u64 parent, u64 root_objectid,
7263 u64 flags, u64 owner, u64 offset,
7264 struct btrfs_key *ins, int ref_mod)
Chris Masone6dcd2d2008-07-17 12:53:50 -04007265{
7266 int ret;
Yan Zheng5d4f98a2009-06-10 10:45:14 -04007267 struct btrfs_fs_info *fs_info = root->fs_info;
Chris Masone6dcd2d2008-07-17 12:53:50 -04007268 struct btrfs_extent_item *extent_item;
Yan Zheng5d4f98a2009-06-10 10:45:14 -04007269 struct btrfs_extent_inline_ref *iref;
Chris Masone6dcd2d2008-07-17 12:53:50 -04007270 struct btrfs_path *path;
Yan Zheng5d4f98a2009-06-10 10:45:14 -04007271 struct extent_buffer *leaf;
7272 int type;
7273 u32 size;
Chris Masonf2654de2007-06-26 12:20:46 -04007274
Yan Zheng5d4f98a2009-06-10 10:45:14 -04007275 if (parent > 0)
7276 type = BTRFS_SHARED_DATA_REF_KEY;
7277 else
7278 type = BTRFS_EXTENT_DATA_REF_KEY;
Zheng Yan31840ae2008-09-23 13:14:14 -04007279
Yan Zheng5d4f98a2009-06-10 10:45:14 -04007280 size = sizeof(*extent_item) + btrfs_extent_inline_ref_size(type);
Chris Mason7bb86312007-12-11 09:25:06 -05007281
7282 path = btrfs_alloc_path();
Tsutomu Itohdb5b4932011-03-23 08:14:16 +00007283 if (!path)
7284 return -ENOMEM;
Chris Mason47e4bb92008-02-01 14:51:59 -05007285
Chris Masonb9473432009-03-13 11:00:37 -04007286 path->leave_spinning = 1;
Yan Zheng5d4f98a2009-06-10 10:45:14 -04007287 ret = btrfs_insert_empty_item(trans, fs_info->extent_root, path,
7288 ins, size);
Jeff Mahoney79787ea2012-03-12 16:03:00 +01007289 if (ret) {
7290 btrfs_free_path(path);
7291 return ret;
7292 }
Josef Bacik0f9dd462008-09-23 13:14:11 -04007293
Yan Zheng5d4f98a2009-06-10 10:45:14 -04007294 leaf = path->nodes[0];
7295 extent_item = btrfs_item_ptr(leaf, path->slots[0],
Chris Mason47e4bb92008-02-01 14:51:59 -05007296 struct btrfs_extent_item);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04007297 btrfs_set_extent_refs(leaf, extent_item, ref_mod);
7298 btrfs_set_extent_generation(leaf, extent_item, trans->transid);
7299 btrfs_set_extent_flags(leaf, extent_item,
7300 flags | BTRFS_EXTENT_FLAG_DATA);
Chris Mason47e4bb92008-02-01 14:51:59 -05007301
Yan Zheng5d4f98a2009-06-10 10:45:14 -04007302 iref = (struct btrfs_extent_inline_ref *)(extent_item + 1);
7303 btrfs_set_extent_inline_ref_type(leaf, iref, type);
7304 if (parent > 0) {
7305 struct btrfs_shared_data_ref *ref;
7306 ref = (struct btrfs_shared_data_ref *)(iref + 1);
7307 btrfs_set_extent_inline_ref_offset(leaf, iref, parent);
7308 btrfs_set_shared_data_ref_count(leaf, ref, ref_mod);
7309 } else {
7310 struct btrfs_extent_data_ref *ref;
7311 ref = (struct btrfs_extent_data_ref *)(&iref->offset);
7312 btrfs_set_extent_data_ref_root(leaf, ref, root_objectid);
7313 btrfs_set_extent_data_ref_objectid(leaf, ref, owner);
7314 btrfs_set_extent_data_ref_offset(leaf, ref, offset);
7315 btrfs_set_extent_data_ref_count(leaf, ref, ref_mod);
7316 }
Chris Mason47e4bb92008-02-01 14:51:59 -05007317
7318 btrfs_mark_buffer_dirty(path->nodes[0]);
Chris Mason7bb86312007-12-11 09:25:06 -05007319 btrfs_free_path(path);
Chris Masonf510cfe2007-10-15 16:14:48 -04007320
Josef Bacikce93ec52014-11-17 15:45:48 -05007321 ret = update_block_group(trans, root, ins->objectid, ins->offset, 1);
Jeff Mahoney79787ea2012-03-12 16:03:00 +01007322 if (ret) { /* -ENOENT, logic error */
Simon Kirbyc2cf52e2013-03-19 22:41:23 +00007323 btrfs_err(fs_info, "update block group failed for %llu %llu",
Geert Uytterhoevenc1c9ff72013-08-20 13:20:07 +02007324 ins->objectid, ins->offset);
Chris Masonf5947062008-02-04 10:10:13 -05007325 BUG();
7326 }
Josef Bacik0be5dc62013-10-07 15:18:52 -04007327 trace_btrfs_reserved_extent_alloc(root, ins->objectid, ins->offset);
Chris Masone6dcd2d2008-07-17 12:53:50 -04007328 return ret;
7329}
7330
Yan Zheng5d4f98a2009-06-10 10:45:14 -04007331static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans,
7332 struct btrfs_root *root,
7333 u64 parent, u64 root_objectid,
7334 u64 flags, struct btrfs_disk_key *key,
Josef Bacikfcebe452014-05-13 17:30:47 -07007335 int level, struct btrfs_key *ins,
7336 int no_quota)
Yan Zheng5d4f98a2009-06-10 10:45:14 -04007337{
7338 int ret;
7339 struct btrfs_fs_info *fs_info = root->fs_info;
7340 struct btrfs_extent_item *extent_item;
7341 struct btrfs_tree_block_info *block_info;
7342 struct btrfs_extent_inline_ref *iref;
7343 struct btrfs_path *path;
7344 struct extent_buffer *leaf;
Josef Bacik3173a182013-03-07 14:22:04 -05007345 u32 size = sizeof(*extent_item) + sizeof(*iref);
Josef Bacikfcebe452014-05-13 17:30:47 -07007346 u64 num_bytes = ins->offset;
Josef Bacik3173a182013-03-07 14:22:04 -05007347 bool skinny_metadata = btrfs_fs_incompat(root->fs_info,
7348 SKINNY_METADATA);
7349
7350 if (!skinny_metadata)
7351 size += sizeof(*block_info);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04007352
7353 path = btrfs_alloc_path();
Josef Bacik857cc2f2013-10-07 15:21:08 -04007354 if (!path) {
7355 btrfs_free_and_pin_reserved_extent(root, ins->objectid,
David Sterba707e8a02014-06-04 19:22:26 +02007356 root->nodesize);
Mark Fashehd8926bb2011-07-13 10:38:47 -07007357 return -ENOMEM;
Josef Bacik857cc2f2013-10-07 15:21:08 -04007358 }
Yan Zheng5d4f98a2009-06-10 10:45:14 -04007359
7360 path->leave_spinning = 1;
7361 ret = btrfs_insert_empty_item(trans, fs_info->extent_root, path,
7362 ins, size);
Jeff Mahoney79787ea2012-03-12 16:03:00 +01007363 if (ret) {
Chris Masondd825252015-04-01 08:36:05 -07007364 btrfs_free_path(path);
Josef Bacik857cc2f2013-10-07 15:21:08 -04007365 btrfs_free_and_pin_reserved_extent(root, ins->objectid,
David Sterba707e8a02014-06-04 19:22:26 +02007366 root->nodesize);
Jeff Mahoney79787ea2012-03-12 16:03:00 +01007367 return ret;
7368 }
Yan Zheng5d4f98a2009-06-10 10:45:14 -04007369
7370 leaf = path->nodes[0];
7371 extent_item = btrfs_item_ptr(leaf, path->slots[0],
7372 struct btrfs_extent_item);
7373 btrfs_set_extent_refs(leaf, extent_item, 1);
7374 btrfs_set_extent_generation(leaf, extent_item, trans->transid);
7375 btrfs_set_extent_flags(leaf, extent_item,
7376 flags | BTRFS_EXTENT_FLAG_TREE_BLOCK);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04007377
Josef Bacik3173a182013-03-07 14:22:04 -05007378 if (skinny_metadata) {
7379 iref = (struct btrfs_extent_inline_ref *)(extent_item + 1);
David Sterba707e8a02014-06-04 19:22:26 +02007380 num_bytes = root->nodesize;
Josef Bacik3173a182013-03-07 14:22:04 -05007381 } else {
7382 block_info = (struct btrfs_tree_block_info *)(extent_item + 1);
7383 btrfs_set_tree_block_key(leaf, block_info, key);
7384 btrfs_set_tree_block_level(leaf, block_info, level);
7385 iref = (struct btrfs_extent_inline_ref *)(block_info + 1);
7386 }
Yan Zheng5d4f98a2009-06-10 10:45:14 -04007387
Yan Zheng5d4f98a2009-06-10 10:45:14 -04007388 if (parent > 0) {
7389 BUG_ON(!(flags & BTRFS_BLOCK_FLAG_FULL_BACKREF));
7390 btrfs_set_extent_inline_ref_type(leaf, iref,
7391 BTRFS_SHARED_BLOCK_REF_KEY);
7392 btrfs_set_extent_inline_ref_offset(leaf, iref, parent);
7393 } else {
7394 btrfs_set_extent_inline_ref_type(leaf, iref,
7395 BTRFS_TREE_BLOCK_REF_KEY);
7396 btrfs_set_extent_inline_ref_offset(leaf, iref, root_objectid);
7397 }
7398
7399 btrfs_mark_buffer_dirty(leaf);
7400 btrfs_free_path(path);
7401
Josef Bacikce93ec52014-11-17 15:45:48 -05007402 ret = update_block_group(trans, root, ins->objectid, root->nodesize,
7403 1);
Jeff Mahoney79787ea2012-03-12 16:03:00 +01007404 if (ret) { /* -ENOENT, logic error */
Simon Kirbyc2cf52e2013-03-19 22:41:23 +00007405 btrfs_err(fs_info, "update block group failed for %llu %llu",
Geert Uytterhoevenc1c9ff72013-08-20 13:20:07 +02007406 ins->objectid, ins->offset);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04007407 BUG();
7408 }
Josef Bacik0be5dc62013-10-07 15:18:52 -04007409
David Sterba707e8a02014-06-04 19:22:26 +02007410 trace_btrfs_reserved_extent_alloc(root, ins->objectid, root->nodesize);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04007411 return ret;
7412}
7413
7414int btrfs_alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
7415 struct btrfs_root *root,
7416 u64 root_objectid, u64 owner,
7417 u64 offset, struct btrfs_key *ins)
Chris Masone6dcd2d2008-07-17 12:53:50 -04007418{
7419 int ret;
Chris Mason1c2308f2008-09-23 13:14:13 -04007420
Yan Zheng5d4f98a2009-06-10 10:45:14 -04007421 BUG_ON(root_objectid == BTRFS_TREE_LOG_OBJECTID);
Chris Mason56bec292009-03-13 10:10:06 -04007422
Arne Jansen66d7e7f2011-09-12 15:26:38 +02007423 ret = btrfs_add_delayed_data_ref(root->fs_info, trans, ins->objectid,
7424 ins->offset, 0,
7425 root_objectid, owner, offset,
7426 BTRFS_ADD_DELAYED_EXTENT, NULL, 0);
Chris Masone6dcd2d2008-07-17 12:53:50 -04007427 return ret;
7428}
Chris Masone02119d2008-09-05 16:13:11 -04007429
7430/*
7431 * this is used by the tree logging recovery code. It records that
7432 * an extent has been allocated and makes sure to clear the free
7433 * space cache bits as well
7434 */
Yan Zheng5d4f98a2009-06-10 10:45:14 -04007435int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans,
7436 struct btrfs_root *root,
7437 u64 root_objectid, u64 owner, u64 offset,
7438 struct btrfs_key *ins)
Chris Masone02119d2008-09-05 16:13:11 -04007439{
7440 int ret;
7441 struct btrfs_block_group_cache *block_group;
Josef Bacik8c2a1a32013-06-06 13:19:32 -04007442
7443 /*
7444 * Mixed block groups will exclude before processing the log so we only
7445 * need to do the exlude dance if this fs isn't mixed.
7446 */
7447 if (!btrfs_fs_incompat(root->fs_info, MIXED_GROUPS)) {
7448 ret = __exclude_logged_extent(root, ins->objectid, ins->offset);
7449 if (ret)
7450 return ret;
7451 }
Chris Masone02119d2008-09-05 16:13:11 -04007452
Chris Masone02119d2008-09-05 16:13:11 -04007453 block_group = btrfs_lookup_block_group(root->fs_info, ins->objectid);
Josef Bacik8c2a1a32013-06-06 13:19:32 -04007454 if (!block_group)
7455 return -EINVAL;
Yan Zheng11833d62009-09-11 16:11:19 -04007456
Josef Bacikfb25e912011-07-26 17:00:46 -04007457 ret = btrfs_update_reserved_bytes(block_group, ins->offset,
Miao Xiee570fd22014-06-19 10:42:50 +08007458 RESERVE_ALLOC_NO_ACCOUNT, 0);
Jeff Mahoney79787ea2012-03-12 16:03:00 +01007459 BUG_ON(ret); /* logic error */
Yan Zheng5d4f98a2009-06-10 10:45:14 -04007460 ret = alloc_reserved_file_extent(trans, root, 0, root_objectid,
7461 0, owner, offset, ins, 1);
Josef Bacikb50c6e22013-04-25 15:55:30 -04007462 btrfs_put_block_group(block_group);
Chris Masone02119d2008-09-05 16:13:11 -04007463 return ret;
7464}
7465
Eric Sandeen48a3b632013-04-25 20:41:01 +00007466static struct extent_buffer *
7467btrfs_init_new_buffer(struct btrfs_trans_handle *trans, struct btrfs_root *root,
David Sterbafe864572014-06-15 02:28:42 +02007468 u64 bytenr, int level)
Chris Mason65b51a02008-08-01 15:11:20 -04007469{
7470 struct extent_buffer *buf;
7471
David Sterbaa83fffb2014-06-15 02:39:54 +02007472 buf = btrfs_find_create_tree_block(root, bytenr);
Chris Mason65b51a02008-08-01 15:11:20 -04007473 if (!buf)
7474 return ERR_PTR(-ENOMEM);
7475 btrfs_set_header_generation(buf, trans->transid);
Chris Mason85d4e462011-07-26 16:11:19 -04007476 btrfs_set_buffer_lockdep_class(root->root_key.objectid, buf, level);
Chris Mason65b51a02008-08-01 15:11:20 -04007477 btrfs_tree_lock(buf);
Daniel Dressler01d58472014-11-21 17:15:07 +09007478 clean_tree_block(trans, root->fs_info, buf);
Josef Bacik3083ee22012-03-09 16:01:49 -05007479 clear_bit(EXTENT_BUFFER_STALE, &buf->bflags);
Chris Masonb4ce94d2009-02-04 09:25:08 -05007480
7481 btrfs_set_lock_blocking(buf);
Chris Mason65b51a02008-08-01 15:11:20 -04007482 btrfs_set_buffer_uptodate(buf);
Chris Masonb4ce94d2009-02-04 09:25:08 -05007483
Chris Masond0c803c2008-09-11 16:17:57 -04007484 if (root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID) {
Filipe Manana656f30d2014-09-26 12:25:56 +01007485 buf->log_index = root->log_transid % 2;
Yan, Zheng8cef4e12009-11-12 09:33:26 +00007486 /*
7487 * we allow two log transactions at a time, use different
7488 * EXENT bit to differentiate dirty pages.
7489 */
Filipe Manana656f30d2014-09-26 12:25:56 +01007490 if (buf->log_index == 0)
Yan, Zheng8cef4e12009-11-12 09:33:26 +00007491 set_extent_dirty(&root->dirty_log_pages, buf->start,
7492 buf->start + buf->len - 1, GFP_NOFS);
7493 else
7494 set_extent_new(&root->dirty_log_pages, buf->start,
7495 buf->start + buf->len - 1, GFP_NOFS);
Chris Masond0c803c2008-09-11 16:17:57 -04007496 } else {
Filipe Manana656f30d2014-09-26 12:25:56 +01007497 buf->log_index = -1;
Chris Masond0c803c2008-09-11 16:17:57 -04007498 set_extent_dirty(&trans->transaction->dirty_pages, buf->start,
7499 buf->start + buf->len - 1, GFP_NOFS);
7500 }
Chris Mason65b51a02008-08-01 15:11:20 -04007501 trans->blocks_used++;
Chris Masonb4ce94d2009-02-04 09:25:08 -05007502 /* this returns a buffer locked for blocking */
Chris Mason65b51a02008-08-01 15:11:20 -04007503 return buf;
7504}
7505
Yan, Zhengf0486c62010-05-16 10:46:25 -04007506static struct btrfs_block_rsv *
7507use_block_rsv(struct btrfs_trans_handle *trans,
7508 struct btrfs_root *root, u32 blocksize)
7509{
7510 struct btrfs_block_rsv *block_rsv;
Josef Bacik68a82272011-01-24 21:43:20 +00007511 struct btrfs_block_rsv *global_rsv = &root->fs_info->global_block_rsv;
Yan, Zhengf0486c62010-05-16 10:46:25 -04007512 int ret;
Miao Xied88033d2013-05-13 13:55:12 +00007513 bool global_updated = false;
Yan, Zhengf0486c62010-05-16 10:46:25 -04007514
7515 block_rsv = get_block_rsv(trans, root);
7516
Miao Xieb586b322013-05-13 13:55:10 +00007517 if (unlikely(block_rsv->size == 0))
7518 goto try_reserve;
Miao Xied88033d2013-05-13 13:55:12 +00007519again:
Yan, Zhengf0486c62010-05-16 10:46:25 -04007520 ret = block_rsv_use_bytes(block_rsv, blocksize);
7521 if (!ret)
7522 return block_rsv;
7523
Miao Xieb586b322013-05-13 13:55:10 +00007524 if (block_rsv->failfast)
7525 return ERR_PTR(ret);
7526
Miao Xied88033d2013-05-13 13:55:12 +00007527 if (block_rsv->type == BTRFS_BLOCK_RSV_GLOBAL && !global_updated) {
7528 global_updated = true;
7529 update_global_block_rsv(root->fs_info);
7530 goto again;
7531 }
7532
Miao Xieb586b322013-05-13 13:55:10 +00007533 if (btrfs_test_opt(root, ENOSPC_DEBUG)) {
7534 static DEFINE_RATELIMIT_STATE(_rs,
7535 DEFAULT_RATELIMIT_INTERVAL * 10,
7536 /*DEFAULT_RATELIMIT_BURST*/ 1);
7537 if (__ratelimit(&_rs))
7538 WARN(1, KERN_DEBUG
Frank Holtonefe120a2013-12-20 11:37:06 -05007539 "BTRFS: block rsv returned %d\n", ret);
Miao Xieb586b322013-05-13 13:55:10 +00007540 }
7541try_reserve:
7542 ret = reserve_metadata_bytes(root, block_rsv, blocksize,
7543 BTRFS_RESERVE_NO_FLUSH);
7544 if (!ret)
7545 return block_rsv;
7546 /*
7547 * If we couldn't reserve metadata bytes try and use some from
Miao Xie5881cfc2013-05-13 13:55:11 +00007548 * the global reserve if its space type is the same as the global
7549 * reservation.
Miao Xieb586b322013-05-13 13:55:10 +00007550 */
Miao Xie5881cfc2013-05-13 13:55:11 +00007551 if (block_rsv->type != BTRFS_BLOCK_RSV_GLOBAL &&
7552 block_rsv->space_info == global_rsv->space_info) {
Miao Xieb586b322013-05-13 13:55:10 +00007553 ret = block_rsv_use_bytes(global_rsv, blocksize);
7554 if (!ret)
7555 return global_rsv;
7556 }
7557 return ERR_PTR(ret);
Yan, Zhengf0486c62010-05-16 10:46:25 -04007558}
7559
Josef Bacik8c2a3ca2012-01-10 10:31:31 -05007560static void unuse_block_rsv(struct btrfs_fs_info *fs_info,
7561 struct btrfs_block_rsv *block_rsv, u32 blocksize)
Yan, Zhengf0486c62010-05-16 10:46:25 -04007562{
7563 block_rsv_add_bytes(block_rsv, blocksize, 0);
Josef Bacik8c2a3ca2012-01-10 10:31:31 -05007564 block_rsv_release_bytes(fs_info, block_rsv, NULL, 0);
Yan, Zhengf0486c62010-05-16 10:46:25 -04007565}
7566
Chris Masonfec577f2007-02-26 10:40:21 -05007567/*
Yan, Zhengf0486c62010-05-16 10:46:25 -04007568 * finds a free extent and does all the dirty work required for allocation
Omar Sandoval67b78592015-02-24 02:47:04 -08007569 * returns the tree buffer or an ERR_PTR on error.
Chris Masonfec577f2007-02-26 10:40:21 -05007570 */
David Sterba4d75f8a2014-06-15 01:54:12 +02007571struct extent_buffer *btrfs_alloc_tree_block(struct btrfs_trans_handle *trans,
7572 struct btrfs_root *root,
Yan Zheng5d4f98a2009-06-10 10:45:14 -04007573 u64 parent, u64 root_objectid,
7574 struct btrfs_disk_key *key, int level,
Jan Schmidt5581a512012-05-16 17:04:52 +02007575 u64 hint, u64 empty_size)
Chris Masonfec577f2007-02-26 10:40:21 -05007576{
Chris Masone2fa7222007-03-12 16:22:34 -04007577 struct btrfs_key ins;
Yan, Zhengf0486c62010-05-16 10:46:25 -04007578 struct btrfs_block_rsv *block_rsv;
Chris Mason5f39d392007-10-15 16:14:19 -04007579 struct extent_buffer *buf;
Omar Sandoval67b78592015-02-24 02:47:04 -08007580 struct btrfs_delayed_extent_op *extent_op;
Yan, Zhengf0486c62010-05-16 10:46:25 -04007581 u64 flags = 0;
7582 int ret;
David Sterba4d75f8a2014-06-15 01:54:12 +02007583 u32 blocksize = root->nodesize;
Josef Bacik3173a182013-03-07 14:22:04 -05007584 bool skinny_metadata = btrfs_fs_incompat(root->fs_info,
7585 SKINNY_METADATA);
Yan, Zhengf0486c62010-05-16 10:46:25 -04007586
David Sterbafccb84c2014-09-29 23:53:21 +02007587 if (btrfs_test_is_dummy_root(root)) {
Josef Bacikfaa2dbf2014-05-07 17:06:09 -04007588 buf = btrfs_init_new_buffer(trans, root, root->alloc_bytenr,
David Sterbafe864572014-06-15 02:28:42 +02007589 level);
Josef Bacikfaa2dbf2014-05-07 17:06:09 -04007590 if (!IS_ERR(buf))
7591 root->alloc_bytenr += blocksize;
7592 return buf;
7593 }
David Sterbafccb84c2014-09-29 23:53:21 +02007594
Yan, Zhengf0486c62010-05-16 10:46:25 -04007595 block_rsv = use_block_rsv(trans, root, blocksize);
7596 if (IS_ERR(block_rsv))
7597 return ERR_CAST(block_rsv);
7598
Josef Bacik00361582013-08-14 14:02:47 -04007599 ret = btrfs_reserve_extent(root, blocksize, blocksize,
Miao Xiee570fd22014-06-19 10:42:50 +08007600 empty_size, hint, &ins, 0, 0);
Omar Sandoval67b78592015-02-24 02:47:04 -08007601 if (ret)
7602 goto out_unuse;
Chris Mason55c69072008-01-09 15:55:33 -05007603
David Sterbafe864572014-06-15 02:28:42 +02007604 buf = btrfs_init_new_buffer(trans, root, ins.objectid, level);
Omar Sandoval67b78592015-02-24 02:47:04 -08007605 if (IS_ERR(buf)) {
7606 ret = PTR_ERR(buf);
7607 goto out_free_reserved;
7608 }
Yan, Zhengf0486c62010-05-16 10:46:25 -04007609
7610 if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
7611 if (parent == 0)
7612 parent = ins.objectid;
7613 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7614 } else
7615 BUG_ON(parent > 0);
7616
7617 if (root_objectid != BTRFS_TREE_LOG_OBJECTID) {
Miao Xie78a61842012-11-21 02:21:28 +00007618 extent_op = btrfs_alloc_delayed_extent_op();
Omar Sandoval67b78592015-02-24 02:47:04 -08007619 if (!extent_op) {
7620 ret = -ENOMEM;
7621 goto out_free_buf;
7622 }
Yan, Zhengf0486c62010-05-16 10:46:25 -04007623 if (key)
7624 memcpy(&extent_op->key, key, sizeof(extent_op->key));
7625 else
7626 memset(&extent_op->key, 0, sizeof(extent_op->key));
7627 extent_op->flags_to_set = flags;
Josef Bacik3173a182013-03-07 14:22:04 -05007628 if (skinny_metadata)
7629 extent_op->update_key = 0;
7630 else
7631 extent_op->update_key = 1;
Yan, Zhengf0486c62010-05-16 10:46:25 -04007632 extent_op->update_flags = 1;
7633 extent_op->is_data = 0;
Josef Bacikb1c79e02013-05-09 13:49:30 -04007634 extent_op->level = level;
Yan, Zhengf0486c62010-05-16 10:46:25 -04007635
Arne Jansen66d7e7f2011-09-12 15:26:38 +02007636 ret = btrfs_add_delayed_tree_ref(root->fs_info, trans,
Omar Sandoval67b78592015-02-24 02:47:04 -08007637 ins.objectid, ins.offset,
7638 parent, root_objectid, level,
7639 BTRFS_ADD_DELAYED_EXTENT,
7640 extent_op, 0);
7641 if (ret)
7642 goto out_free_delayed;
Yan, Zhengf0486c62010-05-16 10:46:25 -04007643 }
Chris Masonfec577f2007-02-26 10:40:21 -05007644 return buf;
Omar Sandoval67b78592015-02-24 02:47:04 -08007645
7646out_free_delayed:
7647 btrfs_free_delayed_extent_op(extent_op);
7648out_free_buf:
7649 free_extent_buffer(buf);
7650out_free_reserved:
7651 btrfs_free_reserved_extent(root, ins.objectid, ins.offset, 0);
7652out_unuse:
7653 unuse_block_rsv(root->fs_info, block_rsv, blocksize);
7654 return ERR_PTR(ret);
Chris Masonfec577f2007-02-26 10:40:21 -05007655}
Chris Masona28ec192007-03-06 20:08:01 -05007656
Yan Zheng2c47e6052009-06-27 21:07:35 -04007657struct walk_control {
7658 u64 refs[BTRFS_MAX_LEVEL];
7659 u64 flags[BTRFS_MAX_LEVEL];
7660 struct btrfs_key update_progress;
7661 int stage;
7662 int level;
7663 int shared_level;
7664 int update_ref;
7665 int keep_locks;
Yan, Zheng1c4850e2009-09-21 15:55:59 -04007666 int reada_slot;
7667 int reada_count;
Arne Jansen66d7e7f2011-09-12 15:26:38 +02007668 int for_reloc;
Yan Zheng2c47e6052009-06-27 21:07:35 -04007669};
7670
7671#define DROP_REFERENCE 1
7672#define UPDATE_BACKREF 2
7673
Yan, Zheng1c4850e2009-09-21 15:55:59 -04007674static noinline void reada_walk_down(struct btrfs_trans_handle *trans,
7675 struct btrfs_root *root,
7676 struct walk_control *wc,
7677 struct btrfs_path *path)
7678{
7679 u64 bytenr;
7680 u64 generation;
7681 u64 refs;
Yan, Zheng94fcca92009-10-09 09:25:16 -04007682 u64 flags;
Yan, Zheng1c4850e2009-09-21 15:55:59 -04007683 u32 nritems;
7684 u32 blocksize;
7685 struct btrfs_key key;
7686 struct extent_buffer *eb;
7687 int ret;
7688 int slot;
7689 int nread = 0;
7690
7691 if (path->slots[wc->level] < wc->reada_slot) {
7692 wc->reada_count = wc->reada_count * 2 / 3;
7693 wc->reada_count = max(wc->reada_count, 2);
7694 } else {
7695 wc->reada_count = wc->reada_count * 3 / 2;
7696 wc->reada_count = min_t(int, wc->reada_count,
7697 BTRFS_NODEPTRS_PER_BLOCK(root));
7698 }
7699
7700 eb = path->nodes[wc->level];
7701 nritems = btrfs_header_nritems(eb);
David Sterba707e8a02014-06-04 19:22:26 +02007702 blocksize = root->nodesize;
Yan, Zheng1c4850e2009-09-21 15:55:59 -04007703
7704 for (slot = path->slots[wc->level]; slot < nritems; slot++) {
7705 if (nread >= wc->reada_count)
7706 break;
7707
7708 cond_resched();
7709 bytenr = btrfs_node_blockptr(eb, slot);
7710 generation = btrfs_node_ptr_generation(eb, slot);
7711
7712 if (slot == path->slots[wc->level])
7713 goto reada;
7714
7715 if (wc->stage == UPDATE_BACKREF &&
7716 generation <= root->root_key.offset)
7717 continue;
7718
Yan, Zheng94fcca92009-10-09 09:25:16 -04007719 /* We don't lock the tree block, it's OK to be racy here */
Josef Bacik3173a182013-03-07 14:22:04 -05007720 ret = btrfs_lookup_extent_info(trans, root, bytenr,
7721 wc->level - 1, 1, &refs,
7722 &flags);
Jeff Mahoney79787ea2012-03-12 16:03:00 +01007723 /* We don't care about errors in readahead. */
7724 if (ret < 0)
7725 continue;
Yan, Zheng94fcca92009-10-09 09:25:16 -04007726 BUG_ON(refs == 0);
7727
Yan, Zheng1c4850e2009-09-21 15:55:59 -04007728 if (wc->stage == DROP_REFERENCE) {
Yan, Zheng1c4850e2009-09-21 15:55:59 -04007729 if (refs == 1)
7730 goto reada;
7731
Yan, Zheng94fcca92009-10-09 09:25:16 -04007732 if (wc->level == 1 &&
7733 (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF))
7734 continue;
Yan, Zheng1c4850e2009-09-21 15:55:59 -04007735 if (!wc->update_ref ||
7736 generation <= root->root_key.offset)
7737 continue;
7738 btrfs_node_key_to_cpu(eb, &key, slot);
7739 ret = btrfs_comp_cpu_keys(&key,
7740 &wc->update_progress);
7741 if (ret < 0)
7742 continue;
Yan, Zheng94fcca92009-10-09 09:25:16 -04007743 } else {
7744 if (wc->level == 1 &&
7745 (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF))
7746 continue;
Yan, Zheng1c4850e2009-09-21 15:55:59 -04007747 }
7748reada:
David Sterbad3e46fe2014-06-15 02:04:19 +02007749 readahead_tree_block(root, bytenr);
Yan, Zheng1c4850e2009-09-21 15:55:59 -04007750 nread++;
7751 }
7752 wc->reada_slot = slot;
7753}
7754
Qu Wenruo0ed47922015-04-16 16:55:08 +08007755/*
7756 * TODO: Modify related function to add related node/leaf to dirty_extent_root,
7757 * for later qgroup accounting.
7758 *
7759 * Current, this function does nothing.
7760 */
Mark Fasheh11526512014-07-17 12:39:01 -07007761static int account_leaf_items(struct btrfs_trans_handle *trans,
7762 struct btrfs_root *root,
7763 struct extent_buffer *eb)
7764{
7765 int nr = btrfs_header_nritems(eb);
Qu Wenruo0ed47922015-04-16 16:55:08 +08007766 int i, extent_type;
Mark Fasheh11526512014-07-17 12:39:01 -07007767 struct btrfs_key key;
7768 struct btrfs_file_extent_item *fi;
7769 u64 bytenr, num_bytes;
7770
7771 for (i = 0; i < nr; i++) {
7772 btrfs_item_key_to_cpu(eb, &key, i);
7773
7774 if (key.type != BTRFS_EXTENT_DATA_KEY)
7775 continue;
7776
7777 fi = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item);
7778 /* filter out non qgroup-accountable extents */
7779 extent_type = btrfs_file_extent_type(eb, fi);
7780
7781 if (extent_type == BTRFS_FILE_EXTENT_INLINE)
7782 continue;
7783
7784 bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
7785 if (!bytenr)
7786 continue;
7787
7788 num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
Mark Fasheh11526512014-07-17 12:39:01 -07007789 }
7790 return 0;
7791}
7792
7793/*
7794 * Walk up the tree from the bottom, freeing leaves and any interior
7795 * nodes which have had all slots visited. If a node (leaf or
7796 * interior) is freed, the node above it will have it's slot
7797 * incremented. The root node will never be freed.
7798 *
7799 * At the end of this function, we should have a path which has all
7800 * slots incremented to the next position for a search. If we need to
7801 * read a new node it will be NULL and the node above it will have the
7802 * correct slot selected for a later read.
7803 *
7804 * If we increment the root nodes slot counter past the number of
7805 * elements, 1 is returned to signal completion of the search.
7806 */
7807static int adjust_slots_upwards(struct btrfs_root *root,
7808 struct btrfs_path *path, int root_level)
7809{
7810 int level = 0;
7811 int nr, slot;
7812 struct extent_buffer *eb;
7813
7814 if (root_level == 0)
7815 return 1;
7816
7817 while (level <= root_level) {
7818 eb = path->nodes[level];
7819 nr = btrfs_header_nritems(eb);
7820 path->slots[level]++;
7821 slot = path->slots[level];
7822 if (slot >= nr || level == 0) {
7823 /*
7824 * Don't free the root - we will detect this
7825 * condition after our loop and return a
7826 * positive value for caller to stop walking the tree.
7827 */
7828 if (level != root_level) {
7829 btrfs_tree_unlock_rw(eb, path->locks[level]);
7830 path->locks[level] = 0;
7831
7832 free_extent_buffer(eb);
7833 path->nodes[level] = NULL;
7834 path->slots[level] = 0;
7835 }
7836 } else {
7837 /*
7838 * We have a valid slot to walk back down
7839 * from. Stop here so caller can process these
7840 * new nodes.
7841 */
7842 break;
7843 }
7844
7845 level++;
7846 }
7847
7848 eb = path->nodes[root_level];
7849 if (path->slots[root_level] >= btrfs_header_nritems(eb))
7850 return 1;
7851
7852 return 0;
7853}
7854
7855/*
7856 * root_eb is the subtree root and is locked before this function is called.
Qu Wenruo0ed47922015-04-16 16:55:08 +08007857 * TODO: Modify this function to mark all (including complete shared node)
7858 * to dirty_extent_root to allow it get accounted in qgroup.
Mark Fasheh11526512014-07-17 12:39:01 -07007859 */
7860static int account_shared_subtree(struct btrfs_trans_handle *trans,
7861 struct btrfs_root *root,
7862 struct extent_buffer *root_eb,
7863 u64 root_gen,
7864 int root_level)
7865{
7866 int ret = 0;
7867 int level;
7868 struct extent_buffer *eb = root_eb;
7869 struct btrfs_path *path = NULL;
7870
7871 BUG_ON(root_level < 0 || root_level > BTRFS_MAX_LEVEL);
7872 BUG_ON(root_eb == NULL);
7873
7874 if (!root->fs_info->quota_enabled)
7875 return 0;
7876
7877 if (!extent_buffer_uptodate(root_eb)) {
7878 ret = btrfs_read_buffer(root_eb, root_gen);
7879 if (ret)
7880 goto out;
7881 }
7882
7883 if (root_level == 0) {
7884 ret = account_leaf_items(trans, root, root_eb);
7885 goto out;
7886 }
7887
7888 path = btrfs_alloc_path();
7889 if (!path)
7890 return -ENOMEM;
7891
7892 /*
7893 * Walk down the tree. Missing extent blocks are filled in as
7894 * we go. Metadata is accounted every time we read a new
7895 * extent block.
7896 *
7897 * When we reach a leaf, we account for file extent items in it,
7898 * walk back up the tree (adjusting slot pointers as we go)
7899 * and restart the search process.
7900 */
7901 extent_buffer_get(root_eb); /* For path */
7902 path->nodes[root_level] = root_eb;
7903 path->slots[root_level] = 0;
7904 path->locks[root_level] = 0; /* so release_path doesn't try to unlock */
7905walk_down:
7906 level = root_level;
7907 while (level >= 0) {
7908 if (path->nodes[level] == NULL) {
Mark Fasheh11526512014-07-17 12:39:01 -07007909 int parent_slot;
7910 u64 child_gen;
7911 u64 child_bytenr;
7912
7913 /* We need to get child blockptr/gen from
7914 * parent before we can read it. */
7915 eb = path->nodes[level + 1];
7916 parent_slot = path->slots[level + 1];
7917 child_bytenr = btrfs_node_blockptr(eb, parent_slot);
7918 child_gen = btrfs_node_ptr_generation(eb, parent_slot);
7919
David Sterbace86cd52014-06-15 01:07:32 +02007920 eb = read_tree_block(root, child_bytenr, child_gen);
Liu Bo64c043d2015-05-25 17:30:15 +08007921 if (IS_ERR(eb)) {
7922 ret = PTR_ERR(eb);
7923 goto out;
7924 } else if (!extent_buffer_uptodate(eb)) {
Liu Bo8635eda2015-05-25 17:30:14 +08007925 free_extent_buffer(eb);
Liu Bo64c043d2015-05-25 17:30:15 +08007926 ret = -EIO;
Mark Fasheh11526512014-07-17 12:39:01 -07007927 goto out;
7928 }
7929
7930 path->nodes[level] = eb;
7931 path->slots[level] = 0;
7932
7933 btrfs_tree_read_lock(eb);
7934 btrfs_set_lock_blocking_rw(eb, BTRFS_READ_LOCK);
7935 path->locks[level] = BTRFS_READ_LOCK_BLOCKING;
Mark Fasheh11526512014-07-17 12:39:01 -07007936 }
7937
7938 if (level == 0) {
7939 ret = account_leaf_items(trans, root, path->nodes[level]);
7940 if (ret)
7941 goto out;
7942
7943 /* Nonzero return here means we completed our search */
7944 ret = adjust_slots_upwards(root, path, root_level);
7945 if (ret)
7946 break;
7947
7948 /* Restart search with new slots */
7949 goto walk_down;
7950 }
7951
7952 level--;
7953 }
7954
7955 ret = 0;
7956out:
7957 btrfs_free_path(path);
7958
7959 return ret;
7960}
7961
Chris Mason9aca1d52007-03-13 11:09:37 -04007962/*
Liu Bo2c016dc2012-12-26 15:32:17 +08007963 * helper to process tree block while walking down the tree.
Yan Zheng2c47e6052009-06-27 21:07:35 -04007964 *
Yan Zheng2c47e6052009-06-27 21:07:35 -04007965 * when wc->stage == UPDATE_BACKREF, this function updates
7966 * back refs for pointers in the block.
7967 *
7968 * NOTE: return value 1 means we should stop walking down.
Yan Zhengf82d02d2008-10-29 14:49:05 -04007969 */
Yan Zheng2c47e6052009-06-27 21:07:35 -04007970static noinline int walk_down_proc(struct btrfs_trans_handle *trans,
7971 struct btrfs_root *root,
7972 struct btrfs_path *path,
Yan, Zheng94fcca92009-10-09 09:25:16 -04007973 struct walk_control *wc, int lookup_info)
Yan Zheng2c47e6052009-06-27 21:07:35 -04007974{
7975 int level = wc->level;
7976 struct extent_buffer *eb = path->nodes[level];
Yan Zheng2c47e6052009-06-27 21:07:35 -04007977 u64 flag = BTRFS_BLOCK_FLAG_FULL_BACKREF;
7978 int ret;
7979
7980 if (wc->stage == UPDATE_BACKREF &&
7981 btrfs_header_owner(eb) != root->root_key.objectid)
7982 return 1;
7983
7984 /*
7985 * when reference count of tree block is 1, it won't increase
7986 * again. once full backref flag is set, we never clear it.
7987 */
Yan, Zheng94fcca92009-10-09 09:25:16 -04007988 if (lookup_info &&
7989 ((wc->stage == DROP_REFERENCE && wc->refs[level] != 1) ||
7990 (wc->stage == UPDATE_BACKREF && !(wc->flags[level] & flag)))) {
Yan Zheng2c47e6052009-06-27 21:07:35 -04007991 BUG_ON(!path->locks[level]);
7992 ret = btrfs_lookup_extent_info(trans, root,
Josef Bacik3173a182013-03-07 14:22:04 -05007993 eb->start, level, 1,
Yan Zheng2c47e6052009-06-27 21:07:35 -04007994 &wc->refs[level],
7995 &wc->flags[level]);
Jeff Mahoney79787ea2012-03-12 16:03:00 +01007996 BUG_ON(ret == -ENOMEM);
7997 if (ret)
7998 return ret;
Yan Zheng2c47e6052009-06-27 21:07:35 -04007999 BUG_ON(wc->refs[level] == 0);
8000 }
8001
Yan Zheng2c47e6052009-06-27 21:07:35 -04008002 if (wc->stage == DROP_REFERENCE) {
8003 if (wc->refs[level] > 1)
8004 return 1;
8005
8006 if (path->locks[level] && !wc->keep_locks) {
Chris Masonbd681512011-07-16 15:23:14 -04008007 btrfs_tree_unlock_rw(eb, path->locks[level]);
Yan Zheng2c47e6052009-06-27 21:07:35 -04008008 path->locks[level] = 0;
8009 }
8010 return 0;
8011 }
8012
8013 /* wc->stage == UPDATE_BACKREF */
8014 if (!(wc->flags[level] & flag)) {
8015 BUG_ON(!path->locks[level]);
Josef Bacike339a6b2014-07-02 10:54:25 -07008016 ret = btrfs_inc_ref(trans, root, eb, 1);
Jeff Mahoney79787ea2012-03-12 16:03:00 +01008017 BUG_ON(ret); /* -ENOMEM */
Josef Bacike339a6b2014-07-02 10:54:25 -07008018 ret = btrfs_dec_ref(trans, root, eb, 0);
Jeff Mahoney79787ea2012-03-12 16:03:00 +01008019 BUG_ON(ret); /* -ENOMEM */
Yan Zheng2c47e6052009-06-27 21:07:35 -04008020 ret = btrfs_set_disk_extent_flags(trans, root, eb->start,
Josef Bacikb1c79e02013-05-09 13:49:30 -04008021 eb->len, flag,
8022 btrfs_header_level(eb), 0);
Jeff Mahoney79787ea2012-03-12 16:03:00 +01008023 BUG_ON(ret); /* -ENOMEM */
Yan Zheng2c47e6052009-06-27 21:07:35 -04008024 wc->flags[level] |= flag;
8025 }
8026
8027 /*
8028 * the block is shared by multiple trees, so it's not good to
8029 * keep the tree lock
8030 */
8031 if (path->locks[level] && level > 0) {
Chris Masonbd681512011-07-16 15:23:14 -04008032 btrfs_tree_unlock_rw(eb, path->locks[level]);
Yan Zheng2c47e6052009-06-27 21:07:35 -04008033 path->locks[level] = 0;
8034 }
8035 return 0;
8036}
8037
8038/*
Liu Bo2c016dc2012-12-26 15:32:17 +08008039 * helper to process tree block pointer.
Yan, Zheng1c4850e2009-09-21 15:55:59 -04008040 *
8041 * when wc->stage == DROP_REFERENCE, this function checks
8042 * reference count of the block pointed to. if the block
8043 * is shared and we need update back refs for the subtree
8044 * rooted at the block, this function changes wc->stage to
8045 * UPDATE_BACKREF. if the block is shared and there is no
8046 * need to update back, this function drops the reference
8047 * to the block.
8048 *
8049 * NOTE: return value 1 means we should stop walking down.
8050 */
8051static noinline int do_walk_down(struct btrfs_trans_handle *trans,
8052 struct btrfs_root *root,
8053 struct btrfs_path *path,
Yan, Zheng94fcca92009-10-09 09:25:16 -04008054 struct walk_control *wc, int *lookup_info)
Yan, Zheng1c4850e2009-09-21 15:55:59 -04008055{
8056 u64 bytenr;
8057 u64 generation;
8058 u64 parent;
8059 u32 blocksize;
8060 struct btrfs_key key;
8061 struct extent_buffer *next;
8062 int level = wc->level;
8063 int reada = 0;
8064 int ret = 0;
Mark Fasheh11526512014-07-17 12:39:01 -07008065 bool need_account = false;
Yan, Zheng1c4850e2009-09-21 15:55:59 -04008066
8067 generation = btrfs_node_ptr_generation(path->nodes[level],
8068 path->slots[level]);
8069 /*
8070 * if the lower level block was created before the snapshot
8071 * was created, we know there is no need to update back refs
8072 * for the subtree
8073 */
8074 if (wc->stage == UPDATE_BACKREF &&
Yan, Zheng94fcca92009-10-09 09:25:16 -04008075 generation <= root->root_key.offset) {
8076 *lookup_info = 1;
Yan, Zheng1c4850e2009-09-21 15:55:59 -04008077 return 1;
Yan, Zheng94fcca92009-10-09 09:25:16 -04008078 }
Yan, Zheng1c4850e2009-09-21 15:55:59 -04008079
8080 bytenr = btrfs_node_blockptr(path->nodes[level], path->slots[level]);
David Sterba707e8a02014-06-04 19:22:26 +02008081 blocksize = root->nodesize;
Yan, Zheng1c4850e2009-09-21 15:55:59 -04008082
Daniel Dressler01d58472014-11-21 17:15:07 +09008083 next = btrfs_find_tree_block(root->fs_info, bytenr);
Yan, Zheng1c4850e2009-09-21 15:55:59 -04008084 if (!next) {
David Sterbaa83fffb2014-06-15 02:39:54 +02008085 next = btrfs_find_create_tree_block(root, bytenr);
Miao Xie90d2c51d2010-03-25 12:37:12 +00008086 if (!next)
8087 return -ENOMEM;
Josef Bacikb2aaaa32013-07-05 17:05:38 -04008088 btrfs_set_buffer_lockdep_class(root->root_key.objectid, next,
8089 level - 1);
Yan, Zheng1c4850e2009-09-21 15:55:59 -04008090 reada = 1;
8091 }
8092 btrfs_tree_lock(next);
8093 btrfs_set_lock_blocking(next);
8094
Josef Bacik3173a182013-03-07 14:22:04 -05008095 ret = btrfs_lookup_extent_info(trans, root, bytenr, level - 1, 1,
Yan, Zheng94fcca92009-10-09 09:25:16 -04008096 &wc->refs[level - 1],
8097 &wc->flags[level - 1]);
Jeff Mahoney79787ea2012-03-12 16:03:00 +01008098 if (ret < 0) {
8099 btrfs_tree_unlock(next);
8100 return ret;
8101 }
8102
Simon Kirbyc2cf52e2013-03-19 22:41:23 +00008103 if (unlikely(wc->refs[level - 1] == 0)) {
8104 btrfs_err(root->fs_info, "Missing references.");
8105 BUG();
8106 }
Yan, Zheng94fcca92009-10-09 09:25:16 -04008107 *lookup_info = 0;
Yan, Zheng1c4850e2009-09-21 15:55:59 -04008108
Yan, Zheng94fcca92009-10-09 09:25:16 -04008109 if (wc->stage == DROP_REFERENCE) {
Yan, Zheng1c4850e2009-09-21 15:55:59 -04008110 if (wc->refs[level - 1] > 1) {
Mark Fasheh11526512014-07-17 12:39:01 -07008111 need_account = true;
Yan, Zheng94fcca92009-10-09 09:25:16 -04008112 if (level == 1 &&
8113 (wc->flags[0] & BTRFS_BLOCK_FLAG_FULL_BACKREF))
8114 goto skip;
8115
Yan, Zheng1c4850e2009-09-21 15:55:59 -04008116 if (!wc->update_ref ||
8117 generation <= root->root_key.offset)
8118 goto skip;
8119
8120 btrfs_node_key_to_cpu(path->nodes[level], &key,
8121 path->slots[level]);
8122 ret = btrfs_comp_cpu_keys(&key, &wc->update_progress);
8123 if (ret < 0)
8124 goto skip;
8125
8126 wc->stage = UPDATE_BACKREF;
8127 wc->shared_level = level - 1;
8128 }
Yan, Zheng94fcca92009-10-09 09:25:16 -04008129 } else {
8130 if (level == 1 &&
8131 (wc->flags[0] & BTRFS_BLOCK_FLAG_FULL_BACKREF))
8132 goto skip;
Yan, Zheng1c4850e2009-09-21 15:55:59 -04008133 }
8134
Chris Masonb9fab912012-05-06 07:23:47 -04008135 if (!btrfs_buffer_uptodate(next, generation, 0)) {
Yan, Zheng1c4850e2009-09-21 15:55:59 -04008136 btrfs_tree_unlock(next);
8137 free_extent_buffer(next);
8138 next = NULL;
Yan, Zheng94fcca92009-10-09 09:25:16 -04008139 *lookup_info = 1;
Yan, Zheng1c4850e2009-09-21 15:55:59 -04008140 }
8141
8142 if (!next) {
8143 if (reada && level == 1)
8144 reada_walk_down(trans, root, wc, path);
David Sterbace86cd52014-06-15 01:07:32 +02008145 next = read_tree_block(root, bytenr, generation);
Liu Bo64c043d2015-05-25 17:30:15 +08008146 if (IS_ERR(next)) {
8147 return PTR_ERR(next);
8148 } else if (!extent_buffer_uptodate(next)) {
Josef Bacik416bc652013-04-23 14:17:42 -04008149 free_extent_buffer(next);
Tsutomu Itoh97d9a8a2011-03-24 06:33:21 +00008150 return -EIO;
Josef Bacik416bc652013-04-23 14:17:42 -04008151 }
Yan, Zheng1c4850e2009-09-21 15:55:59 -04008152 btrfs_tree_lock(next);
8153 btrfs_set_lock_blocking(next);
8154 }
8155
8156 level--;
8157 BUG_ON(level != btrfs_header_level(next));
8158 path->nodes[level] = next;
8159 path->slots[level] = 0;
Chris Masonbd681512011-07-16 15:23:14 -04008160 path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING;
Yan, Zheng1c4850e2009-09-21 15:55:59 -04008161 wc->level = level;
8162 if (wc->level == 1)
8163 wc->reada_slot = 0;
8164 return 0;
8165skip:
8166 wc->refs[level - 1] = 0;
8167 wc->flags[level - 1] = 0;
Yan, Zheng94fcca92009-10-09 09:25:16 -04008168 if (wc->stage == DROP_REFERENCE) {
8169 if (wc->flags[level] & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
8170 parent = path->nodes[level]->start;
8171 } else {
8172 BUG_ON(root->root_key.objectid !=
8173 btrfs_header_owner(path->nodes[level]));
8174 parent = 0;
8175 }
Yan, Zheng1c4850e2009-09-21 15:55:59 -04008176
Mark Fasheh11526512014-07-17 12:39:01 -07008177 if (need_account) {
8178 ret = account_shared_subtree(trans, root, next,
8179 generation, level - 1);
8180 if (ret) {
8181 printk_ratelimited(KERN_ERR "BTRFS: %s Error "
8182 "%d accounting shared subtree. Quota "
8183 "is out of sync, rescan required.\n",
8184 root->fs_info->sb->s_id, ret);
8185 }
8186 }
Yan, Zheng94fcca92009-10-09 09:25:16 -04008187 ret = btrfs_free_extent(trans, root, bytenr, blocksize, parent,
Arne Jansen66d7e7f2011-09-12 15:26:38 +02008188 root->root_key.objectid, level - 1, 0, 0);
Jeff Mahoney79787ea2012-03-12 16:03:00 +01008189 BUG_ON(ret); /* -ENOMEM */
Yan, Zheng1c4850e2009-09-21 15:55:59 -04008190 }
Yan, Zheng1c4850e2009-09-21 15:55:59 -04008191 btrfs_tree_unlock(next);
8192 free_extent_buffer(next);
Yan, Zheng94fcca92009-10-09 09:25:16 -04008193 *lookup_info = 1;
Yan, Zheng1c4850e2009-09-21 15:55:59 -04008194 return 1;
8195}
8196
8197/*
Liu Bo2c016dc2012-12-26 15:32:17 +08008198 * helper to process tree block while walking up the tree.
Yan Zheng2c47e6052009-06-27 21:07:35 -04008199 *
8200 * when wc->stage == DROP_REFERENCE, this function drops
8201 * reference count on the block.
8202 *
8203 * when wc->stage == UPDATE_BACKREF, this function changes
8204 * wc->stage back to DROP_REFERENCE if we changed wc->stage
8205 * to UPDATE_BACKREF previously while processing the block.
8206 *
8207 * NOTE: return value 1 means we should stop walking up.
8208 */
8209static noinline int walk_up_proc(struct btrfs_trans_handle *trans,
8210 struct btrfs_root *root,
8211 struct btrfs_path *path,
8212 struct walk_control *wc)
8213{
Yan, Zhengf0486c62010-05-16 10:46:25 -04008214 int ret;
Yan Zheng2c47e6052009-06-27 21:07:35 -04008215 int level = wc->level;
8216 struct extent_buffer *eb = path->nodes[level];
8217 u64 parent = 0;
8218
8219 if (wc->stage == UPDATE_BACKREF) {
8220 BUG_ON(wc->shared_level < level);
8221 if (level < wc->shared_level)
8222 goto out;
8223
Yan Zheng2c47e6052009-06-27 21:07:35 -04008224 ret = find_next_key(path, level + 1, &wc->update_progress);
8225 if (ret > 0)
8226 wc->update_ref = 0;
8227
8228 wc->stage = DROP_REFERENCE;
8229 wc->shared_level = -1;
8230 path->slots[level] = 0;
8231
8232 /*
8233 * check reference count again if the block isn't locked.
8234 * we should start walking down the tree again if reference
8235 * count is one.
8236 */
8237 if (!path->locks[level]) {
8238 BUG_ON(level == 0);
8239 btrfs_tree_lock(eb);
8240 btrfs_set_lock_blocking(eb);
Chris Masonbd681512011-07-16 15:23:14 -04008241 path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING;
Yan Zheng2c47e6052009-06-27 21:07:35 -04008242
8243 ret = btrfs_lookup_extent_info(trans, root,
Josef Bacik3173a182013-03-07 14:22:04 -05008244 eb->start, level, 1,
Yan Zheng2c47e6052009-06-27 21:07:35 -04008245 &wc->refs[level],
8246 &wc->flags[level]);
Jeff Mahoney79787ea2012-03-12 16:03:00 +01008247 if (ret < 0) {
8248 btrfs_tree_unlock_rw(eb, path->locks[level]);
Liu Bo3268a242012-12-28 09:33:19 +00008249 path->locks[level] = 0;
Jeff Mahoney79787ea2012-03-12 16:03:00 +01008250 return ret;
8251 }
Yan Zheng2c47e6052009-06-27 21:07:35 -04008252 BUG_ON(wc->refs[level] == 0);
8253 if (wc->refs[level] == 1) {
Chris Masonbd681512011-07-16 15:23:14 -04008254 btrfs_tree_unlock_rw(eb, path->locks[level]);
Liu Bo3268a242012-12-28 09:33:19 +00008255 path->locks[level] = 0;
Yan Zheng2c47e6052009-06-27 21:07:35 -04008256 return 1;
8257 }
Yan Zheng2c47e6052009-06-27 21:07:35 -04008258 }
8259 }
8260
8261 /* wc->stage == DROP_REFERENCE */
8262 BUG_ON(wc->refs[level] > 1 && !path->locks[level]);
8263
8264 if (wc->refs[level] == 1) {
8265 if (level == 0) {
8266 if (wc->flags[level] & BTRFS_BLOCK_FLAG_FULL_BACKREF)
Josef Bacike339a6b2014-07-02 10:54:25 -07008267 ret = btrfs_dec_ref(trans, root, eb, 1);
Yan Zheng2c47e6052009-06-27 21:07:35 -04008268 else
Josef Bacike339a6b2014-07-02 10:54:25 -07008269 ret = btrfs_dec_ref(trans, root, eb, 0);
Jeff Mahoney79787ea2012-03-12 16:03:00 +01008270 BUG_ON(ret); /* -ENOMEM */
Mark Fasheh11526512014-07-17 12:39:01 -07008271 ret = account_leaf_items(trans, root, eb);
8272 if (ret) {
8273 printk_ratelimited(KERN_ERR "BTRFS: %s Error "
8274 "%d accounting leaf items. Quota "
8275 "is out of sync, rescan required.\n",
8276 root->fs_info->sb->s_id, ret);
8277 }
Yan Zheng2c47e6052009-06-27 21:07:35 -04008278 }
8279 /* make block locked assertion in clean_tree_block happy */
8280 if (!path->locks[level] &&
8281 btrfs_header_generation(eb) == trans->transid) {
8282 btrfs_tree_lock(eb);
8283 btrfs_set_lock_blocking(eb);
Chris Masonbd681512011-07-16 15:23:14 -04008284 path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING;
Yan Zheng2c47e6052009-06-27 21:07:35 -04008285 }
Daniel Dressler01d58472014-11-21 17:15:07 +09008286 clean_tree_block(trans, root->fs_info, eb);
Yan Zheng2c47e6052009-06-27 21:07:35 -04008287 }
8288
8289 if (eb == root->node) {
8290 if (wc->flags[level] & BTRFS_BLOCK_FLAG_FULL_BACKREF)
8291 parent = eb->start;
8292 else
8293 BUG_ON(root->root_key.objectid !=
8294 btrfs_header_owner(eb));
8295 } else {
8296 if (wc->flags[level + 1] & BTRFS_BLOCK_FLAG_FULL_BACKREF)
8297 parent = path->nodes[level + 1]->start;
8298 else
8299 BUG_ON(root->root_key.objectid !=
8300 btrfs_header_owner(path->nodes[level + 1]));
8301 }
8302
Jan Schmidt5581a512012-05-16 17:04:52 +02008303 btrfs_free_tree_block(trans, root, eb, parent, wc->refs[level] == 1);
Yan Zheng2c47e6052009-06-27 21:07:35 -04008304out:
8305 wc->refs[level] = 0;
8306 wc->flags[level] = 0;
Yan, Zhengf0486c62010-05-16 10:46:25 -04008307 return 0;
Yan Zheng2c47e6052009-06-27 21:07:35 -04008308}
8309
Yan Zheng5d4f98a2009-06-10 10:45:14 -04008310static noinline int walk_down_tree(struct btrfs_trans_handle *trans,
8311 struct btrfs_root *root,
Yan Zheng2c47e6052009-06-27 21:07:35 -04008312 struct btrfs_path *path,
8313 struct walk_control *wc)
Yan Zhengf82d02d2008-10-29 14:49:05 -04008314{
Yan Zheng2c47e6052009-06-27 21:07:35 -04008315 int level = wc->level;
Yan, Zheng94fcca92009-10-09 09:25:16 -04008316 int lookup_info = 1;
Yan Zhengf82d02d2008-10-29 14:49:05 -04008317 int ret;
8318
Yan Zheng2c47e6052009-06-27 21:07:35 -04008319 while (level >= 0) {
Yan, Zheng94fcca92009-10-09 09:25:16 -04008320 ret = walk_down_proc(trans, root, path, wc, lookup_info);
Yan Zheng2c47e6052009-06-27 21:07:35 -04008321 if (ret > 0)
Yan Zhengf82d02d2008-10-29 14:49:05 -04008322 break;
Yan Zhengf82d02d2008-10-29 14:49:05 -04008323
Yan Zheng2c47e6052009-06-27 21:07:35 -04008324 if (level == 0)
8325 break;
8326
Yan, Zheng7a7965f2010-02-01 02:41:17 +00008327 if (path->slots[level] >=
8328 btrfs_header_nritems(path->nodes[level]))
8329 break;
8330
Yan, Zheng94fcca92009-10-09 09:25:16 -04008331 ret = do_walk_down(trans, root, path, wc, &lookup_info);
Yan, Zheng1c4850e2009-09-21 15:55:59 -04008332 if (ret > 0) {
8333 path->slots[level]++;
8334 continue;
Miao Xie90d2c51d2010-03-25 12:37:12 +00008335 } else if (ret < 0)
8336 return ret;
Yan, Zheng1c4850e2009-09-21 15:55:59 -04008337 level = wc->level;
Yan Zhengf82d02d2008-10-29 14:49:05 -04008338 }
Yan Zhengf82d02d2008-10-29 14:49:05 -04008339 return 0;
8340}
8341
Chris Masond3977122009-01-05 21:25:51 -05008342static noinline int walk_up_tree(struct btrfs_trans_handle *trans,
Chris Mason98ed5172008-01-03 10:01:48 -05008343 struct btrfs_root *root,
Yan Zhengf82d02d2008-10-29 14:49:05 -04008344 struct btrfs_path *path,
Yan Zheng2c47e6052009-06-27 21:07:35 -04008345 struct walk_control *wc, int max_level)
Chris Mason20524f02007-03-10 06:35:47 -05008346{
Yan Zheng2c47e6052009-06-27 21:07:35 -04008347 int level = wc->level;
Chris Mason20524f02007-03-10 06:35:47 -05008348 int ret;
Chris Mason9f3a7422007-08-07 15:52:19 -04008349
Yan Zheng2c47e6052009-06-27 21:07:35 -04008350 path->slots[level] = btrfs_header_nritems(path->nodes[level]);
8351 while (level < max_level && path->nodes[level]) {
8352 wc->level = level;
8353 if (path->slots[level] + 1 <
8354 btrfs_header_nritems(path->nodes[level])) {
8355 path->slots[level]++;
Chris Mason20524f02007-03-10 06:35:47 -05008356 return 0;
8357 } else {
Yan Zheng2c47e6052009-06-27 21:07:35 -04008358 ret = walk_up_proc(trans, root, path, wc);
8359 if (ret > 0)
8360 return 0;
Chris Masonbd56b302009-02-04 09:27:02 -05008361
Yan Zheng2c47e6052009-06-27 21:07:35 -04008362 if (path->locks[level]) {
Chris Masonbd681512011-07-16 15:23:14 -04008363 btrfs_tree_unlock_rw(path->nodes[level],
8364 path->locks[level]);
Yan Zheng2c47e6052009-06-27 21:07:35 -04008365 path->locks[level] = 0;
Yan Zhengf82d02d2008-10-29 14:49:05 -04008366 }
Yan Zheng2c47e6052009-06-27 21:07:35 -04008367 free_extent_buffer(path->nodes[level]);
8368 path->nodes[level] = NULL;
8369 level++;
Chris Mason20524f02007-03-10 06:35:47 -05008370 }
8371 }
8372 return 1;
8373}
8374
Chris Mason9aca1d52007-03-13 11:09:37 -04008375/*
Yan Zheng2c47e6052009-06-27 21:07:35 -04008376 * drop a subvolume tree.
8377 *
8378 * this function traverses the tree freeing any blocks that only
8379 * referenced by the tree.
8380 *
8381 * when a shared tree block is found. this function decreases its
8382 * reference count by one. if update_ref is true, this function
8383 * also make sure backrefs for the shared block and all lower level
8384 * blocks are properly updated.
David Sterba9d1a2a32013-03-12 15:13:28 +00008385 *
8386 * If called with for_reloc == 0, may exit early with -EAGAIN
Chris Mason9aca1d52007-03-13 11:09:37 -04008387 */
Jeff Mahoney2c536792011-10-03 23:22:41 -04008388int btrfs_drop_snapshot(struct btrfs_root *root,
Arne Jansen66d7e7f2011-09-12 15:26:38 +02008389 struct btrfs_block_rsv *block_rsv, int update_ref,
8390 int for_reloc)
Chris Mason20524f02007-03-10 06:35:47 -05008391{
Chris Mason5caf2a02007-04-02 11:20:42 -04008392 struct btrfs_path *path;
Yan Zheng2c47e6052009-06-27 21:07:35 -04008393 struct btrfs_trans_handle *trans;
8394 struct btrfs_root *tree_root = root->fs_info->tree_root;
Chris Mason9f3a7422007-08-07 15:52:19 -04008395 struct btrfs_root_item *root_item = &root->root_item;
Yan Zheng2c47e6052009-06-27 21:07:35 -04008396 struct walk_control *wc;
8397 struct btrfs_key key;
8398 int err = 0;
8399 int ret;
8400 int level;
Josef Bacikd29a9f62013-07-17 19:30:20 -04008401 bool root_dropped = false;
Chris Mason20524f02007-03-10 06:35:47 -05008402
Mark Fasheh11526512014-07-17 12:39:01 -07008403 btrfs_debug(root->fs_info, "Drop subvolume %llu", root->objectid);
8404
Chris Mason5caf2a02007-04-02 11:20:42 -04008405 path = btrfs_alloc_path();
Tsutomu Itohcb1b69f2011-08-09 07:11:13 +00008406 if (!path) {
8407 err = -ENOMEM;
8408 goto out;
8409 }
Chris Mason20524f02007-03-10 06:35:47 -05008410
Yan Zheng2c47e6052009-06-27 21:07:35 -04008411 wc = kzalloc(sizeof(*wc), GFP_NOFS);
Mark Fasheh38a1a912011-07-13 10:59:59 -07008412 if (!wc) {
8413 btrfs_free_path(path);
Tsutomu Itohcb1b69f2011-08-09 07:11:13 +00008414 err = -ENOMEM;
8415 goto out;
Mark Fasheh38a1a912011-07-13 10:59:59 -07008416 }
Yan Zheng2c47e6052009-06-27 21:07:35 -04008417
Yan, Zhenga22285a2010-05-16 10:48:46 -04008418 trans = btrfs_start_transaction(tree_root, 0);
Jeff Mahoney79787ea2012-03-12 16:03:00 +01008419 if (IS_ERR(trans)) {
8420 err = PTR_ERR(trans);
8421 goto out_free;
8422 }
Tsutomu Itoh98d5dc12011-01-20 06:19:37 +00008423
Yan, Zheng3fd0a552010-05-16 10:49:59 -04008424 if (block_rsv)
8425 trans->block_rsv = block_rsv;
Yan Zheng2c47e6052009-06-27 21:07:35 -04008426
Chris Mason9f3a7422007-08-07 15:52:19 -04008427 if (btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
Yan Zheng2c47e6052009-06-27 21:07:35 -04008428 level = btrfs_header_level(root->node);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04008429 path->nodes[level] = btrfs_lock_root_node(root);
8430 btrfs_set_lock_blocking(path->nodes[level]);
Chris Mason9f3a7422007-08-07 15:52:19 -04008431 path->slots[level] = 0;
Chris Masonbd681512011-07-16 15:23:14 -04008432 path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING;
Yan Zheng2c47e6052009-06-27 21:07:35 -04008433 memset(&wc->update_progress, 0,
8434 sizeof(wc->update_progress));
Chris Mason9f3a7422007-08-07 15:52:19 -04008435 } else {
Chris Mason9f3a7422007-08-07 15:52:19 -04008436 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
Yan Zheng2c47e6052009-06-27 21:07:35 -04008437 memcpy(&wc->update_progress, &key,
8438 sizeof(wc->update_progress));
8439
Chris Mason6702ed42007-08-07 16:15:09 -04008440 level = root_item->drop_level;
Yan Zheng2c47e6052009-06-27 21:07:35 -04008441 BUG_ON(level == 0);
Chris Mason6702ed42007-08-07 16:15:09 -04008442 path->lowest_level = level;
Yan Zheng2c47e6052009-06-27 21:07:35 -04008443 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
8444 path->lowest_level = 0;
8445 if (ret < 0) {
8446 err = ret;
Jeff Mahoney79787ea2012-03-12 16:03:00 +01008447 goto out_end_trans;
Chris Mason9f3a7422007-08-07 15:52:19 -04008448 }
Yan, Zheng1c4850e2009-09-21 15:55:59 -04008449 WARN_ON(ret > 0);
Yan Zheng2c47e6052009-06-27 21:07:35 -04008450
Chris Mason7d9eb122008-07-08 14:19:17 -04008451 /*
8452 * unlock our path, this is safe because only this
8453 * function is allowed to delete this snapshot
8454 */
Yan Zheng5d4f98a2009-06-10 10:45:14 -04008455 btrfs_unlock_up_safe(path, 0);
Chris Mason9aca1d52007-03-13 11:09:37 -04008456
Yan Zheng2c47e6052009-06-27 21:07:35 -04008457 level = btrfs_header_level(root->node);
8458 while (1) {
8459 btrfs_tree_lock(path->nodes[level]);
8460 btrfs_set_lock_blocking(path->nodes[level]);
Josef Bacikfec386a2013-07-15 12:41:42 -04008461 path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING;
Yan Zheng2c47e6052009-06-27 21:07:35 -04008462
8463 ret = btrfs_lookup_extent_info(trans, root,
8464 path->nodes[level]->start,
Josef Bacik3173a182013-03-07 14:22:04 -05008465 level, 1, &wc->refs[level],
Yan Zheng2c47e6052009-06-27 21:07:35 -04008466 &wc->flags[level]);
Jeff Mahoney79787ea2012-03-12 16:03:00 +01008467 if (ret < 0) {
8468 err = ret;
8469 goto out_end_trans;
8470 }
Yan Zheng2c47e6052009-06-27 21:07:35 -04008471 BUG_ON(wc->refs[level] == 0);
8472
8473 if (level == root_item->drop_level)
8474 break;
8475
8476 btrfs_tree_unlock(path->nodes[level]);
Josef Bacikfec386a2013-07-15 12:41:42 -04008477 path->locks[level] = 0;
Yan Zheng2c47e6052009-06-27 21:07:35 -04008478 WARN_ON(wc->refs[level] != 1);
8479 level--;
8480 }
8481 }
8482
8483 wc->level = level;
8484 wc->shared_level = -1;
8485 wc->stage = DROP_REFERENCE;
8486 wc->update_ref = update_ref;
8487 wc->keep_locks = 0;
Arne Jansen66d7e7f2011-09-12 15:26:38 +02008488 wc->for_reloc = for_reloc;
Yan, Zheng1c4850e2009-09-21 15:55:59 -04008489 wc->reada_count = BTRFS_NODEPTRS_PER_BLOCK(root);
Yan Zheng2c47e6052009-06-27 21:07:35 -04008490
8491 while (1) {
David Sterba9d1a2a32013-03-12 15:13:28 +00008492
Yan Zheng2c47e6052009-06-27 21:07:35 -04008493 ret = walk_down_tree(trans, root, path, wc);
8494 if (ret < 0) {
8495 err = ret;
Chris Masone7a84562008-06-25 16:01:31 -04008496 break;
8497 }
Yan Zheng2c47e6052009-06-27 21:07:35 -04008498
8499 ret = walk_up_tree(trans, root, path, wc, BTRFS_MAX_LEVEL);
8500 if (ret < 0) {
8501 err = ret;
8502 break;
8503 }
8504
8505 if (ret > 0) {
8506 BUG_ON(wc->stage != DROP_REFERENCE);
8507 break;
8508 }
8509
8510 if (wc->stage == DROP_REFERENCE) {
8511 level = wc->level;
8512 btrfs_node_key(path->nodes[level],
8513 &root_item->drop_progress,
8514 path->slots[level]);
8515 root_item->drop_level = level;
8516 }
8517
8518 BUG_ON(wc->level == 0);
Josef Bacik3c8f2422013-07-15 11:57:06 -04008519 if (btrfs_should_end_transaction(trans, tree_root) ||
8520 (!for_reloc && btrfs_need_cleaner_sleep(root))) {
Yan Zheng2c47e6052009-06-27 21:07:35 -04008521 ret = btrfs_update_root(trans, tree_root,
8522 &root->root_key,
8523 root_item);
Jeff Mahoney79787ea2012-03-12 16:03:00 +01008524 if (ret) {
8525 btrfs_abort_transaction(trans, tree_root, ret);
8526 err = ret;
8527 goto out_end_trans;
8528 }
Yan Zheng2c47e6052009-06-27 21:07:35 -04008529
Yan, Zheng3fd0a552010-05-16 10:49:59 -04008530 btrfs_end_transaction_throttle(trans, tree_root);
Josef Bacik3c8f2422013-07-15 11:57:06 -04008531 if (!for_reloc && btrfs_need_cleaner_sleep(root)) {
Frank Holtonefe120a2013-12-20 11:37:06 -05008532 pr_debug("BTRFS: drop snapshot early exit\n");
Josef Bacik3c8f2422013-07-15 11:57:06 -04008533 err = -EAGAIN;
8534 goto out_free;
8535 }
8536
Yan, Zhenga22285a2010-05-16 10:48:46 -04008537 trans = btrfs_start_transaction(tree_root, 0);
Jeff Mahoney79787ea2012-03-12 16:03:00 +01008538 if (IS_ERR(trans)) {
8539 err = PTR_ERR(trans);
8540 goto out_free;
8541 }
Yan, Zheng3fd0a552010-05-16 10:49:59 -04008542 if (block_rsv)
8543 trans->block_rsv = block_rsv;
Chris Masonc3e69d52009-03-13 10:17:05 -04008544 }
Chris Mason20524f02007-03-10 06:35:47 -05008545 }
David Sterbab3b4aa72011-04-21 01:20:15 +02008546 btrfs_release_path(path);
Jeff Mahoney79787ea2012-03-12 16:03:00 +01008547 if (err)
8548 goto out_end_trans;
Yan Zheng2c47e6052009-06-27 21:07:35 -04008549
8550 ret = btrfs_del_root(trans, tree_root, &root->root_key);
Jeff Mahoney79787ea2012-03-12 16:03:00 +01008551 if (ret) {
8552 btrfs_abort_transaction(trans, tree_root, ret);
8553 goto out_end_trans;
8554 }
Yan Zheng2c47e6052009-06-27 21:07:35 -04008555
Yan, Zheng76dda932009-09-21 16:00:26 -04008556 if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
Miao Xiecb517ea2013-05-15 07:48:19 +00008557 ret = btrfs_find_root(tree_root, &root->root_key, path,
8558 NULL, NULL);
Jeff Mahoney79787ea2012-03-12 16:03:00 +01008559 if (ret < 0) {
8560 btrfs_abort_transaction(trans, tree_root, ret);
8561 err = ret;
8562 goto out_end_trans;
8563 } else if (ret > 0) {
Josef Bacik84cd9482010-12-08 12:24:01 -05008564 /* if we fail to delete the orphan item this time
8565 * around, it'll get picked up the next time.
8566 *
8567 * The most common failure here is just -ENOENT.
8568 */
8569 btrfs_del_orphan_item(trans, tree_root,
8570 root->root_key.objectid);
Yan, Zheng76dda932009-09-21 16:00:26 -04008571 }
8572 }
8573
Miao Xie27cdeb72014-04-02 19:51:05 +08008574 if (test_bit(BTRFS_ROOT_IN_RADIX, &root->state)) {
Miao Xiecb517ea2013-05-15 07:48:19 +00008575 btrfs_drop_and_free_fs_root(tree_root->fs_info, root);
Yan, Zheng76dda932009-09-21 16:00:26 -04008576 } else {
8577 free_extent_buffer(root->node);
8578 free_extent_buffer(root->commit_root);
Miao Xieb0feb9d2013-05-15 07:48:20 +00008579 btrfs_put_fs_root(root);
Yan, Zheng76dda932009-09-21 16:00:26 -04008580 }
Josef Bacikd29a9f62013-07-17 19:30:20 -04008581 root_dropped = true;
Jeff Mahoney79787ea2012-03-12 16:03:00 +01008582out_end_trans:
Yan, Zheng3fd0a552010-05-16 10:49:59 -04008583 btrfs_end_transaction_throttle(trans, tree_root);
Jeff Mahoney79787ea2012-03-12 16:03:00 +01008584out_free:
Yan Zheng2c47e6052009-06-27 21:07:35 -04008585 kfree(wc);
Chris Mason5caf2a02007-04-02 11:20:42 -04008586 btrfs_free_path(path);
Tsutomu Itohcb1b69f2011-08-09 07:11:13 +00008587out:
Josef Bacikd29a9f62013-07-17 19:30:20 -04008588 /*
8589 * So if we need to stop dropping the snapshot for whatever reason we
8590 * need to make sure to add it back to the dead root list so that we
8591 * keep trying to do the work later. This also cleans up roots if we
8592 * don't have it in the radix (like when we recover after a power fail
8593 * or unmount) so we don't leak memory.
8594 */
Josef Bacikb37b39c2013-07-23 16:57:15 -04008595 if (!for_reloc && root_dropped == false)
Josef Bacikd29a9f62013-07-17 19:30:20 -04008596 btrfs_add_dead_root(root);
Wang Shilong90515e72014-01-07 17:26:58 +08008597 if (err && err != -EAGAIN)
Tsutomu Itohcb1b69f2011-08-09 07:11:13 +00008598 btrfs_std_error(root->fs_info, err);
Jeff Mahoney2c536792011-10-03 23:22:41 -04008599 return err;
Chris Mason20524f02007-03-10 06:35:47 -05008600}
Chris Mason9078a3e2007-04-26 16:46:15 -04008601
Yan Zheng2c47e6052009-06-27 21:07:35 -04008602/*
8603 * drop subtree rooted at tree block 'node'.
8604 *
8605 * NOTE: this function will unlock and release tree block 'node'
Arne Jansen66d7e7f2011-09-12 15:26:38 +02008606 * only used by relocation code
Yan Zheng2c47e6052009-06-27 21:07:35 -04008607 */
Yan Zhengf82d02d2008-10-29 14:49:05 -04008608int btrfs_drop_subtree(struct btrfs_trans_handle *trans,
8609 struct btrfs_root *root,
8610 struct extent_buffer *node,
8611 struct extent_buffer *parent)
8612{
8613 struct btrfs_path *path;
Yan Zheng2c47e6052009-06-27 21:07:35 -04008614 struct walk_control *wc;
Yan Zhengf82d02d2008-10-29 14:49:05 -04008615 int level;
8616 int parent_level;
8617 int ret = 0;
8618 int wret;
8619
Yan Zheng2c47e6052009-06-27 21:07:35 -04008620 BUG_ON(root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID);
8621
Yan Zhengf82d02d2008-10-29 14:49:05 -04008622 path = btrfs_alloc_path();
Tsutomu Itohdb5b4932011-03-23 08:14:16 +00008623 if (!path)
8624 return -ENOMEM;
Yan Zhengf82d02d2008-10-29 14:49:05 -04008625
Yan Zheng2c47e6052009-06-27 21:07:35 -04008626 wc = kzalloc(sizeof(*wc), GFP_NOFS);
Tsutomu Itohdb5b4932011-03-23 08:14:16 +00008627 if (!wc) {
8628 btrfs_free_path(path);
8629 return -ENOMEM;
8630 }
Yan Zheng2c47e6052009-06-27 21:07:35 -04008631
Chris Masonb9447ef2009-03-09 11:45:38 -04008632 btrfs_assert_tree_locked(parent);
Yan Zhengf82d02d2008-10-29 14:49:05 -04008633 parent_level = btrfs_header_level(parent);
8634 extent_buffer_get(parent);
8635 path->nodes[parent_level] = parent;
8636 path->slots[parent_level] = btrfs_header_nritems(parent);
8637
Chris Masonb9447ef2009-03-09 11:45:38 -04008638 btrfs_assert_tree_locked(node);
Yan Zhengf82d02d2008-10-29 14:49:05 -04008639 level = btrfs_header_level(node);
Yan Zhengf82d02d2008-10-29 14:49:05 -04008640 path->nodes[level] = node;
8641 path->slots[level] = 0;
Chris Masonbd681512011-07-16 15:23:14 -04008642 path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING;
Yan Zheng2c47e6052009-06-27 21:07:35 -04008643
8644 wc->refs[parent_level] = 1;
8645 wc->flags[parent_level] = BTRFS_BLOCK_FLAG_FULL_BACKREF;
8646 wc->level = level;
8647 wc->shared_level = -1;
8648 wc->stage = DROP_REFERENCE;
8649 wc->update_ref = 0;
8650 wc->keep_locks = 1;
Arne Jansen66d7e7f2011-09-12 15:26:38 +02008651 wc->for_reloc = 1;
Yan, Zheng1c4850e2009-09-21 15:55:59 -04008652 wc->reada_count = BTRFS_NODEPTRS_PER_BLOCK(root);
Yan Zhengf82d02d2008-10-29 14:49:05 -04008653
8654 while (1) {
Yan Zheng2c47e6052009-06-27 21:07:35 -04008655 wret = walk_down_tree(trans, root, path, wc);
8656 if (wret < 0) {
Yan Zhengf82d02d2008-10-29 14:49:05 -04008657 ret = wret;
Yan Zhengf82d02d2008-10-29 14:49:05 -04008658 break;
Yan Zheng2c47e6052009-06-27 21:07:35 -04008659 }
Yan Zhengf82d02d2008-10-29 14:49:05 -04008660
Yan Zheng2c47e6052009-06-27 21:07:35 -04008661 wret = walk_up_tree(trans, root, path, wc, parent_level);
Yan Zhengf82d02d2008-10-29 14:49:05 -04008662 if (wret < 0)
8663 ret = wret;
8664 if (wret != 0)
8665 break;
8666 }
8667
Yan Zheng2c47e6052009-06-27 21:07:35 -04008668 kfree(wc);
Yan Zhengf82d02d2008-10-29 14:49:05 -04008669 btrfs_free_path(path);
8670 return ret;
8671}
8672
Chris Masonec44a352008-04-28 15:29:52 -04008673static u64 update_block_group_flags(struct btrfs_root *root, u64 flags)
8674{
8675 u64 num_devices;
Ilya Dryomovfc67c452012-03-27 17:09:17 +03008676 u64 stripped;
Chris Masonec44a352008-04-28 15:29:52 -04008677
Ilya Dryomovfc67c452012-03-27 17:09:17 +03008678 /*
8679 * if restripe for this chunk_type is on pick target profile and
8680 * return, otherwise do the usual balance
8681 */
8682 stripped = get_restripe_target(root->fs_info, flags);
8683 if (stripped)
8684 return extended_to_chunk(stripped);
Ilya Dryomove4d8ec02012-01-16 22:04:48 +02008685
Miao Xie95669972014-07-24 11:37:14 +08008686 num_devices = root->fs_info->fs_devices->rw_devices;
Chris Masoncd02dca2010-12-13 14:56:23 -05008687
Ilya Dryomovfc67c452012-03-27 17:09:17 +03008688 stripped = BTRFS_BLOCK_GROUP_RAID0 |
David Woodhouse53b381b2013-01-29 18:40:14 -05008689 BTRFS_BLOCK_GROUP_RAID5 | BTRFS_BLOCK_GROUP_RAID6 |
Ilya Dryomovfc67c452012-03-27 17:09:17 +03008690 BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID10;
8691
Chris Masonec44a352008-04-28 15:29:52 -04008692 if (num_devices == 1) {
8693 stripped |= BTRFS_BLOCK_GROUP_DUP;
8694 stripped = flags & ~stripped;
8695
8696 /* turn raid0 into single device chunks */
8697 if (flags & BTRFS_BLOCK_GROUP_RAID0)
8698 return stripped;
8699
8700 /* turn mirroring into duplication */
8701 if (flags & (BTRFS_BLOCK_GROUP_RAID1 |
8702 BTRFS_BLOCK_GROUP_RAID10))
8703 return stripped | BTRFS_BLOCK_GROUP_DUP;
Chris Masonec44a352008-04-28 15:29:52 -04008704 } else {
8705 /* they already had raid on here, just return */
Chris Masonec44a352008-04-28 15:29:52 -04008706 if (flags & stripped)
8707 return flags;
8708
8709 stripped |= BTRFS_BLOCK_GROUP_DUP;
8710 stripped = flags & ~stripped;
8711
8712 /* switch duplicated blocks with raid1 */
8713 if (flags & BTRFS_BLOCK_GROUP_DUP)
8714 return stripped | BTRFS_BLOCK_GROUP_RAID1;
8715
Ilya Dryomove3176ca2012-03-27 17:09:16 +03008716 /* this is drive concat, leave it alone */
Chris Masonec44a352008-04-28 15:29:52 -04008717 }
Ilya Dryomove3176ca2012-03-27 17:09:16 +03008718
Chris Masonec44a352008-04-28 15:29:52 -04008719 return flags;
8720}
8721
Zhaolei868f4012015-08-05 16:43:27 +08008722static int inc_block_group_ro(struct btrfs_block_group_cache *cache, int force)
Chris Mason0ef3e662008-05-24 14:04:53 -04008723{
Yan, Zhengf0486c62010-05-16 10:46:25 -04008724 struct btrfs_space_info *sinfo = cache->space_info;
8725 u64 num_bytes;
Miao Xie199c36e2011-07-15 10:34:36 +00008726 u64 min_allocable_bytes;
Yan, Zhengf0486c62010-05-16 10:46:25 -04008727 int ret = -ENOSPC;
Chris Mason0ef3e662008-05-24 14:04:53 -04008728
Miao Xie199c36e2011-07-15 10:34:36 +00008729 /*
8730 * We need some metadata space and system metadata space for
8731 * allocating chunks in some corner cases until we force to set
8732 * it to be readonly.
8733 */
8734 if ((sinfo->flags &
8735 (BTRFS_BLOCK_GROUP_SYSTEM | BTRFS_BLOCK_GROUP_METADATA)) &&
8736 !force)
8737 min_allocable_bytes = 1 * 1024 * 1024;
8738 else
8739 min_allocable_bytes = 0;
8740
Yan, Zhengf0486c62010-05-16 10:46:25 -04008741 spin_lock(&sinfo->lock);
8742 spin_lock(&cache->lock);
WuBo61cfea92011-07-26 03:30:11 +00008743
8744 if (cache->ro) {
Zhaolei868f4012015-08-05 16:43:27 +08008745 cache->ro++;
WuBo61cfea92011-07-26 03:30:11 +00008746 ret = 0;
8747 goto out;
8748 }
8749
Yan, Zhengf0486c62010-05-16 10:46:25 -04008750 num_bytes = cache->key.offset - cache->reserved - cache->pinned -
8751 cache->bytes_super - btrfs_block_group_used(&cache->item);
Chris Mason7d9eb122008-07-08 14:19:17 -04008752
Yan, Zhengf0486c62010-05-16 10:46:25 -04008753 if (sinfo->bytes_used + sinfo->bytes_reserved + sinfo->bytes_pinned +
Josef Bacik37be25b2011-08-05 10:25:38 -04008754 sinfo->bytes_may_use + sinfo->bytes_readonly + num_bytes +
8755 min_allocable_bytes <= sinfo->total_bytes) {
Yan, Zhengf0486c62010-05-16 10:46:25 -04008756 sinfo->bytes_readonly += num_bytes;
Zhaolei868f4012015-08-05 16:43:27 +08008757 cache->ro++;
Josef Bacik633c0aa2014-10-31 09:49:34 -04008758 list_add_tail(&cache->ro_list, &sinfo->ro_bgs);
Yan, Zhengf0486c62010-05-16 10:46:25 -04008759 ret = 0;
8760 }
WuBo61cfea92011-07-26 03:30:11 +00008761out:
Yan, Zhengf0486c62010-05-16 10:46:25 -04008762 spin_unlock(&cache->lock);
8763 spin_unlock(&sinfo->lock);
8764 return ret;
Chris Mason0ef3e662008-05-24 14:04:53 -04008765}
8766
Zhaolei868f4012015-08-05 16:43:27 +08008767int btrfs_inc_block_group_ro(struct btrfs_root *root,
Yan, Zhengf0486c62010-05-16 10:46:25 -04008768 struct btrfs_block_group_cache *cache)
Yan Zheng5d4f98a2009-06-10 10:45:14 -04008769
8770{
Yan, Zhengf0486c62010-05-16 10:46:25 -04008771 struct btrfs_trans_handle *trans;
8772 u64 alloc_flags;
8773 int ret;
8774
Chris Mason1bbc6212015-04-06 12:46:08 -07008775again:
Josef Bacik7a7eaa42011-04-13 12:54:33 -04008776 trans = btrfs_join_transaction(root);
Jeff Mahoney79787ea2012-03-12 16:03:00 +01008777 if (IS_ERR(trans))
8778 return PTR_ERR(trans);
Yan, Zhengf0486c62010-05-16 10:46:25 -04008779
Chris Mason1bbc6212015-04-06 12:46:08 -07008780 /*
8781 * we're not allowed to set block groups readonly after the dirty
8782 * block groups cache has started writing. If it already started,
8783 * back off and let this transaction commit
8784 */
8785 mutex_lock(&root->fs_info->ro_block_group_mutex);
8786 if (trans->transaction->dirty_bg_run) {
8787 u64 transid = trans->transid;
8788
8789 mutex_unlock(&root->fs_info->ro_block_group_mutex);
8790 btrfs_end_transaction(trans, root);
8791
8792 ret = btrfs_wait_for_commit(root, transid);
8793 if (ret)
8794 return ret;
8795 goto again;
8796 }
8797
Chris Mason153c35b2015-05-19 18:54:41 -07008798 /*
8799 * if we are changing raid levels, try to allocate a corresponding
8800 * block group with the new raid level.
8801 */
8802 alloc_flags = update_block_group_flags(root, cache->flags);
8803 if (alloc_flags != cache->flags) {
8804 ret = do_chunk_alloc(trans, root, alloc_flags,
8805 CHUNK_ALLOC_FORCE);
8806 /*
8807 * ENOSPC is allowed here, we may have enough space
8808 * already allocated at the new raid level to
8809 * carry on
8810 */
8811 if (ret == -ENOSPC)
8812 ret = 0;
8813 if (ret < 0)
8814 goto out;
8815 }
Chris Mason1bbc6212015-04-06 12:46:08 -07008816
Zhaolei868f4012015-08-05 16:43:27 +08008817 ret = inc_block_group_ro(cache, 0);
Yan, Zhengf0486c62010-05-16 10:46:25 -04008818 if (!ret)
8819 goto out;
8820 alloc_flags = get_alloc_profile(root, cache->space_info->flags);
Josef Bacik698d0082012-09-12 14:08:47 -04008821 ret = do_chunk_alloc(trans, root, alloc_flags,
Chris Mason0e4f8f82011-04-15 16:05:44 -04008822 CHUNK_ALLOC_FORCE);
Yan, Zhengf0486c62010-05-16 10:46:25 -04008823 if (ret < 0)
8824 goto out;
Zhaolei868f4012015-08-05 16:43:27 +08008825 ret = inc_block_group_ro(cache, 0);
Yan, Zhengf0486c62010-05-16 10:46:25 -04008826out:
Shaohua Li2f081082015-01-09 10:40:15 -08008827 if (cache->flags & BTRFS_BLOCK_GROUP_SYSTEM) {
8828 alloc_flags = update_block_group_flags(root, cache->flags);
Filipe Mananaa9629592015-05-18 19:11:40 +01008829 lock_chunks(root->fs_info->chunk_root);
Filipe Manana4617ea32015-06-09 17:48:21 +01008830 check_system_chunk(trans, root, alloc_flags);
Filipe Mananaa9629592015-05-18 19:11:40 +01008831 unlock_chunks(root->fs_info->chunk_root);
Shaohua Li2f081082015-01-09 10:40:15 -08008832 }
Chris Mason1bbc6212015-04-06 12:46:08 -07008833 mutex_unlock(&root->fs_info->ro_block_group_mutex);
Shaohua Li2f081082015-01-09 10:40:15 -08008834
Yan, Zhengf0486c62010-05-16 10:46:25 -04008835 btrfs_end_transaction(trans, root);
8836 return ret;
8837}
8838
Chris Masonc87f08c2011-02-16 13:57:04 -05008839int btrfs_force_chunk_alloc(struct btrfs_trans_handle *trans,
8840 struct btrfs_root *root, u64 type)
8841{
8842 u64 alloc_flags = get_alloc_profile(root, type);
Josef Bacik698d0082012-09-12 14:08:47 -04008843 return do_chunk_alloc(trans, root, alloc_flags,
Chris Mason0e4f8f82011-04-15 16:05:44 -04008844 CHUNK_ALLOC_FORCE);
Chris Masonc87f08c2011-02-16 13:57:04 -05008845}
8846
Miao Xie6d07bce2011-01-05 10:07:31 +00008847/*
8848 * helper to account the unused space of all the readonly block group in the
Josef Bacik633c0aa2014-10-31 09:49:34 -04008849 * space_info. takes mirrors into account.
Miao Xie6d07bce2011-01-05 10:07:31 +00008850 */
Josef Bacik633c0aa2014-10-31 09:49:34 -04008851u64 btrfs_account_ro_block_groups_free_space(struct btrfs_space_info *sinfo)
Miao Xie6d07bce2011-01-05 10:07:31 +00008852{
8853 struct btrfs_block_group_cache *block_group;
8854 u64 free_bytes = 0;
8855 int factor;
8856
Josef Bacik633c0aa2014-10-31 09:49:34 -04008857 /* It's df, we don't care if it's racey */
8858 if (list_empty(&sinfo->ro_bgs))
8859 return 0;
8860
8861 spin_lock(&sinfo->lock);
8862 list_for_each_entry(block_group, &sinfo->ro_bgs, ro_list) {
Miao Xie6d07bce2011-01-05 10:07:31 +00008863 spin_lock(&block_group->lock);
8864
8865 if (!block_group->ro) {
8866 spin_unlock(&block_group->lock);
8867 continue;
8868 }
8869
8870 if (block_group->flags & (BTRFS_BLOCK_GROUP_RAID1 |
8871 BTRFS_BLOCK_GROUP_RAID10 |
8872 BTRFS_BLOCK_GROUP_DUP))
8873 factor = 2;
8874 else
8875 factor = 1;
8876
8877 free_bytes += (block_group->key.offset -
8878 btrfs_block_group_used(&block_group->item)) *
8879 factor;
8880
8881 spin_unlock(&block_group->lock);
8882 }
Miao Xie6d07bce2011-01-05 10:07:31 +00008883 spin_unlock(&sinfo->lock);
8884
8885 return free_bytes;
8886}
8887
Zhaolei868f4012015-08-05 16:43:27 +08008888void btrfs_dec_block_group_ro(struct btrfs_root *root,
Yan, Zhengf0486c62010-05-16 10:46:25 -04008889 struct btrfs_block_group_cache *cache)
8890{
8891 struct btrfs_space_info *sinfo = cache->space_info;
8892 u64 num_bytes;
8893
8894 BUG_ON(!cache->ro);
8895
8896 spin_lock(&sinfo->lock);
8897 spin_lock(&cache->lock);
Zhaolei868f4012015-08-05 16:43:27 +08008898 if (!--cache->ro) {
8899 num_bytes = cache->key.offset - cache->reserved -
8900 cache->pinned - cache->bytes_super -
8901 btrfs_block_group_used(&cache->item);
8902 sinfo->bytes_readonly -= num_bytes;
8903 list_del_init(&cache->ro_list);
8904 }
Yan, Zhengf0486c62010-05-16 10:46:25 -04008905 spin_unlock(&cache->lock);
8906 spin_unlock(&sinfo->lock);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04008907}
8908
Josef Bacikba1bf482009-09-11 16:11:19 -04008909/*
8910 * checks to see if its even possible to relocate this block group.
8911 *
8912 * @return - -1 if it's not a good idea to relocate this block group, 0 if its
8913 * ok to go ahead and try.
8914 */
8915int btrfs_can_relocate(struct btrfs_root *root, u64 bytenr)
Zheng Yan1a40e232008-09-26 10:09:34 -04008916{
Zheng Yan1a40e232008-09-26 10:09:34 -04008917 struct btrfs_block_group_cache *block_group;
Josef Bacikba1bf482009-09-11 16:11:19 -04008918 struct btrfs_space_info *space_info;
8919 struct btrfs_fs_devices *fs_devices = root->fs_info->fs_devices;
8920 struct btrfs_device *device;
Josef Bacik6df9a952013-06-27 13:22:46 -04008921 struct btrfs_trans_handle *trans;
liubocdcb7252011-08-03 10:15:25 +00008922 u64 min_free;
Josef Bacik6719db62011-08-20 08:29:51 -04008923 u64 dev_min = 1;
8924 u64 dev_nr = 0;
Ilya Dryomov4a5e98f2012-03-27 17:09:17 +03008925 u64 target;
liubocdcb7252011-08-03 10:15:25 +00008926 int index;
Josef Bacikba1bf482009-09-11 16:11:19 -04008927 int full = 0;
8928 int ret = 0;
Chris Masonedbd8d42007-12-21 16:27:24 -05008929
Josef Bacikba1bf482009-09-11 16:11:19 -04008930 block_group = btrfs_lookup_block_group(root->fs_info, bytenr);
Zheng Yan1a40e232008-09-26 10:09:34 -04008931
Josef Bacikba1bf482009-09-11 16:11:19 -04008932 /* odd, couldn't find the block group, leave it alone */
8933 if (!block_group)
8934 return -1;
Chris Masonedbd8d42007-12-21 16:27:24 -05008935
liubocdcb7252011-08-03 10:15:25 +00008936 min_free = btrfs_block_group_used(&block_group->item);
8937
Josef Bacikba1bf482009-09-11 16:11:19 -04008938 /* no bytes used, we're good */
liubocdcb7252011-08-03 10:15:25 +00008939 if (!min_free)
Josef Bacikba1bf482009-09-11 16:11:19 -04008940 goto out;
Chris Mason323da792008-05-09 11:46:48 -04008941
Josef Bacikba1bf482009-09-11 16:11:19 -04008942 space_info = block_group->space_info;
8943 spin_lock(&space_info->lock);
Chris Mason323da792008-05-09 11:46:48 -04008944
Josef Bacikba1bf482009-09-11 16:11:19 -04008945 full = space_info->full;
Zheng Yan1a40e232008-09-26 10:09:34 -04008946
Josef Bacikba1bf482009-09-11 16:11:19 -04008947 /*
8948 * if this is the last block group we have in this space, we can't
Chris Mason7ce618d2009-09-22 14:48:44 -04008949 * relocate it unless we're able to allocate a new chunk below.
8950 *
8951 * Otherwise, we need to make sure we have room in the space to handle
8952 * all of the extents from this block group. If we can, we're good
Josef Bacikba1bf482009-09-11 16:11:19 -04008953 */
Chris Mason7ce618d2009-09-22 14:48:44 -04008954 if ((space_info->total_bytes != block_group->key.offset) &&
liubocdcb7252011-08-03 10:15:25 +00008955 (space_info->bytes_used + space_info->bytes_reserved +
8956 space_info->bytes_pinned + space_info->bytes_readonly +
8957 min_free < space_info->total_bytes)) {
Josef Bacikba1bf482009-09-11 16:11:19 -04008958 spin_unlock(&space_info->lock);
8959 goto out;
8960 }
8961 spin_unlock(&space_info->lock);
Zheng Yan1a40e232008-09-26 10:09:34 -04008962
Josef Bacikba1bf482009-09-11 16:11:19 -04008963 /*
8964 * ok we don't have enough space, but maybe we have free space on our
8965 * devices to allocate new chunks for relocation, so loop through our
Ilya Dryomov4a5e98f2012-03-27 17:09:17 +03008966 * alloc devices and guess if we have enough space. if this block
8967 * group is going to be restriped, run checks against the target
8968 * profile instead of the current one.
Josef Bacikba1bf482009-09-11 16:11:19 -04008969 */
8970 ret = -1;
Chris Mason4313b392008-01-03 09:08:48 -05008971
liubocdcb7252011-08-03 10:15:25 +00008972 /*
8973 * index:
8974 * 0: raid10
8975 * 1: raid1
8976 * 2: dup
8977 * 3: raid0
8978 * 4: single
8979 */
Ilya Dryomov4a5e98f2012-03-27 17:09:17 +03008980 target = get_restripe_target(root->fs_info, block_group->flags);
8981 if (target) {
Liu Bo31e50222012-11-21 14:18:10 +00008982 index = __get_raid_index(extended_to_chunk(target));
Ilya Dryomov4a5e98f2012-03-27 17:09:17 +03008983 } else {
8984 /*
8985 * this is just a balance, so if we were marked as full
8986 * we know there is no space for a new chunk
8987 */
8988 if (full)
8989 goto out;
8990
8991 index = get_block_group_index(block_group);
8992 }
8993
Miao Xiee6ec7162013-01-17 05:38:51 +00008994 if (index == BTRFS_RAID_RAID10) {
liubocdcb7252011-08-03 10:15:25 +00008995 dev_min = 4;
Josef Bacik6719db62011-08-20 08:29:51 -04008996 /* Divide by 2 */
8997 min_free >>= 1;
Miao Xiee6ec7162013-01-17 05:38:51 +00008998 } else if (index == BTRFS_RAID_RAID1) {
liubocdcb7252011-08-03 10:15:25 +00008999 dev_min = 2;
Miao Xiee6ec7162013-01-17 05:38:51 +00009000 } else if (index == BTRFS_RAID_DUP) {
Josef Bacik6719db62011-08-20 08:29:51 -04009001 /* Multiply by 2 */
9002 min_free <<= 1;
Miao Xiee6ec7162013-01-17 05:38:51 +00009003 } else if (index == BTRFS_RAID_RAID0) {
liubocdcb7252011-08-03 10:15:25 +00009004 dev_min = fs_devices->rw_devices;
David Sterba47c57132015-02-20 18:43:47 +01009005 min_free = div64_u64(min_free, dev_min);
liubocdcb7252011-08-03 10:15:25 +00009006 }
9007
Josef Bacik6df9a952013-06-27 13:22:46 -04009008 /* We need to do this so that we can look at pending chunks */
9009 trans = btrfs_join_transaction(root);
9010 if (IS_ERR(trans)) {
9011 ret = PTR_ERR(trans);
9012 goto out;
9013 }
9014
Josef Bacikba1bf482009-09-11 16:11:19 -04009015 mutex_lock(&root->fs_info->chunk_mutex);
9016 list_for_each_entry(device, &fs_devices->alloc_list, dev_alloc_list) {
Miao Xie7bfc8372011-01-05 10:07:26 +00009017 u64 dev_offset;
Chris Masonea8c2812008-08-04 23:17:27 -04009018
Josef Bacikba1bf482009-09-11 16:11:19 -04009019 /*
9020 * check to make sure we can actually find a chunk with enough
9021 * space to fit our block group in.
9022 */
Stefan Behrens63a212a2012-11-05 18:29:28 +01009023 if (device->total_bytes > device->bytes_used + min_free &&
9024 !device->is_tgtdev_for_dev_replace) {
Josef Bacik6df9a952013-06-27 13:22:46 -04009025 ret = find_free_dev_extent(trans, device, min_free,
Miao Xie7bfc8372011-01-05 10:07:26 +00009026 &dev_offset, NULL);
Josef Bacikba1bf482009-09-11 16:11:19 -04009027 if (!ret)
liubocdcb7252011-08-03 10:15:25 +00009028 dev_nr++;
9029
9030 if (dev_nr >= dev_min)
Yan73e48b22008-01-03 14:14:39 -05009031 break;
liubocdcb7252011-08-03 10:15:25 +00009032
Josef Bacikba1bf482009-09-11 16:11:19 -04009033 ret = -1;
Yan73e48b22008-01-03 14:14:39 -05009034 }
Chris Masonedbd8d42007-12-21 16:27:24 -05009035 }
Josef Bacikba1bf482009-09-11 16:11:19 -04009036 mutex_unlock(&root->fs_info->chunk_mutex);
Josef Bacik6df9a952013-06-27 13:22:46 -04009037 btrfs_end_transaction(trans, root);
Chris Masonedbd8d42007-12-21 16:27:24 -05009038out:
Josef Bacikba1bf482009-09-11 16:11:19 -04009039 btrfs_put_block_group(block_group);
Chris Masonedbd8d42007-12-21 16:27:24 -05009040 return ret;
9041}
9042
Christoph Hellwigb2950862008-12-02 09:54:17 -05009043static int find_first_block_group(struct btrfs_root *root,
9044 struct btrfs_path *path, struct btrfs_key *key)
Chris Mason0b86a832008-03-24 15:01:56 -04009045{
Chris Mason925baed2008-06-25 16:01:30 -04009046 int ret = 0;
Chris Mason0b86a832008-03-24 15:01:56 -04009047 struct btrfs_key found_key;
9048 struct extent_buffer *leaf;
9049 int slot;
9050
9051 ret = btrfs_search_slot(NULL, root, key, path, 0, 0);
9052 if (ret < 0)
Chris Mason925baed2008-06-25 16:01:30 -04009053 goto out;
9054
Chris Masond3977122009-01-05 21:25:51 -05009055 while (1) {
Chris Mason0b86a832008-03-24 15:01:56 -04009056 slot = path->slots[0];
9057 leaf = path->nodes[0];
9058 if (slot >= btrfs_header_nritems(leaf)) {
9059 ret = btrfs_next_leaf(root, path);
9060 if (ret == 0)
9061 continue;
9062 if (ret < 0)
Chris Mason925baed2008-06-25 16:01:30 -04009063 goto out;
Chris Mason0b86a832008-03-24 15:01:56 -04009064 break;
9065 }
9066 btrfs_item_key_to_cpu(leaf, &found_key, slot);
9067
9068 if (found_key.objectid >= key->objectid &&
Chris Mason925baed2008-06-25 16:01:30 -04009069 found_key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
9070 ret = 0;
9071 goto out;
9072 }
Chris Mason0b86a832008-03-24 15:01:56 -04009073 path->slots[0]++;
9074 }
Chris Mason925baed2008-06-25 16:01:30 -04009075out:
Chris Mason0b86a832008-03-24 15:01:56 -04009076 return ret;
9077}
9078
Josef Bacik0af3d002010-06-21 14:48:16 -04009079void btrfs_put_block_group_cache(struct btrfs_fs_info *info)
9080{
9081 struct btrfs_block_group_cache *block_group;
9082 u64 last = 0;
9083
9084 while (1) {
9085 struct inode *inode;
9086
9087 block_group = btrfs_lookup_first_block_group(info, last);
9088 while (block_group) {
9089 spin_lock(&block_group->lock);
9090 if (block_group->iref)
9091 break;
9092 spin_unlock(&block_group->lock);
9093 block_group = next_block_group(info->tree_root,
9094 block_group);
9095 }
9096 if (!block_group) {
9097 if (last == 0)
9098 break;
9099 last = 0;
9100 continue;
9101 }
9102
9103 inode = block_group->inode;
9104 block_group->iref = 0;
9105 block_group->inode = NULL;
9106 spin_unlock(&block_group->lock);
9107 iput(inode);
9108 last = block_group->key.objectid + block_group->key.offset;
9109 btrfs_put_block_group(block_group);
9110 }
9111}
9112
Zheng Yan1a40e232008-09-26 10:09:34 -04009113int btrfs_free_block_groups(struct btrfs_fs_info *info)
9114{
9115 struct btrfs_block_group_cache *block_group;
Chris Mason4184ea72009-03-10 12:39:20 -04009116 struct btrfs_space_info *space_info;
Yan Zheng11833d62009-09-11 16:11:19 -04009117 struct btrfs_caching_control *caching_ctl;
Zheng Yan1a40e232008-09-26 10:09:34 -04009118 struct rb_node *n;
9119
Josef Bacik9e351cc2014-03-13 15:42:13 -04009120 down_write(&info->commit_root_sem);
Yan Zheng11833d62009-09-11 16:11:19 -04009121 while (!list_empty(&info->caching_block_groups)) {
9122 caching_ctl = list_entry(info->caching_block_groups.next,
9123 struct btrfs_caching_control, list);
9124 list_del(&caching_ctl->list);
9125 put_caching_control(caching_ctl);
9126 }
Josef Bacik9e351cc2014-03-13 15:42:13 -04009127 up_write(&info->commit_root_sem);
Yan Zheng11833d62009-09-11 16:11:19 -04009128
Josef Bacik47ab2a62014-09-18 11:20:02 -04009129 spin_lock(&info->unused_bgs_lock);
9130 while (!list_empty(&info->unused_bgs)) {
9131 block_group = list_first_entry(&info->unused_bgs,
9132 struct btrfs_block_group_cache,
9133 bg_list);
9134 list_del_init(&block_group->bg_list);
9135 btrfs_put_block_group(block_group);
9136 }
9137 spin_unlock(&info->unused_bgs_lock);
9138
Zheng Yan1a40e232008-09-26 10:09:34 -04009139 spin_lock(&info->block_group_cache_lock);
9140 while ((n = rb_last(&info->block_group_cache_tree)) != NULL) {
9141 block_group = rb_entry(n, struct btrfs_block_group_cache,
9142 cache_node);
Zheng Yan1a40e232008-09-26 10:09:34 -04009143 rb_erase(&block_group->cache_node,
9144 &info->block_group_cache_tree);
Filipe Manana01eacb22014-12-04 18:38:30 +00009145 RB_CLEAR_NODE(&block_group->cache_node);
Yan Zhengd899e052008-10-30 14:25:28 -04009146 spin_unlock(&info->block_group_cache_lock);
9147
Josef Bacik80eb2342008-10-29 14:49:05 -04009148 down_write(&block_group->space_info->groups_sem);
Zheng Yan1a40e232008-09-26 10:09:34 -04009149 list_del(&block_group->list);
Josef Bacik80eb2342008-10-29 14:49:05 -04009150 up_write(&block_group->space_info->groups_sem);
Yan Zhengd2fb3432008-12-11 16:30:39 -05009151
Josef Bacik817d52f2009-07-13 21:29:25 -04009152 if (block_group->cached == BTRFS_CACHE_STARTED)
Yan Zheng11833d62009-09-11 16:11:19 -04009153 wait_block_group_cache_done(block_group);
Josef Bacik817d52f2009-07-13 21:29:25 -04009154
Josef Bacik3c148742011-02-02 15:53:47 +00009155 /*
9156 * We haven't cached this block group, which means we could
9157 * possibly have excluded extents on this block group.
9158 */
Josef Bacik36cce922013-08-05 11:15:21 -04009159 if (block_group->cached == BTRFS_CACHE_NO ||
9160 block_group->cached == BTRFS_CACHE_ERROR)
Josef Bacik3c148742011-02-02 15:53:47 +00009161 free_excluded_extents(info->extent_root, block_group);
9162
Josef Bacik817d52f2009-07-13 21:29:25 -04009163 btrfs_remove_free_space_cache(block_group);
Josef Bacik11dfe352009-11-13 20:12:59 +00009164 btrfs_put_block_group(block_group);
Yan Zhengd899e052008-10-30 14:25:28 -04009165
9166 spin_lock(&info->block_group_cache_lock);
Zheng Yan1a40e232008-09-26 10:09:34 -04009167 }
9168 spin_unlock(&info->block_group_cache_lock);
Chris Mason4184ea72009-03-10 12:39:20 -04009169
9170 /* now that all the block groups are freed, go through and
9171 * free all the space_info structs. This is only called during
9172 * the final stages of unmount, and so we know nobody is
9173 * using them. We call synchronize_rcu() once before we start,
9174 * just to be on the safe side.
9175 */
9176 synchronize_rcu();
9177
Yan, Zheng8929ecfa2010-05-16 10:49:58 -04009178 release_global_block_rsv(info);
9179
Dulshani Gunawardhana67871252013-10-31 10:33:04 +05309180 while (!list_empty(&info->space_info)) {
Jeff Mahoney6ab0a202013-11-01 13:07:04 -04009181 int i;
9182
Chris Mason4184ea72009-03-10 12:39:20 -04009183 space_info = list_entry(info->space_info.next,
9184 struct btrfs_space_info,
9185 list);
David Sterbab069e0c2013-02-08 21:28:17 +00009186 if (btrfs_test_opt(info->tree_root, ENOSPC_DEBUG)) {
Dulshani Gunawardhanafae7f212013-10-31 10:30:08 +05309187 if (WARN_ON(space_info->bytes_pinned > 0 ||
David Sterbab069e0c2013-02-08 21:28:17 +00009188 space_info->bytes_reserved > 0 ||
Dulshani Gunawardhanafae7f212013-10-31 10:30:08 +05309189 space_info->bytes_may_use > 0)) {
David Sterbab069e0c2013-02-08 21:28:17 +00009190 dump_space_info(space_info, 0, 0);
9191 }
Yan, Zhengf0486c62010-05-16 10:46:25 -04009192 }
Chris Mason4184ea72009-03-10 12:39:20 -04009193 list_del(&space_info->list);
Jeff Mahoney6ab0a202013-11-01 13:07:04 -04009194 for (i = 0; i < BTRFS_NR_RAID_TYPES; i++) {
9195 struct kobject *kobj;
Jeff Mahoneyc1895442014-05-27 12:59:57 -04009196 kobj = space_info->block_group_kobjs[i];
9197 space_info->block_group_kobjs[i] = NULL;
9198 if (kobj) {
Jeff Mahoney6ab0a202013-11-01 13:07:04 -04009199 kobject_del(kobj);
9200 kobject_put(kobj);
9201 }
9202 }
9203 kobject_del(&space_info->kobj);
9204 kobject_put(&space_info->kobj);
Chris Mason4184ea72009-03-10 12:39:20 -04009205 }
Zheng Yan1a40e232008-09-26 10:09:34 -04009206 return 0;
9207}
9208
Yan, Zhengb742bb82010-05-16 10:46:24 -04009209static void __link_block_group(struct btrfs_space_info *space_info,
9210 struct btrfs_block_group_cache *cache)
9211{
9212 int index = get_block_group_index(cache);
Jeff Mahoneyed55b6a2014-03-26 14:11:26 -04009213 bool first = false;
Yan, Zhengb742bb82010-05-16 10:46:24 -04009214
9215 down_write(&space_info->groups_sem);
Jeff Mahoneyed55b6a2014-03-26 14:11:26 -04009216 if (list_empty(&space_info->block_groups[index]))
9217 first = true;
9218 list_add_tail(&cache->list, &space_info->block_groups[index]);
9219 up_write(&space_info->groups_sem);
9220
9221 if (first) {
Jeff Mahoneyc1895442014-05-27 12:59:57 -04009222 struct raid_kobject *rkobj;
Jeff Mahoney6ab0a202013-11-01 13:07:04 -04009223 int ret;
9224
Jeff Mahoneyc1895442014-05-27 12:59:57 -04009225 rkobj = kzalloc(sizeof(*rkobj), GFP_NOFS);
9226 if (!rkobj)
9227 goto out_err;
9228 rkobj->raid_type = index;
9229 kobject_init(&rkobj->kobj, &btrfs_raid_ktype);
9230 ret = kobject_add(&rkobj->kobj, &space_info->kobj,
9231 "%s", get_raid_name(index));
Jeff Mahoney6ab0a202013-11-01 13:07:04 -04009232 if (ret) {
Jeff Mahoneyc1895442014-05-27 12:59:57 -04009233 kobject_put(&rkobj->kobj);
9234 goto out_err;
Jeff Mahoney6ab0a202013-11-01 13:07:04 -04009235 }
Jeff Mahoneyc1895442014-05-27 12:59:57 -04009236 space_info->block_group_kobjs[index] = &rkobj->kobj;
Jeff Mahoney6ab0a202013-11-01 13:07:04 -04009237 }
Jeff Mahoneyc1895442014-05-27 12:59:57 -04009238
9239 return;
9240out_err:
9241 pr_warn("BTRFS: failed to add kobject for block cache. ignoring.\n");
Yan, Zhengb742bb82010-05-16 10:46:24 -04009242}
9243
Miao Xie920e4a52014-01-15 20:00:55 +08009244static struct btrfs_block_group_cache *
9245btrfs_create_block_group_cache(struct btrfs_root *root, u64 start, u64 size)
9246{
9247 struct btrfs_block_group_cache *cache;
9248
9249 cache = kzalloc(sizeof(*cache), GFP_NOFS);
9250 if (!cache)
9251 return NULL;
9252
9253 cache->free_space_ctl = kzalloc(sizeof(*cache->free_space_ctl),
9254 GFP_NOFS);
9255 if (!cache->free_space_ctl) {
9256 kfree(cache);
9257 return NULL;
9258 }
9259
9260 cache->key.objectid = start;
9261 cache->key.offset = size;
9262 cache->key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
9263
9264 cache->sectorsize = root->sectorsize;
9265 cache->fs_info = root->fs_info;
9266 cache->full_stripe_len = btrfs_full_stripe_len(root,
9267 &root->fs_info->mapping_tree,
9268 start);
9269 atomic_set(&cache->count, 1);
9270 spin_lock_init(&cache->lock);
Miao Xiee570fd22014-06-19 10:42:50 +08009271 init_rwsem(&cache->data_rwsem);
Miao Xie920e4a52014-01-15 20:00:55 +08009272 INIT_LIST_HEAD(&cache->list);
9273 INIT_LIST_HEAD(&cache->cluster_list);
Josef Bacik47ab2a62014-09-18 11:20:02 -04009274 INIT_LIST_HEAD(&cache->bg_list);
Josef Bacik633c0aa2014-10-31 09:49:34 -04009275 INIT_LIST_HEAD(&cache->ro_list);
Josef Bacikce93ec52014-11-17 15:45:48 -05009276 INIT_LIST_HEAD(&cache->dirty_list);
Chris Masonc9dc4c62015-04-04 17:14:42 -07009277 INIT_LIST_HEAD(&cache->io_list);
Miao Xie920e4a52014-01-15 20:00:55 +08009278 btrfs_init_free_space_ctl(cache);
Filipe Manana04216822014-11-27 21:14:15 +00009279 atomic_set(&cache->trimming, 0);
Miao Xie920e4a52014-01-15 20:00:55 +08009280
9281 return cache;
9282}
9283
Chris Mason9078a3e2007-04-26 16:46:15 -04009284int btrfs_read_block_groups(struct btrfs_root *root)
9285{
9286 struct btrfs_path *path;
9287 int ret;
Chris Mason9078a3e2007-04-26 16:46:15 -04009288 struct btrfs_block_group_cache *cache;
Chris Masonbe744172007-05-06 10:15:01 -04009289 struct btrfs_fs_info *info = root->fs_info;
Chris Mason6324fbf2008-03-24 15:01:59 -04009290 struct btrfs_space_info *space_info;
Chris Mason9078a3e2007-04-26 16:46:15 -04009291 struct btrfs_key key;
9292 struct btrfs_key found_key;
Chris Mason5f39d392007-10-15 16:14:19 -04009293 struct extent_buffer *leaf;
Josef Bacik0af3d002010-06-21 14:48:16 -04009294 int need_clear = 0;
9295 u64 cache_gen;
Chris Mason96b51792007-10-15 16:15:19 -04009296
Chris Masonbe744172007-05-06 10:15:01 -04009297 root = info->extent_root;
Chris Mason9078a3e2007-04-26 16:46:15 -04009298 key.objectid = 0;
Chris Mason0b86a832008-03-24 15:01:56 -04009299 key.offset = 0;
David Sterba962a2982014-06-04 18:41:45 +02009300 key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
Chris Mason9078a3e2007-04-26 16:46:15 -04009301 path = btrfs_alloc_path();
9302 if (!path)
9303 return -ENOMEM;
Josef Bacik026fd312011-05-13 10:32:11 -04009304 path->reada = 1;
Chris Mason9078a3e2007-04-26 16:46:15 -04009305
David Sterba6c417612011-04-13 15:41:04 +02009306 cache_gen = btrfs_super_cache_generation(root->fs_info->super_copy);
Josef Bacik73bc1872011-10-03 14:07:49 -04009307 if (btrfs_test_opt(root, SPACE_CACHE) &&
David Sterba6c417612011-04-13 15:41:04 +02009308 btrfs_super_generation(root->fs_info->super_copy) != cache_gen)
Josef Bacik0af3d002010-06-21 14:48:16 -04009309 need_clear = 1;
Josef Bacik88c2ba32010-09-21 14:21:34 -04009310 if (btrfs_test_opt(root, CLEAR_CACHE))
9311 need_clear = 1;
Josef Bacik0af3d002010-06-21 14:48:16 -04009312
Chris Masond3977122009-01-05 21:25:51 -05009313 while (1) {
Chris Mason0b86a832008-03-24 15:01:56 -04009314 ret = find_first_block_group(root, path, &key);
Yan, Zhengb742bb82010-05-16 10:46:24 -04009315 if (ret > 0)
9316 break;
Chris Mason0b86a832008-03-24 15:01:56 -04009317 if (ret != 0)
9318 goto error;
Miao Xie920e4a52014-01-15 20:00:55 +08009319
Chris Mason5f39d392007-10-15 16:14:19 -04009320 leaf = path->nodes[0];
9321 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
Miao Xie920e4a52014-01-15 20:00:55 +08009322
9323 cache = btrfs_create_block_group_cache(root, found_key.objectid,
9324 found_key.offset);
Chris Mason9078a3e2007-04-26 16:46:15 -04009325 if (!cache) {
Chris Mason0b86a832008-03-24 15:01:56 -04009326 ret = -ENOMEM;
Yan, Zhengf0486c62010-05-16 10:46:25 -04009327 goto error;
Chris Mason9078a3e2007-04-26 16:46:15 -04009328 }
Josef Bacik96303082009-07-13 21:29:25 -04009329
Liu Bocf7c1ef2012-07-06 03:31:34 -06009330 if (need_clear) {
9331 /*
9332 * When we mount with old space cache, we need to
9333 * set BTRFS_DC_CLEAR and set dirty flag.
9334 *
9335 * a) Setting 'BTRFS_DC_CLEAR' makes sure that we
9336 * truncate the old free space cache inode and
9337 * setup a new one.
9338 * b) Setting 'dirty flag' makes sure that we flush
9339 * the new space cache info onto disk.
9340 */
Liu Bocf7c1ef2012-07-06 03:31:34 -06009341 if (btrfs_test_opt(root, SPACE_CACHE))
Josef Bacikce93ec52014-11-17 15:45:48 -05009342 cache->disk_cache_state = BTRFS_DC_CLEAR;
Liu Bocf7c1ef2012-07-06 03:31:34 -06009343 }
Josef Bacik0af3d002010-06-21 14:48:16 -04009344
Chris Mason5f39d392007-10-15 16:14:19 -04009345 read_extent_buffer(leaf, &cache->item,
9346 btrfs_item_ptr_offset(leaf, path->slots[0]),
9347 sizeof(cache->item));
Miao Xie920e4a52014-01-15 20:00:55 +08009348 cache->flags = btrfs_block_group_flags(&cache->item);
Chris Mason0b86a832008-03-24 15:01:56 -04009349
Chris Mason9078a3e2007-04-26 16:46:15 -04009350 key.objectid = found_key.objectid + found_key.offset;
David Sterbab3b4aa72011-04-21 01:20:15 +02009351 btrfs_release_path(path);
Li Zefan34d52cb2011-03-29 13:46:06 +08009352
Josef Bacik817d52f2009-07-13 21:29:25 -04009353 /*
Josef Bacik3c148742011-02-02 15:53:47 +00009354 * We need to exclude the super stripes now so that the space
9355 * info has super bytes accounted for, otherwise we'll think
9356 * we have more space than we actually do.
9357 */
Josef Bacik835d9742013-03-19 12:13:25 -04009358 ret = exclude_super_stripes(root, cache);
9359 if (ret) {
9360 /*
9361 * We may have excluded something, so call this just in
9362 * case.
9363 */
9364 free_excluded_extents(root, cache);
Miao Xie920e4a52014-01-15 20:00:55 +08009365 btrfs_put_block_group(cache);
Josef Bacik835d9742013-03-19 12:13:25 -04009366 goto error;
9367 }
Josef Bacik3c148742011-02-02 15:53:47 +00009368
9369 /*
Josef Bacik817d52f2009-07-13 21:29:25 -04009370 * check for two cases, either we are full, and therefore
9371 * don't need to bother with the caching work since we won't
9372 * find any space, or we are empty, and we can just add all
9373 * the space in and be done with it. This saves us _alot_ of
9374 * time, particularly in the full case.
9375 */
9376 if (found_key.offset == btrfs_block_group_used(&cache->item)) {
Yan Zheng11833d62009-09-11 16:11:19 -04009377 cache->last_byte_to_unpin = (u64)-1;
Josef Bacik817d52f2009-07-13 21:29:25 -04009378 cache->cached = BTRFS_CACHE_FINISHED;
Josef Bacik1b2da372009-09-11 16:11:20 -04009379 free_excluded_extents(root, cache);
Josef Bacik817d52f2009-07-13 21:29:25 -04009380 } else if (btrfs_block_group_used(&cache->item) == 0) {
Yan Zheng11833d62009-09-11 16:11:19 -04009381 cache->last_byte_to_unpin = (u64)-1;
Josef Bacik817d52f2009-07-13 21:29:25 -04009382 cache->cached = BTRFS_CACHE_FINISHED;
9383 add_new_free_space(cache, root->fs_info,
9384 found_key.objectid,
9385 found_key.objectid +
9386 found_key.offset);
Yan Zheng11833d62009-09-11 16:11:19 -04009387 free_excluded_extents(root, cache);
Josef Bacik817d52f2009-07-13 21:29:25 -04009388 }
Chris Mason96b51792007-10-15 16:15:19 -04009389
Josef Bacik8c579fe2013-04-02 12:40:42 -04009390 ret = btrfs_add_block_group_cache(root->fs_info, cache);
9391 if (ret) {
9392 btrfs_remove_free_space_cache(cache);
9393 btrfs_put_block_group(cache);
9394 goto error;
9395 }
9396
Chris Mason6324fbf2008-03-24 15:01:59 -04009397 ret = update_space_info(info, cache->flags, found_key.offset,
9398 btrfs_block_group_used(&cache->item),
9399 &space_info);
Josef Bacik8c579fe2013-04-02 12:40:42 -04009400 if (ret) {
9401 btrfs_remove_free_space_cache(cache);
9402 spin_lock(&info->block_group_cache_lock);
9403 rb_erase(&cache->cache_node,
9404 &info->block_group_cache_tree);
Filipe Manana01eacb22014-12-04 18:38:30 +00009405 RB_CLEAR_NODE(&cache->cache_node);
Josef Bacik8c579fe2013-04-02 12:40:42 -04009406 spin_unlock(&info->block_group_cache_lock);
9407 btrfs_put_block_group(cache);
9408 goto error;
9409 }
9410
Chris Mason6324fbf2008-03-24 15:01:59 -04009411 cache->space_info = space_info;
Josef Bacik1b2da372009-09-11 16:11:20 -04009412 spin_lock(&cache->space_info->lock);
Yan, Zhengf0486c62010-05-16 10:46:25 -04009413 cache->space_info->bytes_readonly += cache->bytes_super;
Josef Bacik1b2da372009-09-11 16:11:20 -04009414 spin_unlock(&cache->space_info->lock);
9415
Yan, Zhengb742bb82010-05-16 10:46:24 -04009416 __link_block_group(space_info, cache);
Chris Mason6324fbf2008-03-24 15:01:59 -04009417
Chris Mason75ccf472008-09-30 19:24:06 -04009418 set_avail_alloc_bits(root->fs_info, cache->flags);
Josef Bacik47ab2a62014-09-18 11:20:02 -04009419 if (btrfs_chunk_readonly(root, cache->key.objectid)) {
Zhaolei868f4012015-08-05 16:43:27 +08009420 inc_block_group_ro(cache, 1);
Josef Bacik47ab2a62014-09-18 11:20:02 -04009421 } else if (btrfs_block_group_used(&cache->item) == 0) {
9422 spin_lock(&info->unused_bgs_lock);
9423 /* Should always be true but just in case. */
9424 if (list_empty(&cache->bg_list)) {
9425 btrfs_get_block_group(cache);
9426 list_add_tail(&cache->bg_list,
9427 &info->unused_bgs);
9428 }
9429 spin_unlock(&info->unused_bgs_lock);
9430 }
Chris Mason9078a3e2007-04-26 16:46:15 -04009431 }
Yan, Zhengb742bb82010-05-16 10:46:24 -04009432
9433 list_for_each_entry_rcu(space_info, &root->fs_info->space_info, list) {
9434 if (!(get_alloc_profile(root, space_info->flags) &
9435 (BTRFS_BLOCK_GROUP_RAID10 |
9436 BTRFS_BLOCK_GROUP_RAID1 |
David Woodhouse53b381b2013-01-29 18:40:14 -05009437 BTRFS_BLOCK_GROUP_RAID5 |
9438 BTRFS_BLOCK_GROUP_RAID6 |
Yan, Zhengb742bb82010-05-16 10:46:24 -04009439 BTRFS_BLOCK_GROUP_DUP)))
9440 continue;
9441 /*
9442 * avoid allocating from un-mirrored block group if there are
9443 * mirrored block groups.
9444 */
chandan1095cc02013-07-16 12:28:56 +05309445 list_for_each_entry(cache,
9446 &space_info->block_groups[BTRFS_RAID_RAID0],
9447 list)
Zhaolei868f4012015-08-05 16:43:27 +08009448 inc_block_group_ro(cache, 1);
chandan1095cc02013-07-16 12:28:56 +05309449 list_for_each_entry(cache,
9450 &space_info->block_groups[BTRFS_RAID_SINGLE],
9451 list)
Zhaolei868f4012015-08-05 16:43:27 +08009452 inc_block_group_ro(cache, 1);
Yan, Zhengb742bb82010-05-16 10:46:24 -04009453 }
Yan, Zhengf0486c62010-05-16 10:46:25 -04009454
9455 init_global_block_rsv(info);
Chris Mason0b86a832008-03-24 15:01:56 -04009456 ret = 0;
9457error:
Chris Mason9078a3e2007-04-26 16:46:15 -04009458 btrfs_free_path(path);
Chris Mason0b86a832008-03-24 15:01:56 -04009459 return ret;
Chris Mason9078a3e2007-04-26 16:46:15 -04009460}
Chris Mason6324fbf2008-03-24 15:01:59 -04009461
Josef Bacikea658ba2012-09-11 16:57:25 -04009462void btrfs_create_pending_block_groups(struct btrfs_trans_handle *trans,
9463 struct btrfs_root *root)
9464{
9465 struct btrfs_block_group_cache *block_group, *tmp;
9466 struct btrfs_root *extent_root = root->fs_info->extent_root;
9467 struct btrfs_block_group_item item;
9468 struct btrfs_key key;
9469 int ret = 0;
9470
Josef Bacik47ab2a62014-09-18 11:20:02 -04009471 list_for_each_entry_safe(block_group, tmp, &trans->new_bgs, bg_list) {
Josef Bacikea658ba2012-09-11 16:57:25 -04009472 if (ret)
Filipe Mananac92f6be2014-11-26 15:28:55 +00009473 goto next;
Josef Bacikea658ba2012-09-11 16:57:25 -04009474
9475 spin_lock(&block_group->lock);
9476 memcpy(&item, &block_group->item, sizeof(item));
9477 memcpy(&key, &block_group->key, sizeof(key));
9478 spin_unlock(&block_group->lock);
9479
9480 ret = btrfs_insert_item(trans, extent_root, &key, &item,
9481 sizeof(item));
9482 if (ret)
9483 btrfs_abort_transaction(trans, extent_root, ret);
Josef Bacik6df9a952013-06-27 13:22:46 -04009484 ret = btrfs_finish_chunk_alloc(trans, extent_root,
9485 key.objectid, key.offset);
9486 if (ret)
9487 btrfs_abort_transaction(trans, extent_root, ret);
Filipe Mananac92f6be2014-11-26 15:28:55 +00009488next:
9489 list_del_init(&block_group->bg_list);
Josef Bacikea658ba2012-09-11 16:57:25 -04009490 }
9491}
9492
Chris Mason6324fbf2008-03-24 15:01:59 -04009493int btrfs_make_block_group(struct btrfs_trans_handle *trans,
9494 struct btrfs_root *root, u64 bytes_used,
Chris Masone17cade2008-04-15 15:41:47 -04009495 u64 type, u64 chunk_objectid, u64 chunk_offset,
Chris Mason6324fbf2008-03-24 15:01:59 -04009496 u64 size)
9497{
9498 int ret;
Chris Mason6324fbf2008-03-24 15:01:59 -04009499 struct btrfs_root *extent_root;
9500 struct btrfs_block_group_cache *cache;
Chris Mason6324fbf2008-03-24 15:01:59 -04009501
9502 extent_root = root->fs_info->extent_root;
Chris Mason6324fbf2008-03-24 15:01:59 -04009503
Miao Xie995946d2014-04-02 19:51:06 +08009504 btrfs_set_log_full_commit(root->fs_info, trans);
Chris Masone02119d2008-09-05 16:13:11 -04009505
Miao Xie920e4a52014-01-15 20:00:55 +08009506 cache = btrfs_create_block_group_cache(root, chunk_offset, size);
Josef Bacik0f9dd462008-09-23 13:14:11 -04009507 if (!cache)
9508 return -ENOMEM;
Li Zefan34d52cb2011-03-29 13:46:06 +08009509
Chris Mason6324fbf2008-03-24 15:01:59 -04009510 btrfs_set_block_group_used(&cache->item, bytes_used);
Chris Mason6324fbf2008-03-24 15:01:59 -04009511 btrfs_set_block_group_chunk_objectid(&cache->item, chunk_objectid);
Chris Mason6324fbf2008-03-24 15:01:59 -04009512 btrfs_set_block_group_flags(&cache->item, type);
9513
Miao Xie920e4a52014-01-15 20:00:55 +08009514 cache->flags = type;
Yan Zheng11833d62009-09-11 16:11:19 -04009515 cache->last_byte_to_unpin = (u64)-1;
Josef Bacik817d52f2009-07-13 21:29:25 -04009516 cache->cached = BTRFS_CACHE_FINISHED;
Josef Bacik835d9742013-03-19 12:13:25 -04009517 ret = exclude_super_stripes(root, cache);
9518 if (ret) {
9519 /*
9520 * We may have excluded something, so call this just in
9521 * case.
9522 */
9523 free_excluded_extents(root, cache);
Miao Xie920e4a52014-01-15 20:00:55 +08009524 btrfs_put_block_group(cache);
Josef Bacik835d9742013-03-19 12:13:25 -04009525 return ret;
9526 }
Josef Bacik96303082009-07-13 21:29:25 -04009527
Josef Bacik817d52f2009-07-13 21:29:25 -04009528 add_new_free_space(cache, root->fs_info, chunk_offset,
9529 chunk_offset + size);
9530
Yan Zheng11833d62009-09-11 16:11:19 -04009531 free_excluded_extents(root, cache);
9532
Filipe Manana2e6e5182015-05-12 00:28:11 +01009533 /*
9534 * Call to ensure the corresponding space_info object is created and
9535 * assigned to our block group, but don't update its counters just yet.
9536 * We want our bg to be added to the rbtree with its ->space_info set.
9537 */
9538 ret = update_space_info(root->fs_info, cache->flags, 0, 0,
9539 &cache->space_info);
9540 if (ret) {
9541 btrfs_remove_free_space_cache(cache);
9542 btrfs_put_block_group(cache);
9543 return ret;
9544 }
9545
Josef Bacik8c579fe2013-04-02 12:40:42 -04009546 ret = btrfs_add_block_group_cache(root->fs_info, cache);
9547 if (ret) {
9548 btrfs_remove_free_space_cache(cache);
9549 btrfs_put_block_group(cache);
9550 return ret;
9551 }
9552
Filipe Manana2e6e5182015-05-12 00:28:11 +01009553 /*
9554 * Now that our block group has its ->space_info set and is inserted in
9555 * the rbtree, update the space info's counters.
9556 */
Chris Mason6324fbf2008-03-24 15:01:59 -04009557 ret = update_space_info(root->fs_info, cache->flags, size, bytes_used,
9558 &cache->space_info);
Josef Bacik8c579fe2013-04-02 12:40:42 -04009559 if (ret) {
9560 btrfs_remove_free_space_cache(cache);
9561 spin_lock(&root->fs_info->block_group_cache_lock);
9562 rb_erase(&cache->cache_node,
9563 &root->fs_info->block_group_cache_tree);
Filipe Manana01eacb22014-12-04 18:38:30 +00009564 RB_CLEAR_NODE(&cache->cache_node);
Josef Bacik8c579fe2013-04-02 12:40:42 -04009565 spin_unlock(&root->fs_info->block_group_cache_lock);
9566 btrfs_put_block_group(cache);
9567 return ret;
9568 }
Li Zefanc7c144d2011-12-07 10:39:22 +08009569 update_global_block_rsv(root->fs_info);
Josef Bacik1b2da372009-09-11 16:11:20 -04009570
9571 spin_lock(&cache->space_info->lock);
Yan, Zhengf0486c62010-05-16 10:46:25 -04009572 cache->space_info->bytes_readonly += cache->bytes_super;
Josef Bacik1b2da372009-09-11 16:11:20 -04009573 spin_unlock(&cache->space_info->lock);
9574
Yan, Zhengb742bb82010-05-16 10:46:24 -04009575 __link_block_group(cache->space_info, cache);
Chris Mason6324fbf2008-03-24 15:01:59 -04009576
Josef Bacik47ab2a62014-09-18 11:20:02 -04009577 list_add_tail(&cache->bg_list, &trans->new_bgs);
Chris Mason6324fbf2008-03-24 15:01:59 -04009578
Chris Masond18a2c42008-04-04 15:40:00 -04009579 set_avail_alloc_bits(extent_root->fs_info, type);
Chris Mason925baed2008-06-25 16:01:30 -04009580
Chris Mason6324fbf2008-03-24 15:01:59 -04009581 return 0;
9582}
Zheng Yan1a40e232008-09-26 10:09:34 -04009583
Ilya Dryomov10ea00f2012-01-16 22:04:47 +02009584static void clear_avail_alloc_bits(struct btrfs_fs_info *fs_info, u64 flags)
9585{
Ilya Dryomov899c81e2012-03-27 17:09:16 +03009586 u64 extra_flags = chunk_to_extended(flags) &
9587 BTRFS_EXTENDED_PROFILE_MASK;
Ilya Dryomov10ea00f2012-01-16 22:04:47 +02009588
Miao Xiede98ced2013-01-29 10:13:12 +00009589 write_seqlock(&fs_info->profiles_lock);
Ilya Dryomov10ea00f2012-01-16 22:04:47 +02009590 if (flags & BTRFS_BLOCK_GROUP_DATA)
9591 fs_info->avail_data_alloc_bits &= ~extra_flags;
9592 if (flags & BTRFS_BLOCK_GROUP_METADATA)
9593 fs_info->avail_metadata_alloc_bits &= ~extra_flags;
9594 if (flags & BTRFS_BLOCK_GROUP_SYSTEM)
9595 fs_info->avail_system_alloc_bits &= ~extra_flags;
Miao Xiede98ced2013-01-29 10:13:12 +00009596 write_sequnlock(&fs_info->profiles_lock);
Ilya Dryomov10ea00f2012-01-16 22:04:47 +02009597}
9598
Zheng Yan1a40e232008-09-26 10:09:34 -04009599int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
Filipe Manana04216822014-11-27 21:14:15 +00009600 struct btrfs_root *root, u64 group_start,
9601 struct extent_map *em)
Zheng Yan1a40e232008-09-26 10:09:34 -04009602{
9603 struct btrfs_path *path;
9604 struct btrfs_block_group_cache *block_group;
Chris Mason44fb5512009-06-04 15:34:51 -04009605 struct btrfs_free_cluster *cluster;
Josef Bacik0af3d002010-06-21 14:48:16 -04009606 struct btrfs_root *tree_root = root->fs_info->tree_root;
Zheng Yan1a40e232008-09-26 10:09:34 -04009607 struct btrfs_key key;
Josef Bacik0af3d002010-06-21 14:48:16 -04009608 struct inode *inode;
Jeff Mahoneyc1895442014-05-27 12:59:57 -04009609 struct kobject *kobj = NULL;
Zheng Yan1a40e232008-09-26 10:09:34 -04009610 int ret;
Ilya Dryomov10ea00f2012-01-16 22:04:47 +02009611 int index;
Josef Bacik89a55892010-10-14 14:52:27 -04009612 int factor;
Filipe Manana4f69cb92014-11-26 15:28:51 +00009613 struct btrfs_caching_control *caching_ctl = NULL;
Filipe Manana04216822014-11-27 21:14:15 +00009614 bool remove_em;
Zheng Yan1a40e232008-09-26 10:09:34 -04009615
Zheng Yan1a40e232008-09-26 10:09:34 -04009616 root = root->fs_info->extent_root;
9617
9618 block_group = btrfs_lookup_block_group(root->fs_info, group_start);
9619 BUG_ON(!block_group);
Yan Zhengc146afa2008-11-12 14:34:12 -05009620 BUG_ON(!block_group->ro);
Zheng Yan1a40e232008-09-26 10:09:34 -04009621
liubo9f7c43c2011-03-07 02:13:33 +00009622 /*
9623 * Free the reserved super bytes from this block group before
9624 * remove it.
9625 */
9626 free_excluded_extents(root, block_group);
9627
Zheng Yan1a40e232008-09-26 10:09:34 -04009628 memcpy(&key, &block_group->key, sizeof(key));
Ilya Dryomov10ea00f2012-01-16 22:04:47 +02009629 index = get_block_group_index(block_group);
Josef Bacik89a55892010-10-14 14:52:27 -04009630 if (block_group->flags & (BTRFS_BLOCK_GROUP_DUP |
9631 BTRFS_BLOCK_GROUP_RAID1 |
9632 BTRFS_BLOCK_GROUP_RAID10))
9633 factor = 2;
9634 else
9635 factor = 1;
Zheng Yan1a40e232008-09-26 10:09:34 -04009636
Chris Mason44fb5512009-06-04 15:34:51 -04009637 /* make sure this block group isn't part of an allocation cluster */
9638 cluster = &root->fs_info->data_alloc_cluster;
9639 spin_lock(&cluster->refill_lock);
9640 btrfs_return_cluster_to_free_space(block_group, cluster);
9641 spin_unlock(&cluster->refill_lock);
9642
9643 /*
9644 * make sure this block group isn't part of a metadata
9645 * allocation cluster
9646 */
9647 cluster = &root->fs_info->meta_alloc_cluster;
9648 spin_lock(&cluster->refill_lock);
9649 btrfs_return_cluster_to_free_space(block_group, cluster);
9650 spin_unlock(&cluster->refill_lock);
9651
Zheng Yan1a40e232008-09-26 10:09:34 -04009652 path = btrfs_alloc_path();
Mark Fashehd8926bb2011-07-13 10:38:47 -07009653 if (!path) {
9654 ret = -ENOMEM;
9655 goto out;
9656 }
Zheng Yan1a40e232008-09-26 10:09:34 -04009657
Chris Mason1bbc6212015-04-06 12:46:08 -07009658 /*
9659 * get the inode first so any iput calls done for the io_list
9660 * aren't the final iput (no unlinks allowed now)
9661 */
Ilya Dryomov10b2f342011-10-02 13:56:53 +03009662 inode = lookup_free_space_inode(tree_root, block_group, path);
Chris Mason1bbc6212015-04-06 12:46:08 -07009663
9664 mutex_lock(&trans->transaction->cache_write_mutex);
9665 /*
9666 * make sure our free spache cache IO is done before remove the
9667 * free space inode
9668 */
9669 spin_lock(&trans->transaction->dirty_bgs_lock);
9670 if (!list_empty(&block_group->io_list)) {
9671 list_del_init(&block_group->io_list);
9672
9673 WARN_ON(!IS_ERR(inode) && inode != block_group->io_ctl.inode);
9674
9675 spin_unlock(&trans->transaction->dirty_bgs_lock);
9676 btrfs_wait_cache_io(root, trans, block_group,
9677 &block_group->io_ctl, path,
9678 block_group->key.objectid);
9679 btrfs_put_block_group(block_group);
9680 spin_lock(&trans->transaction->dirty_bgs_lock);
9681 }
9682
9683 if (!list_empty(&block_group->dirty_list)) {
9684 list_del_init(&block_group->dirty_list);
9685 btrfs_put_block_group(block_group);
9686 }
9687 spin_unlock(&trans->transaction->dirty_bgs_lock);
9688 mutex_unlock(&trans->transaction->cache_write_mutex);
9689
Josef Bacik0af3d002010-06-21 14:48:16 -04009690 if (!IS_ERR(inode)) {
Tsutomu Itohb5324022011-07-19 07:27:20 +00009691 ret = btrfs_orphan_add(trans, inode);
Jeff Mahoney79787ea2012-03-12 16:03:00 +01009692 if (ret) {
9693 btrfs_add_delayed_iput(inode);
9694 goto out;
9695 }
Josef Bacik0af3d002010-06-21 14:48:16 -04009696 clear_nlink(inode);
9697 /* One for the block groups ref */
9698 spin_lock(&block_group->lock);
9699 if (block_group->iref) {
9700 block_group->iref = 0;
9701 block_group->inode = NULL;
9702 spin_unlock(&block_group->lock);
9703 iput(inode);
9704 } else {
9705 spin_unlock(&block_group->lock);
9706 }
9707 /* One for our lookup ref */
Josef Bacik455757c2011-09-19 12:26:24 -04009708 btrfs_add_delayed_iput(inode);
Josef Bacik0af3d002010-06-21 14:48:16 -04009709 }
9710
9711 key.objectid = BTRFS_FREE_SPACE_OBJECTID;
9712 key.offset = block_group->key.objectid;
9713 key.type = 0;
9714
9715 ret = btrfs_search_slot(trans, tree_root, &key, path, -1, 1);
9716 if (ret < 0)
9717 goto out;
9718 if (ret > 0)
David Sterbab3b4aa72011-04-21 01:20:15 +02009719 btrfs_release_path(path);
Josef Bacik0af3d002010-06-21 14:48:16 -04009720 if (ret == 0) {
9721 ret = btrfs_del_item(trans, tree_root, path);
9722 if (ret)
9723 goto out;
David Sterbab3b4aa72011-04-21 01:20:15 +02009724 btrfs_release_path(path);
Josef Bacik0af3d002010-06-21 14:48:16 -04009725 }
9726
Yan Zheng3dfdb932009-01-21 10:49:16 -05009727 spin_lock(&root->fs_info->block_group_cache_lock);
Zheng Yan1a40e232008-09-26 10:09:34 -04009728 rb_erase(&block_group->cache_node,
9729 &root->fs_info->block_group_cache_tree);
Filipe Manana292cbd52014-11-26 15:28:50 +00009730 RB_CLEAR_NODE(&block_group->cache_node);
Liu Boa1897fd2012-12-27 09:01:23 +00009731
9732 if (root->fs_info->first_logical_byte == block_group->key.objectid)
9733 root->fs_info->first_logical_byte = (u64)-1;
Yan Zheng3dfdb932009-01-21 10:49:16 -05009734 spin_unlock(&root->fs_info->block_group_cache_lock);
Josef Bacik817d52f2009-07-13 21:29:25 -04009735
Josef Bacik80eb2342008-10-29 14:49:05 -04009736 down_write(&block_group->space_info->groups_sem);
Chris Mason44fb5512009-06-04 15:34:51 -04009737 /*
9738 * we must use list_del_init so people can check to see if they
9739 * are still on the list after taking the semaphore
9740 */
9741 list_del_init(&block_group->list);
Jeff Mahoney6ab0a202013-11-01 13:07:04 -04009742 if (list_empty(&block_group->space_info->block_groups[index])) {
Jeff Mahoneyc1895442014-05-27 12:59:57 -04009743 kobj = block_group->space_info->block_group_kobjs[index];
9744 block_group->space_info->block_group_kobjs[index] = NULL;
Ilya Dryomov10ea00f2012-01-16 22:04:47 +02009745 clear_avail_alloc_bits(root->fs_info, block_group->flags);
Jeff Mahoney6ab0a202013-11-01 13:07:04 -04009746 }
Josef Bacik80eb2342008-10-29 14:49:05 -04009747 up_write(&block_group->space_info->groups_sem);
Jeff Mahoneyc1895442014-05-27 12:59:57 -04009748 if (kobj) {
9749 kobject_del(kobj);
9750 kobject_put(kobj);
9751 }
Zheng Yan1a40e232008-09-26 10:09:34 -04009752
Filipe Manana4f69cb92014-11-26 15:28:51 +00009753 if (block_group->has_caching_ctl)
9754 caching_ctl = get_caching_control(block_group);
Josef Bacik817d52f2009-07-13 21:29:25 -04009755 if (block_group->cached == BTRFS_CACHE_STARTED)
Yan Zheng11833d62009-09-11 16:11:19 -04009756 wait_block_group_cache_done(block_group);
Filipe Manana4f69cb92014-11-26 15:28:51 +00009757 if (block_group->has_caching_ctl) {
9758 down_write(&root->fs_info->commit_root_sem);
9759 if (!caching_ctl) {
9760 struct btrfs_caching_control *ctl;
9761
9762 list_for_each_entry(ctl,
9763 &root->fs_info->caching_block_groups, list)
9764 if (ctl->block_group == block_group) {
9765 caching_ctl = ctl;
9766 atomic_inc(&caching_ctl->count);
9767 break;
9768 }
9769 }
9770 if (caching_ctl)
9771 list_del_init(&caching_ctl->list);
9772 up_write(&root->fs_info->commit_root_sem);
9773 if (caching_ctl) {
9774 /* Once for the caching bgs list and once for us. */
9775 put_caching_control(caching_ctl);
9776 put_caching_control(caching_ctl);
9777 }
9778 }
Josef Bacik817d52f2009-07-13 21:29:25 -04009779
Josef Bacikce93ec52014-11-17 15:45:48 -05009780 spin_lock(&trans->transaction->dirty_bgs_lock);
9781 if (!list_empty(&block_group->dirty_list)) {
Chris Mason1bbc6212015-04-06 12:46:08 -07009782 WARN_ON(1);
9783 }
9784 if (!list_empty(&block_group->io_list)) {
9785 WARN_ON(1);
Josef Bacikce93ec52014-11-17 15:45:48 -05009786 }
9787 spin_unlock(&trans->transaction->dirty_bgs_lock);
Josef Bacik817d52f2009-07-13 21:29:25 -04009788 btrfs_remove_free_space_cache(block_group);
9789
Yan Zhengc146afa2008-11-12 14:34:12 -05009790 spin_lock(&block_group->space_info->lock);
Filipe Manana75c68e92015-01-16 13:24:40 +00009791 list_del_init(&block_group->ro_list);
Zhao Lei18d018a2015-02-24 20:07:44 +08009792
9793 if (btrfs_test_opt(root, ENOSPC_DEBUG)) {
9794 WARN_ON(block_group->space_info->total_bytes
9795 < block_group->key.offset);
9796 WARN_ON(block_group->space_info->bytes_readonly
9797 < block_group->key.offset);
9798 WARN_ON(block_group->space_info->disk_total
9799 < block_group->key.offset * factor);
9800 }
Yan Zhengc146afa2008-11-12 14:34:12 -05009801 block_group->space_info->total_bytes -= block_group->key.offset;
9802 block_group->space_info->bytes_readonly -= block_group->key.offset;
Josef Bacik89a55892010-10-14 14:52:27 -04009803 block_group->space_info->disk_total -= block_group->key.offset * factor;
Zhao Lei18d018a2015-02-24 20:07:44 +08009804
Yan Zhengc146afa2008-11-12 14:34:12 -05009805 spin_unlock(&block_group->space_info->lock);
Chris Mason283bb192009-07-24 16:30:55 -04009806
Josef Bacik0af3d002010-06-21 14:48:16 -04009807 memcpy(&key, &block_group->key, sizeof(key));
9808
Filipe Manana04216822014-11-27 21:14:15 +00009809 lock_chunks(root);
Filipe Manana495e64f2014-12-02 18:07:30 +00009810 if (!list_empty(&em->list)) {
9811 /* We're in the transaction->pending_chunks list. */
9812 free_extent_map(em);
9813 }
Filipe Manana04216822014-11-27 21:14:15 +00009814 spin_lock(&block_group->lock);
9815 block_group->removed = 1;
9816 /*
9817 * At this point trimming can't start on this block group, because we
9818 * removed the block group from the tree fs_info->block_group_cache_tree
9819 * so no one can't find it anymore and even if someone already got this
9820 * block group before we removed it from the rbtree, they have already
9821 * incremented block_group->trimming - if they didn't, they won't find
9822 * any free space entries because we already removed them all when we
9823 * called btrfs_remove_free_space_cache().
9824 *
9825 * And we must not remove the extent map from the fs_info->mapping_tree
9826 * to prevent the same logical address range and physical device space
9827 * ranges from being reused for a new block group. This is because our
9828 * fs trim operation (btrfs_trim_fs() / btrfs_ioctl_fitrim()) is
9829 * completely transactionless, so while it is trimming a range the
9830 * currently running transaction might finish and a new one start,
9831 * allowing for new block groups to be created that can reuse the same
9832 * physical device locations unless we take this special care.
9833 */
9834 remove_em = (atomic_read(&block_group->trimming) == 0);
9835 /*
9836 * Make sure a trimmer task always sees the em in the pinned_chunks list
9837 * if it sees block_group->removed == 1 (needs to lock block_group->lock
9838 * before checking block_group->removed).
9839 */
9840 if (!remove_em) {
9841 /*
9842 * Our em might be in trans->transaction->pending_chunks which
9843 * is protected by fs_info->chunk_mutex ([lock|unlock]_chunks),
9844 * and so is the fs_info->pinned_chunks list.
9845 *
9846 * So at this point we must be holding the chunk_mutex to avoid
9847 * any races with chunk allocation (more specifically at
9848 * volumes.c:contains_pending_extent()), to ensure it always
9849 * sees the em, either in the pending_chunks list or in the
9850 * pinned_chunks list.
9851 */
9852 list_move_tail(&em->list, &root->fs_info->pinned_chunks);
9853 }
9854 spin_unlock(&block_group->lock);
Filipe Manana04216822014-11-27 21:14:15 +00009855
9856 if (remove_em) {
9857 struct extent_map_tree *em_tree;
9858
9859 em_tree = &root->fs_info->mapping_tree.map_tree;
9860 write_lock(&em_tree->lock);
Filipe Manana8dbcd102014-12-02 18:07:49 +00009861 /*
9862 * The em might be in the pending_chunks list, so make sure the
9863 * chunk mutex is locked, since remove_extent_mapping() will
9864 * delete us from that list.
9865 */
Filipe Manana04216822014-11-27 21:14:15 +00009866 remove_extent_mapping(em_tree, em);
9867 write_unlock(&em_tree->lock);
9868 /* once for the tree */
9869 free_extent_map(em);
9870 }
9871
Filipe Manana8dbcd102014-12-02 18:07:49 +00009872 unlock_chunks(root);
9873
Chris Masonfa9c0d792009-04-03 09:47:43 -04009874 btrfs_put_block_group(block_group);
9875 btrfs_put_block_group(block_group);
Zheng Yan1a40e232008-09-26 10:09:34 -04009876
9877 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
9878 if (ret > 0)
9879 ret = -EIO;
9880 if (ret < 0)
9881 goto out;
9882
9883 ret = btrfs_del_item(trans, root, path);
9884out:
9885 btrfs_free_path(path);
9886 return ret;
9887}
liuboacce9522011-01-06 19:30:25 +08009888
Josef Bacik47ab2a62014-09-18 11:20:02 -04009889/*
9890 * Process the unused_bgs list and remove any that don't have any allocated
9891 * space inside of them.
9892 */
9893void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info)
9894{
9895 struct btrfs_block_group_cache *block_group;
9896 struct btrfs_space_info *space_info;
9897 struct btrfs_root *root = fs_info->extent_root;
9898 struct btrfs_trans_handle *trans;
9899 int ret = 0;
9900
9901 if (!fs_info->open)
9902 return;
9903
9904 spin_lock(&fs_info->unused_bgs_lock);
9905 while (!list_empty(&fs_info->unused_bgs)) {
9906 u64 start, end;
9907
9908 block_group = list_first_entry(&fs_info->unused_bgs,
9909 struct btrfs_block_group_cache,
9910 bg_list);
9911 space_info = block_group->space_info;
9912 list_del_init(&block_group->bg_list);
9913 if (ret || btrfs_mixed_space_info(space_info)) {
9914 btrfs_put_block_group(block_group);
9915 continue;
9916 }
9917 spin_unlock(&fs_info->unused_bgs_lock);
9918
Filipe Manana67c5e7d2015-06-11 00:58:53 +01009919 mutex_lock(&root->fs_info->delete_unused_bgs_mutex);
9920
Josef Bacik47ab2a62014-09-18 11:20:02 -04009921 /* Don't want to race with allocators so take the groups_sem */
9922 down_write(&space_info->groups_sem);
9923 spin_lock(&block_group->lock);
9924 if (block_group->reserved ||
9925 btrfs_block_group_used(&block_group->item) ||
9926 block_group->ro) {
9927 /*
9928 * We want to bail if we made new allocations or have
9929 * outstanding allocations in this block group. We do
9930 * the ro check in case balance is currently acting on
9931 * this block group.
9932 */
9933 spin_unlock(&block_group->lock);
9934 up_write(&space_info->groups_sem);
9935 goto next;
9936 }
9937 spin_unlock(&block_group->lock);
9938
9939 /* We don't want to force the issue, only flip if it's ok. */
Zhaolei868f4012015-08-05 16:43:27 +08009940 ret = inc_block_group_ro(block_group, 0);
Josef Bacik47ab2a62014-09-18 11:20:02 -04009941 up_write(&space_info->groups_sem);
9942 if (ret < 0) {
9943 ret = 0;
9944 goto next;
9945 }
9946
9947 /*
9948 * Want to do this before we do anything else so we can recover
9949 * properly if we fail to join the transaction.
9950 */
Forrest Liu3d84be72015-02-11 14:24:12 +08009951 /* 1 for btrfs_orphan_reserve_metadata() */
9952 trans = btrfs_start_transaction(root, 1);
Josef Bacik47ab2a62014-09-18 11:20:02 -04009953 if (IS_ERR(trans)) {
Zhaolei868f4012015-08-05 16:43:27 +08009954 btrfs_dec_block_group_ro(root, block_group);
Josef Bacik47ab2a62014-09-18 11:20:02 -04009955 ret = PTR_ERR(trans);
9956 goto next;
9957 }
9958
9959 /*
9960 * We could have pending pinned extents for this block group,
9961 * just delete them, we don't care about them anymore.
9962 */
9963 start = block_group->key.objectid;
9964 end = start + block_group->key.offset - 1;
Filipe Mananad4b450c2015-01-29 19:18:25 +00009965 /*
9966 * Hold the unused_bg_unpin_mutex lock to avoid racing with
9967 * btrfs_finish_extent_commit(). If we are at transaction N,
9968 * another task might be running finish_extent_commit() for the
9969 * previous transaction N - 1, and have seen a range belonging
9970 * to the block group in freed_extents[] before we were able to
9971 * clear the whole block group range from freed_extents[]. This
9972 * means that task can lookup for the block group after we
9973 * unpinned it from freed_extents[] and removed it, leading to
9974 * a BUG_ON() at btrfs_unpin_extent_range().
9975 */
9976 mutex_lock(&fs_info->unused_bg_unpin_mutex);
Filipe Manana758eb512014-11-03 14:08:39 +00009977 ret = clear_extent_bits(&fs_info->freed_extents[0], start, end,
Josef Bacik47ab2a62014-09-18 11:20:02 -04009978 EXTENT_DIRTY, GFP_NOFS);
Filipe Manana758eb512014-11-03 14:08:39 +00009979 if (ret) {
Filipe Mananad4b450c2015-01-29 19:18:25 +00009980 mutex_unlock(&fs_info->unused_bg_unpin_mutex);
Zhaolei868f4012015-08-05 16:43:27 +08009981 btrfs_dec_block_group_ro(root, block_group);
Filipe Manana758eb512014-11-03 14:08:39 +00009982 goto end_trans;
9983 }
9984 ret = clear_extent_bits(&fs_info->freed_extents[1], start, end,
Josef Bacik47ab2a62014-09-18 11:20:02 -04009985 EXTENT_DIRTY, GFP_NOFS);
Filipe Manana758eb512014-11-03 14:08:39 +00009986 if (ret) {
Filipe Mananad4b450c2015-01-29 19:18:25 +00009987 mutex_unlock(&fs_info->unused_bg_unpin_mutex);
Zhaolei868f4012015-08-05 16:43:27 +08009988 btrfs_dec_block_group_ro(root, block_group);
Filipe Manana758eb512014-11-03 14:08:39 +00009989 goto end_trans;
9990 }
Filipe Mananad4b450c2015-01-29 19:18:25 +00009991 mutex_unlock(&fs_info->unused_bg_unpin_mutex);
Josef Bacik47ab2a62014-09-18 11:20:02 -04009992
9993 /* Reset pinned so btrfs_put_block_group doesn't complain */
Zhao Leic30666d2015-02-25 14:17:20 +08009994 spin_lock(&space_info->lock);
9995 spin_lock(&block_group->lock);
9996
9997 space_info->bytes_pinned -= block_group->pinned;
9998 space_info->bytes_readonly += block_group->pinned;
9999 percpu_counter_add(&space_info->total_bytes_pinned,
10000 -block_group->pinned);
Josef Bacik47ab2a62014-09-18 11:20:02 -040010001 block_group->pinned = 0;
10002
Zhao Leic30666d2015-02-25 14:17:20 +080010003 spin_unlock(&block_group->lock);
10004 spin_unlock(&space_info->lock);
10005
Josef Bacik47ab2a62014-09-18 11:20:02 -040010006 /*
10007 * Btrfs_remove_chunk will abort the transaction if things go
10008 * horribly wrong.
10009 */
10010 ret = btrfs_remove_chunk(trans, root,
10011 block_group->key.objectid);
Filipe Manana758eb512014-11-03 14:08:39 +000010012end_trans:
Josef Bacik47ab2a62014-09-18 11:20:02 -040010013 btrfs_end_transaction(trans, root);
10014next:
Filipe Manana67c5e7d2015-06-11 00:58:53 +010010015 mutex_unlock(&root->fs_info->delete_unused_bgs_mutex);
Josef Bacik47ab2a62014-09-18 11:20:02 -040010016 btrfs_put_block_group(block_group);
10017 spin_lock(&fs_info->unused_bgs_lock);
10018 }
10019 spin_unlock(&fs_info->unused_bgs_lock);
10020}
10021
liuboc59021f2011-03-07 02:13:14 +000010022int btrfs_init_space_info(struct btrfs_fs_info *fs_info)
10023{
10024 struct btrfs_space_info *space_info;
liubo1aba86d2011-04-08 08:44:37 +000010025 struct btrfs_super_block *disk_super;
10026 u64 features;
10027 u64 flags;
10028 int mixed = 0;
liuboc59021f2011-03-07 02:13:14 +000010029 int ret;
10030
David Sterba6c417612011-04-13 15:41:04 +020010031 disk_super = fs_info->super_copy;
liubo1aba86d2011-04-08 08:44:37 +000010032 if (!btrfs_super_root(disk_super))
10033 return 1;
liuboc59021f2011-03-07 02:13:14 +000010034
liubo1aba86d2011-04-08 08:44:37 +000010035 features = btrfs_super_incompat_flags(disk_super);
10036 if (features & BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS)
10037 mixed = 1;
liuboc59021f2011-03-07 02:13:14 +000010038
liubo1aba86d2011-04-08 08:44:37 +000010039 flags = BTRFS_BLOCK_GROUP_SYSTEM;
10040 ret = update_space_info(fs_info, flags, 0, 0, &space_info);
liuboc59021f2011-03-07 02:13:14 +000010041 if (ret)
liubo1aba86d2011-04-08 08:44:37 +000010042 goto out;
liuboc59021f2011-03-07 02:13:14 +000010043
liubo1aba86d2011-04-08 08:44:37 +000010044 if (mixed) {
10045 flags = BTRFS_BLOCK_GROUP_METADATA | BTRFS_BLOCK_GROUP_DATA;
10046 ret = update_space_info(fs_info, flags, 0, 0, &space_info);
10047 } else {
10048 flags = BTRFS_BLOCK_GROUP_METADATA;
10049 ret = update_space_info(fs_info, flags, 0, 0, &space_info);
10050 if (ret)
10051 goto out;
10052
10053 flags = BTRFS_BLOCK_GROUP_DATA;
10054 ret = update_space_info(fs_info, flags, 0, 0, &space_info);
10055 }
10056out:
liuboc59021f2011-03-07 02:13:14 +000010057 return ret;
10058}
10059
liuboacce9522011-01-06 19:30:25 +080010060int btrfs_error_unpin_extent_range(struct btrfs_root *root, u64 start, u64 end)
10061{
Filipe Manana678886b2014-12-07 21:31:47 +000010062 return unpin_extent_range(root, start, end, false);
liuboacce9522011-01-06 19:30:25 +080010063}
10064
Li Dongyangf7039b12011-03-24 10:24:28 +000010065int btrfs_trim_fs(struct btrfs_root *root, struct fstrim_range *range)
10066{
10067 struct btrfs_fs_info *fs_info = root->fs_info;
10068 struct btrfs_block_group_cache *cache = NULL;
10069 u64 group_trimmed;
10070 u64 start;
10071 u64 end;
10072 u64 trimmed = 0;
Liu Bo2cac13e2012-02-09 18:17:41 +080010073 u64 total_bytes = btrfs_super_total_bytes(fs_info->super_copy);
Li Dongyangf7039b12011-03-24 10:24:28 +000010074 int ret = 0;
10075
Liu Bo2cac13e2012-02-09 18:17:41 +080010076 /*
10077 * try to trim all FS space, our block group may start from non-zero.
10078 */
10079 if (range->len == total_bytes)
10080 cache = btrfs_lookup_first_block_group(fs_info, range->start);
10081 else
10082 cache = btrfs_lookup_block_group(fs_info, range->start);
Li Dongyangf7039b12011-03-24 10:24:28 +000010083
10084 while (cache) {
10085 if (cache->key.objectid >= (range->start + range->len)) {
10086 btrfs_put_block_group(cache);
10087 break;
10088 }
10089
10090 start = max(range->start, cache->key.objectid);
10091 end = min(range->start + range->len,
10092 cache->key.objectid + cache->key.offset);
10093
10094 if (end - start >= range->minlen) {
10095 if (!block_group_cache_done(cache)) {
Liu Bof6373bf2012-12-27 09:01:18 +000010096 ret = cache_block_group(cache, 0);
Josef Bacik1be41b72013-06-12 13:56:06 -040010097 if (ret) {
10098 btrfs_put_block_group(cache);
10099 break;
10100 }
10101 ret = wait_block_group_cache_done(cache);
10102 if (ret) {
10103 btrfs_put_block_group(cache);
10104 break;
10105 }
Li Dongyangf7039b12011-03-24 10:24:28 +000010106 }
10107 ret = btrfs_trim_block_group(cache,
10108 &group_trimmed,
10109 start,
10110 end,
10111 range->minlen);
10112
10113 trimmed += group_trimmed;
10114 if (ret) {
10115 btrfs_put_block_group(cache);
10116 break;
10117 }
10118 }
10119
10120 cache = next_block_group(fs_info->tree_root, cache);
10121 }
10122
10123 range->len = trimmed;
10124 return ret;
10125}
Miao Xie8257b2d2014-03-06 13:38:19 +080010126
10127/*
Filipe Manana9ea24bb2014-10-29 11:57:59 +000010128 * btrfs_{start,end}_write_no_snapshoting() are similar to
10129 * mnt_{want,drop}_write(), they are used to prevent some tasks from writing
10130 * data into the page cache through nocow before the subvolume is snapshoted,
10131 * but flush the data into disk after the snapshot creation, or to prevent
10132 * operations while snapshoting is ongoing and that cause the snapshot to be
10133 * inconsistent (writes followed by expanding truncates for example).
Miao Xie8257b2d2014-03-06 13:38:19 +080010134 */
Filipe Manana9ea24bb2014-10-29 11:57:59 +000010135void btrfs_end_write_no_snapshoting(struct btrfs_root *root)
Miao Xie8257b2d2014-03-06 13:38:19 +080010136{
10137 percpu_counter_dec(&root->subv_writers->counter);
10138 /*
10139 * Make sure counter is updated before we wake up
10140 * waiters.
10141 */
10142 smp_mb();
10143 if (waitqueue_active(&root->subv_writers->wait))
10144 wake_up(&root->subv_writers->wait);
10145}
10146
Filipe Manana9ea24bb2014-10-29 11:57:59 +000010147int btrfs_start_write_no_snapshoting(struct btrfs_root *root)
Miao Xie8257b2d2014-03-06 13:38:19 +080010148{
David Sterbaee39b432014-09-30 01:33:33 +020010149 if (atomic_read(&root->will_be_snapshoted))
Miao Xie8257b2d2014-03-06 13:38:19 +080010150 return 0;
10151
10152 percpu_counter_inc(&root->subv_writers->counter);
10153 /*
10154 * Make sure counter is updated before we check for snapshot creation.
10155 */
10156 smp_mb();
David Sterbaee39b432014-09-30 01:33:33 +020010157 if (atomic_read(&root->will_be_snapshoted)) {
Filipe Manana9ea24bb2014-10-29 11:57:59 +000010158 btrfs_end_write_no_snapshoting(root);
Miao Xie8257b2d2014-03-06 13:38:19 +080010159 return 0;
10160 }
10161 return 1;
10162}