blob: a4a4f593ec71d34bb988dcfa430db208866ca2c5 [file] [log] [blame]
Chris Mason6cbd5572007-06-12 09:07:21 -04001/*
2 * Copyright (C) 2007 Oracle. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
17 */
Zach Brownec6b9102007-07-11 10:00:37 -040018#include <linux/sched.h>
Chris Masonedbd8d42007-12-21 16:27:24 -050019#include <linux/pagemap.h>
Chris Masonec44a352008-04-28 15:29:52 -040020#include <linux/writeback.h>
David Woodhouse21af8042008-08-12 14:13:26 +010021#include <linux/blkdev.h>
Chris Masonb7a9f292009-02-04 09:23:45 -050022#include <linux/sort.h>
Chris Mason4184ea72009-03-10 12:39:20 -040023#include <linux/rcupdate.h>
Josef Bacik817d52f2009-07-13 21:29:25 -040024#include <linux/kthread.h>
Tejun Heo5a0e3ad2010-03-24 17:04:11 +090025#include <linux/slab.h>
David Sterbadff51cd2011-06-14 12:52:17 +020026#include <linux/ratelimit.h>
Josef Bacikb150a4f2013-06-19 15:00:04 -040027#include <linux/percpu_counter.h>
Chris Mason74493f72007-12-11 09:25:06 -050028#include "hash.h"
Miao Xie995946d2014-04-02 19:51:06 +080029#include "tree-log.h"
Chris Masonfec577f2007-02-26 10:40:21 -050030#include "disk-io.h"
31#include "print-tree.h"
Chris Mason0b86a832008-03-24 15:01:56 -040032#include "volumes.h"
David Woodhouse53b381b2013-01-29 18:40:14 -050033#include "raid56.h"
Chris Mason925baed2008-06-25 16:01:30 -040034#include "locking.h"
Chris Masonfa9c0d792009-04-03 09:47:43 -040035#include "free-space-cache.h"
Omar Sandoval1e144fb2015-09-29 20:50:37 -070036#include "free-space-tree.h"
Miao Xie3fed40c2012-09-13 04:51:36 -060037#include "math.h"
Jeff Mahoney6ab0a202013-11-01 13:07:04 -040038#include "sysfs.h"
Josef Bacikfcebe452014-05-13 17:30:47 -070039#include "qgroup.h"
Chris Masonfec577f2007-02-26 10:40:21 -050040
Arne Jansen709c0482011-09-12 12:22:57 +020041#undef SCRAMBLE_DELAYED_REFS
42
Miao Xie9e622d62012-01-26 15:01:12 -050043/*
44 * control flags for do_chunk_alloc's force field
Chris Mason0e4f8f82011-04-15 16:05:44 -040045 * CHUNK_ALLOC_NO_FORCE means to only allocate a chunk
46 * if we really need one.
47 *
Chris Mason0e4f8f82011-04-15 16:05:44 -040048 * CHUNK_ALLOC_LIMITED means to only try and allocate one
49 * if we have very few chunks already allocated. This is
50 * used as part of the clustering code to help make sure
51 * we have a good pool of storage to cluster in, without
52 * filling the FS with empty chunks
53 *
Miao Xie9e622d62012-01-26 15:01:12 -050054 * CHUNK_ALLOC_FORCE means it must try to allocate one
55 *
Chris Mason0e4f8f82011-04-15 16:05:44 -040056 */
57enum {
58 CHUNK_ALLOC_NO_FORCE = 0,
Miao Xie9e622d62012-01-26 15:01:12 -050059 CHUNK_ALLOC_LIMITED = 1,
60 CHUNK_ALLOC_FORCE = 2,
Chris Mason0e4f8f82011-04-15 16:05:44 -040061};
62
Josef Bacikfb25e912011-07-26 17:00:46 -040063/*
64 * Control how reservations are dealt with.
65 *
66 * RESERVE_FREE - freeing a reservation.
67 * RESERVE_ALLOC - allocating space and we need to update bytes_may_use for
68 * ENOSPC accounting
69 * RESERVE_ALLOC_NO_ACCOUNT - allocating space and we should not update
70 * bytes_may_use as the ENOSPC accounting is done elsewhere
71 */
72enum {
73 RESERVE_FREE = 0,
74 RESERVE_ALLOC = 1,
75 RESERVE_ALLOC_NO_ACCOUNT = 2,
76};
77
Josef Bacikce93ec52014-11-17 15:45:48 -050078static int update_block_group(struct btrfs_trans_handle *trans,
79 struct btrfs_root *root, u64 bytenr,
80 u64 num_bytes, int alloc);
Yan Zheng5d4f98a2009-06-10 10:45:14 -040081static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
82 struct btrfs_root *root,
Qu Wenruoc682f9b2015-03-17 16:59:47 +080083 struct btrfs_delayed_ref_node *node, u64 parent,
Yan Zheng5d4f98a2009-06-10 10:45:14 -040084 u64 root_objectid, u64 owner_objectid,
85 u64 owner_offset, int refs_to_drop,
Qu Wenruoc682f9b2015-03-17 16:59:47 +080086 struct btrfs_delayed_extent_op *extra_op);
Yan Zheng5d4f98a2009-06-10 10:45:14 -040087static void __run_delayed_extent_op(struct btrfs_delayed_extent_op *extent_op,
88 struct extent_buffer *leaf,
89 struct btrfs_extent_item *ei);
90static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
91 struct btrfs_root *root,
92 u64 parent, u64 root_objectid,
93 u64 flags, u64 owner, u64 offset,
94 struct btrfs_key *ins, int ref_mod);
95static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans,
96 struct btrfs_root *root,
97 u64 parent, u64 root_objectid,
98 u64 flags, struct btrfs_disk_key *key,
Josef Bacikfcebe452014-05-13 17:30:47 -070099 int level, struct btrfs_key *ins,
100 int no_quota);
Josef Bacik6a632092009-02-20 11:00:09 -0500101static int do_chunk_alloc(struct btrfs_trans_handle *trans,
Josef Bacik698d0082012-09-12 14:08:47 -0400102 struct btrfs_root *extent_root, u64 flags,
103 int force);
Yan Zheng11833d62009-09-11 16:11:19 -0400104static int find_next_key(struct btrfs_path *path, int level,
105 struct btrfs_key *key);
Josef Bacik9ed74f22009-09-11 16:12:44 -0400106static void dump_space_info(struct btrfs_space_info *info, u64 bytes,
107 int dump_block_groups);
Josef Bacikfb25e912011-07-26 17:00:46 -0400108static int btrfs_update_reserved_bytes(struct btrfs_block_group_cache *cache,
Miao Xiee570fd22014-06-19 10:42:50 +0800109 u64 num_bytes, int reserve,
110 int delalloc);
Josef Bacik5d803662013-02-07 16:06:02 -0500111static int block_rsv_use_bytes(struct btrfs_block_rsv *block_rsv,
112 u64 num_bytes);
Eric Sandeen48a3b632013-04-25 20:41:01 +0000113int btrfs_pin_extent(struct btrfs_root *root,
114 u64 bytenr, u64 num_bytes, int reserved);
Josef Bacik6a632092009-02-20 11:00:09 -0500115
Josef Bacik817d52f2009-07-13 21:29:25 -0400116static noinline int
117block_group_cache_done(struct btrfs_block_group_cache *cache)
118{
119 smp_mb();
Josef Bacik36cce922013-08-05 11:15:21 -0400120 return cache->cached == BTRFS_CACHE_FINISHED ||
121 cache->cached == BTRFS_CACHE_ERROR;
Josef Bacik817d52f2009-07-13 21:29:25 -0400122}
123
Josef Bacik0f9dd462008-09-23 13:14:11 -0400124static int block_group_bits(struct btrfs_block_group_cache *cache, u64 bits)
125{
126 return (cache->flags & bits) == bits;
127}
128
David Sterba62a45b62011-04-20 15:52:26 +0200129static void btrfs_get_block_group(struct btrfs_block_group_cache *cache)
Josef Bacik11dfe352009-11-13 20:12:59 +0000130{
131 atomic_inc(&cache->count);
132}
133
134void btrfs_put_block_group(struct btrfs_block_group_cache *cache)
135{
Yan, Zhengf0486c62010-05-16 10:46:25 -0400136 if (atomic_dec_and_test(&cache->count)) {
137 WARN_ON(cache->pinned > 0);
138 WARN_ON(cache->reserved > 0);
Li Zefan34d52cb2011-03-29 13:46:06 +0800139 kfree(cache->free_space_ctl);
Josef Bacik11dfe352009-11-13 20:12:59 +0000140 kfree(cache);
Yan, Zhengf0486c62010-05-16 10:46:25 -0400141 }
Josef Bacik11dfe352009-11-13 20:12:59 +0000142}
143
Josef Bacik0f9dd462008-09-23 13:14:11 -0400144/*
145 * this adds the block group to the fs_info rb tree for the block group
146 * cache
147 */
Christoph Hellwigb2950862008-12-02 09:54:17 -0500148static int btrfs_add_block_group_cache(struct btrfs_fs_info *info,
Josef Bacik0f9dd462008-09-23 13:14:11 -0400149 struct btrfs_block_group_cache *block_group)
150{
151 struct rb_node **p;
152 struct rb_node *parent = NULL;
153 struct btrfs_block_group_cache *cache;
154
155 spin_lock(&info->block_group_cache_lock);
156 p = &info->block_group_cache_tree.rb_node;
157
158 while (*p) {
159 parent = *p;
160 cache = rb_entry(parent, struct btrfs_block_group_cache,
161 cache_node);
162 if (block_group->key.objectid < cache->key.objectid) {
163 p = &(*p)->rb_left;
164 } else if (block_group->key.objectid > cache->key.objectid) {
165 p = &(*p)->rb_right;
166 } else {
167 spin_unlock(&info->block_group_cache_lock);
168 return -EEXIST;
169 }
170 }
171
172 rb_link_node(&block_group->cache_node, parent, p);
173 rb_insert_color(&block_group->cache_node,
174 &info->block_group_cache_tree);
Liu Boa1897fd2012-12-27 09:01:23 +0000175
176 if (info->first_logical_byte > block_group->key.objectid)
177 info->first_logical_byte = block_group->key.objectid;
178
Josef Bacik0f9dd462008-09-23 13:14:11 -0400179 spin_unlock(&info->block_group_cache_lock);
180
181 return 0;
182}
183
184/*
185 * This will return the block group at or after bytenr if contains is 0, else
186 * it will return the block group that contains the bytenr
187 */
188static struct btrfs_block_group_cache *
189block_group_cache_tree_search(struct btrfs_fs_info *info, u64 bytenr,
190 int contains)
191{
192 struct btrfs_block_group_cache *cache, *ret = NULL;
193 struct rb_node *n;
194 u64 end, start;
195
196 spin_lock(&info->block_group_cache_lock);
197 n = info->block_group_cache_tree.rb_node;
198
199 while (n) {
200 cache = rb_entry(n, struct btrfs_block_group_cache,
201 cache_node);
202 end = cache->key.objectid + cache->key.offset - 1;
203 start = cache->key.objectid;
204
205 if (bytenr < start) {
206 if (!contains && (!ret || start < ret->key.objectid))
207 ret = cache;
208 n = n->rb_left;
209 } else if (bytenr > start) {
210 if (contains && bytenr <= end) {
211 ret = cache;
212 break;
213 }
214 n = n->rb_right;
215 } else {
216 ret = cache;
217 break;
218 }
219 }
Liu Boa1897fd2012-12-27 09:01:23 +0000220 if (ret) {
Josef Bacik11dfe352009-11-13 20:12:59 +0000221 btrfs_get_block_group(ret);
Liu Boa1897fd2012-12-27 09:01:23 +0000222 if (bytenr == 0 && info->first_logical_byte > ret->key.objectid)
223 info->first_logical_byte = ret->key.objectid;
224 }
Josef Bacik0f9dd462008-09-23 13:14:11 -0400225 spin_unlock(&info->block_group_cache_lock);
226
227 return ret;
228}
229
Yan Zheng11833d62009-09-11 16:11:19 -0400230static int add_excluded_extent(struct btrfs_root *root,
231 u64 start, u64 num_bytes)
Josef Bacik817d52f2009-07-13 21:29:25 -0400232{
Yan Zheng11833d62009-09-11 16:11:19 -0400233 u64 end = start + num_bytes - 1;
234 set_extent_bits(&root->fs_info->freed_extents[0],
235 start, end, EXTENT_UPTODATE, GFP_NOFS);
236 set_extent_bits(&root->fs_info->freed_extents[1],
237 start, end, EXTENT_UPTODATE, GFP_NOFS);
238 return 0;
Josef Bacik817d52f2009-07-13 21:29:25 -0400239}
240
Yan Zheng11833d62009-09-11 16:11:19 -0400241static void free_excluded_extents(struct btrfs_root *root,
242 struct btrfs_block_group_cache *cache)
Josef Bacik817d52f2009-07-13 21:29:25 -0400243{
Yan Zheng11833d62009-09-11 16:11:19 -0400244 u64 start, end;
245
246 start = cache->key.objectid;
247 end = start + cache->key.offset - 1;
248
249 clear_extent_bits(&root->fs_info->freed_extents[0],
250 start, end, EXTENT_UPTODATE, GFP_NOFS);
251 clear_extent_bits(&root->fs_info->freed_extents[1],
252 start, end, EXTENT_UPTODATE, GFP_NOFS);
253}
254
255static int exclude_super_stripes(struct btrfs_root *root,
256 struct btrfs_block_group_cache *cache)
257{
Josef Bacik817d52f2009-07-13 21:29:25 -0400258 u64 bytenr;
259 u64 *logical;
260 int stripe_len;
261 int i, nr, ret;
262
Yan, Zheng06b23312009-11-26 09:31:11 +0000263 if (cache->key.objectid < BTRFS_SUPER_INFO_OFFSET) {
264 stripe_len = BTRFS_SUPER_INFO_OFFSET - cache->key.objectid;
265 cache->bytes_super += stripe_len;
266 ret = add_excluded_extent(root, cache->key.objectid,
267 stripe_len);
Josef Bacik835d9742013-03-19 12:13:25 -0400268 if (ret)
269 return ret;
Yan, Zheng06b23312009-11-26 09:31:11 +0000270 }
271
Josef Bacik817d52f2009-07-13 21:29:25 -0400272 for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
273 bytenr = btrfs_sb_offset(i);
274 ret = btrfs_rmap_block(&root->fs_info->mapping_tree,
275 cache->key.objectid, bytenr,
276 0, &logical, &nr, &stripe_len);
Josef Bacik835d9742013-03-19 12:13:25 -0400277 if (ret)
278 return ret;
Yan Zheng11833d62009-09-11 16:11:19 -0400279
Josef Bacik817d52f2009-07-13 21:29:25 -0400280 while (nr--) {
Josef Bacik51bf5f02013-04-23 12:55:21 -0400281 u64 start, len;
282
283 if (logical[nr] > cache->key.objectid +
284 cache->key.offset)
285 continue;
286
287 if (logical[nr] + stripe_len <= cache->key.objectid)
288 continue;
289
290 start = logical[nr];
291 if (start < cache->key.objectid) {
292 start = cache->key.objectid;
293 len = (logical[nr] + stripe_len) - start;
294 } else {
295 len = min_t(u64, stripe_len,
296 cache->key.objectid +
297 cache->key.offset - start);
298 }
299
300 cache->bytes_super += len;
301 ret = add_excluded_extent(root, start, len);
Josef Bacik835d9742013-03-19 12:13:25 -0400302 if (ret) {
303 kfree(logical);
304 return ret;
305 }
Josef Bacik817d52f2009-07-13 21:29:25 -0400306 }
Yan Zheng11833d62009-09-11 16:11:19 -0400307
Josef Bacik817d52f2009-07-13 21:29:25 -0400308 kfree(logical);
309 }
Josef Bacik817d52f2009-07-13 21:29:25 -0400310 return 0;
311}
312
Yan Zheng11833d62009-09-11 16:11:19 -0400313static struct btrfs_caching_control *
314get_caching_control(struct btrfs_block_group_cache *cache)
315{
316 struct btrfs_caching_control *ctl;
317
318 spin_lock(&cache->lock);
Josef Bacikdde5abe2010-09-16 16:17:03 -0400319 if (!cache->caching_ctl) {
320 spin_unlock(&cache->lock);
321 return NULL;
322 }
323
Yan Zheng11833d62009-09-11 16:11:19 -0400324 ctl = cache->caching_ctl;
325 atomic_inc(&ctl->count);
326 spin_unlock(&cache->lock);
327 return ctl;
328}
329
330static void put_caching_control(struct btrfs_caching_control *ctl)
331{
332 if (atomic_dec_and_test(&ctl->count))
333 kfree(ctl);
334}
335
Josef Bacik0f9dd462008-09-23 13:14:11 -0400336/*
337 * this is only called by cache_block_group, since we could have freed extents
338 * we need to check the pinned_extents for any extents that can't be used yet
339 * since their free space will be released as soon as the transaction commits.
340 */
Omar Sandovala5ed9182015-09-29 20:50:35 -0700341u64 add_new_free_space(struct btrfs_block_group_cache *block_group,
342 struct btrfs_fs_info *info, u64 start, u64 end)
Josef Bacik0f9dd462008-09-23 13:14:11 -0400343{
Josef Bacik817d52f2009-07-13 21:29:25 -0400344 u64 extent_start, extent_end, size, total_added = 0;
Josef Bacik0f9dd462008-09-23 13:14:11 -0400345 int ret;
346
347 while (start < end) {
Yan Zheng11833d62009-09-11 16:11:19 -0400348 ret = find_first_extent_bit(info->pinned_extents, start,
Josef Bacik0f9dd462008-09-23 13:14:11 -0400349 &extent_start, &extent_end,
Josef Bacike6138872012-09-27 17:07:30 -0400350 EXTENT_DIRTY | EXTENT_UPTODATE,
351 NULL);
Josef Bacik0f9dd462008-09-23 13:14:11 -0400352 if (ret)
353 break;
354
Yan, Zheng06b23312009-11-26 09:31:11 +0000355 if (extent_start <= start) {
Josef Bacik0f9dd462008-09-23 13:14:11 -0400356 start = extent_end + 1;
357 } else if (extent_start > start && extent_start < end) {
358 size = extent_start - start;
Josef Bacik817d52f2009-07-13 21:29:25 -0400359 total_added += size;
Josef Bacikea6a4782008-11-20 12:16:16 -0500360 ret = btrfs_add_free_space(block_group, start,
361 size);
Jeff Mahoney79787ea2012-03-12 16:03:00 +0100362 BUG_ON(ret); /* -ENOMEM or logic error */
Josef Bacik0f9dd462008-09-23 13:14:11 -0400363 start = extent_end + 1;
364 } else {
365 break;
366 }
367 }
368
369 if (start < end) {
370 size = end - start;
Josef Bacik817d52f2009-07-13 21:29:25 -0400371 total_added += size;
Josef Bacikea6a4782008-11-20 12:16:16 -0500372 ret = btrfs_add_free_space(block_group, start, size);
Jeff Mahoney79787ea2012-03-12 16:03:00 +0100373 BUG_ON(ret); /* -ENOMEM or logic error */
Josef Bacik0f9dd462008-09-23 13:14:11 -0400374 }
375
Josef Bacik817d52f2009-07-13 21:29:25 -0400376 return total_added;
Josef Bacik0f9dd462008-09-23 13:14:11 -0400377}
378
Omar Sandoval73fa48b2015-09-29 20:50:33 -0700379static int load_extent_tree_free(struct btrfs_caching_control *caching_ctl)
Chris Masone37c9e62007-05-09 20:13:14 -0400380{
Josef Bacikbab39bf2011-06-30 14:42:28 -0400381 struct btrfs_block_group_cache *block_group;
382 struct btrfs_fs_info *fs_info;
Josef Bacikbab39bf2011-06-30 14:42:28 -0400383 struct btrfs_root *extent_root;
Chris Masone37c9e62007-05-09 20:13:14 -0400384 struct btrfs_path *path;
Chris Mason5f39d392007-10-15 16:14:19 -0400385 struct extent_buffer *leaf;
Yan Zheng11833d62009-09-11 16:11:19 -0400386 struct btrfs_key key;
Josef Bacik817d52f2009-07-13 21:29:25 -0400387 u64 total_found = 0;
Yan Zheng11833d62009-09-11 16:11:19 -0400388 u64 last = 0;
389 u32 nritems;
Omar Sandoval73fa48b2015-09-29 20:50:33 -0700390 int ret;
Chris Masonf510cfe2007-10-15 16:14:48 -0400391
Josef Bacikbab39bf2011-06-30 14:42:28 -0400392 block_group = caching_ctl->block_group;
393 fs_info = block_group->fs_info;
394 extent_root = fs_info->extent_root;
395
Chris Masone37c9e62007-05-09 20:13:14 -0400396 path = btrfs_alloc_path();
397 if (!path)
Omar Sandoval73fa48b2015-09-29 20:50:33 -0700398 return -ENOMEM;
Yan7d7d6062007-09-14 16:15:28 -0400399
Josef Bacik817d52f2009-07-13 21:29:25 -0400400 last = max_t(u64, block_group->key.objectid, BTRFS_SUPER_INFO_OFFSET);
Yan Zheng11833d62009-09-11 16:11:19 -0400401
Chris Mason5cd57b22008-06-25 16:01:30 -0400402 /*
Josef Bacik817d52f2009-07-13 21:29:25 -0400403 * We don't want to deadlock with somebody trying to allocate a new
404 * extent for the extent root while also trying to search the extent
405 * root to add free space. So we skip locking and search the commit
406 * root, since its read-only
Chris Mason5cd57b22008-06-25 16:01:30 -0400407 */
408 path->skip_locking = 1;
Josef Bacik817d52f2009-07-13 21:29:25 -0400409 path->search_commit_root = 1;
Josef Bacik026fd312011-05-13 10:32:11 -0400410 path->reada = 1;
Josef Bacik817d52f2009-07-13 21:29:25 -0400411
Yan Zhenge4404d62008-12-12 10:03:26 -0500412 key.objectid = last;
Chris Masone37c9e62007-05-09 20:13:14 -0400413 key.offset = 0;
Yan Zheng11833d62009-09-11 16:11:19 -0400414 key.type = BTRFS_EXTENT_ITEM_KEY;
Chris Mason013f1b12009-07-31 14:57:55 -0400415
Liu Bo52ee28d2013-07-11 17:51:15 +0800416next:
Yan Zheng11833d62009-09-11 16:11:19 -0400417 ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
Chris Masone37c9e62007-05-09 20:13:14 -0400418 if (ret < 0)
Omar Sandoval73fa48b2015-09-29 20:50:33 -0700419 goto out;
Yan Zhenga512bbf2008-12-08 16:46:26 -0500420
Yan Zheng11833d62009-09-11 16:11:19 -0400421 leaf = path->nodes[0];
422 nritems = btrfs_header_nritems(leaf);
423
Chris Masond3977122009-01-05 21:25:51 -0500424 while (1) {
David Sterba7841cb22011-05-31 18:07:27 +0200425 if (btrfs_fs_closing(fs_info) > 1) {
Yan Zhengf25784b2009-07-28 08:41:57 -0400426 last = (u64)-1;
Josef Bacik817d52f2009-07-13 21:29:25 -0400427 break;
Yan Zhengf25784b2009-07-28 08:41:57 -0400428 }
Josef Bacik817d52f2009-07-13 21:29:25 -0400429
Yan Zheng11833d62009-09-11 16:11:19 -0400430 if (path->slots[0] < nritems) {
431 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
432 } else {
433 ret = find_next_key(path, 0, &key);
434 if (ret)
Chris Masone37c9e62007-05-09 20:13:14 -0400435 break;
Josef Bacik817d52f2009-07-13 21:29:25 -0400436
Josef Bacikc9ea7b22013-09-19 10:02:11 -0400437 if (need_resched() ||
Josef Bacik9e351cc2014-03-13 15:42:13 -0400438 rwsem_is_contended(&fs_info->commit_root_sem)) {
Josef Bacik589d8ad2011-05-11 17:30:53 -0400439 caching_ctl->progress = last;
Chris Masonff5714c2011-05-28 07:00:39 -0400440 btrfs_release_path(path);
Josef Bacik9e351cc2014-03-13 15:42:13 -0400441 up_read(&fs_info->commit_root_sem);
Josef Bacik589d8ad2011-05-11 17:30:53 -0400442 mutex_unlock(&caching_ctl->mutex);
Yan Zheng11833d62009-09-11 16:11:19 -0400443 cond_resched();
Omar Sandoval73fa48b2015-09-29 20:50:33 -0700444 mutex_lock(&caching_ctl->mutex);
445 down_read(&fs_info->commit_root_sem);
446 goto next;
Josef Bacik589d8ad2011-05-11 17:30:53 -0400447 }
Josef Bacik0a3896d2013-04-19 14:37:26 -0400448
449 ret = btrfs_next_leaf(extent_root, path);
450 if (ret < 0)
Omar Sandoval73fa48b2015-09-29 20:50:33 -0700451 goto out;
Josef Bacik0a3896d2013-04-19 14:37:26 -0400452 if (ret)
453 break;
Josef Bacik589d8ad2011-05-11 17:30:53 -0400454 leaf = path->nodes[0];
455 nritems = btrfs_header_nritems(leaf);
456 continue;
Yan Zheng11833d62009-09-11 16:11:19 -0400457 }
Josef Bacik817d52f2009-07-13 21:29:25 -0400458
Liu Bo52ee28d2013-07-11 17:51:15 +0800459 if (key.objectid < last) {
460 key.objectid = last;
461 key.offset = 0;
462 key.type = BTRFS_EXTENT_ITEM_KEY;
463
464 caching_ctl->progress = last;
465 btrfs_release_path(path);
466 goto next;
467 }
468
Yan Zheng11833d62009-09-11 16:11:19 -0400469 if (key.objectid < block_group->key.objectid) {
470 path->slots[0]++;
Josef Bacik817d52f2009-07-13 21:29:25 -0400471 continue;
Chris Masone37c9e62007-05-09 20:13:14 -0400472 }
Josef Bacik0f9dd462008-09-23 13:14:11 -0400473
Chris Masone37c9e62007-05-09 20:13:14 -0400474 if (key.objectid >= block_group->key.objectid +
Josef Bacik0f9dd462008-09-23 13:14:11 -0400475 block_group->key.offset)
Yan7d7d6062007-09-14 16:15:28 -0400476 break;
Yan7d7d6062007-09-14 16:15:28 -0400477
Josef Bacik3173a182013-03-07 14:22:04 -0500478 if (key.type == BTRFS_EXTENT_ITEM_KEY ||
479 key.type == BTRFS_METADATA_ITEM_KEY) {
Josef Bacik817d52f2009-07-13 21:29:25 -0400480 total_found += add_new_free_space(block_group,
481 fs_info, last,
482 key.objectid);
Josef Bacik3173a182013-03-07 14:22:04 -0500483 if (key.type == BTRFS_METADATA_ITEM_KEY)
484 last = key.objectid +
David Sterba707e8a02014-06-04 19:22:26 +0200485 fs_info->tree_root->nodesize;
Josef Bacik3173a182013-03-07 14:22:04 -0500486 else
487 last = key.objectid + key.offset;
Josef Bacik817d52f2009-07-13 21:29:25 -0400488
Omar Sandoval73fa48b2015-09-29 20:50:33 -0700489 if (total_found > CACHING_CTL_WAKE_UP) {
Yan Zheng11833d62009-09-11 16:11:19 -0400490 total_found = 0;
491 wake_up(&caching_ctl->wait);
492 }
Josef Bacik817d52f2009-07-13 21:29:25 -0400493 }
Chris Masone37c9e62007-05-09 20:13:14 -0400494 path->slots[0]++;
495 }
Josef Bacikef8bbdf2008-09-23 13:14:11 -0400496 ret = 0;
Josef Bacik817d52f2009-07-13 21:29:25 -0400497
498 total_found += add_new_free_space(block_group, fs_info, last,
499 block_group->key.objectid +
500 block_group->key.offset);
Yan Zheng11833d62009-09-11 16:11:19 -0400501 caching_ctl->progress = (u64)-1;
Josef Bacik817d52f2009-07-13 21:29:25 -0400502
Omar Sandoval73fa48b2015-09-29 20:50:33 -0700503out:
504 btrfs_free_path(path);
505 return ret;
506}
507
508static noinline void caching_thread(struct btrfs_work *work)
509{
510 struct btrfs_block_group_cache *block_group;
511 struct btrfs_fs_info *fs_info;
512 struct btrfs_caching_control *caching_ctl;
513 int ret;
514
515 caching_ctl = container_of(work, struct btrfs_caching_control, work);
516 block_group = caching_ctl->block_group;
517 fs_info = block_group->fs_info;
518
519 mutex_lock(&caching_ctl->mutex);
520 down_read(&fs_info->commit_root_sem);
521
Omar Sandoval1e144fb2015-09-29 20:50:37 -0700522 if (btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE))
523 ret = load_free_space_tree(caching_ctl);
524 else
525 ret = load_extent_tree_free(caching_ctl);
Omar Sandoval73fa48b2015-09-29 20:50:33 -0700526
Josef Bacik817d52f2009-07-13 21:29:25 -0400527 spin_lock(&block_group->lock);
Yan Zheng11833d62009-09-11 16:11:19 -0400528 block_group->caching_ctl = NULL;
Omar Sandoval73fa48b2015-09-29 20:50:33 -0700529 block_group->cached = ret ? BTRFS_CACHE_ERROR : BTRFS_CACHE_FINISHED;
Josef Bacik817d52f2009-07-13 21:29:25 -0400530 spin_unlock(&block_group->lock);
531
Josef Bacik9e351cc2014-03-13 15:42:13 -0400532 up_read(&fs_info->commit_root_sem);
Omar Sandoval73fa48b2015-09-29 20:50:33 -0700533 free_excluded_extents(fs_info->extent_root, block_group);
Yan Zheng11833d62009-09-11 16:11:19 -0400534 mutex_unlock(&caching_ctl->mutex);
Omar Sandoval73fa48b2015-09-29 20:50:33 -0700535
Yan Zheng11833d62009-09-11 16:11:19 -0400536 wake_up(&caching_ctl->wait);
537
538 put_caching_control(caching_ctl);
Josef Bacik11dfe352009-11-13 20:12:59 +0000539 btrfs_put_block_group(block_group);
Josef Bacik817d52f2009-07-13 21:29:25 -0400540}
541
Josef Bacik9d66e232010-08-25 16:54:15 -0400542static int cache_block_group(struct btrfs_block_group_cache *cache,
Josef Bacik9d66e232010-08-25 16:54:15 -0400543 int load_cache_only)
Josef Bacik817d52f2009-07-13 21:29:25 -0400544{
Josef Bacik291c7d22011-11-14 13:52:14 -0500545 DEFINE_WAIT(wait);
Yan Zheng11833d62009-09-11 16:11:19 -0400546 struct btrfs_fs_info *fs_info = cache->fs_info;
547 struct btrfs_caching_control *caching_ctl;
Josef Bacik817d52f2009-07-13 21:29:25 -0400548 int ret = 0;
549
Josef Bacik291c7d22011-11-14 13:52:14 -0500550 caching_ctl = kzalloc(sizeof(*caching_ctl), GFP_NOFS);
Jeff Mahoney79787ea2012-03-12 16:03:00 +0100551 if (!caching_ctl)
552 return -ENOMEM;
Josef Bacik291c7d22011-11-14 13:52:14 -0500553
554 INIT_LIST_HEAD(&caching_ctl->list);
555 mutex_init(&caching_ctl->mutex);
556 init_waitqueue_head(&caching_ctl->wait);
557 caching_ctl->block_group = cache;
558 caching_ctl->progress = cache->key.objectid;
559 atomic_set(&caching_ctl->count, 1);
Liu Bo9e0af232014-08-15 23:36:53 +0800560 btrfs_init_work(&caching_ctl->work, btrfs_cache_helper,
561 caching_thread, NULL, NULL);
Josef Bacik291c7d22011-11-14 13:52:14 -0500562
563 spin_lock(&cache->lock);
564 /*
565 * This should be a rare occasion, but this could happen I think in the
566 * case where one thread starts to load the space cache info, and then
567 * some other thread starts a transaction commit which tries to do an
568 * allocation while the other thread is still loading the space cache
569 * info. The previous loop should have kept us from choosing this block
570 * group, but if we've moved to the state where we will wait on caching
571 * block groups we need to first check if we're doing a fast load here,
572 * so we can wait for it to finish, otherwise we could end up allocating
573 * from a block group who's cache gets evicted for one reason or
574 * another.
575 */
576 while (cache->cached == BTRFS_CACHE_FAST) {
577 struct btrfs_caching_control *ctl;
578
579 ctl = cache->caching_ctl;
580 atomic_inc(&ctl->count);
581 prepare_to_wait(&ctl->wait, &wait, TASK_UNINTERRUPTIBLE);
582 spin_unlock(&cache->lock);
583
584 schedule();
585
586 finish_wait(&ctl->wait, &wait);
587 put_caching_control(ctl);
588 spin_lock(&cache->lock);
589 }
590
591 if (cache->cached != BTRFS_CACHE_NO) {
592 spin_unlock(&cache->lock);
593 kfree(caching_ctl);
Yan Zheng11833d62009-09-11 16:11:19 -0400594 return 0;
Josef Bacik291c7d22011-11-14 13:52:14 -0500595 }
596 WARN_ON(cache->caching_ctl);
597 cache->caching_ctl = caching_ctl;
598 cache->cached = BTRFS_CACHE_FAST;
599 spin_unlock(&cache->lock);
Yan Zheng11833d62009-09-11 16:11:19 -0400600
Josef Bacikd53ba472012-04-12 16:03:57 -0400601 if (fs_info->mount_opt & BTRFS_MOUNT_SPACE_CACHE) {
Josef Bacikcb83b7b2014-11-26 11:52:54 -0500602 mutex_lock(&caching_ctl->mutex);
Josef Bacik9d66e232010-08-25 16:54:15 -0400603 ret = load_free_space_cache(fs_info, cache);
604
605 spin_lock(&cache->lock);
606 if (ret == 1) {
Josef Bacik291c7d22011-11-14 13:52:14 -0500607 cache->caching_ctl = NULL;
Josef Bacik9d66e232010-08-25 16:54:15 -0400608 cache->cached = BTRFS_CACHE_FINISHED;
609 cache->last_byte_to_unpin = (u64)-1;
Josef Bacikcb83b7b2014-11-26 11:52:54 -0500610 caching_ctl->progress = (u64)-1;
Josef Bacik9d66e232010-08-25 16:54:15 -0400611 } else {
Josef Bacik291c7d22011-11-14 13:52:14 -0500612 if (load_cache_only) {
613 cache->caching_ctl = NULL;
614 cache->cached = BTRFS_CACHE_NO;
615 } else {
616 cache->cached = BTRFS_CACHE_STARTED;
Filipe Manana4f69cb92014-11-26 15:28:51 +0000617 cache->has_caching_ctl = 1;
Josef Bacik291c7d22011-11-14 13:52:14 -0500618 }
Josef Bacik9d66e232010-08-25 16:54:15 -0400619 }
620 spin_unlock(&cache->lock);
Josef Bacikcb83b7b2014-11-26 11:52:54 -0500621 mutex_unlock(&caching_ctl->mutex);
622
Josef Bacik291c7d22011-11-14 13:52:14 -0500623 wake_up(&caching_ctl->wait);
Josef Bacik3c148742011-02-02 15:53:47 +0000624 if (ret == 1) {
Josef Bacik291c7d22011-11-14 13:52:14 -0500625 put_caching_control(caching_ctl);
Josef Bacik3c148742011-02-02 15:53:47 +0000626 free_excluded_extents(fs_info->extent_root, cache);
Josef Bacik9d66e232010-08-25 16:54:15 -0400627 return 0;
Josef Bacik3c148742011-02-02 15:53:47 +0000628 }
Josef Bacik291c7d22011-11-14 13:52:14 -0500629 } else {
630 /*
Omar Sandoval1e144fb2015-09-29 20:50:37 -0700631 * We're either using the free space tree or no caching at all.
632 * Set cached to the appropriate value and wakeup any waiters.
Josef Bacik291c7d22011-11-14 13:52:14 -0500633 */
634 spin_lock(&cache->lock);
635 if (load_cache_only) {
636 cache->caching_ctl = NULL;
637 cache->cached = BTRFS_CACHE_NO;
638 } else {
639 cache->cached = BTRFS_CACHE_STARTED;
Filipe Manana4f69cb92014-11-26 15:28:51 +0000640 cache->has_caching_ctl = 1;
Josef Bacik291c7d22011-11-14 13:52:14 -0500641 }
Josef Bacik817d52f2009-07-13 21:29:25 -0400642 spin_unlock(&cache->lock);
Josef Bacik291c7d22011-11-14 13:52:14 -0500643 wake_up(&caching_ctl->wait);
644 }
645
646 if (load_cache_only) {
647 put_caching_control(caching_ctl);
Yan Zheng11833d62009-09-11 16:11:19 -0400648 return 0;
Josef Bacik817d52f2009-07-13 21:29:25 -0400649 }
Josef Bacik817d52f2009-07-13 21:29:25 -0400650
Josef Bacik9e351cc2014-03-13 15:42:13 -0400651 down_write(&fs_info->commit_root_sem);
Josef Bacik291c7d22011-11-14 13:52:14 -0500652 atomic_inc(&caching_ctl->count);
Yan Zheng11833d62009-09-11 16:11:19 -0400653 list_add_tail(&caching_ctl->list, &fs_info->caching_block_groups);
Josef Bacik9e351cc2014-03-13 15:42:13 -0400654 up_write(&fs_info->commit_root_sem);
Yan Zheng11833d62009-09-11 16:11:19 -0400655
Josef Bacik11dfe352009-11-13 20:12:59 +0000656 btrfs_get_block_group(cache);
Yan Zheng11833d62009-09-11 16:11:19 -0400657
Qu Wenruoe66f0bb2014-02-28 10:46:12 +0800658 btrfs_queue_work(fs_info->caching_workers, &caching_ctl->work);
Josef Bacik817d52f2009-07-13 21:29:25 -0400659
Josef Bacikef8bbdf2008-09-23 13:14:11 -0400660 return ret;
Chris Masone37c9e62007-05-09 20:13:14 -0400661}
662
Josef Bacik0f9dd462008-09-23 13:14:11 -0400663/*
664 * return the block group that starts at or after bytenr
665 */
Chris Masond3977122009-01-05 21:25:51 -0500666static struct btrfs_block_group_cache *
667btrfs_lookup_first_block_group(struct btrfs_fs_info *info, u64 bytenr)
Chris Mason0ef3e662008-05-24 14:04:53 -0400668{
Josef Bacik0f9dd462008-09-23 13:14:11 -0400669 struct btrfs_block_group_cache *cache;
Chris Mason0ef3e662008-05-24 14:04:53 -0400670
Josef Bacik0f9dd462008-09-23 13:14:11 -0400671 cache = block_group_cache_tree_search(info, bytenr, 0);
Chris Mason0ef3e662008-05-24 14:04:53 -0400672
Josef Bacik0f9dd462008-09-23 13:14:11 -0400673 return cache;
Chris Mason0ef3e662008-05-24 14:04:53 -0400674}
675
Josef Bacik0f9dd462008-09-23 13:14:11 -0400676/*
Sankar P9f556842009-05-14 13:52:22 -0400677 * return the block group that contains the given bytenr
Josef Bacik0f9dd462008-09-23 13:14:11 -0400678 */
Chris Masond3977122009-01-05 21:25:51 -0500679struct btrfs_block_group_cache *btrfs_lookup_block_group(
680 struct btrfs_fs_info *info,
681 u64 bytenr)
Chris Masonbe744172007-05-06 10:15:01 -0400682{
Josef Bacik0f9dd462008-09-23 13:14:11 -0400683 struct btrfs_block_group_cache *cache;
Chris Masonbe744172007-05-06 10:15:01 -0400684
Josef Bacik0f9dd462008-09-23 13:14:11 -0400685 cache = block_group_cache_tree_search(info, bytenr, 1);
Chris Mason96b51792007-10-15 16:15:19 -0400686
Josef Bacik0f9dd462008-09-23 13:14:11 -0400687 return cache;
Chris Masonbe744172007-05-06 10:15:01 -0400688}
Chris Mason0b86a832008-03-24 15:01:56 -0400689
Josef Bacik0f9dd462008-09-23 13:14:11 -0400690static struct btrfs_space_info *__find_space_info(struct btrfs_fs_info *info,
691 u64 flags)
Chris Mason6324fbf2008-03-24 15:01:59 -0400692{
Josef Bacik0f9dd462008-09-23 13:14:11 -0400693 struct list_head *head = &info->space_info;
Josef Bacik0f9dd462008-09-23 13:14:11 -0400694 struct btrfs_space_info *found;
Chris Mason4184ea72009-03-10 12:39:20 -0400695
Ilya Dryomov52ba6922012-01-16 22:04:47 +0200696 flags &= BTRFS_BLOCK_GROUP_TYPE_MASK;
Yan, Zhengb742bb822010-05-16 10:46:24 -0400697
Chris Mason4184ea72009-03-10 12:39:20 -0400698 rcu_read_lock();
699 list_for_each_entry_rcu(found, head, list) {
Josef Bacik67377732010-09-16 16:19:09 -0400700 if (found->flags & flags) {
Chris Mason4184ea72009-03-10 12:39:20 -0400701 rcu_read_unlock();
Josef Bacik0f9dd462008-09-23 13:14:11 -0400702 return found;
Chris Mason4184ea72009-03-10 12:39:20 -0400703 }
Josef Bacik0f9dd462008-09-23 13:14:11 -0400704 }
Chris Mason4184ea72009-03-10 12:39:20 -0400705 rcu_read_unlock();
Josef Bacik0f9dd462008-09-23 13:14:11 -0400706 return NULL;
Chris Mason6324fbf2008-03-24 15:01:59 -0400707}
708
Chris Mason4184ea72009-03-10 12:39:20 -0400709/*
710 * after adding space to the filesystem, we need to clear the full flags
711 * on all the space infos.
712 */
713void btrfs_clear_space_info_full(struct btrfs_fs_info *info)
714{
715 struct list_head *head = &info->space_info;
716 struct btrfs_space_info *found;
717
718 rcu_read_lock();
719 list_for_each_entry_rcu(found, head, list)
720 found->full = 0;
721 rcu_read_unlock();
722}
723
Filipe Manana1a4ed8f2014-10-27 10:44:24 +0000724/* simple helper to search for an existing data extent at a given offset */
725int btrfs_lookup_data_extent(struct btrfs_root *root, u64 start, u64 len)
Chris Masone02119d2008-09-05 16:13:11 -0400726{
727 int ret;
728 struct btrfs_key key;
Zheng Yan31840ae2008-09-23 13:14:14 -0400729 struct btrfs_path *path;
Chris Masone02119d2008-09-05 16:13:11 -0400730
Zheng Yan31840ae2008-09-23 13:14:14 -0400731 path = btrfs_alloc_path();
Mark Fashehd8926bb2011-07-13 10:38:47 -0700732 if (!path)
733 return -ENOMEM;
734
Chris Masone02119d2008-09-05 16:13:11 -0400735 key.objectid = start;
736 key.offset = len;
Josef Bacik3173a182013-03-07 14:22:04 -0500737 key.type = BTRFS_EXTENT_ITEM_KEY;
Chris Masone02119d2008-09-05 16:13:11 -0400738 ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, path,
739 0, 0);
Zheng Yan31840ae2008-09-23 13:14:14 -0400740 btrfs_free_path(path);
Chris Mason7bb86312007-12-11 09:25:06 -0500741 return ret;
742}
743
Chris Masond8d5f3e2007-12-11 12:42:00 -0500744/*
Josef Bacik3173a182013-03-07 14:22:04 -0500745 * helper function to lookup reference count and flags of a tree block.
Yan, Zhenga22285a2010-05-16 10:48:46 -0400746 *
747 * the head node for delayed ref is used to store the sum of all the
748 * reference count modifications queued up in the rbtree. the head
749 * node may also store the extent flags to set. This way you can check
750 * to see what the reference count and extent flags would be if all of
751 * the delayed refs are not processed.
752 */
753int btrfs_lookup_extent_info(struct btrfs_trans_handle *trans,
754 struct btrfs_root *root, u64 bytenr,
Josef Bacik3173a182013-03-07 14:22:04 -0500755 u64 offset, int metadata, u64 *refs, u64 *flags)
Yan, Zhenga22285a2010-05-16 10:48:46 -0400756{
757 struct btrfs_delayed_ref_head *head;
758 struct btrfs_delayed_ref_root *delayed_refs;
759 struct btrfs_path *path;
760 struct btrfs_extent_item *ei;
761 struct extent_buffer *leaf;
762 struct btrfs_key key;
763 u32 item_size;
764 u64 num_refs;
765 u64 extent_flags;
766 int ret;
767
Josef Bacik3173a182013-03-07 14:22:04 -0500768 /*
769 * If we don't have skinny metadata, don't bother doing anything
770 * different
771 */
772 if (metadata && !btrfs_fs_incompat(root->fs_info, SKINNY_METADATA)) {
David Sterba707e8a02014-06-04 19:22:26 +0200773 offset = root->nodesize;
Josef Bacik3173a182013-03-07 14:22:04 -0500774 metadata = 0;
775 }
776
Yan, Zhenga22285a2010-05-16 10:48:46 -0400777 path = btrfs_alloc_path();
778 if (!path)
779 return -ENOMEM;
780
Yan, Zhenga22285a2010-05-16 10:48:46 -0400781 if (!trans) {
782 path->skip_locking = 1;
783 path->search_commit_root = 1;
784 }
Filipe David Borba Manana639eefc2013-12-08 00:26:29 +0000785
786search_again:
787 key.objectid = bytenr;
788 key.offset = offset;
789 if (metadata)
790 key.type = BTRFS_METADATA_ITEM_KEY;
791 else
792 key.type = BTRFS_EXTENT_ITEM_KEY;
793
Yan, Zhenga22285a2010-05-16 10:48:46 -0400794 ret = btrfs_search_slot(trans, root->fs_info->extent_root,
795 &key, path, 0, 0);
796 if (ret < 0)
797 goto out_free;
798
Josef Bacik3173a182013-03-07 14:22:04 -0500799 if (ret > 0 && metadata && key.type == BTRFS_METADATA_ITEM_KEY) {
Filipe David Borba Manana74be9512013-07-05 23:12:06 +0100800 if (path->slots[0]) {
801 path->slots[0]--;
802 btrfs_item_key_to_cpu(path->nodes[0], &key,
803 path->slots[0]);
804 if (key.objectid == bytenr &&
805 key.type == BTRFS_EXTENT_ITEM_KEY &&
David Sterba707e8a02014-06-04 19:22:26 +0200806 key.offset == root->nodesize)
Filipe David Borba Manana74be9512013-07-05 23:12:06 +0100807 ret = 0;
808 }
Josef Bacik3173a182013-03-07 14:22:04 -0500809 }
810
Yan, Zhenga22285a2010-05-16 10:48:46 -0400811 if (ret == 0) {
812 leaf = path->nodes[0];
813 item_size = btrfs_item_size_nr(leaf, path->slots[0]);
814 if (item_size >= sizeof(*ei)) {
815 ei = btrfs_item_ptr(leaf, path->slots[0],
816 struct btrfs_extent_item);
817 num_refs = btrfs_extent_refs(leaf, ei);
818 extent_flags = btrfs_extent_flags(leaf, ei);
819 } else {
820#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
821 struct btrfs_extent_item_v0 *ei0;
822 BUG_ON(item_size != sizeof(*ei0));
823 ei0 = btrfs_item_ptr(leaf, path->slots[0],
824 struct btrfs_extent_item_v0);
825 num_refs = btrfs_extent_refs_v0(leaf, ei0);
826 /* FIXME: this isn't correct for data */
827 extent_flags = BTRFS_BLOCK_FLAG_FULL_BACKREF;
828#else
829 BUG();
830#endif
831 }
832 BUG_ON(num_refs == 0);
833 } else {
834 num_refs = 0;
835 extent_flags = 0;
836 ret = 0;
837 }
838
839 if (!trans)
840 goto out;
841
842 delayed_refs = &trans->transaction->delayed_refs;
843 spin_lock(&delayed_refs->lock);
844 head = btrfs_find_delayed_ref_head(trans, bytenr);
845 if (head) {
846 if (!mutex_trylock(&head->mutex)) {
847 atomic_inc(&head->node.refs);
848 spin_unlock(&delayed_refs->lock);
849
David Sterbab3b4aa72011-04-21 01:20:15 +0200850 btrfs_release_path(path);
Yan, Zhenga22285a2010-05-16 10:48:46 -0400851
David Sterba8cc33e52011-05-02 15:29:25 +0200852 /*
853 * Mutex was contended, block until it's released and try
854 * again
855 */
Yan, Zhenga22285a2010-05-16 10:48:46 -0400856 mutex_lock(&head->mutex);
857 mutex_unlock(&head->mutex);
858 btrfs_put_delayed_ref(&head->node);
Filipe David Borba Manana639eefc2013-12-08 00:26:29 +0000859 goto search_again;
Yan, Zhenga22285a2010-05-16 10:48:46 -0400860 }
Josef Bacikd7df2c72014-01-23 09:21:38 -0500861 spin_lock(&head->lock);
Yan, Zhenga22285a2010-05-16 10:48:46 -0400862 if (head->extent_op && head->extent_op->update_flags)
863 extent_flags |= head->extent_op->flags_to_set;
864 else
865 BUG_ON(num_refs == 0);
866
867 num_refs += head->node.ref_mod;
Josef Bacikd7df2c72014-01-23 09:21:38 -0500868 spin_unlock(&head->lock);
Yan, Zhenga22285a2010-05-16 10:48:46 -0400869 mutex_unlock(&head->mutex);
870 }
871 spin_unlock(&delayed_refs->lock);
872out:
873 WARN_ON(num_refs == 0);
874 if (refs)
875 *refs = num_refs;
876 if (flags)
877 *flags = extent_flags;
878out_free:
879 btrfs_free_path(path);
880 return ret;
881}
882
883/*
Chris Masond8d5f3e2007-12-11 12:42:00 -0500884 * Back reference rules. Back refs have three main goals:
885 *
886 * 1) differentiate between all holders of references to an extent so that
887 * when a reference is dropped we can make sure it was a valid reference
888 * before freeing the extent.
889 *
890 * 2) Provide enough information to quickly find the holders of an extent
891 * if we notice a given block is corrupted or bad.
892 *
893 * 3) Make it easy to migrate blocks for FS shrinking or storage pool
894 * maintenance. This is actually the same as #2, but with a slightly
895 * different use case.
896 *
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400897 * There are two kinds of back refs. The implicit back refs is optimized
898 * for pointers in non-shared tree blocks. For a given pointer in a block,
899 * back refs of this kind provide information about the block's owner tree
900 * and the pointer's key. These information allow us to find the block by
901 * b-tree searching. The full back refs is for pointers in tree blocks not
902 * referenced by their owner trees. The location of tree block is recorded
903 * in the back refs. Actually the full back refs is generic, and can be
904 * used in all cases the implicit back refs is used. The major shortcoming
905 * of the full back refs is its overhead. Every time a tree block gets
906 * COWed, we have to update back refs entry for all pointers in it.
907 *
908 * For a newly allocated tree block, we use implicit back refs for
909 * pointers in it. This means most tree related operations only involve
910 * implicit back refs. For a tree block created in old transaction, the
911 * only way to drop a reference to it is COW it. So we can detect the
912 * event that tree block loses its owner tree's reference and do the
913 * back refs conversion.
914 *
915 * When a tree block is COW'd through a tree, there are four cases:
916 *
917 * The reference count of the block is one and the tree is the block's
918 * owner tree. Nothing to do in this case.
919 *
920 * The reference count of the block is one and the tree is not the
921 * block's owner tree. In this case, full back refs is used for pointers
922 * in the block. Remove these full back refs, add implicit back refs for
923 * every pointers in the new block.
924 *
925 * The reference count of the block is greater than one and the tree is
926 * the block's owner tree. In this case, implicit back refs is used for
927 * pointers in the block. Add full back refs for every pointers in the
928 * block, increase lower level extents' reference counts. The original
929 * implicit back refs are entailed to the new block.
930 *
931 * The reference count of the block is greater than one and the tree is
932 * not the block's owner tree. Add implicit back refs for every pointer in
933 * the new block, increase lower level extents' reference count.
934 *
935 * Back Reference Key composing:
936 *
937 * The key objectid corresponds to the first byte in the extent,
938 * The key type is used to differentiate between types of back refs.
939 * There are different meanings of the key offset for different types
940 * of back refs.
941 *
Chris Masond8d5f3e2007-12-11 12:42:00 -0500942 * File extents can be referenced by:
943 *
944 * - multiple snapshots, subvolumes, or different generations in one subvol
Zheng Yan31840ae2008-09-23 13:14:14 -0400945 * - different files inside a single subvolume
Chris Masond8d5f3e2007-12-11 12:42:00 -0500946 * - different offsets inside a file (bookend extents in file.c)
947 *
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400948 * The extent ref structure for the implicit back refs has fields for:
Chris Masond8d5f3e2007-12-11 12:42:00 -0500949 *
950 * - Objectid of the subvolume root
Chris Masond8d5f3e2007-12-11 12:42:00 -0500951 * - objectid of the file holding the reference
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400952 * - original offset in the file
953 * - how many bookend extents
Zheng Yan31840ae2008-09-23 13:14:14 -0400954 *
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400955 * The key offset for the implicit back refs is hash of the first
956 * three fields.
Chris Masond8d5f3e2007-12-11 12:42:00 -0500957 *
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400958 * The extent ref structure for the full back refs has field for:
Chris Masond8d5f3e2007-12-11 12:42:00 -0500959 *
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400960 * - number of pointers in the tree leaf
Chris Masond8d5f3e2007-12-11 12:42:00 -0500961 *
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400962 * The key offset for the implicit back refs is the first byte of
963 * the tree leaf
Chris Masond8d5f3e2007-12-11 12:42:00 -0500964 *
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400965 * When a file extent is allocated, The implicit back refs is used.
966 * the fields are filled in:
Chris Masond8d5f3e2007-12-11 12:42:00 -0500967 *
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400968 * (root_key.objectid, inode objectid, offset in file, 1)
969 *
970 * When a file extent is removed file truncation, we find the
971 * corresponding implicit back refs and check the following fields:
972 *
973 * (btrfs_header_owner(leaf), inode objectid, offset in file)
Chris Masond8d5f3e2007-12-11 12:42:00 -0500974 *
975 * Btree extents can be referenced by:
976 *
977 * - Different subvolumes
Chris Masond8d5f3e2007-12-11 12:42:00 -0500978 *
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400979 * Both the implicit back refs and the full back refs for tree blocks
980 * only consist of key. The key offset for the implicit back refs is
981 * objectid of block's owner tree. The key offset for the full back refs
982 * is the first byte of parent block.
Chris Masond8d5f3e2007-12-11 12:42:00 -0500983 *
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400984 * When implicit back refs is used, information about the lowest key and
985 * level of the tree block are required. These information are stored in
986 * tree block info structure.
Chris Masond8d5f3e2007-12-11 12:42:00 -0500987 */
Zheng Yan31840ae2008-09-23 13:14:14 -0400988
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400989#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
990static int convert_extent_item_v0(struct btrfs_trans_handle *trans,
991 struct btrfs_root *root,
992 struct btrfs_path *path,
993 u64 owner, u32 extra_size)
Chris Mason74493f72007-12-11 09:25:06 -0500994{
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400995 struct btrfs_extent_item *item;
996 struct btrfs_extent_item_v0 *ei0;
997 struct btrfs_extent_ref_v0 *ref0;
998 struct btrfs_tree_block_info *bi;
Zheng Yan31840ae2008-09-23 13:14:14 -0400999 struct extent_buffer *leaf;
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001000 struct btrfs_key key;
1001 struct btrfs_key found_key;
1002 u32 new_size = sizeof(*item);
1003 u64 refs;
Chris Mason74493f72007-12-11 09:25:06 -05001004 int ret;
1005
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001006 leaf = path->nodes[0];
1007 BUG_ON(btrfs_item_size_nr(leaf, path->slots[0]) != sizeof(*ei0));
Chris Mason74493f72007-12-11 09:25:06 -05001008
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001009 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
1010 ei0 = btrfs_item_ptr(leaf, path->slots[0],
1011 struct btrfs_extent_item_v0);
1012 refs = btrfs_extent_refs_v0(leaf, ei0);
1013
1014 if (owner == (u64)-1) {
1015 while (1) {
1016 if (path->slots[0] >= btrfs_header_nritems(leaf)) {
1017 ret = btrfs_next_leaf(root, path);
1018 if (ret < 0)
1019 return ret;
Jeff Mahoney79787ea2012-03-12 16:03:00 +01001020 BUG_ON(ret > 0); /* Corruption */
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001021 leaf = path->nodes[0];
1022 }
1023 btrfs_item_key_to_cpu(leaf, &found_key,
1024 path->slots[0]);
1025 BUG_ON(key.objectid != found_key.objectid);
1026 if (found_key.type != BTRFS_EXTENT_REF_V0_KEY) {
1027 path->slots[0]++;
1028 continue;
1029 }
1030 ref0 = btrfs_item_ptr(leaf, path->slots[0],
1031 struct btrfs_extent_ref_v0);
1032 owner = btrfs_ref_objectid_v0(leaf, ref0);
1033 break;
1034 }
1035 }
David Sterbab3b4aa72011-04-21 01:20:15 +02001036 btrfs_release_path(path);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001037
1038 if (owner < BTRFS_FIRST_FREE_OBJECTID)
1039 new_size += sizeof(*bi);
1040
1041 new_size -= sizeof(*ei0);
1042 ret = btrfs_search_slot(trans, root, &key, path,
1043 new_size + extra_size, 1);
Zheng Yan31840ae2008-09-23 13:14:14 -04001044 if (ret < 0)
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001045 return ret;
Jeff Mahoney79787ea2012-03-12 16:03:00 +01001046 BUG_ON(ret); /* Corruption */
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001047
Tsutomu Itoh4b90c682013-04-16 05:18:49 +00001048 btrfs_extend_item(root, path, new_size);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001049
1050 leaf = path->nodes[0];
1051 item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
1052 btrfs_set_extent_refs(leaf, item, refs);
1053 /* FIXME: get real generation */
1054 btrfs_set_extent_generation(leaf, item, 0);
1055 if (owner < BTRFS_FIRST_FREE_OBJECTID) {
1056 btrfs_set_extent_flags(leaf, item,
1057 BTRFS_EXTENT_FLAG_TREE_BLOCK |
1058 BTRFS_BLOCK_FLAG_FULL_BACKREF);
1059 bi = (struct btrfs_tree_block_info *)(item + 1);
1060 /* FIXME: get first key of the block */
1061 memset_extent_buffer(leaf, 0, (unsigned long)bi, sizeof(*bi));
1062 btrfs_set_tree_block_level(leaf, bi, (int)owner);
1063 } else {
1064 btrfs_set_extent_flags(leaf, item, BTRFS_EXTENT_FLAG_DATA);
1065 }
1066 btrfs_mark_buffer_dirty(leaf);
1067 return 0;
1068}
1069#endif
1070
1071static u64 hash_extent_data_ref(u64 root_objectid, u64 owner, u64 offset)
1072{
1073 u32 high_crc = ~(u32)0;
1074 u32 low_crc = ~(u32)0;
1075 __le64 lenum;
1076
1077 lenum = cpu_to_le64(root_objectid);
Filipe David Borba Manana14a958e2014-01-12 02:22:46 +00001078 high_crc = btrfs_crc32c(high_crc, &lenum, sizeof(lenum));
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001079 lenum = cpu_to_le64(owner);
Filipe David Borba Manana14a958e2014-01-12 02:22:46 +00001080 low_crc = btrfs_crc32c(low_crc, &lenum, sizeof(lenum));
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001081 lenum = cpu_to_le64(offset);
Filipe David Borba Manana14a958e2014-01-12 02:22:46 +00001082 low_crc = btrfs_crc32c(low_crc, &lenum, sizeof(lenum));
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001083
1084 return ((u64)high_crc << 31) ^ (u64)low_crc;
1085}
1086
1087static u64 hash_extent_data_ref_item(struct extent_buffer *leaf,
1088 struct btrfs_extent_data_ref *ref)
1089{
1090 return hash_extent_data_ref(btrfs_extent_data_ref_root(leaf, ref),
1091 btrfs_extent_data_ref_objectid(leaf, ref),
1092 btrfs_extent_data_ref_offset(leaf, ref));
1093}
1094
1095static int match_extent_data_ref(struct extent_buffer *leaf,
1096 struct btrfs_extent_data_ref *ref,
1097 u64 root_objectid, u64 owner, u64 offset)
1098{
1099 if (btrfs_extent_data_ref_root(leaf, ref) != root_objectid ||
1100 btrfs_extent_data_ref_objectid(leaf, ref) != owner ||
1101 btrfs_extent_data_ref_offset(leaf, ref) != offset)
1102 return 0;
1103 return 1;
1104}
1105
1106static noinline int lookup_extent_data_ref(struct btrfs_trans_handle *trans,
1107 struct btrfs_root *root,
1108 struct btrfs_path *path,
1109 u64 bytenr, u64 parent,
1110 u64 root_objectid,
1111 u64 owner, u64 offset)
1112{
1113 struct btrfs_key key;
1114 struct btrfs_extent_data_ref *ref;
1115 struct extent_buffer *leaf;
1116 u32 nritems;
1117 int ret;
1118 int recow;
1119 int err = -ENOENT;
1120
1121 key.objectid = bytenr;
1122 if (parent) {
1123 key.type = BTRFS_SHARED_DATA_REF_KEY;
1124 key.offset = parent;
1125 } else {
1126 key.type = BTRFS_EXTENT_DATA_REF_KEY;
1127 key.offset = hash_extent_data_ref(root_objectid,
1128 owner, offset);
1129 }
1130again:
1131 recow = 0;
1132 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
1133 if (ret < 0) {
1134 err = ret;
1135 goto fail;
1136 }
1137
1138 if (parent) {
1139 if (!ret)
1140 return 0;
1141#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
1142 key.type = BTRFS_EXTENT_REF_V0_KEY;
David Sterbab3b4aa72011-04-21 01:20:15 +02001143 btrfs_release_path(path);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001144 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
1145 if (ret < 0) {
1146 err = ret;
1147 goto fail;
1148 }
1149 if (!ret)
1150 return 0;
1151#endif
1152 goto fail;
Zheng Yan31840ae2008-09-23 13:14:14 -04001153 }
1154
1155 leaf = path->nodes[0];
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001156 nritems = btrfs_header_nritems(leaf);
1157 while (1) {
1158 if (path->slots[0] >= nritems) {
1159 ret = btrfs_next_leaf(root, path);
1160 if (ret < 0)
1161 err = ret;
1162 if (ret)
1163 goto fail;
1164
1165 leaf = path->nodes[0];
1166 nritems = btrfs_header_nritems(leaf);
1167 recow = 1;
1168 }
1169
1170 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
1171 if (key.objectid != bytenr ||
1172 key.type != BTRFS_EXTENT_DATA_REF_KEY)
1173 goto fail;
1174
1175 ref = btrfs_item_ptr(leaf, path->slots[0],
1176 struct btrfs_extent_data_ref);
1177
1178 if (match_extent_data_ref(leaf, ref, root_objectid,
1179 owner, offset)) {
1180 if (recow) {
David Sterbab3b4aa72011-04-21 01:20:15 +02001181 btrfs_release_path(path);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001182 goto again;
1183 }
1184 err = 0;
1185 break;
1186 }
1187 path->slots[0]++;
Zheng Yan31840ae2008-09-23 13:14:14 -04001188 }
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001189fail:
1190 return err;
Zheng Yan31840ae2008-09-23 13:14:14 -04001191}
1192
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001193static noinline int insert_extent_data_ref(struct btrfs_trans_handle *trans,
1194 struct btrfs_root *root,
1195 struct btrfs_path *path,
1196 u64 bytenr, u64 parent,
1197 u64 root_objectid, u64 owner,
1198 u64 offset, int refs_to_add)
Zheng Yan31840ae2008-09-23 13:14:14 -04001199{
1200 struct btrfs_key key;
1201 struct extent_buffer *leaf;
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001202 u32 size;
Zheng Yan31840ae2008-09-23 13:14:14 -04001203 u32 num_refs;
1204 int ret;
1205
1206 key.objectid = bytenr;
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001207 if (parent) {
1208 key.type = BTRFS_SHARED_DATA_REF_KEY;
1209 key.offset = parent;
1210 size = sizeof(struct btrfs_shared_data_ref);
Zheng Yan31840ae2008-09-23 13:14:14 -04001211 } else {
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001212 key.type = BTRFS_EXTENT_DATA_REF_KEY;
1213 key.offset = hash_extent_data_ref(root_objectid,
1214 owner, offset);
1215 size = sizeof(struct btrfs_extent_data_ref);
Zheng Yan31840ae2008-09-23 13:14:14 -04001216 }
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001217
1218 ret = btrfs_insert_empty_item(trans, root, path, &key, size);
1219 if (ret && ret != -EEXIST)
1220 goto fail;
1221
1222 leaf = path->nodes[0];
1223 if (parent) {
1224 struct btrfs_shared_data_ref *ref;
1225 ref = btrfs_item_ptr(leaf, path->slots[0],
1226 struct btrfs_shared_data_ref);
1227 if (ret == 0) {
1228 btrfs_set_shared_data_ref_count(leaf, ref, refs_to_add);
1229 } else {
1230 num_refs = btrfs_shared_data_ref_count(leaf, ref);
1231 num_refs += refs_to_add;
1232 btrfs_set_shared_data_ref_count(leaf, ref, num_refs);
1233 }
1234 } else {
1235 struct btrfs_extent_data_ref *ref;
1236 while (ret == -EEXIST) {
1237 ref = btrfs_item_ptr(leaf, path->slots[0],
1238 struct btrfs_extent_data_ref);
1239 if (match_extent_data_ref(leaf, ref, root_objectid,
1240 owner, offset))
1241 break;
David Sterbab3b4aa72011-04-21 01:20:15 +02001242 btrfs_release_path(path);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001243 key.offset++;
1244 ret = btrfs_insert_empty_item(trans, root, path, &key,
1245 size);
1246 if (ret && ret != -EEXIST)
1247 goto fail;
1248
1249 leaf = path->nodes[0];
1250 }
1251 ref = btrfs_item_ptr(leaf, path->slots[0],
1252 struct btrfs_extent_data_ref);
1253 if (ret == 0) {
1254 btrfs_set_extent_data_ref_root(leaf, ref,
1255 root_objectid);
1256 btrfs_set_extent_data_ref_objectid(leaf, ref, owner);
1257 btrfs_set_extent_data_ref_offset(leaf, ref, offset);
1258 btrfs_set_extent_data_ref_count(leaf, ref, refs_to_add);
1259 } else {
1260 num_refs = btrfs_extent_data_ref_count(leaf, ref);
1261 num_refs += refs_to_add;
1262 btrfs_set_extent_data_ref_count(leaf, ref, num_refs);
1263 }
1264 }
1265 btrfs_mark_buffer_dirty(leaf);
1266 ret = 0;
1267fail:
David Sterbab3b4aa72011-04-21 01:20:15 +02001268 btrfs_release_path(path);
Chris Mason7bb86312007-12-11 09:25:06 -05001269 return ret;
Chris Mason74493f72007-12-11 09:25:06 -05001270}
1271
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001272static noinline int remove_extent_data_ref(struct btrfs_trans_handle *trans,
1273 struct btrfs_root *root,
1274 struct btrfs_path *path,
Josef Bacikfcebe452014-05-13 17:30:47 -07001275 int refs_to_drop, int *last_ref)
Zheng Yan31840ae2008-09-23 13:14:14 -04001276{
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001277 struct btrfs_key key;
1278 struct btrfs_extent_data_ref *ref1 = NULL;
1279 struct btrfs_shared_data_ref *ref2 = NULL;
Zheng Yan31840ae2008-09-23 13:14:14 -04001280 struct extent_buffer *leaf;
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001281 u32 num_refs = 0;
Zheng Yan31840ae2008-09-23 13:14:14 -04001282 int ret = 0;
1283
1284 leaf = path->nodes[0];
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001285 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
1286
1287 if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
1288 ref1 = btrfs_item_ptr(leaf, path->slots[0],
1289 struct btrfs_extent_data_ref);
1290 num_refs = btrfs_extent_data_ref_count(leaf, ref1);
1291 } else if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
1292 ref2 = btrfs_item_ptr(leaf, path->slots[0],
1293 struct btrfs_shared_data_ref);
1294 num_refs = btrfs_shared_data_ref_count(leaf, ref2);
1295#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
1296 } else if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
1297 struct btrfs_extent_ref_v0 *ref0;
1298 ref0 = btrfs_item_ptr(leaf, path->slots[0],
1299 struct btrfs_extent_ref_v0);
1300 num_refs = btrfs_ref_count_v0(leaf, ref0);
1301#endif
1302 } else {
1303 BUG();
1304 }
1305
Chris Mason56bec292009-03-13 10:10:06 -04001306 BUG_ON(num_refs < refs_to_drop);
1307 num_refs -= refs_to_drop;
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001308
Zheng Yan31840ae2008-09-23 13:14:14 -04001309 if (num_refs == 0) {
1310 ret = btrfs_del_item(trans, root, path);
Josef Bacikfcebe452014-05-13 17:30:47 -07001311 *last_ref = 1;
Zheng Yan31840ae2008-09-23 13:14:14 -04001312 } else {
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001313 if (key.type == BTRFS_EXTENT_DATA_REF_KEY)
1314 btrfs_set_extent_data_ref_count(leaf, ref1, num_refs);
1315 else if (key.type == BTRFS_SHARED_DATA_REF_KEY)
1316 btrfs_set_shared_data_ref_count(leaf, ref2, num_refs);
1317#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
1318 else {
1319 struct btrfs_extent_ref_v0 *ref0;
1320 ref0 = btrfs_item_ptr(leaf, path->slots[0],
1321 struct btrfs_extent_ref_v0);
1322 btrfs_set_ref_count_v0(leaf, ref0, num_refs);
1323 }
1324#endif
Zheng Yan31840ae2008-09-23 13:14:14 -04001325 btrfs_mark_buffer_dirty(leaf);
1326 }
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001327 return ret;
1328}
1329
Zhaolei9ed0dea2015-08-06 22:16:24 +08001330static noinline u32 extent_data_ref_count(struct btrfs_path *path,
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001331 struct btrfs_extent_inline_ref *iref)
1332{
1333 struct btrfs_key key;
1334 struct extent_buffer *leaf;
1335 struct btrfs_extent_data_ref *ref1;
1336 struct btrfs_shared_data_ref *ref2;
1337 u32 num_refs = 0;
1338
1339 leaf = path->nodes[0];
1340 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
1341 if (iref) {
1342 if (btrfs_extent_inline_ref_type(leaf, iref) ==
1343 BTRFS_EXTENT_DATA_REF_KEY) {
1344 ref1 = (struct btrfs_extent_data_ref *)(&iref->offset);
1345 num_refs = btrfs_extent_data_ref_count(leaf, ref1);
1346 } else {
1347 ref2 = (struct btrfs_shared_data_ref *)(iref + 1);
1348 num_refs = btrfs_shared_data_ref_count(leaf, ref2);
1349 }
1350 } else if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
1351 ref1 = btrfs_item_ptr(leaf, path->slots[0],
1352 struct btrfs_extent_data_ref);
1353 num_refs = btrfs_extent_data_ref_count(leaf, ref1);
1354 } else if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
1355 ref2 = btrfs_item_ptr(leaf, path->slots[0],
1356 struct btrfs_shared_data_ref);
1357 num_refs = btrfs_shared_data_ref_count(leaf, ref2);
1358#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
1359 } else if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
1360 struct btrfs_extent_ref_v0 *ref0;
1361 ref0 = btrfs_item_ptr(leaf, path->slots[0],
1362 struct btrfs_extent_ref_v0);
1363 num_refs = btrfs_ref_count_v0(leaf, ref0);
1364#endif
1365 } else {
1366 WARN_ON(1);
1367 }
1368 return num_refs;
1369}
1370
1371static noinline int lookup_tree_block_ref(struct btrfs_trans_handle *trans,
1372 struct btrfs_root *root,
1373 struct btrfs_path *path,
1374 u64 bytenr, u64 parent,
1375 u64 root_objectid)
1376{
1377 struct btrfs_key key;
1378 int ret;
1379
1380 key.objectid = bytenr;
1381 if (parent) {
1382 key.type = BTRFS_SHARED_BLOCK_REF_KEY;
1383 key.offset = parent;
1384 } else {
1385 key.type = BTRFS_TREE_BLOCK_REF_KEY;
1386 key.offset = root_objectid;
1387 }
1388
1389 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
1390 if (ret > 0)
1391 ret = -ENOENT;
1392#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
1393 if (ret == -ENOENT && parent) {
David Sterbab3b4aa72011-04-21 01:20:15 +02001394 btrfs_release_path(path);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001395 key.type = BTRFS_EXTENT_REF_V0_KEY;
1396 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
1397 if (ret > 0)
1398 ret = -ENOENT;
1399 }
1400#endif
1401 return ret;
1402}
1403
1404static noinline int insert_tree_block_ref(struct btrfs_trans_handle *trans,
1405 struct btrfs_root *root,
1406 struct btrfs_path *path,
1407 u64 bytenr, u64 parent,
1408 u64 root_objectid)
1409{
1410 struct btrfs_key key;
1411 int ret;
1412
1413 key.objectid = bytenr;
1414 if (parent) {
1415 key.type = BTRFS_SHARED_BLOCK_REF_KEY;
1416 key.offset = parent;
1417 } else {
1418 key.type = BTRFS_TREE_BLOCK_REF_KEY;
1419 key.offset = root_objectid;
1420 }
1421
1422 ret = btrfs_insert_empty_item(trans, root, path, &key, 0);
David Sterbab3b4aa72011-04-21 01:20:15 +02001423 btrfs_release_path(path);
Zheng Yan31840ae2008-09-23 13:14:14 -04001424 return ret;
1425}
1426
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001427static inline int extent_ref_type(u64 parent, u64 owner)
1428{
1429 int type;
1430 if (owner < BTRFS_FIRST_FREE_OBJECTID) {
1431 if (parent > 0)
1432 type = BTRFS_SHARED_BLOCK_REF_KEY;
1433 else
1434 type = BTRFS_TREE_BLOCK_REF_KEY;
1435 } else {
1436 if (parent > 0)
1437 type = BTRFS_SHARED_DATA_REF_KEY;
1438 else
1439 type = BTRFS_EXTENT_DATA_REF_KEY;
1440 }
1441 return type;
1442}
1443
Yan Zheng2c47e6052009-06-27 21:07:35 -04001444static int find_next_key(struct btrfs_path *path, int level,
1445 struct btrfs_key *key)
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001446
1447{
Yan Zheng2c47e6052009-06-27 21:07:35 -04001448 for (; level < BTRFS_MAX_LEVEL; level++) {
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001449 if (!path->nodes[level])
1450 break;
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001451 if (path->slots[level] + 1 >=
1452 btrfs_header_nritems(path->nodes[level]))
1453 continue;
1454 if (level == 0)
1455 btrfs_item_key_to_cpu(path->nodes[level], key,
1456 path->slots[level] + 1);
1457 else
1458 btrfs_node_key_to_cpu(path->nodes[level], key,
1459 path->slots[level] + 1);
1460 return 0;
1461 }
1462 return 1;
1463}
1464
1465/*
1466 * look for inline back ref. if back ref is found, *ref_ret is set
1467 * to the address of inline back ref, and 0 is returned.
1468 *
1469 * if back ref isn't found, *ref_ret is set to the address where it
1470 * should be inserted, and -ENOENT is returned.
1471 *
1472 * if insert is true and there are too many inline back refs, the path
1473 * points to the extent item, and -EAGAIN is returned.
1474 *
1475 * NOTE: inline back refs are ordered in the same way that back ref
1476 * items in the tree are ordered.
1477 */
1478static noinline_for_stack
1479int lookup_inline_extent_backref(struct btrfs_trans_handle *trans,
1480 struct btrfs_root *root,
1481 struct btrfs_path *path,
1482 struct btrfs_extent_inline_ref **ref_ret,
1483 u64 bytenr, u64 num_bytes,
1484 u64 parent, u64 root_objectid,
1485 u64 owner, u64 offset, int insert)
1486{
1487 struct btrfs_key key;
1488 struct extent_buffer *leaf;
1489 struct btrfs_extent_item *ei;
1490 struct btrfs_extent_inline_ref *iref;
1491 u64 flags;
1492 u64 item_size;
1493 unsigned long ptr;
1494 unsigned long end;
1495 int extra_size;
1496 int type;
1497 int want;
1498 int ret;
1499 int err = 0;
Josef Bacik3173a182013-03-07 14:22:04 -05001500 bool skinny_metadata = btrfs_fs_incompat(root->fs_info,
1501 SKINNY_METADATA);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001502
1503 key.objectid = bytenr;
1504 key.type = BTRFS_EXTENT_ITEM_KEY;
1505 key.offset = num_bytes;
1506
1507 want = extent_ref_type(parent, owner);
1508 if (insert) {
1509 extra_size = btrfs_extent_inline_ref_size(want);
Yan Zheng85d41982009-06-11 08:51:10 -04001510 path->keep_locks = 1;
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001511 } else
1512 extra_size = -1;
Josef Bacik3173a182013-03-07 14:22:04 -05001513
1514 /*
1515 * Owner is our parent level, so we can just add one to get the level
1516 * for the block we are interested in.
1517 */
1518 if (skinny_metadata && owner < BTRFS_FIRST_FREE_OBJECTID) {
1519 key.type = BTRFS_METADATA_ITEM_KEY;
1520 key.offset = owner;
1521 }
1522
1523again:
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001524 ret = btrfs_search_slot(trans, root, &key, path, extra_size, 1);
1525 if (ret < 0) {
1526 err = ret;
1527 goto out;
1528 }
Josef Bacik3173a182013-03-07 14:22:04 -05001529
1530 /*
1531 * We may be a newly converted file system which still has the old fat
1532 * extent entries for metadata, so try and see if we have one of those.
1533 */
1534 if (ret > 0 && skinny_metadata) {
1535 skinny_metadata = false;
1536 if (path->slots[0]) {
1537 path->slots[0]--;
1538 btrfs_item_key_to_cpu(path->nodes[0], &key,
1539 path->slots[0]);
1540 if (key.objectid == bytenr &&
1541 key.type == BTRFS_EXTENT_ITEM_KEY &&
1542 key.offset == num_bytes)
1543 ret = 0;
1544 }
1545 if (ret) {
Filipe Manana9ce49a02014-04-24 15:15:28 +01001546 key.objectid = bytenr;
Josef Bacik3173a182013-03-07 14:22:04 -05001547 key.type = BTRFS_EXTENT_ITEM_KEY;
1548 key.offset = num_bytes;
1549 btrfs_release_path(path);
1550 goto again;
1551 }
1552 }
1553
Jeff Mahoney79787ea2012-03-12 16:03:00 +01001554 if (ret && !insert) {
1555 err = -ENOENT;
1556 goto out;
Dulshani Gunawardhanafae7f212013-10-31 10:30:08 +05301557 } else if (WARN_ON(ret)) {
Josef Bacik492104c2013-03-08 15:41:02 -05001558 err = -EIO;
Josef Bacik492104c2013-03-08 15:41:02 -05001559 goto out;
Jeff Mahoney79787ea2012-03-12 16:03:00 +01001560 }
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001561
1562 leaf = path->nodes[0];
1563 item_size = btrfs_item_size_nr(leaf, path->slots[0]);
1564#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
1565 if (item_size < sizeof(*ei)) {
1566 if (!insert) {
1567 err = -ENOENT;
1568 goto out;
1569 }
1570 ret = convert_extent_item_v0(trans, root, path, owner,
1571 extra_size);
1572 if (ret < 0) {
1573 err = ret;
1574 goto out;
1575 }
1576 leaf = path->nodes[0];
1577 item_size = btrfs_item_size_nr(leaf, path->slots[0]);
1578 }
1579#endif
1580 BUG_ON(item_size < sizeof(*ei));
1581
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001582 ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
1583 flags = btrfs_extent_flags(leaf, ei);
1584
1585 ptr = (unsigned long)(ei + 1);
1586 end = (unsigned long)ei + item_size;
1587
Josef Bacik3173a182013-03-07 14:22:04 -05001588 if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK && !skinny_metadata) {
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001589 ptr += sizeof(struct btrfs_tree_block_info);
1590 BUG_ON(ptr > end);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001591 }
1592
1593 err = -ENOENT;
1594 while (1) {
1595 if (ptr >= end) {
1596 WARN_ON(ptr > end);
1597 break;
1598 }
1599 iref = (struct btrfs_extent_inline_ref *)ptr;
1600 type = btrfs_extent_inline_ref_type(leaf, iref);
1601 if (want < type)
1602 break;
1603 if (want > type) {
1604 ptr += btrfs_extent_inline_ref_size(type);
1605 continue;
1606 }
1607
1608 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
1609 struct btrfs_extent_data_ref *dref;
1610 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
1611 if (match_extent_data_ref(leaf, dref, root_objectid,
1612 owner, offset)) {
1613 err = 0;
1614 break;
1615 }
1616 if (hash_extent_data_ref_item(leaf, dref) <
1617 hash_extent_data_ref(root_objectid, owner, offset))
1618 break;
1619 } else {
1620 u64 ref_offset;
1621 ref_offset = btrfs_extent_inline_ref_offset(leaf, iref);
1622 if (parent > 0) {
1623 if (parent == ref_offset) {
1624 err = 0;
1625 break;
1626 }
1627 if (ref_offset < parent)
1628 break;
1629 } else {
1630 if (root_objectid == ref_offset) {
1631 err = 0;
1632 break;
1633 }
1634 if (ref_offset < root_objectid)
1635 break;
1636 }
1637 }
1638 ptr += btrfs_extent_inline_ref_size(type);
1639 }
1640 if (err == -ENOENT && insert) {
1641 if (item_size + extra_size >=
1642 BTRFS_MAX_EXTENT_ITEM_SIZE(root)) {
1643 err = -EAGAIN;
1644 goto out;
1645 }
1646 /*
1647 * To add new inline back ref, we have to make sure
1648 * there is no corresponding back ref item.
1649 * For simplicity, we just do not add new inline back
1650 * ref if there is any kind of item for this block
1651 */
Yan Zheng2c47e6052009-06-27 21:07:35 -04001652 if (find_next_key(path, 0, &key) == 0 &&
1653 key.objectid == bytenr &&
Yan Zheng85d41982009-06-11 08:51:10 -04001654 key.type < BTRFS_BLOCK_GROUP_ITEM_KEY) {
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001655 err = -EAGAIN;
1656 goto out;
1657 }
1658 }
1659 *ref_ret = (struct btrfs_extent_inline_ref *)ptr;
1660out:
Yan Zheng85d41982009-06-11 08:51:10 -04001661 if (insert) {
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001662 path->keep_locks = 0;
1663 btrfs_unlock_up_safe(path, 1);
1664 }
1665 return err;
1666}
1667
1668/*
1669 * helper to add new inline back ref
1670 */
1671static noinline_for_stack
Tsutomu Itohfd279fa2013-04-16 05:19:11 +00001672void setup_inline_extent_backref(struct btrfs_root *root,
Jeff Mahoney143bede2012-03-01 14:56:26 +01001673 struct btrfs_path *path,
1674 struct btrfs_extent_inline_ref *iref,
1675 u64 parent, u64 root_objectid,
1676 u64 owner, u64 offset, int refs_to_add,
1677 struct btrfs_delayed_extent_op *extent_op)
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001678{
1679 struct extent_buffer *leaf;
1680 struct btrfs_extent_item *ei;
1681 unsigned long ptr;
1682 unsigned long end;
1683 unsigned long item_offset;
1684 u64 refs;
1685 int size;
1686 int type;
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001687
1688 leaf = path->nodes[0];
1689 ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
1690 item_offset = (unsigned long)iref - (unsigned long)ei;
1691
1692 type = extent_ref_type(parent, owner);
1693 size = btrfs_extent_inline_ref_size(type);
1694
Tsutomu Itoh4b90c682013-04-16 05:18:49 +00001695 btrfs_extend_item(root, path, size);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001696
1697 ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
1698 refs = btrfs_extent_refs(leaf, ei);
1699 refs += refs_to_add;
1700 btrfs_set_extent_refs(leaf, ei, refs);
1701 if (extent_op)
1702 __run_delayed_extent_op(extent_op, leaf, ei);
1703
1704 ptr = (unsigned long)ei + item_offset;
1705 end = (unsigned long)ei + btrfs_item_size_nr(leaf, path->slots[0]);
1706 if (ptr < end - size)
1707 memmove_extent_buffer(leaf, ptr + size, ptr,
1708 end - size - ptr);
1709
1710 iref = (struct btrfs_extent_inline_ref *)ptr;
1711 btrfs_set_extent_inline_ref_type(leaf, iref, type);
1712 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
1713 struct btrfs_extent_data_ref *dref;
1714 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
1715 btrfs_set_extent_data_ref_root(leaf, dref, root_objectid);
1716 btrfs_set_extent_data_ref_objectid(leaf, dref, owner);
1717 btrfs_set_extent_data_ref_offset(leaf, dref, offset);
1718 btrfs_set_extent_data_ref_count(leaf, dref, refs_to_add);
1719 } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
1720 struct btrfs_shared_data_ref *sref;
1721 sref = (struct btrfs_shared_data_ref *)(iref + 1);
1722 btrfs_set_shared_data_ref_count(leaf, sref, refs_to_add);
1723 btrfs_set_extent_inline_ref_offset(leaf, iref, parent);
1724 } else if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
1725 btrfs_set_extent_inline_ref_offset(leaf, iref, parent);
1726 } else {
1727 btrfs_set_extent_inline_ref_offset(leaf, iref, root_objectid);
1728 }
1729 btrfs_mark_buffer_dirty(leaf);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001730}
1731
1732static int lookup_extent_backref(struct btrfs_trans_handle *trans,
1733 struct btrfs_root *root,
1734 struct btrfs_path *path,
1735 struct btrfs_extent_inline_ref **ref_ret,
1736 u64 bytenr, u64 num_bytes, u64 parent,
1737 u64 root_objectid, u64 owner, u64 offset)
1738{
1739 int ret;
1740
1741 ret = lookup_inline_extent_backref(trans, root, path, ref_ret,
1742 bytenr, num_bytes, parent,
1743 root_objectid, owner, offset, 0);
1744 if (ret != -ENOENT)
1745 return ret;
1746
David Sterbab3b4aa72011-04-21 01:20:15 +02001747 btrfs_release_path(path);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001748 *ref_ret = NULL;
1749
1750 if (owner < BTRFS_FIRST_FREE_OBJECTID) {
1751 ret = lookup_tree_block_ref(trans, root, path, bytenr, parent,
1752 root_objectid);
1753 } else {
1754 ret = lookup_extent_data_ref(trans, root, path, bytenr, parent,
1755 root_objectid, owner, offset);
1756 }
1757 return ret;
1758}
1759
1760/*
1761 * helper to update/remove inline back ref
1762 */
1763static noinline_for_stack
Tsutomu Itohafe5fea2013-04-16 05:18:22 +00001764void update_inline_extent_backref(struct btrfs_root *root,
Jeff Mahoney143bede2012-03-01 14:56:26 +01001765 struct btrfs_path *path,
1766 struct btrfs_extent_inline_ref *iref,
1767 int refs_to_mod,
Josef Bacikfcebe452014-05-13 17:30:47 -07001768 struct btrfs_delayed_extent_op *extent_op,
1769 int *last_ref)
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001770{
1771 struct extent_buffer *leaf;
1772 struct btrfs_extent_item *ei;
1773 struct btrfs_extent_data_ref *dref = NULL;
1774 struct btrfs_shared_data_ref *sref = NULL;
1775 unsigned long ptr;
1776 unsigned long end;
1777 u32 item_size;
1778 int size;
1779 int type;
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001780 u64 refs;
1781
1782 leaf = path->nodes[0];
1783 ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
1784 refs = btrfs_extent_refs(leaf, ei);
1785 WARN_ON(refs_to_mod < 0 && refs + refs_to_mod <= 0);
1786 refs += refs_to_mod;
1787 btrfs_set_extent_refs(leaf, ei, refs);
1788 if (extent_op)
1789 __run_delayed_extent_op(extent_op, leaf, ei);
1790
1791 type = btrfs_extent_inline_ref_type(leaf, iref);
1792
1793 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
1794 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
1795 refs = btrfs_extent_data_ref_count(leaf, dref);
1796 } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
1797 sref = (struct btrfs_shared_data_ref *)(iref + 1);
1798 refs = btrfs_shared_data_ref_count(leaf, sref);
1799 } else {
1800 refs = 1;
1801 BUG_ON(refs_to_mod != -1);
1802 }
1803
1804 BUG_ON(refs_to_mod < 0 && refs < -refs_to_mod);
1805 refs += refs_to_mod;
1806
1807 if (refs > 0) {
1808 if (type == BTRFS_EXTENT_DATA_REF_KEY)
1809 btrfs_set_extent_data_ref_count(leaf, dref, refs);
1810 else
1811 btrfs_set_shared_data_ref_count(leaf, sref, refs);
1812 } else {
Josef Bacikfcebe452014-05-13 17:30:47 -07001813 *last_ref = 1;
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001814 size = btrfs_extent_inline_ref_size(type);
1815 item_size = btrfs_item_size_nr(leaf, path->slots[0]);
1816 ptr = (unsigned long)iref;
1817 end = (unsigned long)ei + item_size;
1818 if (ptr + size < end)
1819 memmove_extent_buffer(leaf, ptr, ptr + size,
1820 end - ptr - size);
1821 item_size -= size;
Tsutomu Itohafe5fea2013-04-16 05:18:22 +00001822 btrfs_truncate_item(root, path, item_size, 1);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001823 }
1824 btrfs_mark_buffer_dirty(leaf);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001825}
1826
1827static noinline_for_stack
1828int insert_inline_extent_backref(struct btrfs_trans_handle *trans,
1829 struct btrfs_root *root,
1830 struct btrfs_path *path,
1831 u64 bytenr, u64 num_bytes, u64 parent,
1832 u64 root_objectid, u64 owner,
1833 u64 offset, int refs_to_add,
1834 struct btrfs_delayed_extent_op *extent_op)
1835{
1836 struct btrfs_extent_inline_ref *iref;
1837 int ret;
1838
1839 ret = lookup_inline_extent_backref(trans, root, path, &iref,
1840 bytenr, num_bytes, parent,
1841 root_objectid, owner, offset, 1);
1842 if (ret == 0) {
1843 BUG_ON(owner < BTRFS_FIRST_FREE_OBJECTID);
Tsutomu Itohafe5fea2013-04-16 05:18:22 +00001844 update_inline_extent_backref(root, path, iref,
Josef Bacikfcebe452014-05-13 17:30:47 -07001845 refs_to_add, extent_op, NULL);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001846 } else if (ret == -ENOENT) {
Tsutomu Itohfd279fa2013-04-16 05:19:11 +00001847 setup_inline_extent_backref(root, path, iref, parent,
Jeff Mahoney143bede2012-03-01 14:56:26 +01001848 root_objectid, owner, offset,
1849 refs_to_add, extent_op);
1850 ret = 0;
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001851 }
1852 return ret;
1853}
1854
1855static int insert_extent_backref(struct btrfs_trans_handle *trans,
1856 struct btrfs_root *root,
1857 struct btrfs_path *path,
1858 u64 bytenr, u64 parent, u64 root_objectid,
1859 u64 owner, u64 offset, int refs_to_add)
1860{
1861 int ret;
1862 if (owner < BTRFS_FIRST_FREE_OBJECTID) {
1863 BUG_ON(refs_to_add != 1);
1864 ret = insert_tree_block_ref(trans, root, path, bytenr,
1865 parent, root_objectid);
1866 } else {
1867 ret = insert_extent_data_ref(trans, root, path, bytenr,
1868 parent, root_objectid,
1869 owner, offset, refs_to_add);
1870 }
1871 return ret;
1872}
1873
1874static int remove_extent_backref(struct btrfs_trans_handle *trans,
1875 struct btrfs_root *root,
1876 struct btrfs_path *path,
1877 struct btrfs_extent_inline_ref *iref,
Josef Bacikfcebe452014-05-13 17:30:47 -07001878 int refs_to_drop, int is_data, int *last_ref)
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001879{
Jeff Mahoney143bede2012-03-01 14:56:26 +01001880 int ret = 0;
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001881
1882 BUG_ON(!is_data && refs_to_drop != 1);
1883 if (iref) {
Tsutomu Itohafe5fea2013-04-16 05:18:22 +00001884 update_inline_extent_backref(root, path, iref,
Josef Bacikfcebe452014-05-13 17:30:47 -07001885 -refs_to_drop, NULL, last_ref);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001886 } else if (is_data) {
Josef Bacikfcebe452014-05-13 17:30:47 -07001887 ret = remove_extent_data_ref(trans, root, path, refs_to_drop,
1888 last_ref);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001889 } else {
Josef Bacikfcebe452014-05-13 17:30:47 -07001890 *last_ref = 1;
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001891 ret = btrfs_del_item(trans, root, path);
1892 }
1893 return ret;
1894}
1895
Jeff Mahoney86557862015-06-15 09:41:16 -04001896#define in_range(b, first, len) ((b) >= (first) && (b) < (first) + (len))
Jeff Mahoneyd04c6b82015-06-15 09:41:14 -04001897static int btrfs_issue_discard(struct block_device *bdev, u64 start, u64 len,
1898 u64 *discarded_bytes)
Chris Mason15916de2008-11-19 21:17:22 -05001899{
Jeff Mahoney86557862015-06-15 09:41:16 -04001900 int j, ret = 0;
1901 u64 bytes_left, end;
Jeff Mahoney4d89d372015-06-15 09:41:15 -04001902 u64 aligned_start = ALIGN(start, 1 << 9);
1903
1904 if (WARN_ON(start != aligned_start)) {
1905 len -= aligned_start - start;
1906 len = round_down(len, 1 << 9);
1907 start = aligned_start;
1908 }
Jeff Mahoneyd04c6b82015-06-15 09:41:14 -04001909
1910 *discarded_bytes = 0;
Jeff Mahoney86557862015-06-15 09:41:16 -04001911
1912 if (!len)
1913 return 0;
1914
1915 end = start + len;
1916 bytes_left = len;
1917
1918 /* Skip any superblocks on this device. */
1919 for (j = 0; j < BTRFS_SUPER_MIRROR_MAX; j++) {
1920 u64 sb_start = btrfs_sb_offset(j);
1921 u64 sb_end = sb_start + BTRFS_SUPER_INFO_SIZE;
1922 u64 size = sb_start - start;
1923
1924 if (!in_range(sb_start, start, bytes_left) &&
1925 !in_range(sb_end, start, bytes_left) &&
1926 !in_range(start, sb_start, BTRFS_SUPER_INFO_SIZE))
1927 continue;
1928
1929 /*
1930 * Superblock spans beginning of range. Adjust start and
1931 * try again.
1932 */
1933 if (sb_start <= start) {
1934 start += sb_end - start;
1935 if (start > end) {
1936 bytes_left = 0;
1937 break;
1938 }
1939 bytes_left = end - start;
1940 continue;
1941 }
1942
1943 if (size) {
1944 ret = blkdev_issue_discard(bdev, start >> 9, size >> 9,
1945 GFP_NOFS, 0);
1946 if (!ret)
1947 *discarded_bytes += size;
1948 else if (ret != -EOPNOTSUPP)
1949 return ret;
1950 }
1951
1952 start = sb_end;
1953 if (start > end) {
1954 bytes_left = 0;
1955 break;
1956 }
1957 bytes_left = end - start;
1958 }
1959
1960 if (bytes_left) {
1961 ret = blkdev_issue_discard(bdev, start >> 9, bytes_left >> 9,
Jeff Mahoney4d89d372015-06-15 09:41:15 -04001962 GFP_NOFS, 0);
1963 if (!ret)
Jeff Mahoney86557862015-06-15 09:41:16 -04001964 *discarded_bytes += bytes_left;
Jeff Mahoney4d89d372015-06-15 09:41:15 -04001965 }
Jeff Mahoneyd04c6b82015-06-15 09:41:14 -04001966 return ret;
Chris Mason15916de2008-11-19 21:17:22 -05001967}
Chris Mason15916de2008-11-19 21:17:22 -05001968
Filipe Manana1edb647b2014-12-08 14:01:12 +00001969int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr,
1970 u64 num_bytes, u64 *actual_bytes)
Liu Hui1f3c79a2009-01-05 15:57:51 -05001971{
Liu Hui1f3c79a2009-01-05 15:57:51 -05001972 int ret;
Li Dongyang5378e602011-03-24 10:24:27 +00001973 u64 discarded_bytes = 0;
Jan Schmidta1d3c472011-08-04 17:15:33 +02001974 struct btrfs_bio *bbio = NULL;
Liu Hui1f3c79a2009-01-05 15:57:51 -05001975
Christoph Hellwige244a0a2009-10-14 09:24:59 -04001976
Liu Hui1f3c79a2009-01-05 15:57:51 -05001977 /* Tell the block device(s) that the sectors can be discarded */
Stefan Behrens3ec706c2012-11-05 15:46:42 +01001978 ret = btrfs_map_block(root->fs_info, REQ_DISCARD,
Jan Schmidta1d3c472011-08-04 17:15:33 +02001979 bytenr, &num_bytes, &bbio, 0);
Jeff Mahoney79787ea2012-03-12 16:03:00 +01001980 /* Error condition is -ENOMEM */
Liu Hui1f3c79a2009-01-05 15:57:51 -05001981 if (!ret) {
Jan Schmidta1d3c472011-08-04 17:15:33 +02001982 struct btrfs_bio_stripe *stripe = bbio->stripes;
Liu Hui1f3c79a2009-01-05 15:57:51 -05001983 int i;
1984
Liu Hui1f3c79a2009-01-05 15:57:51 -05001985
Jan Schmidta1d3c472011-08-04 17:15:33 +02001986 for (i = 0; i < bbio->num_stripes; i++, stripe++) {
Jeff Mahoneyd04c6b82015-06-15 09:41:14 -04001987 u64 bytes;
Josef Bacikd5e20032011-08-04 14:52:27 +00001988 if (!stripe->dev->can_discard)
1989 continue;
1990
Li Dongyang5378e602011-03-24 10:24:27 +00001991 ret = btrfs_issue_discard(stripe->dev->bdev,
1992 stripe->physical,
Jeff Mahoneyd04c6b82015-06-15 09:41:14 -04001993 stripe->length,
1994 &bytes);
Li Dongyang5378e602011-03-24 10:24:27 +00001995 if (!ret)
Jeff Mahoneyd04c6b82015-06-15 09:41:14 -04001996 discarded_bytes += bytes;
Li Dongyang5378e602011-03-24 10:24:27 +00001997 else if (ret != -EOPNOTSUPP)
Jeff Mahoney79787ea2012-03-12 16:03:00 +01001998 break; /* Logic errors or -ENOMEM, or -EIO but I don't know how that could happen JDM */
Josef Bacikd5e20032011-08-04 14:52:27 +00001999
2000 /*
2001 * Just in case we get back EOPNOTSUPP for some reason,
2002 * just ignore the return value so we don't screw up
2003 * people calling discard_extent.
2004 */
2005 ret = 0;
Liu Hui1f3c79a2009-01-05 15:57:51 -05002006 }
Zhao Lei6e9606d2015-01-20 15:11:34 +08002007 btrfs_put_bbio(bbio);
Liu Hui1f3c79a2009-01-05 15:57:51 -05002008 }
Li Dongyang5378e602011-03-24 10:24:27 +00002009
2010 if (actual_bytes)
2011 *actual_bytes = discarded_bytes;
2012
Liu Hui1f3c79a2009-01-05 15:57:51 -05002013
David Woodhouse53b381b2013-01-29 18:40:14 -05002014 if (ret == -EOPNOTSUPP)
2015 ret = 0;
Liu Hui1f3c79a2009-01-05 15:57:51 -05002016 return ret;
Liu Hui1f3c79a2009-01-05 15:57:51 -05002017}
2018
Jeff Mahoney79787ea2012-03-12 16:03:00 +01002019/* Can return -ENOMEM */
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002020int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
2021 struct btrfs_root *root,
2022 u64 bytenr, u64 num_bytes, u64 parent,
Josef Bacikfcebe452014-05-13 17:30:47 -07002023 u64 root_objectid, u64 owner, u64 offset,
2024 int no_quota)
Zheng Yan31840ae2008-09-23 13:14:14 -04002025{
2026 int ret;
Arne Jansen66d7e7f2011-09-12 15:26:38 +02002027 struct btrfs_fs_info *fs_info = root->fs_info;
2028
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002029 BUG_ON(owner < BTRFS_FIRST_FREE_OBJECTID &&
2030 root_objectid == BTRFS_TREE_LOG_OBJECTID);
Zheng Yan31840ae2008-09-23 13:14:14 -04002031
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002032 if (owner < BTRFS_FIRST_FREE_OBJECTID) {
Arne Jansen66d7e7f2011-09-12 15:26:38 +02002033 ret = btrfs_add_delayed_tree_ref(fs_info, trans, bytenr,
2034 num_bytes,
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002035 parent, root_objectid, (int)owner,
Josef Bacikfcebe452014-05-13 17:30:47 -07002036 BTRFS_ADD_DELAYED_REF, NULL, no_quota);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002037 } else {
Arne Jansen66d7e7f2011-09-12 15:26:38 +02002038 ret = btrfs_add_delayed_data_ref(fs_info, trans, bytenr,
2039 num_bytes,
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002040 parent, root_objectid, owner, offset,
Josef Bacikfcebe452014-05-13 17:30:47 -07002041 BTRFS_ADD_DELAYED_REF, NULL, no_quota);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002042 }
Zheng Yan31840ae2008-09-23 13:14:14 -04002043 return ret;
2044}
2045
Chris Mason925baed2008-06-25 16:01:30 -04002046static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002047 struct btrfs_root *root,
Qu Wenruoc682f9b2015-03-17 16:59:47 +08002048 struct btrfs_delayed_ref_node *node,
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002049 u64 parent, u64 root_objectid,
2050 u64 owner, u64 offset, int refs_to_add,
2051 struct btrfs_delayed_extent_op *extent_op)
Chris Mason56bec292009-03-13 10:10:06 -04002052{
Josef Bacikfcebe452014-05-13 17:30:47 -07002053 struct btrfs_fs_info *fs_info = root->fs_info;
Chris Mason5caf2a02007-04-02 11:20:42 -04002054 struct btrfs_path *path;
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002055 struct extent_buffer *leaf;
Chris Mason234b63a2007-03-13 10:46:10 -04002056 struct btrfs_extent_item *item;
Josef Bacikfcebe452014-05-13 17:30:47 -07002057 struct btrfs_key key;
Qu Wenruoc682f9b2015-03-17 16:59:47 +08002058 u64 bytenr = node->bytenr;
2059 u64 num_bytes = node->num_bytes;
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002060 u64 refs;
2061 int ret;
Qu Wenruoc682f9b2015-03-17 16:59:47 +08002062 int no_quota = node->no_quota;
Chris Mason037e6392007-03-07 11:50:24 -05002063
Chris Mason5caf2a02007-04-02 11:20:42 -04002064 path = btrfs_alloc_path();
Chris Mason54aa1f42007-06-22 14:16:25 -04002065 if (!path)
2066 return -ENOMEM;
Chris Mason26b80032007-08-08 20:17:12 -04002067
Josef Bacikfcebe452014-05-13 17:30:47 -07002068 if (!is_fstree(root_objectid) || !root->fs_info->quota_enabled)
2069 no_quota = 1;
2070
Chris Mason3c12ac72008-04-21 12:01:38 -04002071 path->reada = 1;
Chris Masonb9473432009-03-13 11:00:37 -04002072 path->leave_spinning = 1;
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002073 /* this will setup the path even if it fails to insert the back ref */
Josef Bacikfcebe452014-05-13 17:30:47 -07002074 ret = insert_inline_extent_backref(trans, fs_info->extent_root, path,
2075 bytenr, num_bytes, parent,
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002076 root_objectid, owner, offset,
2077 refs_to_add, extent_op);
Qu Wenruo0ed47922015-04-16 16:55:08 +08002078 if ((ret < 0 && ret != -EAGAIN) || !ret)
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002079 goto out;
Josef Bacikfcebe452014-05-13 17:30:47 -07002080
2081 /*
2082 * Ok we had -EAGAIN which means we didn't have space to insert and
2083 * inline extent ref, so just update the reference count and add a
2084 * normal backref.
2085 */
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002086 leaf = path->nodes[0];
Josef Bacikfcebe452014-05-13 17:30:47 -07002087 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002088 item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
2089 refs = btrfs_extent_refs(leaf, item);
2090 btrfs_set_extent_refs(leaf, item, refs + refs_to_add);
2091 if (extent_op)
2092 __run_delayed_extent_op(extent_op, leaf, item);
Zheng Yan31840ae2008-09-23 13:14:14 -04002093
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002094 btrfs_mark_buffer_dirty(leaf);
David Sterbab3b4aa72011-04-21 01:20:15 +02002095 btrfs_release_path(path);
Chris Mason7bb86312007-12-11 09:25:06 -05002096
Chris Mason3c12ac72008-04-21 12:01:38 -04002097 path->reada = 1;
Chris Masonb9473432009-03-13 11:00:37 -04002098 path->leave_spinning = 1;
Chris Mason56bec292009-03-13 10:10:06 -04002099 /* now insert the actual backref */
Zheng Yan31840ae2008-09-23 13:14:14 -04002100 ret = insert_extent_backref(trans, root->fs_info->extent_root,
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002101 path, bytenr, parent, root_objectid,
2102 owner, offset, refs_to_add);
Jeff Mahoney79787ea2012-03-12 16:03:00 +01002103 if (ret)
2104 btrfs_abort_transaction(trans, root, ret);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002105out:
Chris Mason74493f72007-12-11 09:25:06 -05002106 btrfs_free_path(path);
Liu Bo30d133f2013-10-11 16:30:23 +08002107 return ret;
Chris Mason02217ed2007-03-02 16:08:05 -05002108}
2109
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002110static int run_delayed_data_ref(struct btrfs_trans_handle *trans,
2111 struct btrfs_root *root,
2112 struct btrfs_delayed_ref_node *node,
2113 struct btrfs_delayed_extent_op *extent_op,
2114 int insert_reserved)
Chris Masone9d0b132007-08-10 14:06:19 -04002115{
Chris Mason56bec292009-03-13 10:10:06 -04002116 int ret = 0;
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002117 struct btrfs_delayed_data_ref *ref;
2118 struct btrfs_key ins;
2119 u64 parent = 0;
2120 u64 ref_root = 0;
2121 u64 flags = 0;
Chris Mason56bec292009-03-13 10:10:06 -04002122
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002123 ins.objectid = node->bytenr;
2124 ins.offset = node->num_bytes;
2125 ins.type = BTRFS_EXTENT_ITEM_KEY;
Chris Mason56bec292009-03-13 10:10:06 -04002126
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002127 ref = btrfs_delayed_node_to_data_ref(node);
Liu Bo599c75e2013-07-16 19:03:36 +08002128 trace_run_delayed_data_ref(node, ref, node->action);
2129
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002130 if (node->type == BTRFS_SHARED_DATA_REF_KEY)
2131 parent = ref->parent;
Josef Bacikfcebe452014-05-13 17:30:47 -07002132 ref_root = ref->root;
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002133
2134 if (node->action == BTRFS_ADD_DELAYED_REF && insert_reserved) {
Josef Bacik3173a182013-03-07 14:22:04 -05002135 if (extent_op)
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002136 flags |= extent_op->flags_to_set;
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002137 ret = alloc_reserved_file_extent(trans, root,
2138 parent, ref_root, flags,
2139 ref->objectid, ref->offset,
2140 &ins, node->ref_mod);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002141 } else if (node->action == BTRFS_ADD_DELAYED_REF) {
Qu Wenruoc682f9b2015-03-17 16:59:47 +08002142 ret = __btrfs_inc_extent_ref(trans, root, node, parent,
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002143 ref_root, ref->objectid,
2144 ref->offset, node->ref_mod,
Qu Wenruoc682f9b2015-03-17 16:59:47 +08002145 extent_op);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002146 } else if (node->action == BTRFS_DROP_DELAYED_REF) {
Qu Wenruoc682f9b2015-03-17 16:59:47 +08002147 ret = __btrfs_free_extent(trans, root, node, parent,
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002148 ref_root, ref->objectid,
2149 ref->offset, node->ref_mod,
Qu Wenruoc682f9b2015-03-17 16:59:47 +08002150 extent_op);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002151 } else {
2152 BUG();
2153 }
Chris Mason56bec292009-03-13 10:10:06 -04002154 return ret;
2155}
2156
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002157static void __run_delayed_extent_op(struct btrfs_delayed_extent_op *extent_op,
2158 struct extent_buffer *leaf,
2159 struct btrfs_extent_item *ei)
2160{
2161 u64 flags = btrfs_extent_flags(leaf, ei);
2162 if (extent_op->update_flags) {
2163 flags |= extent_op->flags_to_set;
2164 btrfs_set_extent_flags(leaf, ei, flags);
2165 }
2166
2167 if (extent_op->update_key) {
2168 struct btrfs_tree_block_info *bi;
2169 BUG_ON(!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK));
2170 bi = (struct btrfs_tree_block_info *)(ei + 1);
2171 btrfs_set_tree_block_key(leaf, bi, &extent_op->key);
2172 }
2173}
2174
2175static int run_delayed_extent_op(struct btrfs_trans_handle *trans,
2176 struct btrfs_root *root,
2177 struct btrfs_delayed_ref_node *node,
2178 struct btrfs_delayed_extent_op *extent_op)
2179{
2180 struct btrfs_key key;
2181 struct btrfs_path *path;
2182 struct btrfs_extent_item *ei;
2183 struct extent_buffer *leaf;
2184 u32 item_size;
2185 int ret;
2186 int err = 0;
Josef Bacikb1c79e02013-05-09 13:49:30 -04002187 int metadata = !extent_op->is_data;
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002188
Jeff Mahoney79787ea2012-03-12 16:03:00 +01002189 if (trans->aborted)
2190 return 0;
2191
Josef Bacik3173a182013-03-07 14:22:04 -05002192 if (metadata && !btrfs_fs_incompat(root->fs_info, SKINNY_METADATA))
2193 metadata = 0;
2194
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002195 path = btrfs_alloc_path();
2196 if (!path)
2197 return -ENOMEM;
2198
2199 key.objectid = node->bytenr;
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002200
Josef Bacik3173a182013-03-07 14:22:04 -05002201 if (metadata) {
Josef Bacik3173a182013-03-07 14:22:04 -05002202 key.type = BTRFS_METADATA_ITEM_KEY;
Josef Bacikb1c79e02013-05-09 13:49:30 -04002203 key.offset = extent_op->level;
Josef Bacik3173a182013-03-07 14:22:04 -05002204 } else {
2205 key.type = BTRFS_EXTENT_ITEM_KEY;
2206 key.offset = node->num_bytes;
2207 }
2208
2209again:
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002210 path->reada = 1;
2211 path->leave_spinning = 1;
2212 ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key,
2213 path, 0, 1);
2214 if (ret < 0) {
2215 err = ret;
2216 goto out;
2217 }
2218 if (ret > 0) {
Josef Bacik3173a182013-03-07 14:22:04 -05002219 if (metadata) {
Filipe David Borba Manana55994882013-10-18 15:42:56 +01002220 if (path->slots[0] > 0) {
2221 path->slots[0]--;
2222 btrfs_item_key_to_cpu(path->nodes[0], &key,
2223 path->slots[0]);
2224 if (key.objectid == node->bytenr &&
2225 key.type == BTRFS_EXTENT_ITEM_KEY &&
2226 key.offset == node->num_bytes)
2227 ret = 0;
2228 }
2229 if (ret > 0) {
2230 btrfs_release_path(path);
2231 metadata = 0;
Josef Bacik3173a182013-03-07 14:22:04 -05002232
Filipe David Borba Manana55994882013-10-18 15:42:56 +01002233 key.objectid = node->bytenr;
2234 key.offset = node->num_bytes;
2235 key.type = BTRFS_EXTENT_ITEM_KEY;
2236 goto again;
2237 }
2238 } else {
2239 err = -EIO;
2240 goto out;
Josef Bacik3173a182013-03-07 14:22:04 -05002241 }
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002242 }
2243
2244 leaf = path->nodes[0];
2245 item_size = btrfs_item_size_nr(leaf, path->slots[0]);
2246#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
2247 if (item_size < sizeof(*ei)) {
2248 ret = convert_extent_item_v0(trans, root->fs_info->extent_root,
2249 path, (u64)-1, 0);
2250 if (ret < 0) {
2251 err = ret;
2252 goto out;
2253 }
2254 leaf = path->nodes[0];
2255 item_size = btrfs_item_size_nr(leaf, path->slots[0]);
2256 }
2257#endif
2258 BUG_ON(item_size < sizeof(*ei));
2259 ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
2260 __run_delayed_extent_op(extent_op, leaf, ei);
2261
2262 btrfs_mark_buffer_dirty(leaf);
2263out:
2264 btrfs_free_path(path);
2265 return err;
2266}
2267
2268static int run_delayed_tree_ref(struct btrfs_trans_handle *trans,
2269 struct btrfs_root *root,
2270 struct btrfs_delayed_ref_node *node,
2271 struct btrfs_delayed_extent_op *extent_op,
2272 int insert_reserved)
2273{
2274 int ret = 0;
2275 struct btrfs_delayed_tree_ref *ref;
2276 struct btrfs_key ins;
2277 u64 parent = 0;
2278 u64 ref_root = 0;
Josef Bacik3173a182013-03-07 14:22:04 -05002279 bool skinny_metadata = btrfs_fs_incompat(root->fs_info,
2280 SKINNY_METADATA);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002281
2282 ref = btrfs_delayed_node_to_tree_ref(node);
Liu Bo599c75e2013-07-16 19:03:36 +08002283 trace_run_delayed_tree_ref(node, ref, node->action);
2284
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002285 if (node->type == BTRFS_SHARED_BLOCK_REF_KEY)
2286 parent = ref->parent;
Josef Bacikfcebe452014-05-13 17:30:47 -07002287 ref_root = ref->root;
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002288
Josef Bacik3173a182013-03-07 14:22:04 -05002289 ins.objectid = node->bytenr;
2290 if (skinny_metadata) {
2291 ins.offset = ref->level;
2292 ins.type = BTRFS_METADATA_ITEM_KEY;
2293 } else {
2294 ins.offset = node->num_bytes;
2295 ins.type = BTRFS_EXTENT_ITEM_KEY;
2296 }
2297
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002298 BUG_ON(node->ref_mod != 1);
2299 if (node->action == BTRFS_ADD_DELAYED_REF && insert_reserved) {
Josef Bacik3173a182013-03-07 14:22:04 -05002300 BUG_ON(!extent_op || !extent_op->update_flags);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002301 ret = alloc_reserved_tree_block(trans, root,
2302 parent, ref_root,
2303 extent_op->flags_to_set,
2304 &extent_op->key,
Josef Bacikfcebe452014-05-13 17:30:47 -07002305 ref->level, &ins,
2306 node->no_quota);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002307 } else if (node->action == BTRFS_ADD_DELAYED_REF) {
Qu Wenruoc682f9b2015-03-17 16:59:47 +08002308 ret = __btrfs_inc_extent_ref(trans, root, node,
2309 parent, ref_root,
2310 ref->level, 0, 1,
Josef Bacikfcebe452014-05-13 17:30:47 -07002311 extent_op);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002312 } else if (node->action == BTRFS_DROP_DELAYED_REF) {
Qu Wenruoc682f9b2015-03-17 16:59:47 +08002313 ret = __btrfs_free_extent(trans, root, node,
2314 parent, ref_root,
2315 ref->level, 0, 1, extent_op);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002316 } else {
2317 BUG();
2318 }
2319 return ret;
2320}
2321
Chris Mason56bec292009-03-13 10:10:06 -04002322/* helper function to actually process a single delayed ref entry */
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002323static int run_one_delayed_ref(struct btrfs_trans_handle *trans,
2324 struct btrfs_root *root,
2325 struct btrfs_delayed_ref_node *node,
2326 struct btrfs_delayed_extent_op *extent_op,
2327 int insert_reserved)
Chris Mason56bec292009-03-13 10:10:06 -04002328{
Jeff Mahoney79787ea2012-03-12 16:03:00 +01002329 int ret = 0;
2330
Josef Bacik857cc2f2013-10-07 15:21:08 -04002331 if (trans->aborted) {
2332 if (insert_reserved)
2333 btrfs_pin_extent(root, node->bytenr,
2334 node->num_bytes, 1);
Jeff Mahoney79787ea2012-03-12 16:03:00 +01002335 return 0;
Josef Bacik857cc2f2013-10-07 15:21:08 -04002336 }
Jeff Mahoney79787ea2012-03-12 16:03:00 +01002337
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002338 if (btrfs_delayed_ref_is_head(node)) {
Chris Mason56bec292009-03-13 10:10:06 -04002339 struct btrfs_delayed_ref_head *head;
2340 /*
2341 * we've hit the end of the chain and we were supposed
2342 * to insert this extent into the tree. But, it got
2343 * deleted before we ever needed to insert it, so all
2344 * we have to do is clean up the accounting
2345 */
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002346 BUG_ON(extent_op);
2347 head = btrfs_delayed_node_to_head(node);
Liu Bo599c75e2013-07-16 19:03:36 +08002348 trace_run_delayed_ref_head(node, head, node->action);
2349
Chris Mason56bec292009-03-13 10:10:06 -04002350 if (insert_reserved) {
Yan, Zhengf0486c62010-05-16 10:46:25 -04002351 btrfs_pin_extent(root, node->bytenr,
2352 node->num_bytes, 1);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002353 if (head->is_data) {
2354 ret = btrfs_del_csums(trans, root,
2355 node->bytenr,
2356 node->num_bytes);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002357 }
Chris Mason56bec292009-03-13 10:10:06 -04002358 }
Jeff Mahoney79787ea2012-03-12 16:03:00 +01002359 return ret;
Chris Mason56bec292009-03-13 10:10:06 -04002360 }
Josef Bacikeb099672009-02-12 09:27:38 -05002361
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002362 if (node->type == BTRFS_TREE_BLOCK_REF_KEY ||
2363 node->type == BTRFS_SHARED_BLOCK_REF_KEY)
2364 ret = run_delayed_tree_ref(trans, root, node, extent_op,
2365 insert_reserved);
2366 else if (node->type == BTRFS_EXTENT_DATA_REF_KEY ||
2367 node->type == BTRFS_SHARED_DATA_REF_KEY)
2368 ret = run_delayed_data_ref(trans, root, node, extent_op,
2369 insert_reserved);
2370 else
2371 BUG();
2372 return ret;
Chris Masone9d0b132007-08-10 14:06:19 -04002373}
2374
Qu Wenruoc6fc2452015-03-30 17:03:00 +08002375static inline struct btrfs_delayed_ref_node *
Chris Mason56bec292009-03-13 10:10:06 -04002376select_delayed_ref(struct btrfs_delayed_ref_head *head)
Chris Masona28ec192007-03-06 20:08:01 -05002377{
Filipe Mananacffc3372015-07-09 13:13:44 +01002378 struct btrfs_delayed_ref_node *ref;
2379
Qu Wenruoc6fc2452015-03-30 17:03:00 +08002380 if (list_empty(&head->ref_list))
2381 return NULL;
Josef Bacikd7df2c72014-01-23 09:21:38 -05002382
Filipe Mananacffc3372015-07-09 13:13:44 +01002383 /*
2384 * Select a delayed ref of type BTRFS_ADD_DELAYED_REF first.
2385 * This is to prevent a ref count from going down to zero, which deletes
2386 * the extent item from the extent tree, when there still are references
2387 * to add, which would fail because they would not find the extent item.
2388 */
2389 list_for_each_entry(ref, &head->ref_list, list) {
2390 if (ref->action == BTRFS_ADD_DELAYED_REF)
2391 return ref;
2392 }
2393
Qu Wenruoc6fc2452015-03-30 17:03:00 +08002394 return list_entry(head->ref_list.next, struct btrfs_delayed_ref_node,
2395 list);
Chris Mason56bec292009-03-13 10:10:06 -04002396}
2397
Jeff Mahoney79787ea2012-03-12 16:03:00 +01002398/*
2399 * Returns 0 on success or if called with an already aborted transaction.
2400 * Returns -ENOMEM or -EIO on failure and will abort the transaction.
2401 */
Josef Bacikd7df2c72014-01-23 09:21:38 -05002402static noinline int __btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
2403 struct btrfs_root *root,
2404 unsigned long nr)
Chris Mason56bec292009-03-13 10:10:06 -04002405{
Chris Mason56bec292009-03-13 10:10:06 -04002406 struct btrfs_delayed_ref_root *delayed_refs;
2407 struct btrfs_delayed_ref_node *ref;
2408 struct btrfs_delayed_ref_head *locked_ref = NULL;
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002409 struct btrfs_delayed_extent_op *extent_op;
Jan Schmidt097b8a72012-06-21 11:08:04 +02002410 struct btrfs_fs_info *fs_info = root->fs_info;
Josef Bacik0a2b2a82014-01-23 10:54:11 -05002411 ktime_t start = ktime_get();
Chris Mason56bec292009-03-13 10:10:06 -04002412 int ret;
Josef Bacikd7df2c72014-01-23 09:21:38 -05002413 unsigned long count = 0;
Josef Bacik0a2b2a82014-01-23 10:54:11 -05002414 unsigned long actual_count = 0;
Chris Mason56bec292009-03-13 10:10:06 -04002415 int must_insert_reserved = 0;
Chris Mason56bec292009-03-13 10:10:06 -04002416
2417 delayed_refs = &trans->transaction->delayed_refs;
Chris Mason56bec292009-03-13 10:10:06 -04002418 while (1) {
2419 if (!locked_ref) {
Josef Bacikd7df2c72014-01-23 09:21:38 -05002420 if (count >= nr)
Chris Mason56bec292009-03-13 10:10:06 -04002421 break;
Chris Mason56bec292009-03-13 10:10:06 -04002422
Josef Bacikd7df2c72014-01-23 09:21:38 -05002423 spin_lock(&delayed_refs->lock);
2424 locked_ref = btrfs_select_ref_head(trans);
2425 if (!locked_ref) {
2426 spin_unlock(&delayed_refs->lock);
2427 break;
2428 }
Chris Masonc3e69d52009-03-13 10:17:05 -04002429
2430 /* grab the lock that says we are going to process
2431 * all the refs for this head */
2432 ret = btrfs_delayed_ref_lock(trans, locked_ref);
Josef Bacikd7df2c72014-01-23 09:21:38 -05002433 spin_unlock(&delayed_refs->lock);
Chris Masonc3e69d52009-03-13 10:17:05 -04002434 /*
2435 * we may have dropped the spin lock to get the head
2436 * mutex lock, and that might have given someone else
2437 * time to free the head. If that's true, it has been
2438 * removed from our list and we can move on.
2439 */
2440 if (ret == -EAGAIN) {
2441 locked_ref = NULL;
2442 count++;
2443 continue;
Chris Mason56bec292009-03-13 10:10:06 -04002444 }
2445 }
2446
Josef Bacikd7df2c72014-01-23 09:21:38 -05002447 spin_lock(&locked_ref->lock);
Josef Bacikae1e2062012-08-07 16:00:32 -04002448
2449 /*
Arne Jansend1270cd2011-09-13 15:16:43 +02002450 * locked_ref is the head node, so we have to go one
2451 * node back for any delayed ref updates
2452 */
2453 ref = select_delayed_ref(locked_ref);
2454
2455 if (ref && ref->seq &&
Jan Schmidt097b8a72012-06-21 11:08:04 +02002456 btrfs_check_delayed_seq(fs_info, delayed_refs, ref->seq)) {
Josef Bacikd7df2c72014-01-23 09:21:38 -05002457 spin_unlock(&locked_ref->lock);
Miao Xie093486c2012-12-19 08:10:10 +00002458 btrfs_delayed_ref_unlock(locked_ref);
Josef Bacikd7df2c72014-01-23 09:21:38 -05002459 spin_lock(&delayed_refs->lock);
2460 locked_ref->processing = 0;
Arne Jansend1270cd2011-09-13 15:16:43 +02002461 delayed_refs->num_heads_ready++;
2462 spin_unlock(&delayed_refs->lock);
Josef Bacikd7df2c72014-01-23 09:21:38 -05002463 locked_ref = NULL;
Arne Jansend1270cd2011-09-13 15:16:43 +02002464 cond_resched();
Josef Bacik27a377d2014-02-07 13:57:59 -05002465 count++;
Arne Jansend1270cd2011-09-13 15:16:43 +02002466 continue;
2467 }
2468
2469 /*
Chris Mason56bec292009-03-13 10:10:06 -04002470 * record the must insert reserved flag before we
2471 * drop the spin lock.
2472 */
2473 must_insert_reserved = locked_ref->must_insert_reserved;
2474 locked_ref->must_insert_reserved = 0;
2475
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002476 extent_op = locked_ref->extent_op;
2477 locked_ref->extent_op = NULL;
2478
Chris Mason56bec292009-03-13 10:10:06 -04002479 if (!ref) {
Josef Bacikd7df2c72014-01-23 09:21:38 -05002480
2481
Chris Mason56bec292009-03-13 10:10:06 -04002482 /* All delayed refs have been processed, Go ahead
2483 * and send the head node to run_one_delayed_ref,
2484 * so that any accounting fixes can happen
2485 */
2486 ref = &locked_ref->node;
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002487
2488 if (extent_op && must_insert_reserved) {
Miao Xie78a61842012-11-21 02:21:28 +00002489 btrfs_free_delayed_extent_op(extent_op);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002490 extent_op = NULL;
2491 }
2492
2493 if (extent_op) {
Josef Bacikd7df2c72014-01-23 09:21:38 -05002494 spin_unlock(&locked_ref->lock);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002495 ret = run_delayed_extent_op(trans, root,
2496 ref, extent_op);
Miao Xie78a61842012-11-21 02:21:28 +00002497 btrfs_free_delayed_extent_op(extent_op);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002498
Jeff Mahoney79787ea2012-03-12 16:03:00 +01002499 if (ret) {
Josef Bacik857cc2f2013-10-07 15:21:08 -04002500 /*
2501 * Need to reset must_insert_reserved if
2502 * there was an error so the abort stuff
2503 * can cleanup the reserved space
2504 * properly.
2505 */
2506 if (must_insert_reserved)
2507 locked_ref->must_insert_reserved = 1;
Josef Bacikd7df2c72014-01-23 09:21:38 -05002508 locked_ref->processing = 0;
Simon Kirbyc2cf52e2013-03-19 22:41:23 +00002509 btrfs_debug(fs_info, "run_delayed_extent_op returned %d", ret);
Miao Xie093486c2012-12-19 08:10:10 +00002510 btrfs_delayed_ref_unlock(locked_ref);
Jeff Mahoney79787ea2012-03-12 16:03:00 +01002511 return ret;
2512 }
Josef Bacikd7df2c72014-01-23 09:21:38 -05002513 continue;
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002514 }
Chris Mason56bec292009-03-13 10:10:06 -04002515
Josef Bacikd7df2c72014-01-23 09:21:38 -05002516 /*
2517 * Need to drop our head ref lock and re-aqcuire the
2518 * delayed ref lock and then re-check to make sure
2519 * nobody got added.
2520 */
2521 spin_unlock(&locked_ref->lock);
2522 spin_lock(&delayed_refs->lock);
2523 spin_lock(&locked_ref->lock);
Qu Wenruoc6fc2452015-03-30 17:03:00 +08002524 if (!list_empty(&locked_ref->ref_list) ||
Josef Bacik573a0752014-03-27 19:41:34 -04002525 locked_ref->extent_op) {
Josef Bacikd7df2c72014-01-23 09:21:38 -05002526 spin_unlock(&locked_ref->lock);
2527 spin_unlock(&delayed_refs->lock);
2528 continue;
2529 }
2530 ref->in_tree = 0;
2531 delayed_refs->num_heads--;
Liu Boc46effa2013-10-14 12:59:45 +08002532 rb_erase(&locked_ref->href_node,
2533 &delayed_refs->href_root);
Josef Bacikd7df2c72014-01-23 09:21:38 -05002534 spin_unlock(&delayed_refs->lock);
2535 } else {
Josef Bacik0a2b2a82014-01-23 10:54:11 -05002536 actual_count++;
Josef Bacikd7df2c72014-01-23 09:21:38 -05002537 ref->in_tree = 0;
Qu Wenruoc6fc2452015-03-30 17:03:00 +08002538 list_del(&ref->list);
Liu Boc46effa2013-10-14 12:59:45 +08002539 }
Josef Bacikd7df2c72014-01-23 09:21:38 -05002540 atomic_dec(&delayed_refs->num_entries);
2541
Miao Xie093486c2012-12-19 08:10:10 +00002542 if (!btrfs_delayed_ref_is_head(ref)) {
Arne Jansen22cd2e72012-08-09 00:16:53 -06002543 /*
2544 * when we play the delayed ref, also correct the
2545 * ref_mod on head
2546 */
2547 switch (ref->action) {
2548 case BTRFS_ADD_DELAYED_REF:
2549 case BTRFS_ADD_DELAYED_EXTENT:
2550 locked_ref->node.ref_mod -= ref->ref_mod;
2551 break;
2552 case BTRFS_DROP_DELAYED_REF:
2553 locked_ref->node.ref_mod += ref->ref_mod;
2554 break;
2555 default:
2556 WARN_ON(1);
2557 }
2558 }
Josef Bacikd7df2c72014-01-23 09:21:38 -05002559 spin_unlock(&locked_ref->lock);
Chris Mason56bec292009-03-13 10:10:06 -04002560
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002561 ret = run_one_delayed_ref(trans, root, ref, extent_op,
Chris Mason56bec292009-03-13 10:10:06 -04002562 must_insert_reserved);
Chris Mason56bec292009-03-13 10:10:06 -04002563
Miao Xie78a61842012-11-21 02:21:28 +00002564 btrfs_free_delayed_extent_op(extent_op);
Jeff Mahoney79787ea2012-03-12 16:03:00 +01002565 if (ret) {
Josef Bacikd7df2c72014-01-23 09:21:38 -05002566 locked_ref->processing = 0;
Miao Xie093486c2012-12-19 08:10:10 +00002567 btrfs_delayed_ref_unlock(locked_ref);
2568 btrfs_put_delayed_ref(ref);
Simon Kirbyc2cf52e2013-03-19 22:41:23 +00002569 btrfs_debug(fs_info, "run_one_delayed_ref returned %d", ret);
Jeff Mahoney79787ea2012-03-12 16:03:00 +01002570 return ret;
2571 }
2572
Miao Xie093486c2012-12-19 08:10:10 +00002573 /*
2574 * If this node is a head, that means all the refs in this head
2575 * have been dealt with, and we will pick the next head to deal
2576 * with, so we must unlock the head and drop it from the cluster
2577 * list before we release it.
2578 */
2579 if (btrfs_delayed_ref_is_head(ref)) {
Josef Bacik12621332015-02-03 07:50:16 -08002580 if (locked_ref->is_data &&
2581 locked_ref->total_ref_mod < 0) {
2582 spin_lock(&delayed_refs->lock);
2583 delayed_refs->pending_csums -= ref->num_bytes;
2584 spin_unlock(&delayed_refs->lock);
2585 }
Miao Xie093486c2012-12-19 08:10:10 +00002586 btrfs_delayed_ref_unlock(locked_ref);
2587 locked_ref = NULL;
2588 }
2589 btrfs_put_delayed_ref(ref);
2590 count++;
Chris Mason1887be62009-03-13 10:11:24 -04002591 cond_resched();
Chris Mason56bec292009-03-13 10:10:06 -04002592 }
Josef Bacik0a2b2a82014-01-23 10:54:11 -05002593
2594 /*
2595 * We don't want to include ref heads since we can have empty ref heads
2596 * and those will drastically skew our runtime down since we just do
2597 * accounting, no actual extent tree updates.
2598 */
2599 if (actual_count > 0) {
2600 u64 runtime = ktime_to_ns(ktime_sub(ktime_get(), start));
2601 u64 avg;
2602
2603 /*
2604 * We weigh the current average higher than our current runtime
2605 * to avoid large swings in the average.
2606 */
2607 spin_lock(&delayed_refs->lock);
2608 avg = fs_info->avg_delayed_ref_runtime * 3 + runtime;
David Sterbaf8c269d2015-01-16 17:21:12 +01002609 fs_info->avg_delayed_ref_runtime = avg >> 2; /* div by 4 */
Josef Bacik0a2b2a82014-01-23 10:54:11 -05002610 spin_unlock(&delayed_refs->lock);
2611 }
Josef Bacikd7df2c72014-01-23 09:21:38 -05002612 return 0;
Chris Masonc3e69d52009-03-13 10:17:05 -04002613}
2614
Arne Jansen709c0482011-09-12 12:22:57 +02002615#ifdef SCRAMBLE_DELAYED_REFS
2616/*
2617 * Normally delayed refs get processed in ascending bytenr order. This
2618 * correlates in most cases to the order added. To expose dependencies on this
2619 * order, we start to process the tree in the middle instead of the beginning
2620 */
2621static u64 find_middle(struct rb_root *root)
2622{
2623 struct rb_node *n = root->rb_node;
2624 struct btrfs_delayed_ref_node *entry;
2625 int alt = 1;
2626 u64 middle;
2627 u64 first = 0, last = 0;
2628
2629 n = rb_first(root);
2630 if (n) {
2631 entry = rb_entry(n, struct btrfs_delayed_ref_node, rb_node);
2632 first = entry->bytenr;
2633 }
2634 n = rb_last(root);
2635 if (n) {
2636 entry = rb_entry(n, struct btrfs_delayed_ref_node, rb_node);
2637 last = entry->bytenr;
2638 }
2639 n = root->rb_node;
2640
2641 while (n) {
2642 entry = rb_entry(n, struct btrfs_delayed_ref_node, rb_node);
2643 WARN_ON(!entry->in_tree);
2644
2645 middle = entry->bytenr;
2646
2647 if (alt)
2648 n = n->rb_left;
2649 else
2650 n = n->rb_right;
2651
2652 alt = 1 - alt;
2653 }
2654 return middle;
2655}
2656#endif
2657
Josef Bacik1be41b72013-06-12 13:56:06 -04002658static inline u64 heads_to_leaves(struct btrfs_root *root, u64 heads)
2659{
2660 u64 num_bytes;
2661
2662 num_bytes = heads * (sizeof(struct btrfs_extent_item) +
2663 sizeof(struct btrfs_extent_inline_ref));
2664 if (!btrfs_fs_incompat(root->fs_info, SKINNY_METADATA))
2665 num_bytes += heads * sizeof(struct btrfs_tree_block_info);
2666
2667 /*
2668 * We don't ever fill up leaves all the way so multiply by 2 just to be
2669 * closer to what we're really going to want to ouse.
2670 */
David Sterbaf8c269d2015-01-16 17:21:12 +01002671 return div_u64(num_bytes, BTRFS_LEAF_DATA_SIZE(root));
Josef Bacik1be41b72013-06-12 13:56:06 -04002672}
2673
Josef Bacik12621332015-02-03 07:50:16 -08002674/*
2675 * Takes the number of bytes to be csumm'ed and figures out how many leaves it
2676 * would require to store the csums for that many bytes.
2677 */
Chris Mason28f75a02015-02-04 06:59:29 -08002678u64 btrfs_csum_bytes_to_leaves(struct btrfs_root *root, u64 csum_bytes)
Josef Bacik12621332015-02-03 07:50:16 -08002679{
2680 u64 csum_size;
2681 u64 num_csums_per_leaf;
2682 u64 num_csums;
2683
2684 csum_size = BTRFS_LEAF_DATA_SIZE(root) - sizeof(struct btrfs_item);
2685 num_csums_per_leaf = div64_u64(csum_size,
2686 (u64)btrfs_super_csum_size(root->fs_info->super_copy));
2687 num_csums = div64_u64(csum_bytes, root->sectorsize);
2688 num_csums += num_csums_per_leaf - 1;
2689 num_csums = div64_u64(num_csums, num_csums_per_leaf);
2690 return num_csums;
2691}
2692
Josef Bacik0a2b2a82014-01-23 10:54:11 -05002693int btrfs_check_space_for_delayed_refs(struct btrfs_trans_handle *trans,
Josef Bacik1be41b72013-06-12 13:56:06 -04002694 struct btrfs_root *root)
2695{
2696 struct btrfs_block_rsv *global_rsv;
2697 u64 num_heads = trans->transaction->delayed_refs.num_heads_ready;
Josef Bacik12621332015-02-03 07:50:16 -08002698 u64 csum_bytes = trans->transaction->delayed_refs.pending_csums;
Josef Bacikcb723e42015-02-18 08:06:57 -08002699 u64 num_dirty_bgs = trans->transaction->num_dirty_bgs;
2700 u64 num_bytes, num_dirty_bgs_bytes;
Josef Bacik1be41b72013-06-12 13:56:06 -04002701 int ret = 0;
2702
2703 num_bytes = btrfs_calc_trans_metadata_size(root, 1);
2704 num_heads = heads_to_leaves(root, num_heads);
2705 if (num_heads > 1)
David Sterba707e8a02014-06-04 19:22:26 +02002706 num_bytes += (num_heads - 1) * root->nodesize;
Josef Bacik1be41b72013-06-12 13:56:06 -04002707 num_bytes <<= 1;
Chris Mason28f75a02015-02-04 06:59:29 -08002708 num_bytes += btrfs_csum_bytes_to_leaves(root, csum_bytes) * root->nodesize;
Josef Bacikcb723e42015-02-18 08:06:57 -08002709 num_dirty_bgs_bytes = btrfs_calc_trans_metadata_size(root,
2710 num_dirty_bgs);
Josef Bacik1be41b72013-06-12 13:56:06 -04002711 global_rsv = &root->fs_info->global_block_rsv;
2712
2713 /*
2714 * If we can't allocate any more chunks lets make sure we have _lots_ of
2715 * wiggle room since running delayed refs can create more delayed refs.
2716 */
Josef Bacikcb723e42015-02-18 08:06:57 -08002717 if (global_rsv->space_info->full) {
2718 num_dirty_bgs_bytes <<= 1;
Josef Bacik1be41b72013-06-12 13:56:06 -04002719 num_bytes <<= 1;
Josef Bacikcb723e42015-02-18 08:06:57 -08002720 }
Josef Bacik1be41b72013-06-12 13:56:06 -04002721
2722 spin_lock(&global_rsv->lock);
Josef Bacikcb723e42015-02-18 08:06:57 -08002723 if (global_rsv->reserved <= num_bytes + num_dirty_bgs_bytes)
Josef Bacik1be41b72013-06-12 13:56:06 -04002724 ret = 1;
2725 spin_unlock(&global_rsv->lock);
2726 return ret;
2727}
2728
Josef Bacik0a2b2a82014-01-23 10:54:11 -05002729int btrfs_should_throttle_delayed_refs(struct btrfs_trans_handle *trans,
2730 struct btrfs_root *root)
2731{
2732 struct btrfs_fs_info *fs_info = root->fs_info;
2733 u64 num_entries =
2734 atomic_read(&trans->transaction->delayed_refs.num_entries);
2735 u64 avg_runtime;
Chris Masona79b7d42014-05-22 16:18:52 -07002736 u64 val;
Josef Bacik0a2b2a82014-01-23 10:54:11 -05002737
2738 smp_mb();
2739 avg_runtime = fs_info->avg_delayed_ref_runtime;
Chris Masona79b7d42014-05-22 16:18:52 -07002740 val = num_entries * avg_runtime;
Josef Bacik0a2b2a82014-01-23 10:54:11 -05002741 if (num_entries * avg_runtime >= NSEC_PER_SEC)
2742 return 1;
Chris Masona79b7d42014-05-22 16:18:52 -07002743 if (val >= NSEC_PER_SEC / 2)
2744 return 2;
Josef Bacik0a2b2a82014-01-23 10:54:11 -05002745
2746 return btrfs_check_space_for_delayed_refs(trans, root);
2747}
2748
Chris Masona79b7d42014-05-22 16:18:52 -07002749struct async_delayed_refs {
2750 struct btrfs_root *root;
2751 int count;
2752 int error;
2753 int sync;
2754 struct completion wait;
2755 struct btrfs_work work;
2756};
2757
2758static void delayed_ref_async_start(struct btrfs_work *work)
2759{
2760 struct async_delayed_refs *async;
2761 struct btrfs_trans_handle *trans;
2762 int ret;
2763
2764 async = container_of(work, struct async_delayed_refs, work);
2765
2766 trans = btrfs_join_transaction(async->root);
2767 if (IS_ERR(trans)) {
2768 async->error = PTR_ERR(trans);
2769 goto done;
2770 }
2771
2772 /*
2773 * trans->sync means that when we call end_transaciton, we won't
2774 * wait on delayed refs
2775 */
2776 trans->sync = true;
2777 ret = btrfs_run_delayed_refs(trans, async->root, async->count);
2778 if (ret)
2779 async->error = ret;
2780
2781 ret = btrfs_end_transaction(trans, async->root);
2782 if (ret && !async->error)
2783 async->error = ret;
2784done:
2785 if (async->sync)
2786 complete(&async->wait);
2787 else
2788 kfree(async);
2789}
2790
2791int btrfs_async_run_delayed_refs(struct btrfs_root *root,
2792 unsigned long count, int wait)
2793{
2794 struct async_delayed_refs *async;
2795 int ret;
2796
2797 async = kmalloc(sizeof(*async), GFP_NOFS);
2798 if (!async)
2799 return -ENOMEM;
2800
2801 async->root = root->fs_info->tree_root;
2802 async->count = count;
2803 async->error = 0;
2804 if (wait)
2805 async->sync = 1;
2806 else
2807 async->sync = 0;
2808 init_completion(&async->wait);
2809
Liu Bo9e0af232014-08-15 23:36:53 +08002810 btrfs_init_work(&async->work, btrfs_extent_refs_helper,
2811 delayed_ref_async_start, NULL, NULL);
Chris Masona79b7d42014-05-22 16:18:52 -07002812
2813 btrfs_queue_work(root->fs_info->extent_workers, &async->work);
2814
2815 if (wait) {
2816 wait_for_completion(&async->wait);
2817 ret = async->error;
2818 kfree(async);
2819 return ret;
2820 }
2821 return 0;
2822}
2823
Chris Masonc3e69d52009-03-13 10:17:05 -04002824/*
2825 * this starts processing the delayed reference count updates and
2826 * extent insertions we have queued up so far. count can be
2827 * 0, which means to process everything in the tree at the start
2828 * of the run (but not newly added entries), or it can be some target
2829 * number you'd like to process.
Jeff Mahoney79787ea2012-03-12 16:03:00 +01002830 *
2831 * Returns 0 on success or if called with an aborted transaction
2832 * Returns <0 on error and aborts the transaction
Chris Masonc3e69d52009-03-13 10:17:05 -04002833 */
2834int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
2835 struct btrfs_root *root, unsigned long count)
2836{
2837 struct rb_node *node;
2838 struct btrfs_delayed_ref_root *delayed_refs;
Liu Boc46effa2013-10-14 12:59:45 +08002839 struct btrfs_delayed_ref_head *head;
Chris Masonc3e69d52009-03-13 10:17:05 -04002840 int ret;
2841 int run_all = count == (unsigned long)-1;
Filipe Mananad9a05402015-10-03 13:13:13 +01002842 bool can_flush_pending_bgs = trans->can_flush_pending_bgs;
Chris Masonc3e69d52009-03-13 10:17:05 -04002843
Jeff Mahoney79787ea2012-03-12 16:03:00 +01002844 /* We'll clean this up in btrfs_cleanup_transaction */
2845 if (trans->aborted)
2846 return 0;
2847
Chris Masonc3e69d52009-03-13 10:17:05 -04002848 if (root == root->fs_info->extent_root)
2849 root = root->fs_info->tree_root;
2850
2851 delayed_refs = &trans->transaction->delayed_refs;
Liu Bo26455d32014-12-17 16:14:09 +08002852 if (count == 0)
Josef Bacikd7df2c72014-01-23 09:21:38 -05002853 count = atomic_read(&delayed_refs->num_entries) * 2;
Chris Masonbb721702013-01-29 18:44:12 -05002854
Chris Masonc3e69d52009-03-13 10:17:05 -04002855again:
Arne Jansen709c0482011-09-12 12:22:57 +02002856#ifdef SCRAMBLE_DELAYED_REFS
2857 delayed_refs->run_delayed_start = find_middle(&delayed_refs->root);
2858#endif
Filipe Mananad9a05402015-10-03 13:13:13 +01002859 trans->can_flush_pending_bgs = false;
Josef Bacikd7df2c72014-01-23 09:21:38 -05002860 ret = __btrfs_run_delayed_refs(trans, root, count);
2861 if (ret < 0) {
2862 btrfs_abort_transaction(trans, root, ret);
2863 return ret;
Chris Masonc3e69d52009-03-13 10:17:05 -04002864 }
2865
Chris Mason56bec292009-03-13 10:10:06 -04002866 if (run_all) {
Josef Bacikd7df2c72014-01-23 09:21:38 -05002867 if (!list_empty(&trans->new_bgs))
Josef Bacikea658ba2012-09-11 16:57:25 -04002868 btrfs_create_pending_block_groups(trans, root);
Josef Bacikea658ba2012-09-11 16:57:25 -04002869
Josef Bacikd7df2c72014-01-23 09:21:38 -05002870 spin_lock(&delayed_refs->lock);
Liu Boc46effa2013-10-14 12:59:45 +08002871 node = rb_first(&delayed_refs->href_root);
Josef Bacikd7df2c72014-01-23 09:21:38 -05002872 if (!node) {
2873 spin_unlock(&delayed_refs->lock);
Chris Mason56bec292009-03-13 10:10:06 -04002874 goto out;
Josef Bacikd7df2c72014-01-23 09:21:38 -05002875 }
Chris Masonc3e69d52009-03-13 10:17:05 -04002876 count = (unsigned long)-1;
Chris Mason56bec292009-03-13 10:10:06 -04002877
2878 while (node) {
Liu Boc46effa2013-10-14 12:59:45 +08002879 head = rb_entry(node, struct btrfs_delayed_ref_head,
2880 href_node);
2881 if (btrfs_delayed_ref_is_head(&head->node)) {
2882 struct btrfs_delayed_ref_node *ref;
Chris Mason56bec292009-03-13 10:10:06 -04002883
Liu Boc46effa2013-10-14 12:59:45 +08002884 ref = &head->node;
Chris Mason56bec292009-03-13 10:10:06 -04002885 atomic_inc(&ref->refs);
2886
2887 spin_unlock(&delayed_refs->lock);
David Sterba8cc33e52011-05-02 15:29:25 +02002888 /*
2889 * Mutex was contended, block until it's
2890 * released and try again
2891 */
Chris Mason56bec292009-03-13 10:10:06 -04002892 mutex_lock(&head->mutex);
2893 mutex_unlock(&head->mutex);
2894
2895 btrfs_put_delayed_ref(ref);
Chris Mason1887be62009-03-13 10:11:24 -04002896 cond_resched();
Chris Mason56bec292009-03-13 10:10:06 -04002897 goto again;
Liu Boc46effa2013-10-14 12:59:45 +08002898 } else {
2899 WARN_ON(1);
Chris Mason56bec292009-03-13 10:10:06 -04002900 }
2901 node = rb_next(node);
2902 }
2903 spin_unlock(&delayed_refs->lock);
Josef Bacikd7df2c72014-01-23 09:21:38 -05002904 cond_resched();
Chris Mason56bec292009-03-13 10:10:06 -04002905 goto again;
2906 }
Chris Mason54aa1f42007-06-22 14:16:25 -04002907out:
Jan Schmidtedf39272012-06-28 18:04:55 +02002908 assert_qgroups_uptodate(trans);
Filipe Mananad9a05402015-10-03 13:13:13 +01002909 trans->can_flush_pending_bgs = can_flush_pending_bgs;
Chris Masona28ec192007-03-06 20:08:01 -05002910 return 0;
2911}
2912
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002913int btrfs_set_disk_extent_flags(struct btrfs_trans_handle *trans,
2914 struct btrfs_root *root,
2915 u64 bytenr, u64 num_bytes, u64 flags,
Josef Bacikb1c79e02013-05-09 13:49:30 -04002916 int level, int is_data)
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002917{
2918 struct btrfs_delayed_extent_op *extent_op;
2919 int ret;
2920
Miao Xie78a61842012-11-21 02:21:28 +00002921 extent_op = btrfs_alloc_delayed_extent_op();
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002922 if (!extent_op)
2923 return -ENOMEM;
2924
2925 extent_op->flags_to_set = flags;
2926 extent_op->update_flags = 1;
2927 extent_op->update_key = 0;
2928 extent_op->is_data = is_data ? 1 : 0;
Josef Bacikb1c79e02013-05-09 13:49:30 -04002929 extent_op->level = level;
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002930
Arne Jansen66d7e7f2011-09-12 15:26:38 +02002931 ret = btrfs_add_delayed_extent_op(root->fs_info, trans, bytenr,
2932 num_bytes, extent_op);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002933 if (ret)
Miao Xie78a61842012-11-21 02:21:28 +00002934 btrfs_free_delayed_extent_op(extent_op);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002935 return ret;
2936}
2937
2938static noinline int check_delayed_ref(struct btrfs_trans_handle *trans,
2939 struct btrfs_root *root,
2940 struct btrfs_path *path,
2941 u64 objectid, u64 offset, u64 bytenr)
2942{
2943 struct btrfs_delayed_ref_head *head;
2944 struct btrfs_delayed_ref_node *ref;
2945 struct btrfs_delayed_data_ref *data_ref;
2946 struct btrfs_delayed_ref_root *delayed_refs;
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002947 int ret = 0;
2948
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002949 delayed_refs = &trans->transaction->delayed_refs;
2950 spin_lock(&delayed_refs->lock);
2951 head = btrfs_find_delayed_ref_head(trans, bytenr);
Josef Bacikd7df2c72014-01-23 09:21:38 -05002952 if (!head) {
2953 spin_unlock(&delayed_refs->lock);
2954 return 0;
2955 }
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002956
2957 if (!mutex_trylock(&head->mutex)) {
2958 atomic_inc(&head->node.refs);
2959 spin_unlock(&delayed_refs->lock);
2960
David Sterbab3b4aa72011-04-21 01:20:15 +02002961 btrfs_release_path(path);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002962
David Sterba8cc33e52011-05-02 15:29:25 +02002963 /*
2964 * Mutex was contended, block until it's released and let
2965 * caller try again
2966 */
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002967 mutex_lock(&head->mutex);
2968 mutex_unlock(&head->mutex);
2969 btrfs_put_delayed_ref(&head->node);
2970 return -EAGAIN;
2971 }
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002972 spin_unlock(&delayed_refs->lock);
Josef Bacikd7df2c72014-01-23 09:21:38 -05002973
2974 spin_lock(&head->lock);
Qu Wenruoc6fc2452015-03-30 17:03:00 +08002975 list_for_each_entry(ref, &head->ref_list, list) {
Josef Bacikd7df2c72014-01-23 09:21:38 -05002976 /* If it's a shared ref we know a cross reference exists */
2977 if (ref->type != BTRFS_EXTENT_DATA_REF_KEY) {
2978 ret = 1;
2979 break;
2980 }
2981
2982 data_ref = btrfs_delayed_node_to_data_ref(ref);
2983
2984 /*
2985 * If our ref doesn't match the one we're currently looking at
2986 * then we have a cross reference.
2987 */
2988 if (data_ref->root != root->root_key.objectid ||
2989 data_ref->objectid != objectid ||
2990 data_ref->offset != offset) {
2991 ret = 1;
2992 break;
2993 }
2994 }
2995 spin_unlock(&head->lock);
2996 mutex_unlock(&head->mutex);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002997 return ret;
2998}
2999
3000static noinline int check_committed_ref(struct btrfs_trans_handle *trans,
3001 struct btrfs_root *root,
3002 struct btrfs_path *path,
3003 u64 objectid, u64 offset, u64 bytenr)
Chris Masonbe20aa92007-12-17 20:14:01 -05003004{
3005 struct btrfs_root *extent_root = root->fs_info->extent_root;
Yan Zhengf321e492008-07-30 09:26:11 -04003006 struct extent_buffer *leaf;
Yan Zheng5d4f98a2009-06-10 10:45:14 -04003007 struct btrfs_extent_data_ref *ref;
3008 struct btrfs_extent_inline_ref *iref;
3009 struct btrfs_extent_item *ei;
Chris Masonbe20aa92007-12-17 20:14:01 -05003010 struct btrfs_key key;
Yan Zheng5d4f98a2009-06-10 10:45:14 -04003011 u32 item_size;
Yan Zhengf321e492008-07-30 09:26:11 -04003012 int ret;
Chris Masonbe20aa92007-12-17 20:14:01 -05003013
Chris Masonbe20aa92007-12-17 20:14:01 -05003014 key.objectid = bytenr;
Zheng Yan31840ae2008-09-23 13:14:14 -04003015 key.offset = (u64)-1;
Yan Zhengf321e492008-07-30 09:26:11 -04003016 key.type = BTRFS_EXTENT_ITEM_KEY;
Chris Masonbe20aa92007-12-17 20:14:01 -05003017
Chris Masonbe20aa92007-12-17 20:14:01 -05003018 ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
3019 if (ret < 0)
3020 goto out;
Jeff Mahoney79787ea2012-03-12 16:03:00 +01003021 BUG_ON(ret == 0); /* Corruption */
Yan Zheng80ff3852008-10-30 14:20:02 -04003022
3023 ret = -ENOENT;
3024 if (path->slots[0] == 0)
Zheng Yan31840ae2008-09-23 13:14:14 -04003025 goto out;
Chris Masonbe20aa92007-12-17 20:14:01 -05003026
Zheng Yan31840ae2008-09-23 13:14:14 -04003027 path->slots[0]--;
Yan Zhengf321e492008-07-30 09:26:11 -04003028 leaf = path->nodes[0];
Yan Zheng5d4f98a2009-06-10 10:45:14 -04003029 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
Chris Masonbe20aa92007-12-17 20:14:01 -05003030
Yan Zheng5d4f98a2009-06-10 10:45:14 -04003031 if (key.objectid != bytenr || key.type != BTRFS_EXTENT_ITEM_KEY)
Chris Masonbe20aa92007-12-17 20:14:01 -05003032 goto out;
Chris Masonbe20aa92007-12-17 20:14:01 -05003033
Yan Zheng5d4f98a2009-06-10 10:45:14 -04003034 ret = 1;
3035 item_size = btrfs_item_size_nr(leaf, path->slots[0]);
3036#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
3037 if (item_size < sizeof(*ei)) {
3038 WARN_ON(item_size != sizeof(struct btrfs_extent_item_v0));
3039 goto out;
Chris Masonbe20aa92007-12-17 20:14:01 -05003040 }
Yan Zheng5d4f98a2009-06-10 10:45:14 -04003041#endif
3042 ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
3043
3044 if (item_size != sizeof(*ei) +
3045 btrfs_extent_inline_ref_size(BTRFS_EXTENT_DATA_REF_KEY))
3046 goto out;
3047
3048 if (btrfs_extent_generation(leaf, ei) <=
3049 btrfs_root_last_snapshot(&root->root_item))
3050 goto out;
3051
3052 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
3053 if (btrfs_extent_inline_ref_type(leaf, iref) !=
3054 BTRFS_EXTENT_DATA_REF_KEY)
3055 goto out;
3056
3057 ref = (struct btrfs_extent_data_ref *)(&iref->offset);
3058 if (btrfs_extent_refs(leaf, ei) !=
3059 btrfs_extent_data_ref_count(leaf, ref) ||
3060 btrfs_extent_data_ref_root(leaf, ref) !=
3061 root->root_key.objectid ||
3062 btrfs_extent_data_ref_objectid(leaf, ref) != objectid ||
3063 btrfs_extent_data_ref_offset(leaf, ref) != offset)
3064 goto out;
3065
Yan Zhengf321e492008-07-30 09:26:11 -04003066 ret = 0;
Chris Masonbe20aa92007-12-17 20:14:01 -05003067out:
Yan Zheng5d4f98a2009-06-10 10:45:14 -04003068 return ret;
3069}
3070
3071int btrfs_cross_ref_exist(struct btrfs_trans_handle *trans,
3072 struct btrfs_root *root,
3073 u64 objectid, u64 offset, u64 bytenr)
3074{
3075 struct btrfs_path *path;
3076 int ret;
3077 int ret2;
3078
3079 path = btrfs_alloc_path();
3080 if (!path)
3081 return -ENOENT;
3082
3083 do {
3084 ret = check_committed_ref(trans, root, path, objectid,
3085 offset, bytenr);
3086 if (ret && ret != -ENOENT)
3087 goto out;
3088
3089 ret2 = check_delayed_ref(trans, root, path, objectid,
3090 offset, bytenr);
3091 } while (ret2 == -EAGAIN);
3092
3093 if (ret2 && ret2 != -ENOENT) {
3094 ret = ret2;
3095 goto out;
3096 }
3097
3098 if (ret != -ENOENT || ret2 != -ENOENT)
3099 ret = 0;
3100out:
Yan Zhengf321e492008-07-30 09:26:11 -04003101 btrfs_free_path(path);
Yan, Zhengf0486c62010-05-16 10:46:25 -04003102 if (root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
3103 WARN_ON(ret > 0);
Yan Zhengf321e492008-07-30 09:26:11 -04003104 return ret;
3105}
3106
Yan Zheng5d4f98a2009-06-10 10:45:14 -04003107static int __btrfs_mod_ref(struct btrfs_trans_handle *trans,
Chris Masonb7a9f292009-02-04 09:23:45 -05003108 struct btrfs_root *root,
Yan Zheng5d4f98a2009-06-10 10:45:14 -04003109 struct extent_buffer *buf,
Josef Bacike339a6b2014-07-02 10:54:25 -07003110 int full_backref, int inc)
Zheng Yan31840ae2008-09-23 13:14:14 -04003111{
3112 u64 bytenr;
Yan Zheng5d4f98a2009-06-10 10:45:14 -04003113 u64 num_bytes;
3114 u64 parent;
Zheng Yan31840ae2008-09-23 13:14:14 -04003115 u64 ref_root;
Zheng Yan31840ae2008-09-23 13:14:14 -04003116 u32 nritems;
Zheng Yan31840ae2008-09-23 13:14:14 -04003117 struct btrfs_key key;
3118 struct btrfs_file_extent_item *fi;
3119 int i;
3120 int level;
3121 int ret = 0;
Zheng Yan31840ae2008-09-23 13:14:14 -04003122 int (*process_func)(struct btrfs_trans_handle *, struct btrfs_root *,
Arne Jansen66d7e7f2011-09-12 15:26:38 +02003123 u64, u64, u64, u64, u64, u64, int);
Zheng Yan31840ae2008-09-23 13:14:14 -04003124
David Sterbafccb84c2014-09-29 23:53:21 +02003125
3126 if (btrfs_test_is_dummy_root(root))
Josef Bacikfaa2dbf2014-05-07 17:06:09 -04003127 return 0;
David Sterbafccb84c2014-09-29 23:53:21 +02003128
Zheng Yan31840ae2008-09-23 13:14:14 -04003129 ref_root = btrfs_header_owner(buf);
Zheng Yan31840ae2008-09-23 13:14:14 -04003130 nritems = btrfs_header_nritems(buf);
3131 level = btrfs_header_level(buf);
3132
Miao Xie27cdeb72014-04-02 19:51:05 +08003133 if (!test_bit(BTRFS_ROOT_REF_COWS, &root->state) && level == 0)
Yan Zheng5d4f98a2009-06-10 10:45:14 -04003134 return 0;
Chris Masonb7a9f292009-02-04 09:23:45 -05003135
Yan Zheng5d4f98a2009-06-10 10:45:14 -04003136 if (inc)
3137 process_func = btrfs_inc_extent_ref;
3138 else
3139 process_func = btrfs_free_extent;
Zheng Yan31840ae2008-09-23 13:14:14 -04003140
Yan Zheng5d4f98a2009-06-10 10:45:14 -04003141 if (full_backref)
3142 parent = buf->start;
3143 else
3144 parent = 0;
3145
Zheng Yan31840ae2008-09-23 13:14:14 -04003146 for (i = 0; i < nritems; i++) {
Chris Masondb945352007-10-15 16:15:53 -04003147 if (level == 0) {
Chris Mason5f39d392007-10-15 16:14:19 -04003148 btrfs_item_key_to_cpu(buf, &key, i);
David Sterba962a2982014-06-04 18:41:45 +02003149 if (key.type != BTRFS_EXTENT_DATA_KEY)
Chris Mason54aa1f42007-06-22 14:16:25 -04003150 continue;
Chris Mason5f39d392007-10-15 16:14:19 -04003151 fi = btrfs_item_ptr(buf, i,
Chris Mason54aa1f42007-06-22 14:16:25 -04003152 struct btrfs_file_extent_item);
Chris Mason5f39d392007-10-15 16:14:19 -04003153 if (btrfs_file_extent_type(buf, fi) ==
Chris Mason54aa1f42007-06-22 14:16:25 -04003154 BTRFS_FILE_EXTENT_INLINE)
3155 continue;
Zheng Yan31840ae2008-09-23 13:14:14 -04003156 bytenr = btrfs_file_extent_disk_bytenr(buf, fi);
3157 if (bytenr == 0)
Chris Mason54aa1f42007-06-22 14:16:25 -04003158 continue;
Zheng Yan31840ae2008-09-23 13:14:14 -04003159
Yan Zheng5d4f98a2009-06-10 10:45:14 -04003160 num_bytes = btrfs_file_extent_disk_num_bytes(buf, fi);
3161 key.offset -= btrfs_file_extent_offset(buf, fi);
3162 ret = process_func(trans, root, bytenr, num_bytes,
3163 parent, ref_root, key.objectid,
Josef Bacike339a6b2014-07-02 10:54:25 -07003164 key.offset, 1);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04003165 if (ret)
3166 goto fail;
Chris Masonb7a9f292009-02-04 09:23:45 -05003167 } else {
3168 bytenr = btrfs_node_blockptr(buf, i);
David Sterba707e8a02014-06-04 19:22:26 +02003169 num_bytes = root->nodesize;
Yan Zheng5d4f98a2009-06-10 10:45:14 -04003170 ret = process_func(trans, root, bytenr, num_bytes,
Arne Jansen66d7e7f2011-09-12 15:26:38 +02003171 parent, ref_root, level - 1, 0,
Josef Bacike339a6b2014-07-02 10:54:25 -07003172 1);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04003173 if (ret)
Zheng Yan31840ae2008-09-23 13:14:14 -04003174 goto fail;
Chris Mason54aa1f42007-06-22 14:16:25 -04003175 }
3176 }
Zheng Yan31840ae2008-09-23 13:14:14 -04003177 return 0;
3178fail:
Chris Mason54aa1f42007-06-22 14:16:25 -04003179 return ret;
Chris Mason02217ed2007-03-02 16:08:05 -05003180}
3181
Yan Zheng5d4f98a2009-06-10 10:45:14 -04003182int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
Josef Bacike339a6b2014-07-02 10:54:25 -07003183 struct extent_buffer *buf, int full_backref)
Zheng Yan31840ae2008-09-23 13:14:14 -04003184{
Josef Bacike339a6b2014-07-02 10:54:25 -07003185 return __btrfs_mod_ref(trans, root, buf, full_backref, 1);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04003186}
Zheng Yan31840ae2008-09-23 13:14:14 -04003187
Yan Zheng5d4f98a2009-06-10 10:45:14 -04003188int btrfs_dec_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
Josef Bacike339a6b2014-07-02 10:54:25 -07003189 struct extent_buffer *buf, int full_backref)
Yan Zheng5d4f98a2009-06-10 10:45:14 -04003190{
Josef Bacike339a6b2014-07-02 10:54:25 -07003191 return __btrfs_mod_ref(trans, root, buf, full_backref, 0);
Zheng Yan31840ae2008-09-23 13:14:14 -04003192}
3193
Chris Mason9078a3e2007-04-26 16:46:15 -04003194static int write_one_cache_group(struct btrfs_trans_handle *trans,
3195 struct btrfs_root *root,
3196 struct btrfs_path *path,
3197 struct btrfs_block_group_cache *cache)
3198{
3199 int ret;
Chris Mason9078a3e2007-04-26 16:46:15 -04003200 struct btrfs_root *extent_root = root->fs_info->extent_root;
Chris Mason5f39d392007-10-15 16:14:19 -04003201 unsigned long bi;
3202 struct extent_buffer *leaf;
Chris Mason9078a3e2007-04-26 16:46:15 -04003203
Chris Mason9078a3e2007-04-26 16:46:15 -04003204 ret = btrfs_search_slot(trans, extent_root, &cache->key, path, 0, 1);
Josef Bacikdf95e7f2014-12-12 16:02:20 -05003205 if (ret) {
3206 if (ret > 0)
3207 ret = -ENOENT;
Chris Mason54aa1f42007-06-22 14:16:25 -04003208 goto fail;
Josef Bacikdf95e7f2014-12-12 16:02:20 -05003209 }
Chris Mason5f39d392007-10-15 16:14:19 -04003210
3211 leaf = path->nodes[0];
3212 bi = btrfs_item_ptr_offset(leaf, path->slots[0]);
3213 write_extent_buffer(leaf, &cache->item, bi, sizeof(cache->item));
3214 btrfs_mark_buffer_dirty(leaf);
Chris Mason54aa1f42007-06-22 14:16:25 -04003215fail:
Filipe Manana24b89d02015-04-25 18:31:05 +01003216 btrfs_release_path(path);
Josef Bacikdf95e7f2014-12-12 16:02:20 -05003217 return ret;
Chris Mason9078a3e2007-04-26 16:46:15 -04003218
3219}
3220
Yan Zheng4a8c9a62009-07-22 10:07:05 -04003221static struct btrfs_block_group_cache *
3222next_block_group(struct btrfs_root *root,
3223 struct btrfs_block_group_cache *cache)
3224{
3225 struct rb_node *node;
Filipe Manana292cbd52014-11-26 15:28:50 +00003226
Yan Zheng4a8c9a62009-07-22 10:07:05 -04003227 spin_lock(&root->fs_info->block_group_cache_lock);
Filipe Manana292cbd52014-11-26 15:28:50 +00003228
3229 /* If our block group was removed, we need a full search. */
3230 if (RB_EMPTY_NODE(&cache->cache_node)) {
3231 const u64 next_bytenr = cache->key.objectid + cache->key.offset;
3232
3233 spin_unlock(&root->fs_info->block_group_cache_lock);
3234 btrfs_put_block_group(cache);
3235 cache = btrfs_lookup_first_block_group(root->fs_info,
3236 next_bytenr);
3237 return cache;
3238 }
Yan Zheng4a8c9a62009-07-22 10:07:05 -04003239 node = rb_next(&cache->cache_node);
3240 btrfs_put_block_group(cache);
3241 if (node) {
3242 cache = rb_entry(node, struct btrfs_block_group_cache,
3243 cache_node);
Josef Bacik11dfe352009-11-13 20:12:59 +00003244 btrfs_get_block_group(cache);
Yan Zheng4a8c9a62009-07-22 10:07:05 -04003245 } else
3246 cache = NULL;
3247 spin_unlock(&root->fs_info->block_group_cache_lock);
3248 return cache;
3249}
3250
Josef Bacik0af3d002010-06-21 14:48:16 -04003251static int cache_save_setup(struct btrfs_block_group_cache *block_group,
3252 struct btrfs_trans_handle *trans,
3253 struct btrfs_path *path)
3254{
3255 struct btrfs_root *root = block_group->fs_info->tree_root;
3256 struct inode *inode = NULL;
3257 u64 alloc_hint = 0;
Josef Bacik2b209822010-12-03 13:17:53 -05003258 int dcs = BTRFS_DC_ERROR;
David Sterbaf8c269d2015-01-16 17:21:12 +01003259 u64 num_pages = 0;
Josef Bacik0af3d002010-06-21 14:48:16 -04003260 int retries = 0;
3261 int ret = 0;
3262
3263 /*
3264 * If this block group is smaller than 100 megs don't bother caching the
3265 * block group.
3266 */
3267 if (block_group->key.offset < (100 * 1024 * 1024)) {
3268 spin_lock(&block_group->lock);
3269 block_group->disk_cache_state = BTRFS_DC_WRITTEN;
3270 spin_unlock(&block_group->lock);
3271 return 0;
3272 }
3273
Josef Bacik0c0ef4b2015-02-12 09:43:51 -05003274 if (trans->aborted)
3275 return 0;
Josef Bacik0af3d002010-06-21 14:48:16 -04003276again:
3277 inode = lookup_free_space_inode(root, block_group, path);
3278 if (IS_ERR(inode) && PTR_ERR(inode) != -ENOENT) {
3279 ret = PTR_ERR(inode);
David Sterbab3b4aa72011-04-21 01:20:15 +02003280 btrfs_release_path(path);
Josef Bacik0af3d002010-06-21 14:48:16 -04003281 goto out;
3282 }
3283
3284 if (IS_ERR(inode)) {
3285 BUG_ON(retries);
3286 retries++;
3287
3288 if (block_group->ro)
3289 goto out_free;
3290
3291 ret = create_free_space_inode(root, trans, block_group, path);
3292 if (ret)
3293 goto out_free;
3294 goto again;
3295 }
3296
Josef Bacik5b0e95b2011-10-06 08:58:24 -04003297 /* We've already setup this transaction, go ahead and exit */
3298 if (block_group->cache_generation == trans->transid &&
3299 i_size_read(inode)) {
3300 dcs = BTRFS_DC_SETUP;
3301 goto out_put;
3302 }
3303
Josef Bacik0af3d002010-06-21 14:48:16 -04003304 /*
3305 * We want to set the generation to 0, that way if anything goes wrong
3306 * from here on out we know not to trust this cache when we load up next
3307 * time.
3308 */
3309 BTRFS_I(inode)->generation = 0;
3310 ret = btrfs_update_inode(trans, root, inode);
Josef Bacik0c0ef4b2015-02-12 09:43:51 -05003311 if (ret) {
3312 /*
3313 * So theoretically we could recover from this, simply set the
3314 * super cache generation to 0 so we know to invalidate the
3315 * cache, but then we'd have to keep track of the block groups
3316 * that fail this way so we know we _have_ to reset this cache
3317 * before the next commit or risk reading stale cache. So to
3318 * limit our exposure to horrible edge cases lets just abort the
3319 * transaction, this only happens in really bad situations
3320 * anyway.
3321 */
3322 btrfs_abort_transaction(trans, root, ret);
3323 goto out_put;
3324 }
Josef Bacik0af3d002010-06-21 14:48:16 -04003325 WARN_ON(ret);
3326
3327 if (i_size_read(inode) > 0) {
Miao Xie7b61cd92013-05-13 13:55:09 +00003328 ret = btrfs_check_trunc_cache_free_space(root,
3329 &root->fs_info->global_block_rsv);
3330 if (ret)
3331 goto out_put;
3332
Chris Mason1bbc6212015-04-06 12:46:08 -07003333 ret = btrfs_truncate_free_space_cache(root, trans, NULL, inode);
Josef Bacik0af3d002010-06-21 14:48:16 -04003334 if (ret)
3335 goto out_put;
3336 }
3337
3338 spin_lock(&block_group->lock);
Liu Bocf7c1ef2012-07-06 03:31:34 -06003339 if (block_group->cached != BTRFS_CACHE_FINISHED ||
Chris Masone4c88f02015-04-18 05:22:48 -07003340 !btrfs_test_opt(root, SPACE_CACHE)) {
Liu Bocf7c1ef2012-07-06 03:31:34 -06003341 /*
3342 * don't bother trying to write stuff out _if_
3343 * a) we're not cached,
3344 * b) we're with nospace_cache mount option.
3345 */
Josef Bacik2b209822010-12-03 13:17:53 -05003346 dcs = BTRFS_DC_WRITTEN;
Josef Bacik0af3d002010-06-21 14:48:16 -04003347 spin_unlock(&block_group->lock);
3348 goto out_put;
3349 }
3350 spin_unlock(&block_group->lock);
3351
Josef Bacik6fc823b2012-08-06 13:46:38 -06003352 /*
3353 * Try to preallocate enough space based on how big the block group is.
3354 * Keep in mind this has to include any pinned space which could end up
3355 * taking up quite a bit since it's not folded into the other space
3356 * cache.
3357 */
David Sterbaf8c269d2015-01-16 17:21:12 +01003358 num_pages = div_u64(block_group->key.offset, 256 * 1024 * 1024);
Josef Bacik0af3d002010-06-21 14:48:16 -04003359 if (!num_pages)
3360 num_pages = 1;
3361
Josef Bacik0af3d002010-06-21 14:48:16 -04003362 num_pages *= 16;
3363 num_pages *= PAGE_CACHE_SIZE;
3364
Dongsheng Yange2d1f922015-02-06 10:26:52 -05003365 ret = btrfs_check_data_free_space(inode, num_pages, num_pages);
Josef Bacik0af3d002010-06-21 14:48:16 -04003366 if (ret)
3367 goto out_put;
3368
3369 ret = btrfs_prealloc_file_range_trans(inode, trans, 0, 0, num_pages,
3370 num_pages, num_pages,
3371 &alloc_hint);
Josef Bacik2b209822010-12-03 13:17:53 -05003372 if (!ret)
3373 dcs = BTRFS_DC_SETUP;
Josef Bacik0af3d002010-06-21 14:48:16 -04003374 btrfs_free_reserved_data_space(inode, num_pages);
Josef Bacikc09544e2011-08-30 10:19:10 -04003375
Josef Bacik0af3d002010-06-21 14:48:16 -04003376out_put:
3377 iput(inode);
3378out_free:
David Sterbab3b4aa72011-04-21 01:20:15 +02003379 btrfs_release_path(path);
Josef Bacik0af3d002010-06-21 14:48:16 -04003380out:
3381 spin_lock(&block_group->lock);
Josef Bacike65cbb92011-12-13 16:04:54 -05003382 if (!ret && dcs == BTRFS_DC_SETUP)
Josef Bacik5b0e95b2011-10-06 08:58:24 -04003383 block_group->cache_generation = trans->transid;
Josef Bacik2b209822010-12-03 13:17:53 -05003384 block_group->disk_cache_state = dcs;
Josef Bacik0af3d002010-06-21 14:48:16 -04003385 spin_unlock(&block_group->lock);
3386
3387 return ret;
3388}
3389
Josef Bacikdcdf7f62015-03-02 16:37:31 -05003390int btrfs_setup_space_cache(struct btrfs_trans_handle *trans,
3391 struct btrfs_root *root)
3392{
3393 struct btrfs_block_group_cache *cache, *tmp;
3394 struct btrfs_transaction *cur_trans = trans->transaction;
3395 struct btrfs_path *path;
3396
3397 if (list_empty(&cur_trans->dirty_bgs) ||
3398 !btrfs_test_opt(root, SPACE_CACHE))
3399 return 0;
3400
3401 path = btrfs_alloc_path();
3402 if (!path)
3403 return -ENOMEM;
3404
3405 /* Could add new block groups, use _safe just in case */
3406 list_for_each_entry_safe(cache, tmp, &cur_trans->dirty_bgs,
3407 dirty_list) {
3408 if (cache->disk_cache_state == BTRFS_DC_CLEAR)
3409 cache_save_setup(cache, trans, path);
3410 }
3411
3412 btrfs_free_path(path);
3413 return 0;
3414}
3415
Chris Mason1bbc6212015-04-06 12:46:08 -07003416/*
3417 * transaction commit does final block group cache writeback during a
3418 * critical section where nothing is allowed to change the FS. This is
3419 * required in order for the cache to actually match the block group,
3420 * but can introduce a lot of latency into the commit.
3421 *
3422 * So, btrfs_start_dirty_block_groups is here to kick off block group
3423 * cache IO. There's a chance we'll have to redo some of it if the
3424 * block group changes again during the commit, but it greatly reduces
3425 * the commit latency by getting rid of the easy block groups while
3426 * we're still allowing others to join the commit.
3427 */
3428int btrfs_start_dirty_block_groups(struct btrfs_trans_handle *trans,
3429 struct btrfs_root *root)
3430{
3431 struct btrfs_block_group_cache *cache;
3432 struct btrfs_transaction *cur_trans = trans->transaction;
3433 int ret = 0;
3434 int should_put;
3435 struct btrfs_path *path = NULL;
3436 LIST_HEAD(dirty);
3437 struct list_head *io = &cur_trans->io_bgs;
3438 int num_started = 0;
3439 int loops = 0;
3440
3441 spin_lock(&cur_trans->dirty_bgs_lock);
Filipe Mananab58d1a92015-04-25 18:29:16 +01003442 if (list_empty(&cur_trans->dirty_bgs)) {
3443 spin_unlock(&cur_trans->dirty_bgs_lock);
3444 return 0;
Chris Mason1bbc6212015-04-06 12:46:08 -07003445 }
Filipe Mananab58d1a92015-04-25 18:29:16 +01003446 list_splice_init(&cur_trans->dirty_bgs, &dirty);
Chris Mason1bbc6212015-04-06 12:46:08 -07003447 spin_unlock(&cur_trans->dirty_bgs_lock);
3448
3449again:
Chris Mason1bbc6212015-04-06 12:46:08 -07003450 /*
3451 * make sure all the block groups on our dirty list actually
3452 * exist
3453 */
3454 btrfs_create_pending_block_groups(trans, root);
3455
3456 if (!path) {
3457 path = btrfs_alloc_path();
3458 if (!path)
3459 return -ENOMEM;
3460 }
3461
Filipe Mananab58d1a92015-04-25 18:29:16 +01003462 /*
3463 * cache_write_mutex is here only to save us from balance or automatic
3464 * removal of empty block groups deleting this block group while we are
3465 * writing out the cache
3466 */
3467 mutex_lock(&trans->transaction->cache_write_mutex);
Chris Mason1bbc6212015-04-06 12:46:08 -07003468 while (!list_empty(&dirty)) {
3469 cache = list_first_entry(&dirty,
3470 struct btrfs_block_group_cache,
3471 dirty_list);
Chris Mason1bbc6212015-04-06 12:46:08 -07003472 /*
3473 * this can happen if something re-dirties a block
3474 * group that is already under IO. Just wait for it to
3475 * finish and then do it all again
3476 */
3477 if (!list_empty(&cache->io_list)) {
3478 list_del_init(&cache->io_list);
3479 btrfs_wait_cache_io(root, trans, cache,
3480 &cache->io_ctl, path,
3481 cache->key.objectid);
3482 btrfs_put_block_group(cache);
3483 }
3484
3485
3486 /*
3487 * btrfs_wait_cache_io uses the cache->dirty_list to decide
3488 * if it should update the cache_state. Don't delete
3489 * until after we wait.
3490 *
3491 * Since we're not running in the commit critical section
3492 * we need the dirty_bgs_lock to protect from update_block_group
3493 */
3494 spin_lock(&cur_trans->dirty_bgs_lock);
3495 list_del_init(&cache->dirty_list);
3496 spin_unlock(&cur_trans->dirty_bgs_lock);
3497
3498 should_put = 1;
3499
3500 cache_save_setup(cache, trans, path);
3501
3502 if (cache->disk_cache_state == BTRFS_DC_SETUP) {
3503 cache->io_ctl.inode = NULL;
3504 ret = btrfs_write_out_cache(root, trans, cache, path);
3505 if (ret == 0 && cache->io_ctl.inode) {
3506 num_started++;
3507 should_put = 0;
3508
3509 /*
3510 * the cache_write_mutex is protecting
3511 * the io_list
3512 */
3513 list_add_tail(&cache->io_list, io);
3514 } else {
3515 /*
3516 * if we failed to write the cache, the
3517 * generation will be bad and life goes on
3518 */
3519 ret = 0;
3520 }
3521 }
Filipe Mananaff1f8252015-05-06 16:15:09 +01003522 if (!ret) {
Chris Mason1bbc6212015-04-06 12:46:08 -07003523 ret = write_one_cache_group(trans, root, path, cache);
Filipe Mananaff1f8252015-05-06 16:15:09 +01003524 /*
3525 * Our block group might still be attached to the list
3526 * of new block groups in the transaction handle of some
3527 * other task (struct btrfs_trans_handle->new_bgs). This
3528 * means its block group item isn't yet in the extent
3529 * tree. If this happens ignore the error, as we will
3530 * try again later in the critical section of the
3531 * transaction commit.
3532 */
3533 if (ret == -ENOENT) {
3534 ret = 0;
3535 spin_lock(&cur_trans->dirty_bgs_lock);
3536 if (list_empty(&cache->dirty_list)) {
3537 list_add_tail(&cache->dirty_list,
3538 &cur_trans->dirty_bgs);
3539 btrfs_get_block_group(cache);
3540 }
3541 spin_unlock(&cur_trans->dirty_bgs_lock);
3542 } else if (ret) {
3543 btrfs_abort_transaction(trans, root, ret);
3544 }
3545 }
Chris Mason1bbc6212015-04-06 12:46:08 -07003546
3547 /* if its not on the io list, we need to put the block group */
3548 if (should_put)
3549 btrfs_put_block_group(cache);
3550
3551 if (ret)
3552 break;
Filipe Mananab58d1a92015-04-25 18:29:16 +01003553
3554 /*
3555 * Avoid blocking other tasks for too long. It might even save
3556 * us from writing caches for block groups that are going to be
3557 * removed.
3558 */
3559 mutex_unlock(&trans->transaction->cache_write_mutex);
3560 mutex_lock(&trans->transaction->cache_write_mutex);
Chris Mason1bbc6212015-04-06 12:46:08 -07003561 }
Filipe Mananab58d1a92015-04-25 18:29:16 +01003562 mutex_unlock(&trans->transaction->cache_write_mutex);
Chris Mason1bbc6212015-04-06 12:46:08 -07003563
3564 /*
3565 * go through delayed refs for all the stuff we've just kicked off
3566 * and then loop back (just once)
3567 */
3568 ret = btrfs_run_delayed_refs(trans, root, 0);
3569 if (!ret && loops == 0) {
3570 loops++;
3571 spin_lock(&cur_trans->dirty_bgs_lock);
3572 list_splice_init(&cur_trans->dirty_bgs, &dirty);
Filipe Mananab58d1a92015-04-25 18:29:16 +01003573 /*
3574 * dirty_bgs_lock protects us from concurrent block group
3575 * deletes too (not just cache_write_mutex).
3576 */
3577 if (!list_empty(&dirty)) {
3578 spin_unlock(&cur_trans->dirty_bgs_lock);
3579 goto again;
3580 }
Chris Mason1bbc6212015-04-06 12:46:08 -07003581 spin_unlock(&cur_trans->dirty_bgs_lock);
Chris Mason1bbc6212015-04-06 12:46:08 -07003582 }
3583
3584 btrfs_free_path(path);
3585 return ret;
3586}
3587
Chris Mason96b51792007-10-15 16:15:19 -04003588int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
3589 struct btrfs_root *root)
Chris Mason9078a3e2007-04-26 16:46:15 -04003590{
Yan Zheng4a8c9a62009-07-22 10:07:05 -04003591 struct btrfs_block_group_cache *cache;
Josef Bacikce93ec52014-11-17 15:45:48 -05003592 struct btrfs_transaction *cur_trans = trans->transaction;
3593 int ret = 0;
Chris Masonc9dc4c62015-04-04 17:14:42 -07003594 int should_put;
Chris Mason9078a3e2007-04-26 16:46:15 -04003595 struct btrfs_path *path;
Chris Mason1bbc6212015-04-06 12:46:08 -07003596 struct list_head *io = &cur_trans->io_bgs;
Chris Masonc9dc4c62015-04-04 17:14:42 -07003597 int num_started = 0;
Chris Mason9078a3e2007-04-26 16:46:15 -04003598
3599 path = btrfs_alloc_path();
3600 if (!path)
3601 return -ENOMEM;
3602
Josef Bacikce93ec52014-11-17 15:45:48 -05003603 /*
3604 * We don't need the lock here since we are protected by the transaction
3605 * commit. We want to do the cache_save_setup first and then run the
3606 * delayed refs to make sure we have the best chance at doing this all
3607 * in one shot.
3608 */
3609 while (!list_empty(&cur_trans->dirty_bgs)) {
3610 cache = list_first_entry(&cur_trans->dirty_bgs,
3611 struct btrfs_block_group_cache,
3612 dirty_list);
Chris Masonc9dc4c62015-04-04 17:14:42 -07003613
3614 /*
3615 * this can happen if cache_save_setup re-dirties a block
3616 * group that is already under IO. Just wait for it to
3617 * finish and then do it all again
3618 */
3619 if (!list_empty(&cache->io_list)) {
3620 list_del_init(&cache->io_list);
3621 btrfs_wait_cache_io(root, trans, cache,
3622 &cache->io_ctl, path,
3623 cache->key.objectid);
3624 btrfs_put_block_group(cache);
Chris Masonc9dc4c62015-04-04 17:14:42 -07003625 }
3626
Chris Mason1bbc6212015-04-06 12:46:08 -07003627 /*
3628 * don't remove from the dirty list until after we've waited
3629 * on any pending IO
3630 */
Josef Bacikce93ec52014-11-17 15:45:48 -05003631 list_del_init(&cache->dirty_list);
Chris Masonc9dc4c62015-04-04 17:14:42 -07003632 should_put = 1;
3633
Chris Mason1bbc6212015-04-06 12:46:08 -07003634 cache_save_setup(cache, trans, path);
Chris Masonc9dc4c62015-04-04 17:14:42 -07003635
Josef Bacikce93ec52014-11-17 15:45:48 -05003636 if (!ret)
Chris Masonc9dc4c62015-04-04 17:14:42 -07003637 ret = btrfs_run_delayed_refs(trans, root, (unsigned long) -1);
3638
3639 if (!ret && cache->disk_cache_state == BTRFS_DC_SETUP) {
3640 cache->io_ctl.inode = NULL;
3641 ret = btrfs_write_out_cache(root, trans, cache, path);
3642 if (ret == 0 && cache->io_ctl.inode) {
3643 num_started++;
3644 should_put = 0;
Chris Mason1bbc6212015-04-06 12:46:08 -07003645 list_add_tail(&cache->io_list, io);
Chris Masonc9dc4c62015-04-04 17:14:42 -07003646 } else {
3647 /*
3648 * if we failed to write the cache, the
3649 * generation will be bad and life goes on
3650 */
3651 ret = 0;
3652 }
3653 }
Filipe Mananaff1f8252015-05-06 16:15:09 +01003654 if (!ret) {
Josef Bacikce93ec52014-11-17 15:45:48 -05003655 ret = write_one_cache_group(trans, root, path, cache);
Filipe Mananaff1f8252015-05-06 16:15:09 +01003656 if (ret)
3657 btrfs_abort_transaction(trans, root, ret);
3658 }
Chris Masonc9dc4c62015-04-04 17:14:42 -07003659
3660 /* if its not on the io list, we need to put the block group */
3661 if (should_put)
3662 btrfs_put_block_group(cache);
3663 }
3664
Chris Mason1bbc6212015-04-06 12:46:08 -07003665 while (!list_empty(io)) {
3666 cache = list_first_entry(io, struct btrfs_block_group_cache,
Chris Masonc9dc4c62015-04-04 17:14:42 -07003667 io_list);
3668 list_del_init(&cache->io_list);
Chris Masonc9dc4c62015-04-04 17:14:42 -07003669 btrfs_wait_cache_io(root, trans, cache,
3670 &cache->io_ctl, path, cache->key.objectid);
Josef Bacik0af3d002010-06-21 14:48:16 -04003671 btrfs_put_block_group(cache);
3672 }
3673
Chris Mason9078a3e2007-04-26 16:46:15 -04003674 btrfs_free_path(path);
Josef Bacikce93ec52014-11-17 15:45:48 -05003675 return ret;
Chris Mason9078a3e2007-04-26 16:46:15 -04003676}
3677
Yan Zhengd2fb3432008-12-11 16:30:39 -05003678int btrfs_extent_readonly(struct btrfs_root *root, u64 bytenr)
3679{
3680 struct btrfs_block_group_cache *block_group;
3681 int readonly = 0;
3682
3683 block_group = btrfs_lookup_block_group(root->fs_info, bytenr);
3684 if (!block_group || block_group->ro)
3685 readonly = 1;
3686 if (block_group)
Chris Masonfa9c0d792009-04-03 09:47:43 -04003687 btrfs_put_block_group(block_group);
Yan Zhengd2fb3432008-12-11 16:30:39 -05003688 return readonly;
3689}
3690
Jeff Mahoney6ab0a202013-11-01 13:07:04 -04003691static const char *alloc_name(u64 flags)
3692{
3693 switch (flags) {
3694 case BTRFS_BLOCK_GROUP_METADATA|BTRFS_BLOCK_GROUP_DATA:
3695 return "mixed";
3696 case BTRFS_BLOCK_GROUP_METADATA:
3697 return "metadata";
3698 case BTRFS_BLOCK_GROUP_DATA:
3699 return "data";
3700 case BTRFS_BLOCK_GROUP_SYSTEM:
3701 return "system";
3702 default:
3703 WARN_ON(1);
3704 return "invalid-combination";
3705 };
3706}
3707
Chris Mason593060d2008-03-25 16:50:33 -04003708static int update_space_info(struct btrfs_fs_info *info, u64 flags,
3709 u64 total_bytes, u64 bytes_used,
3710 struct btrfs_space_info **space_info)
3711{
3712 struct btrfs_space_info *found;
Yan, Zhengb742bb822010-05-16 10:46:24 -04003713 int i;
3714 int factor;
Josef Bacikb150a4f2013-06-19 15:00:04 -04003715 int ret;
Yan, Zhengb742bb822010-05-16 10:46:24 -04003716
3717 if (flags & (BTRFS_BLOCK_GROUP_DUP | BTRFS_BLOCK_GROUP_RAID1 |
3718 BTRFS_BLOCK_GROUP_RAID10))
3719 factor = 2;
3720 else
3721 factor = 1;
Chris Mason593060d2008-03-25 16:50:33 -04003722
3723 found = __find_space_info(info, flags);
3724 if (found) {
Josef Bacik25179202008-10-29 14:49:05 -04003725 spin_lock(&found->lock);
Chris Mason593060d2008-03-25 16:50:33 -04003726 found->total_bytes += total_bytes;
Josef Bacik89a55892010-10-14 14:52:27 -04003727 found->disk_total += total_bytes * factor;
Chris Mason593060d2008-03-25 16:50:33 -04003728 found->bytes_used += bytes_used;
Yan, Zhengb742bb822010-05-16 10:46:24 -04003729 found->disk_used += bytes_used * factor;
Filipe Manana2e6e5182015-05-12 00:28:11 +01003730 if (total_bytes > 0)
3731 found->full = 0;
Josef Bacik25179202008-10-29 14:49:05 -04003732 spin_unlock(&found->lock);
Chris Mason593060d2008-03-25 16:50:33 -04003733 *space_info = found;
3734 return 0;
3735 }
Yan Zhengc146afa2008-11-12 14:34:12 -05003736 found = kzalloc(sizeof(*found), GFP_NOFS);
Chris Mason593060d2008-03-25 16:50:33 -04003737 if (!found)
3738 return -ENOMEM;
3739
Tejun Heo908c7f12014-09-08 09:51:29 +09003740 ret = percpu_counter_init(&found->total_bytes_pinned, 0, GFP_KERNEL);
Josef Bacikb150a4f2013-06-19 15:00:04 -04003741 if (ret) {
3742 kfree(found);
3743 return ret;
3744 }
3745
Jeff Mahoneyc1895442014-05-27 12:59:57 -04003746 for (i = 0; i < BTRFS_NR_RAID_TYPES; i++)
Yan, Zhengb742bb822010-05-16 10:46:24 -04003747 INIT_LIST_HEAD(&found->block_groups[i]);
Josef Bacik80eb2342008-10-29 14:49:05 -04003748 init_rwsem(&found->groups_sem);
Josef Bacik0f9dd462008-09-23 13:14:11 -04003749 spin_lock_init(&found->lock);
Ilya Dryomov52ba6922012-01-16 22:04:47 +02003750 found->flags = flags & BTRFS_BLOCK_GROUP_TYPE_MASK;
Chris Mason593060d2008-03-25 16:50:33 -04003751 found->total_bytes = total_bytes;
Josef Bacik89a55892010-10-14 14:52:27 -04003752 found->disk_total = total_bytes * factor;
Chris Mason593060d2008-03-25 16:50:33 -04003753 found->bytes_used = bytes_used;
Yan, Zhengb742bb822010-05-16 10:46:24 -04003754 found->disk_used = bytes_used * factor;
Chris Mason593060d2008-03-25 16:50:33 -04003755 found->bytes_pinned = 0;
Zheng Yane8569812008-09-26 10:05:48 -04003756 found->bytes_reserved = 0;
Yan Zhengc146afa2008-11-12 14:34:12 -05003757 found->bytes_readonly = 0;
Yan, Zhengf0486c62010-05-16 10:46:25 -04003758 found->bytes_may_use = 0;
Filipe Manana6af3e3a2015-09-07 10:41:12 +01003759 found->full = 0;
Chris Mason0e4f8f82011-04-15 16:05:44 -04003760 found->force_alloc = CHUNK_ALLOC_NO_FORCE;
Josef Bacik6d741192011-04-11 20:20:11 -04003761 found->chunk_alloc = 0;
Josef Bacikfdb5eff2011-06-07 16:07:44 -04003762 found->flush = 0;
3763 init_waitqueue_head(&found->wait);
Josef Bacik633c0aa2014-10-31 09:49:34 -04003764 INIT_LIST_HEAD(&found->ro_bgs);
Jeff Mahoney6ab0a202013-11-01 13:07:04 -04003765
3766 ret = kobject_init_and_add(&found->kobj, &space_info_ktype,
3767 info->space_info_kobj, "%s",
3768 alloc_name(found->flags));
3769 if (ret) {
3770 kfree(found);
3771 return ret;
3772 }
3773
Chris Mason593060d2008-03-25 16:50:33 -04003774 *space_info = found;
Chris Mason4184ea72009-03-10 12:39:20 -04003775 list_add_rcu(&found->list, &info->space_info);
Li Zefanb4d7c3c2012-07-09 20:21:07 -06003776 if (flags & BTRFS_BLOCK_GROUP_DATA)
3777 info->data_sinfo = found;
Jeff Mahoney6ab0a202013-11-01 13:07:04 -04003778
3779 return ret;
Chris Mason593060d2008-03-25 16:50:33 -04003780}
3781
Chris Mason8790d502008-04-03 16:29:03 -04003782static void set_avail_alloc_bits(struct btrfs_fs_info *fs_info, u64 flags)
3783{
Ilya Dryomov899c81e2012-03-27 17:09:16 +03003784 u64 extra_flags = chunk_to_extended(flags) &
3785 BTRFS_EXTENDED_PROFILE_MASK;
Ilya Dryomova46d11a2012-01-16 22:04:47 +02003786
Miao Xiede98ced2013-01-29 10:13:12 +00003787 write_seqlock(&fs_info->profiles_lock);
Ilya Dryomova46d11a2012-01-16 22:04:47 +02003788 if (flags & BTRFS_BLOCK_GROUP_DATA)
3789 fs_info->avail_data_alloc_bits |= extra_flags;
3790 if (flags & BTRFS_BLOCK_GROUP_METADATA)
3791 fs_info->avail_metadata_alloc_bits |= extra_flags;
3792 if (flags & BTRFS_BLOCK_GROUP_SYSTEM)
3793 fs_info->avail_system_alloc_bits |= extra_flags;
Miao Xiede98ced2013-01-29 10:13:12 +00003794 write_sequnlock(&fs_info->profiles_lock);
Chris Mason8790d502008-04-03 16:29:03 -04003795}
Chris Mason593060d2008-03-25 16:50:33 -04003796
Ilya Dryomova46d11a2012-01-16 22:04:47 +02003797/*
Ilya Dryomovfc67c452012-03-27 17:09:17 +03003798 * returns target flags in extended format or 0 if restripe for this
3799 * chunk_type is not in progress
Ilya Dryomovc6664b42012-04-12 16:03:56 -04003800 *
3801 * should be called with either volume_mutex or balance_lock held
Ilya Dryomovfc67c452012-03-27 17:09:17 +03003802 */
3803static u64 get_restripe_target(struct btrfs_fs_info *fs_info, u64 flags)
3804{
3805 struct btrfs_balance_control *bctl = fs_info->balance_ctl;
3806 u64 target = 0;
3807
Ilya Dryomovfc67c452012-03-27 17:09:17 +03003808 if (!bctl)
3809 return 0;
3810
3811 if (flags & BTRFS_BLOCK_GROUP_DATA &&
3812 bctl->data.flags & BTRFS_BALANCE_ARGS_CONVERT) {
3813 target = BTRFS_BLOCK_GROUP_DATA | bctl->data.target;
3814 } else if (flags & BTRFS_BLOCK_GROUP_SYSTEM &&
3815 bctl->sys.flags & BTRFS_BALANCE_ARGS_CONVERT) {
3816 target = BTRFS_BLOCK_GROUP_SYSTEM | bctl->sys.target;
3817 } else if (flags & BTRFS_BLOCK_GROUP_METADATA &&
3818 bctl->meta.flags & BTRFS_BALANCE_ARGS_CONVERT) {
3819 target = BTRFS_BLOCK_GROUP_METADATA | bctl->meta.target;
3820 }
3821
3822 return target;
3823}
3824
3825/*
Ilya Dryomova46d11a2012-01-16 22:04:47 +02003826 * @flags: available profiles in extended format (see ctree.h)
3827 *
Ilya Dryomove4d8ec02012-01-16 22:04:48 +02003828 * Returns reduced profile in chunk format. If profile changing is in
3829 * progress (either running or paused) picks the target profile (if it's
3830 * already available), otherwise falls back to plain reducing.
Ilya Dryomova46d11a2012-01-16 22:04:47 +02003831 */
Eric Sandeen48a3b632013-04-25 20:41:01 +00003832static u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags)
Chris Masonec44a352008-04-28 15:29:52 -04003833{
Miao Xie95669972014-07-24 11:37:14 +08003834 u64 num_devices = root->fs_info->fs_devices->rw_devices;
Ilya Dryomovfc67c452012-03-27 17:09:17 +03003835 u64 target;
David Woodhouse53b381b2013-01-29 18:40:14 -05003836 u64 tmp;
Chris Masona061fc82008-05-07 11:43:44 -04003837
Ilya Dryomovfc67c452012-03-27 17:09:17 +03003838 /*
3839 * see if restripe for this chunk_type is in progress, if so
3840 * try to reduce to the target profile
3841 */
Ilya Dryomove4d8ec02012-01-16 22:04:48 +02003842 spin_lock(&root->fs_info->balance_lock);
Ilya Dryomovfc67c452012-03-27 17:09:17 +03003843 target = get_restripe_target(root->fs_info, flags);
3844 if (target) {
3845 /* pick target profile only if it's already available */
3846 if ((flags & target) & BTRFS_EXTENDED_PROFILE_MASK) {
Ilya Dryomove4d8ec02012-01-16 22:04:48 +02003847 spin_unlock(&root->fs_info->balance_lock);
Ilya Dryomovfc67c452012-03-27 17:09:17 +03003848 return extended_to_chunk(target);
Ilya Dryomove4d8ec02012-01-16 22:04:48 +02003849 }
3850 }
3851 spin_unlock(&root->fs_info->balance_lock);
3852
David Woodhouse53b381b2013-01-29 18:40:14 -05003853 /* First, mask out the RAID levels which aren't possible */
Chris Masona061fc82008-05-07 11:43:44 -04003854 if (num_devices == 1)
David Woodhouse53b381b2013-01-29 18:40:14 -05003855 flags &= ~(BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID0 |
3856 BTRFS_BLOCK_GROUP_RAID5);
3857 if (num_devices < 3)
3858 flags &= ~BTRFS_BLOCK_GROUP_RAID6;
Chris Masona061fc82008-05-07 11:43:44 -04003859 if (num_devices < 4)
3860 flags &= ~BTRFS_BLOCK_GROUP_RAID10;
3861
David Woodhouse53b381b2013-01-29 18:40:14 -05003862 tmp = flags & (BTRFS_BLOCK_GROUP_DUP | BTRFS_BLOCK_GROUP_RAID0 |
3863 BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID5 |
3864 BTRFS_BLOCK_GROUP_RAID6 | BTRFS_BLOCK_GROUP_RAID10);
3865 flags &= ~tmp;
Chris Masonec44a352008-04-28 15:29:52 -04003866
David Woodhouse53b381b2013-01-29 18:40:14 -05003867 if (tmp & BTRFS_BLOCK_GROUP_RAID6)
3868 tmp = BTRFS_BLOCK_GROUP_RAID6;
3869 else if (tmp & BTRFS_BLOCK_GROUP_RAID5)
3870 tmp = BTRFS_BLOCK_GROUP_RAID5;
3871 else if (tmp & BTRFS_BLOCK_GROUP_RAID10)
3872 tmp = BTRFS_BLOCK_GROUP_RAID10;
3873 else if (tmp & BTRFS_BLOCK_GROUP_RAID1)
3874 tmp = BTRFS_BLOCK_GROUP_RAID1;
3875 else if (tmp & BTRFS_BLOCK_GROUP_RAID0)
3876 tmp = BTRFS_BLOCK_GROUP_RAID0;
Chris Masonec44a352008-04-28 15:29:52 -04003877
David Woodhouse53b381b2013-01-29 18:40:14 -05003878 return extended_to_chunk(flags | tmp);
Chris Masonec44a352008-04-28 15:29:52 -04003879}
3880
Filipe Mananaf8213bd2014-04-24 15:15:29 +01003881static u64 get_alloc_profile(struct btrfs_root *root, u64 orig_flags)
Josef Bacik6a632092009-02-20 11:00:09 -05003882{
Miao Xiede98ced2013-01-29 10:13:12 +00003883 unsigned seq;
Filipe Mananaf8213bd2014-04-24 15:15:29 +01003884 u64 flags;
Miao Xiede98ced2013-01-29 10:13:12 +00003885
3886 do {
Filipe Mananaf8213bd2014-04-24 15:15:29 +01003887 flags = orig_flags;
Miao Xiede98ced2013-01-29 10:13:12 +00003888 seq = read_seqbegin(&root->fs_info->profiles_lock);
3889
3890 if (flags & BTRFS_BLOCK_GROUP_DATA)
3891 flags |= root->fs_info->avail_data_alloc_bits;
3892 else if (flags & BTRFS_BLOCK_GROUP_SYSTEM)
3893 flags |= root->fs_info->avail_system_alloc_bits;
3894 else if (flags & BTRFS_BLOCK_GROUP_METADATA)
3895 flags |= root->fs_info->avail_metadata_alloc_bits;
3896 } while (read_seqretry(&root->fs_info->profiles_lock, seq));
Ilya Dryomov6fef8df2012-01-16 22:04:47 +02003897
Yan, Zhengb742bb822010-05-16 10:46:24 -04003898 return btrfs_reduce_alloc_profile(root, flags);
3899}
Josef Bacik6a632092009-02-20 11:00:09 -05003900
Miao Xie6d07bce2011-01-05 10:07:31 +00003901u64 btrfs_get_alloc_profile(struct btrfs_root *root, int data)
Yan, Zhengb742bb822010-05-16 10:46:24 -04003902{
3903 u64 flags;
David Woodhouse53b381b2013-01-29 18:40:14 -05003904 u64 ret;
Josef Bacik6a632092009-02-20 11:00:09 -05003905
Yan, Zhengb742bb822010-05-16 10:46:24 -04003906 if (data)
3907 flags = BTRFS_BLOCK_GROUP_DATA;
3908 else if (root == root->fs_info->chunk_root)
3909 flags = BTRFS_BLOCK_GROUP_SYSTEM;
3910 else
3911 flags = BTRFS_BLOCK_GROUP_METADATA;
3912
David Woodhouse53b381b2013-01-29 18:40:14 -05003913 ret = get_alloc_profile(root, flags);
3914 return ret;
Josef Bacik6a632092009-02-20 11:00:09 -05003915}
3916
Josef Bacik6a632092009-02-20 11:00:09 -05003917/*
3918 * This will check the space that the inode allocates from to make sure we have
3919 * enough space for bytes.
3920 */
Dongsheng Yange2d1f922015-02-06 10:26:52 -05003921int btrfs_check_data_free_space(struct inode *inode, u64 bytes, u64 write_bytes)
Josef Bacik6a632092009-02-20 11:00:09 -05003922{
3923 struct btrfs_space_info *data_sinfo;
Yan, Zheng0ca1f7c2010-05-16 10:48:47 -04003924 struct btrfs_root *root = BTRFS_I(inode)->root;
Li Zefanb4d7c3c2012-07-09 20:21:07 -06003925 struct btrfs_fs_info *fs_info = root->fs_info;
Josef Bacikab6e24102010-03-19 14:38:13 +00003926 u64 used;
Zhao Lei94b947b2015-02-14 13:23:45 +08003927 int ret = 0;
Zhao Leic99f1b02015-03-02 19:32:20 +08003928 int need_commit = 2;
3929 int have_pinned_space;
Josef Bacik6a632092009-02-20 11:00:09 -05003930
3931 /* make sure bytes are sectorsize aligned */
Qu Wenruofda28322013-02-26 08:10:22 +00003932 bytes = ALIGN(bytes, root->sectorsize);
Josef Bacik6a632092009-02-20 11:00:09 -05003933
Miao Xie9dced182013-10-25 17:33:36 +08003934 if (btrfs_is_free_space_inode(inode)) {
Zhao Leic99f1b02015-03-02 19:32:20 +08003935 need_commit = 0;
Miao Xie9dced182013-10-25 17:33:36 +08003936 ASSERT(current->journal_info);
Josef Bacik0af3d002010-06-21 14:48:16 -04003937 }
3938
Li Zefanb4d7c3c2012-07-09 20:21:07 -06003939 data_sinfo = fs_info->data_sinfo;
Chris Mason33b4d472009-09-22 14:45:50 -04003940 if (!data_sinfo)
3941 goto alloc;
3942
Josef Bacik6a632092009-02-20 11:00:09 -05003943again:
3944 /* make sure we have enough space to handle the data first */
3945 spin_lock(&data_sinfo->lock);
Yan, Zheng8929ecfa2010-05-16 10:49:58 -04003946 used = data_sinfo->bytes_used + data_sinfo->bytes_reserved +
3947 data_sinfo->bytes_pinned + data_sinfo->bytes_readonly +
3948 data_sinfo->bytes_may_use;
Josef Bacikab6e24102010-03-19 14:38:13 +00003949
3950 if (used + bytes > data_sinfo->total_bytes) {
Josef Bacik4e06bdd2009-02-20 10:59:53 -05003951 struct btrfs_trans_handle *trans;
3952
Josef Bacik6a632092009-02-20 11:00:09 -05003953 /*
3954 * if we don't have enough free bytes in this space then we need
3955 * to alloc a new chunk.
3956 */
Zhao Leib9fd47c2015-02-09 14:40:20 +08003957 if (!data_sinfo->full) {
Josef Bacik6a632092009-02-20 11:00:09 -05003958 u64 alloc_target;
Josef Bacik6a632092009-02-20 11:00:09 -05003959
Chris Mason0e4f8f82011-04-15 16:05:44 -04003960 data_sinfo->force_alloc = CHUNK_ALLOC_FORCE;
Josef Bacik6a632092009-02-20 11:00:09 -05003961 spin_unlock(&data_sinfo->lock);
Chris Mason33b4d472009-09-22 14:45:50 -04003962alloc:
Josef Bacik6a632092009-02-20 11:00:09 -05003963 alloc_target = btrfs_get_alloc_profile(root, 1);
Miao Xie9dced182013-10-25 17:33:36 +08003964 /*
3965 * It is ugly that we don't call nolock join
3966 * transaction for the free space inode case here.
3967 * But it is safe because we only do the data space
3968 * reservation for the free space cache in the
3969 * transaction context, the common join transaction
3970 * just increase the counter of the current transaction
3971 * handler, doesn't try to acquire the trans_lock of
3972 * the fs.
3973 */
Josef Bacik7a7eaa42011-04-13 12:54:33 -04003974 trans = btrfs_join_transaction(root);
Yan, Zhenga22285a2010-05-16 10:48:46 -04003975 if (IS_ERR(trans))
3976 return PTR_ERR(trans);
Josef Bacik6a632092009-02-20 11:00:09 -05003977
3978 ret = do_chunk_alloc(trans, root->fs_info->extent_root,
Chris Mason0e4f8f82011-04-15 16:05:44 -04003979 alloc_target,
3980 CHUNK_ALLOC_NO_FORCE);
Josef Bacik6a632092009-02-20 11:00:09 -05003981 btrfs_end_transaction(trans, root);
Miao Xied52a5b52011-01-05 10:07:18 +00003982 if (ret < 0) {
3983 if (ret != -ENOSPC)
3984 return ret;
Zhao Leic99f1b02015-03-02 19:32:20 +08003985 else {
3986 have_pinned_space = 1;
Miao Xied52a5b52011-01-05 10:07:18 +00003987 goto commit_trans;
Zhao Leic99f1b02015-03-02 19:32:20 +08003988 }
Miao Xied52a5b52011-01-05 10:07:18 +00003989 }
Chris Mason33b4d472009-09-22 14:45:50 -04003990
Li Zefanb4d7c3c2012-07-09 20:21:07 -06003991 if (!data_sinfo)
3992 data_sinfo = fs_info->data_sinfo;
3993
Josef Bacik6a632092009-02-20 11:00:09 -05003994 goto again;
3995 }
Josef Bacikf2bb8f52011-05-25 13:10:16 -04003996
3997 /*
Josef Bacikb150a4f2013-06-19 15:00:04 -04003998 * If we don't have enough pinned space to deal with this
Zhao Lei94b947b2015-02-14 13:23:45 +08003999 * allocation, and no removed chunk in current transaction,
4000 * don't bother committing the transaction.
Josef Bacikf2bb8f52011-05-25 13:10:16 -04004001 */
Zhao Leic99f1b02015-03-02 19:32:20 +08004002 have_pinned_space = percpu_counter_compare(
4003 &data_sinfo->total_bytes_pinned,
4004 used + bytes - data_sinfo->total_bytes);
Josef Bacik6a632092009-02-20 11:00:09 -05004005 spin_unlock(&data_sinfo->lock);
Josef Bacik4e06bdd2009-02-20 10:59:53 -05004006
4007 /* commit the current transaction and try again */
Miao Xied52a5b52011-01-05 10:07:18 +00004008commit_trans:
Zhao Leic99f1b02015-03-02 19:32:20 +08004009 if (need_commit &&
Josef Bacika4abeea2011-04-11 17:25:13 -04004010 !atomic_read(&root->fs_info->open_ioctl_trans)) {
Zhao Leic99f1b02015-03-02 19:32:20 +08004011 need_commit--;
Josef Bacikb150a4f2013-06-19 15:00:04 -04004012
Zhao Lei9a4e7272015-04-09 12:34:43 +08004013 if (need_commit > 0)
4014 btrfs_wait_ordered_roots(fs_info, -1);
4015
Josef Bacik7a7eaa42011-04-13 12:54:33 -04004016 trans = btrfs_join_transaction(root);
Yan, Zhenga22285a2010-05-16 10:48:46 -04004017 if (IS_ERR(trans))
4018 return PTR_ERR(trans);
Zhao Leic99f1b02015-03-02 19:32:20 +08004019 if (have_pinned_space >= 0 ||
4020 trans->transaction->have_free_bgs ||
4021 need_commit > 0) {
Zhao Lei94b947b2015-02-14 13:23:45 +08004022 ret = btrfs_commit_transaction(trans, root);
4023 if (ret)
4024 return ret;
Zhao Leid7c15172015-02-26 10:49:20 +08004025 /*
4026 * make sure that all running delayed iput are
4027 * done
4028 */
4029 down_write(&root->fs_info->delayed_iput_sem);
4030 up_write(&root->fs_info->delayed_iput_sem);
Zhao Lei94b947b2015-02-14 13:23:45 +08004031 goto again;
4032 } else {
4033 btrfs_end_transaction(trans, root);
4034 }
Josef Bacik4e06bdd2009-02-20 10:59:53 -05004035 }
4036
Jeff Mahoneycab45e22013-10-16 16:27:01 -04004037 trace_btrfs_space_reservation(root->fs_info,
4038 "space_info:enospc",
4039 data_sinfo->flags, bytes, 1);
Josef Bacik6a632092009-02-20 11:00:09 -05004040 return -ENOSPC;
4041 }
Dongsheng Yange2d1f922015-02-06 10:26:52 -05004042 ret = btrfs_qgroup_reserve(root, write_bytes);
Dongsheng Yang237c0e92014-12-29 06:23:05 -05004043 if (ret)
4044 goto out;
Josef Bacik6a632092009-02-20 11:00:09 -05004045 data_sinfo->bytes_may_use += bytes;
Josef Bacik8c2a3ca2012-01-10 10:31:31 -05004046 trace_btrfs_space_reservation(root->fs_info, "space_info",
Liu Bo2bcc0322012-03-29 09:57:44 -04004047 data_sinfo->flags, bytes, 1);
Dongsheng Yang237c0e92014-12-29 06:23:05 -05004048out:
Josef Bacik6a632092009-02-20 11:00:09 -05004049 spin_unlock(&data_sinfo->lock);
4050
Dongsheng Yang237c0e92014-12-29 06:23:05 -05004051 return ret;
Josef Bacik6a632092009-02-20 11:00:09 -05004052}
4053
4054/*
Josef Bacikfb25e912011-07-26 17:00:46 -04004055 * Called if we need to clear a data reservation for this inode.
Josef Bacik6a632092009-02-20 11:00:09 -05004056 */
Yan, Zheng0ca1f7c2010-05-16 10:48:47 -04004057void btrfs_free_reserved_data_space(struct inode *inode, u64 bytes)
Josef Bacik6a632092009-02-20 11:00:09 -05004058{
Yan, Zheng0ca1f7c2010-05-16 10:48:47 -04004059 struct btrfs_root *root = BTRFS_I(inode)->root;
Josef Bacik6a632092009-02-20 11:00:09 -05004060 struct btrfs_space_info *data_sinfo;
4061
4062 /* make sure bytes are sectorsize aligned */
Qu Wenruofda28322013-02-26 08:10:22 +00004063 bytes = ALIGN(bytes, root->sectorsize);
Josef Bacik6a632092009-02-20 11:00:09 -05004064
Li Zefanb4d7c3c2012-07-09 20:21:07 -06004065 data_sinfo = root->fs_info->data_sinfo;
Josef Bacik6a632092009-02-20 11:00:09 -05004066 spin_lock(&data_sinfo->lock);
Josef Bacik7ee9e442013-06-21 16:37:03 -04004067 WARN_ON(data_sinfo->bytes_may_use < bytes);
Josef Bacik6a632092009-02-20 11:00:09 -05004068 data_sinfo->bytes_may_use -= bytes;
Josef Bacik8c2a3ca2012-01-10 10:31:31 -05004069 trace_btrfs_space_reservation(root->fs_info, "space_info",
Liu Bo2bcc0322012-03-29 09:57:44 -04004070 data_sinfo->flags, bytes, 0);
Josef Bacik6a632092009-02-20 11:00:09 -05004071 spin_unlock(&data_sinfo->lock);
4072}
4073
Josef Bacik97e728d2009-04-21 17:40:57 -04004074static void force_metadata_allocation(struct btrfs_fs_info *info)
4075{
4076 struct list_head *head = &info->space_info;
4077 struct btrfs_space_info *found;
4078
4079 rcu_read_lock();
4080 list_for_each_entry_rcu(found, head, list) {
4081 if (found->flags & BTRFS_BLOCK_GROUP_METADATA)
Chris Mason0e4f8f82011-04-15 16:05:44 -04004082 found->force_alloc = CHUNK_ALLOC_FORCE;
Josef Bacik97e728d2009-04-21 17:40:57 -04004083 }
4084 rcu_read_unlock();
4085}
4086
Miao Xie3c76cd82013-04-25 10:12:38 +00004087static inline u64 calc_global_rsv_need_space(struct btrfs_block_rsv *global)
4088{
4089 return (global->size << 1);
4090}
4091
Chris Masone5bc2452010-10-26 13:37:56 -04004092static int should_alloc_chunk(struct btrfs_root *root,
Josef Bacik698d0082012-09-12 14:08:47 -04004093 struct btrfs_space_info *sinfo, int force)
Yan, Zheng424499d2010-05-16 10:46:25 -04004094{
Josef Bacikfb25e912011-07-26 17:00:46 -04004095 struct btrfs_block_rsv *global_rsv = &root->fs_info->global_block_rsv;
Yan, Zheng424499d2010-05-16 10:46:25 -04004096 u64 num_bytes = sinfo->total_bytes - sinfo->bytes_readonly;
Chris Mason0e4f8f82011-04-15 16:05:44 -04004097 u64 num_allocated = sinfo->bytes_used + sinfo->bytes_reserved;
Chris Masone5bc2452010-10-26 13:37:56 -04004098 u64 thresh;
Yan, Zheng424499d2010-05-16 10:46:25 -04004099
Chris Mason0e4f8f82011-04-15 16:05:44 -04004100 if (force == CHUNK_ALLOC_FORCE)
4101 return 1;
4102
4103 /*
Josef Bacikfb25e912011-07-26 17:00:46 -04004104 * We need to take into account the global rsv because for all intents
4105 * and purposes it's used space. Don't worry about locking the
4106 * global_rsv, it doesn't change except when the transaction commits.
4107 */
Josef Bacik54338b52012-08-14 16:20:52 -04004108 if (sinfo->flags & BTRFS_BLOCK_GROUP_METADATA)
Miao Xie3c76cd82013-04-25 10:12:38 +00004109 num_allocated += calc_global_rsv_need_space(global_rsv);
Josef Bacikfb25e912011-07-26 17:00:46 -04004110
4111 /*
Chris Mason0e4f8f82011-04-15 16:05:44 -04004112 * in limited mode, we want to have some free space up to
4113 * about 1% of the FS size.
4114 */
4115 if (force == CHUNK_ALLOC_LIMITED) {
David Sterba6c417612011-04-13 15:41:04 +02004116 thresh = btrfs_super_total_bytes(root->fs_info->super_copy);
Chris Mason0e4f8f82011-04-15 16:05:44 -04004117 thresh = max_t(u64, 64 * 1024 * 1024,
4118 div_factor_fine(thresh, 1));
4119
4120 if (num_bytes - num_allocated < thresh)
4121 return 1;
4122 }
Chris Mason0e4f8f82011-04-15 16:05:44 -04004123
Josef Bacik698d0082012-09-12 14:08:47 -04004124 if (num_allocated + 2 * 1024 * 1024 < div_factor(num_bytes, 8))
Josef Bacik14ed0ca2010-10-15 15:23:48 -04004125 return 0;
Yan, Zheng424499d2010-05-16 10:46:25 -04004126 return 1;
4127}
4128
Filipe Manana39c2d7f2015-05-20 14:01:55 +01004129static u64 get_profile_num_devs(struct btrfs_root *root, u64 type)
Liu Bo15d1ff82012-03-29 09:57:44 -04004130{
4131 u64 num_dev;
4132
David Woodhouse53b381b2013-01-29 18:40:14 -05004133 if (type & (BTRFS_BLOCK_GROUP_RAID10 |
4134 BTRFS_BLOCK_GROUP_RAID0 |
4135 BTRFS_BLOCK_GROUP_RAID5 |
4136 BTRFS_BLOCK_GROUP_RAID6))
Liu Bo15d1ff82012-03-29 09:57:44 -04004137 num_dev = root->fs_info->fs_devices->rw_devices;
4138 else if (type & BTRFS_BLOCK_GROUP_RAID1)
4139 num_dev = 2;
4140 else
4141 num_dev = 1; /* DUP or single */
4142
Filipe Manana39c2d7f2015-05-20 14:01:55 +01004143 return num_dev;
Liu Bo15d1ff82012-03-29 09:57:44 -04004144}
4145
Filipe Manana39c2d7f2015-05-20 14:01:55 +01004146/*
4147 * If @is_allocation is true, reserve space in the system space info necessary
4148 * for allocating a chunk, otherwise if it's false, reserve space necessary for
4149 * removing a chunk.
4150 */
4151void check_system_chunk(struct btrfs_trans_handle *trans,
4152 struct btrfs_root *root,
Filipe Manana4617ea32015-06-09 17:48:21 +01004153 u64 type)
Liu Bo15d1ff82012-03-29 09:57:44 -04004154{
4155 struct btrfs_space_info *info;
4156 u64 left;
4157 u64 thresh;
Filipe Manana4fbcdf62015-05-20 14:01:54 +01004158 int ret = 0;
Filipe Manana39c2d7f2015-05-20 14:01:55 +01004159 u64 num_devs;
Filipe Manana4fbcdf62015-05-20 14:01:54 +01004160
4161 /*
4162 * Needed because we can end up allocating a system chunk and for an
4163 * atomic and race free space reservation in the chunk block reserve.
4164 */
4165 ASSERT(mutex_is_locked(&root->fs_info->chunk_mutex));
Liu Bo15d1ff82012-03-29 09:57:44 -04004166
4167 info = __find_space_info(root->fs_info, BTRFS_BLOCK_GROUP_SYSTEM);
4168 spin_lock(&info->lock);
4169 left = info->total_bytes - info->bytes_used - info->bytes_pinned -
Filipe Manana4fbcdf62015-05-20 14:01:54 +01004170 info->bytes_reserved - info->bytes_readonly -
4171 info->bytes_may_use;
Liu Bo15d1ff82012-03-29 09:57:44 -04004172 spin_unlock(&info->lock);
4173
Filipe Manana39c2d7f2015-05-20 14:01:55 +01004174 num_devs = get_profile_num_devs(root, type);
4175
4176 /* num_devs device items to update and 1 chunk item to add or remove */
Filipe Manana4617ea32015-06-09 17:48:21 +01004177 thresh = btrfs_calc_trunc_metadata_size(root, num_devs) +
4178 btrfs_calc_trans_metadata_size(root, 1);
Filipe Manana39c2d7f2015-05-20 14:01:55 +01004179
Liu Bo15d1ff82012-03-29 09:57:44 -04004180 if (left < thresh && btrfs_test_opt(root, ENOSPC_DEBUG)) {
Simon Kirbyc2cf52e2013-03-19 22:41:23 +00004181 btrfs_info(root->fs_info, "left=%llu, need=%llu, flags=%llu",
4182 left, thresh, type);
Liu Bo15d1ff82012-03-29 09:57:44 -04004183 dump_space_info(info, 0, 0);
4184 }
4185
4186 if (left < thresh) {
4187 u64 flags;
4188
4189 flags = btrfs_get_alloc_profile(root->fs_info->chunk_root, 0);
Filipe Manana4fbcdf62015-05-20 14:01:54 +01004190 /*
4191 * Ignore failure to create system chunk. We might end up not
4192 * needing it, as we might not need to COW all nodes/leafs from
4193 * the paths we visit in the chunk tree (they were already COWed
4194 * or created in the current transaction for example).
4195 */
4196 ret = btrfs_alloc_chunk(trans, root, flags);
4197 }
4198
4199 if (!ret) {
4200 ret = btrfs_block_rsv_add(root->fs_info->chunk_root,
4201 &root->fs_info->chunk_block_rsv,
4202 thresh, BTRFS_RESERVE_NO_FLUSH);
4203 if (!ret)
4204 trans->chunk_bytes_reserved += thresh;
Liu Bo15d1ff82012-03-29 09:57:44 -04004205 }
4206}
4207
Chris Mason6324fbf2008-03-24 15:01:59 -04004208static int do_chunk_alloc(struct btrfs_trans_handle *trans,
Josef Bacik698d0082012-09-12 14:08:47 -04004209 struct btrfs_root *extent_root, u64 flags, int force)
Chris Mason6324fbf2008-03-24 15:01:59 -04004210{
4211 struct btrfs_space_info *space_info;
Josef Bacik97e728d2009-04-21 17:40:57 -04004212 struct btrfs_fs_info *fs_info = extent_root->fs_info;
Josef Bacik6d741192011-04-11 20:20:11 -04004213 int wait_for_alloc = 0;
Yan Zhengc146afa2008-11-12 14:34:12 -05004214 int ret = 0;
4215
Josef Bacikc6b305a2012-12-18 09:16:16 -05004216 /* Don't re-enter if we're already allocating a chunk */
4217 if (trans->allocating_chunk)
4218 return -ENOSPC;
4219
Chris Mason6324fbf2008-03-24 15:01:59 -04004220 space_info = __find_space_info(extent_root->fs_info, flags);
Chris Mason593060d2008-03-25 16:50:33 -04004221 if (!space_info) {
4222 ret = update_space_info(extent_root->fs_info, flags,
4223 0, 0, &space_info);
Jeff Mahoney79787ea2012-03-12 16:03:00 +01004224 BUG_ON(ret); /* -ENOMEM */
Chris Mason593060d2008-03-25 16:50:33 -04004225 }
Jeff Mahoney79787ea2012-03-12 16:03:00 +01004226 BUG_ON(!space_info); /* Logic error */
Chris Mason6324fbf2008-03-24 15:01:59 -04004227
Josef Bacik6d741192011-04-11 20:20:11 -04004228again:
Josef Bacik25179202008-10-29 14:49:05 -04004229 spin_lock(&space_info->lock);
Miao Xie9e622d62012-01-26 15:01:12 -05004230 if (force < space_info->force_alloc)
Chris Mason0e4f8f82011-04-15 16:05:44 -04004231 force = space_info->force_alloc;
Josef Bacik25179202008-10-29 14:49:05 -04004232 if (space_info->full) {
Filipe David Borba Manana09fb99a2013-08-05 16:25:12 +01004233 if (should_alloc_chunk(extent_root, space_info, force))
4234 ret = -ENOSPC;
4235 else
4236 ret = 0;
Josef Bacik25179202008-10-29 14:49:05 -04004237 spin_unlock(&space_info->lock);
Filipe David Borba Manana09fb99a2013-08-05 16:25:12 +01004238 return ret;
Josef Bacik25179202008-10-29 14:49:05 -04004239 }
Chris Mason6324fbf2008-03-24 15:01:59 -04004240
Josef Bacik698d0082012-09-12 14:08:47 -04004241 if (!should_alloc_chunk(extent_root, space_info, force)) {
Josef Bacik25179202008-10-29 14:49:05 -04004242 spin_unlock(&space_info->lock);
Josef Bacik6d741192011-04-11 20:20:11 -04004243 return 0;
4244 } else if (space_info->chunk_alloc) {
4245 wait_for_alloc = 1;
4246 } else {
4247 space_info->chunk_alloc = 1;
Josef Bacik25179202008-10-29 14:49:05 -04004248 }
Chris Mason0e4f8f82011-04-15 16:05:44 -04004249
Josef Bacik25179202008-10-29 14:49:05 -04004250 spin_unlock(&space_info->lock);
4251
Josef Bacik6d741192011-04-11 20:20:11 -04004252 mutex_lock(&fs_info->chunk_mutex);
4253
4254 /*
4255 * The chunk_mutex is held throughout the entirety of a chunk
4256 * allocation, so once we've acquired the chunk_mutex we know that the
4257 * other guy is done and we need to recheck and see if we should
4258 * allocate.
4259 */
4260 if (wait_for_alloc) {
4261 mutex_unlock(&fs_info->chunk_mutex);
4262 wait_for_alloc = 0;
4263 goto again;
4264 }
4265
Josef Bacikc6b305a2012-12-18 09:16:16 -05004266 trans->allocating_chunk = true;
4267
Josef Bacik97e728d2009-04-21 17:40:57 -04004268 /*
Josef Bacik67377732010-09-16 16:19:09 -04004269 * If we have mixed data/metadata chunks we want to make sure we keep
4270 * allocating mixed chunks instead of individual chunks.
4271 */
4272 if (btrfs_mixed_space_info(space_info))
4273 flags |= (BTRFS_BLOCK_GROUP_DATA | BTRFS_BLOCK_GROUP_METADATA);
4274
4275 /*
Josef Bacik97e728d2009-04-21 17:40:57 -04004276 * if we're doing a data chunk, go ahead and make sure that
4277 * we keep a reasonable number of metadata chunks allocated in the
4278 * FS as well.
4279 */
Josef Bacik9ed74f22009-09-11 16:12:44 -04004280 if (flags & BTRFS_BLOCK_GROUP_DATA && fs_info->metadata_ratio) {
Josef Bacik97e728d2009-04-21 17:40:57 -04004281 fs_info->data_chunk_allocations++;
4282 if (!(fs_info->data_chunk_allocations %
4283 fs_info->metadata_ratio))
4284 force_metadata_allocation(fs_info);
4285 }
4286
Liu Bo15d1ff82012-03-29 09:57:44 -04004287 /*
4288 * Check if we have enough space in SYSTEM chunk because we may need
4289 * to update devices.
4290 */
Filipe Manana4617ea32015-06-09 17:48:21 +01004291 check_system_chunk(trans, extent_root, flags);
Liu Bo15d1ff82012-03-29 09:57:44 -04004292
Yan Zheng2b820322008-11-17 21:11:30 -05004293 ret = btrfs_alloc_chunk(trans, extent_root, flags);
Josef Bacikc6b305a2012-12-18 09:16:16 -05004294 trans->allocating_chunk = false;
Mark Fasheh92b8e8972011-07-12 10:57:59 -07004295
Josef Bacik9ed74f22009-09-11 16:12:44 -04004296 spin_lock(&space_info->lock);
Alexandre Olivaa81cb9a2013-02-21 21:15:14 +00004297 if (ret < 0 && ret != -ENOSPC)
4298 goto out;
Chris Masond3977122009-01-05 21:25:51 -05004299 if (ret)
Chris Mason6324fbf2008-03-24 15:01:59 -04004300 space_info->full = 1;
Yan, Zheng424499d2010-05-16 10:46:25 -04004301 else
4302 ret = 1;
Josef Bacik6d741192011-04-11 20:20:11 -04004303
Chris Mason0e4f8f82011-04-15 16:05:44 -04004304 space_info->force_alloc = CHUNK_ALLOC_NO_FORCE;
Alexandre Olivaa81cb9a2013-02-21 21:15:14 +00004305out:
Josef Bacik6d741192011-04-11 20:20:11 -04004306 space_info->chunk_alloc = 0;
Josef Bacik9ed74f22009-09-11 16:12:44 -04004307 spin_unlock(&space_info->lock);
Dan Carpentera25c75d2012-04-18 09:59:29 +03004308 mutex_unlock(&fs_info->chunk_mutex);
Filipe Manana00d80e32015-07-20 14:56:20 +01004309 /*
4310 * When we allocate a new chunk we reserve space in the chunk block
4311 * reserve to make sure we can COW nodes/leafs in the chunk tree or
4312 * add new nodes/leafs to it if we end up needing to do it when
4313 * inserting the chunk item and updating device items as part of the
4314 * second phase of chunk allocation, performed by
4315 * btrfs_finish_chunk_alloc(). So make sure we don't accumulate a
4316 * large number of new block groups to create in our transaction
4317 * handle's new_bgs list to avoid exhausting the chunk block reserve
4318 * in extreme cases - like having a single transaction create many new
4319 * block groups when starting to write out the free space caches of all
4320 * the block groups that were made dirty during the lifetime of the
4321 * transaction.
4322 */
Filipe Mananad9a05402015-10-03 13:13:13 +01004323 if (trans->can_flush_pending_bgs &&
4324 trans->chunk_bytes_reserved >= (2 * 1024 * 1024ull)) {
Filipe Manana00d80e32015-07-20 14:56:20 +01004325 btrfs_create_pending_block_groups(trans, trans->root);
4326 btrfs_trans_release_chunk_metadata(trans);
4327 }
Josef Bacik0f9dd462008-09-23 13:14:11 -04004328 return ret;
Chris Mason6324fbf2008-03-24 15:01:59 -04004329}
4330
Josef Bacika80c8dcf2012-09-06 16:59:33 -04004331static int can_overcommit(struct btrfs_root *root,
4332 struct btrfs_space_info *space_info, u64 bytes,
Miao Xie08e007d2012-10-16 11:33:38 +00004333 enum btrfs_reserve_flush_enum flush)
Josef Bacika80c8dcf2012-09-06 16:59:33 -04004334{
Josef Bacik96f1bb52013-01-30 17:02:51 -05004335 struct btrfs_block_rsv *global_rsv = &root->fs_info->global_block_rsv;
Josef Bacika80c8dcf2012-09-06 16:59:33 -04004336 u64 profile = btrfs_get_alloc_profile(root, 0);
Miao Xie3c76cd82013-04-25 10:12:38 +00004337 u64 space_size;
Josef Bacika80c8dcf2012-09-06 16:59:33 -04004338 u64 avail;
4339 u64 used;
4340
4341 used = space_info->bytes_used + space_info->bytes_reserved +
Josef Bacik96f1bb52013-01-30 17:02:51 -05004342 space_info->bytes_pinned + space_info->bytes_readonly;
4343
Josef Bacik96f1bb52013-01-30 17:02:51 -05004344 /*
4345 * We only want to allow over committing if we have lots of actual space
4346 * free, but if we don't have enough space to handle the global reserve
4347 * space then we could end up having a real enospc problem when trying
4348 * to allocate a chunk or some other such important allocation.
4349 */
Miao Xie3c76cd82013-04-25 10:12:38 +00004350 spin_lock(&global_rsv->lock);
4351 space_size = calc_global_rsv_need_space(global_rsv);
4352 spin_unlock(&global_rsv->lock);
4353 if (used + space_size >= space_info->total_bytes)
Josef Bacik96f1bb52013-01-30 17:02:51 -05004354 return 0;
4355
4356 used += space_info->bytes_may_use;
Josef Bacika80c8dcf2012-09-06 16:59:33 -04004357
4358 spin_lock(&root->fs_info->free_chunk_lock);
4359 avail = root->fs_info->free_chunk_space;
4360 spin_unlock(&root->fs_info->free_chunk_lock);
4361
4362 /*
4363 * If we have dup, raid1 or raid10 then only half of the free
David Woodhouse53b381b2013-01-29 18:40:14 -05004364 * space is actually useable. For raid56, the space info used
4365 * doesn't include the parity drive, so we don't have to
4366 * change the math
Josef Bacika80c8dcf2012-09-06 16:59:33 -04004367 */
4368 if (profile & (BTRFS_BLOCK_GROUP_DUP |
4369 BTRFS_BLOCK_GROUP_RAID1 |
4370 BTRFS_BLOCK_GROUP_RAID10))
4371 avail >>= 1;
4372
4373 /*
Miao Xie561c2942012-10-16 11:32:18 +00004374 * If we aren't flushing all things, let us overcommit up to
4375 * 1/2th of the space. If we can flush, don't let us overcommit
4376 * too much, let it overcommit up to 1/8 of the space.
Josef Bacika80c8dcf2012-09-06 16:59:33 -04004377 */
Miao Xie08e007d2012-10-16 11:33:38 +00004378 if (flush == BTRFS_RESERVE_FLUSH_ALL)
Josef Bacik14575ae2013-09-17 10:48:00 -04004379 avail >>= 3;
Josef Bacika80c8dcf2012-09-06 16:59:33 -04004380 else
Josef Bacik14575ae2013-09-17 10:48:00 -04004381 avail >>= 1;
Josef Bacika80c8dcf2012-09-06 16:59:33 -04004382
Josef Bacik14575ae2013-09-17 10:48:00 -04004383 if (used + bytes < space_info->total_bytes + avail)
Josef Bacika80c8dcf2012-09-06 16:59:33 -04004384 return 1;
4385 return 0;
4386}
4387
Eric Sandeen48a3b632013-04-25 20:41:01 +00004388static void btrfs_writeback_inodes_sb_nr(struct btrfs_root *root,
Miao Xie6c255e62014-03-06 13:55:01 +08004389 unsigned long nr_pages, int nr_items)
Miao Xieda633a42012-12-20 11:19:09 +00004390{
4391 struct super_block *sb = root->fs_info->sb;
Miao Xieda633a42012-12-20 11:19:09 +00004392
Josef Bacik925a6ef2013-06-20 12:31:27 -04004393 if (down_read_trylock(&sb->s_umount)) {
4394 writeback_inodes_sb_nr(sb, nr_pages, WB_REASON_FS_FREE_SPACE);
4395 up_read(&sb->s_umount);
4396 } else {
Miao Xieda633a42012-12-20 11:19:09 +00004397 /*
4398 * We needn't worry the filesystem going from r/w to r/o though
4399 * we don't acquire ->s_umount mutex, because the filesystem
4400 * should guarantee the delalloc inodes list be empty after
4401 * the filesystem is readonly(all dirty pages are written to
4402 * the disk).
4403 */
Miao Xie6c255e62014-03-06 13:55:01 +08004404 btrfs_start_delalloc_roots(root->fs_info, 0, nr_items);
Josef Bacik98ad69c2013-04-04 11:55:49 -04004405 if (!current->journal_info)
Miao Xie6c255e62014-03-06 13:55:01 +08004406 btrfs_wait_ordered_roots(root->fs_info, nr_items);
Miao Xieda633a42012-12-20 11:19:09 +00004407 }
4408}
4409
Miao Xie18cd8ea2013-11-04 23:13:22 +08004410static inline int calc_reclaim_items_nr(struct btrfs_root *root, u64 to_reclaim)
4411{
4412 u64 bytes;
4413 int nr;
4414
4415 bytes = btrfs_calc_trans_metadata_size(root, 1);
4416 nr = (int)div64_u64(to_reclaim, bytes);
4417 if (!nr)
4418 nr = 1;
4419 return nr;
4420}
4421
Miao Xiec61a16a2013-11-04 23:13:23 +08004422#define EXTENT_SIZE_PER_ITEM (256 * 1024)
4423
Yan, Zheng5da9d012010-05-16 10:46:25 -04004424/*
4425 * shrink metadata reservation for delalloc
4426 */
Josef Bacikf4c738c2012-07-02 17:10:51 -04004427static void shrink_delalloc(struct btrfs_root *root, u64 to_reclaim, u64 orig,
4428 bool wait_ordered)
Yan, Zheng5da9d012010-05-16 10:46:25 -04004429{
Yan, Zheng0ca1f7c2010-05-16 10:48:47 -04004430 struct btrfs_block_rsv *block_rsv;
Josef Bacik0019f102010-10-15 15:18:40 -04004431 struct btrfs_space_info *space_info;
Josef Bacik663350a2011-11-03 22:54:25 -04004432 struct btrfs_trans_handle *trans;
Josef Bacikf4c738c2012-07-02 17:10:51 -04004433 u64 delalloc_bytes;
Yan, Zheng5da9d012010-05-16 10:46:25 -04004434 u64 max_reclaim;
Josef Bacikb1953bc2011-01-21 21:10:01 +00004435 long time_left;
Miao Xied3ee29e32013-11-04 23:13:20 +08004436 unsigned long nr_pages;
4437 int loops;
Miao Xieb0244192013-11-04 23:13:25 +08004438 int items;
Miao Xie08e007d2012-10-16 11:33:38 +00004439 enum btrfs_reserve_flush_enum flush;
Yan, Zheng5da9d012010-05-16 10:46:25 -04004440
Miao Xiec61a16a2013-11-04 23:13:23 +08004441 /* Calc the number of the pages we need flush for space reservation */
Miao Xieb0244192013-11-04 23:13:25 +08004442 items = calc_reclaim_items_nr(root, to_reclaim);
4443 to_reclaim = items * EXTENT_SIZE_PER_ITEM;
Miao Xiec61a16a2013-11-04 23:13:23 +08004444
Josef Bacik663350a2011-11-03 22:54:25 -04004445 trans = (struct btrfs_trans_handle *)current->journal_info;
Yan, Zheng0ca1f7c2010-05-16 10:48:47 -04004446 block_rsv = &root->fs_info->delalloc_block_rsv;
Josef Bacik0019f102010-10-15 15:18:40 -04004447 space_info = block_rsv->space_info;
Chris Masonbf9022e2010-10-26 13:40:45 -04004448
Miao Xie963d6782013-01-29 10:10:51 +00004449 delalloc_bytes = percpu_counter_sum_positive(
4450 &root->fs_info->delalloc_bytes);
Josef Bacikf4c738c2012-07-02 17:10:51 -04004451 if (delalloc_bytes == 0) {
Josef Bacikfdb5eff2011-06-07 16:07:44 -04004452 if (trans)
Josef Bacikf4c738c2012-07-02 17:10:51 -04004453 return;
Miao Xie38c135a2013-11-04 23:13:21 +08004454 if (wait_ordered)
Miao Xieb0244192013-11-04 23:13:25 +08004455 btrfs_wait_ordered_roots(root->fs_info, items);
Josef Bacikf4c738c2012-07-02 17:10:51 -04004456 return;
Josef Bacikfdb5eff2011-06-07 16:07:44 -04004457 }
4458
Miao Xied3ee29e32013-11-04 23:13:20 +08004459 loops = 0;
Josef Bacikf4c738c2012-07-02 17:10:51 -04004460 while (delalloc_bytes && loops < 3) {
4461 max_reclaim = min(delalloc_bytes, to_reclaim);
4462 nr_pages = max_reclaim >> PAGE_CACHE_SHIFT;
Miao Xie6c255e62014-03-06 13:55:01 +08004463 btrfs_writeback_inodes_sb_nr(root, nr_pages, items);
Josef Bacikdea31f52012-09-06 16:47:00 -04004464 /*
4465 * We need to wait for the async pages to actually start before
4466 * we do anything.
4467 */
Miao Xie9f3a0742013-11-04 23:13:24 +08004468 max_reclaim = atomic_read(&root->fs_info->async_delalloc_pages);
4469 if (!max_reclaim)
4470 goto skip_async;
Josef Bacikdea31f52012-09-06 16:47:00 -04004471
Miao Xie9f3a0742013-11-04 23:13:24 +08004472 if (max_reclaim <= nr_pages)
4473 max_reclaim = 0;
4474 else
4475 max_reclaim -= nr_pages;
4476
4477 wait_event(root->fs_info->async_submit_wait,
4478 atomic_read(&root->fs_info->async_delalloc_pages) <=
4479 (int)max_reclaim);
4480skip_async:
Miao Xie08e007d2012-10-16 11:33:38 +00004481 if (!trans)
4482 flush = BTRFS_RESERVE_FLUSH_ALL;
4483 else
4484 flush = BTRFS_RESERVE_NO_FLUSH;
Josef Bacik0019f102010-10-15 15:18:40 -04004485 spin_lock(&space_info->lock);
Miao Xie08e007d2012-10-16 11:33:38 +00004486 if (can_overcommit(root, space_info, orig, flush)) {
Josef Bacikf4c738c2012-07-02 17:10:51 -04004487 spin_unlock(&space_info->lock);
4488 break;
4489 }
Josef Bacik0019f102010-10-15 15:18:40 -04004490 spin_unlock(&space_info->lock);
Yan, Zheng5da9d012010-05-16 10:46:25 -04004491
Chris Mason36e39c42011-03-12 07:08:42 -05004492 loops++;
Josef Bacikf104d042011-10-14 13:56:58 -04004493 if (wait_ordered && !trans) {
Miao Xieb0244192013-11-04 23:13:25 +08004494 btrfs_wait_ordered_roots(root->fs_info, items);
Josef Bacikf104d042011-10-14 13:56:58 -04004495 } else {
Josef Bacikf4c738c2012-07-02 17:10:51 -04004496 time_left = schedule_timeout_killable(1);
Josef Bacikf104d042011-10-14 13:56:58 -04004497 if (time_left)
4498 break;
4499 }
Miao Xie963d6782013-01-29 10:10:51 +00004500 delalloc_bytes = percpu_counter_sum_positive(
4501 &root->fs_info->delalloc_bytes);
Yan, Zheng5da9d012010-05-16 10:46:25 -04004502 }
Yan, Zheng5da9d012010-05-16 10:46:25 -04004503}
4504
Josef Bacik4a92b1b2011-08-30 12:34:28 -04004505/**
Josef Bacik663350a2011-11-03 22:54:25 -04004506 * maybe_commit_transaction - possibly commit the transaction if its ok to
4507 * @root - the root we're allocating for
4508 * @bytes - the number of bytes we want to reserve
4509 * @force - force the commit
Josef Bacik8bb8ab22010-10-15 16:52:49 -04004510 *
Josef Bacik663350a2011-11-03 22:54:25 -04004511 * This will check to make sure that committing the transaction will actually
4512 * get us somewhere and then commit the transaction if it does. Otherwise it
4513 * will return -ENOSPC.
Josef Bacik8bb8ab22010-10-15 16:52:49 -04004514 */
Josef Bacik663350a2011-11-03 22:54:25 -04004515static int may_commit_transaction(struct btrfs_root *root,
4516 struct btrfs_space_info *space_info,
4517 u64 bytes, int force)
4518{
4519 struct btrfs_block_rsv *delayed_rsv = &root->fs_info->delayed_block_rsv;
4520 struct btrfs_trans_handle *trans;
4521
4522 trans = (struct btrfs_trans_handle *)current->journal_info;
4523 if (trans)
4524 return -EAGAIN;
4525
4526 if (force)
4527 goto commit;
4528
4529 /* See if there is enough pinned space to make this reservation */
Josef Bacikb150a4f2013-06-19 15:00:04 -04004530 if (percpu_counter_compare(&space_info->total_bytes_pinned,
Miao Xie0424c542014-03-06 13:54:59 +08004531 bytes) >= 0)
Josef Bacik663350a2011-11-03 22:54:25 -04004532 goto commit;
Josef Bacik663350a2011-11-03 22:54:25 -04004533
4534 /*
4535 * See if there is some space in the delayed insertion reservation for
4536 * this reservation.
4537 */
4538 if (space_info != delayed_rsv->space_info)
4539 return -ENOSPC;
4540
4541 spin_lock(&delayed_rsv->lock);
Josef Bacikb150a4f2013-06-19 15:00:04 -04004542 if (percpu_counter_compare(&space_info->total_bytes_pinned,
4543 bytes - delayed_rsv->size) >= 0) {
Josef Bacik663350a2011-11-03 22:54:25 -04004544 spin_unlock(&delayed_rsv->lock);
4545 return -ENOSPC;
4546 }
4547 spin_unlock(&delayed_rsv->lock);
4548
4549commit:
4550 trans = btrfs_join_transaction(root);
4551 if (IS_ERR(trans))
4552 return -ENOSPC;
4553
4554 return btrfs_commit_transaction(trans, root);
4555}
4556
Josef Bacik96c3f432012-06-21 14:05:49 -04004557enum flush_state {
Josef Bacik67b0fd62012-09-24 13:42:00 -04004558 FLUSH_DELAYED_ITEMS_NR = 1,
4559 FLUSH_DELAYED_ITEMS = 2,
4560 FLUSH_DELALLOC = 3,
4561 FLUSH_DELALLOC_WAIT = 4,
Josef Bacikea658ba2012-09-11 16:57:25 -04004562 ALLOC_CHUNK = 5,
4563 COMMIT_TRANS = 6,
Josef Bacik96c3f432012-06-21 14:05:49 -04004564};
4565
4566static int flush_space(struct btrfs_root *root,
4567 struct btrfs_space_info *space_info, u64 num_bytes,
4568 u64 orig_bytes, int state)
4569{
4570 struct btrfs_trans_handle *trans;
4571 int nr;
Josef Bacikf4c738c2012-07-02 17:10:51 -04004572 int ret = 0;
Josef Bacik96c3f432012-06-21 14:05:49 -04004573
4574 switch (state) {
Josef Bacik96c3f432012-06-21 14:05:49 -04004575 case FLUSH_DELAYED_ITEMS_NR:
4576 case FLUSH_DELAYED_ITEMS:
Miao Xie18cd8ea2013-11-04 23:13:22 +08004577 if (state == FLUSH_DELAYED_ITEMS_NR)
4578 nr = calc_reclaim_items_nr(root, num_bytes) * 2;
4579 else
Josef Bacik96c3f432012-06-21 14:05:49 -04004580 nr = -1;
Miao Xie18cd8ea2013-11-04 23:13:22 +08004581
Josef Bacik96c3f432012-06-21 14:05:49 -04004582 trans = btrfs_join_transaction(root);
4583 if (IS_ERR(trans)) {
4584 ret = PTR_ERR(trans);
4585 break;
4586 }
4587 ret = btrfs_run_delayed_items_nr(trans, root, nr);
4588 btrfs_end_transaction(trans, root);
4589 break;
Josef Bacik67b0fd62012-09-24 13:42:00 -04004590 case FLUSH_DELALLOC:
4591 case FLUSH_DELALLOC_WAIT:
Miao Xie24af7dd2014-03-06 13:55:00 +08004592 shrink_delalloc(root, num_bytes * 2, orig_bytes,
Josef Bacik67b0fd62012-09-24 13:42:00 -04004593 state == FLUSH_DELALLOC_WAIT);
4594 break;
Josef Bacikea658ba2012-09-11 16:57:25 -04004595 case ALLOC_CHUNK:
4596 trans = btrfs_join_transaction(root);
4597 if (IS_ERR(trans)) {
4598 ret = PTR_ERR(trans);
4599 break;
4600 }
4601 ret = do_chunk_alloc(trans, root->fs_info->extent_root,
Josef Bacikea658ba2012-09-11 16:57:25 -04004602 btrfs_get_alloc_profile(root, 0),
4603 CHUNK_ALLOC_NO_FORCE);
4604 btrfs_end_transaction(trans, root);
4605 if (ret == -ENOSPC)
4606 ret = 0;
4607 break;
Josef Bacik96c3f432012-06-21 14:05:49 -04004608 case COMMIT_TRANS:
4609 ret = may_commit_transaction(root, space_info, orig_bytes, 0);
4610 break;
4611 default:
4612 ret = -ENOSPC;
4613 break;
4614 }
4615
4616 return ret;
4617}
Miao Xie21c7e752014-05-13 17:29:04 -07004618
4619static inline u64
4620btrfs_calc_reclaim_metadata_size(struct btrfs_root *root,
4621 struct btrfs_space_info *space_info)
4622{
4623 u64 used;
4624 u64 expected;
4625 u64 to_reclaim;
4626
4627 to_reclaim = min_t(u64, num_online_cpus() * 1024 * 1024,
4628 16 * 1024 * 1024);
4629 spin_lock(&space_info->lock);
4630 if (can_overcommit(root, space_info, to_reclaim,
4631 BTRFS_RESERVE_FLUSH_ALL)) {
4632 to_reclaim = 0;
4633 goto out;
4634 }
4635
4636 used = space_info->bytes_used + space_info->bytes_reserved +
4637 space_info->bytes_pinned + space_info->bytes_readonly +
4638 space_info->bytes_may_use;
4639 if (can_overcommit(root, space_info, 1024 * 1024,
4640 BTRFS_RESERVE_FLUSH_ALL))
4641 expected = div_factor_fine(space_info->total_bytes, 95);
4642 else
4643 expected = div_factor_fine(space_info->total_bytes, 90);
4644
4645 if (used > expected)
4646 to_reclaim = used - expected;
4647 else
4648 to_reclaim = 0;
4649 to_reclaim = min(to_reclaim, space_info->bytes_may_use +
4650 space_info->bytes_reserved);
4651out:
4652 spin_unlock(&space_info->lock);
4653
4654 return to_reclaim;
4655}
4656
4657static inline int need_do_async_reclaim(struct btrfs_space_info *space_info,
4658 struct btrfs_fs_info *fs_info, u64 used)
4659{
Josef Bacik365c5312015-02-18 13:58:15 -08004660 u64 thresh = div_factor_fine(space_info->total_bytes, 98);
4661
4662 /* If we're just plain full then async reclaim just slows us down. */
4663 if (space_info->bytes_used >= thresh)
4664 return 0;
4665
4666 return (used >= thresh && !btrfs_fs_closing(fs_info) &&
Miao Xie21c7e752014-05-13 17:29:04 -07004667 !test_bit(BTRFS_FS_STATE_REMOUNTING, &fs_info->fs_state));
4668}
4669
4670static int btrfs_need_do_async_reclaim(struct btrfs_space_info *space_info,
Liu Bo25ce4592014-09-10 12:58:50 +08004671 struct btrfs_fs_info *fs_info,
4672 int flush_state)
Miao Xie21c7e752014-05-13 17:29:04 -07004673{
4674 u64 used;
4675
4676 spin_lock(&space_info->lock);
Liu Bo25ce4592014-09-10 12:58:50 +08004677 /*
4678 * We run out of space and have not got any free space via flush_space,
4679 * so don't bother doing async reclaim.
4680 */
4681 if (flush_state > COMMIT_TRANS && space_info->full) {
4682 spin_unlock(&space_info->lock);
4683 return 0;
4684 }
4685
Miao Xie21c7e752014-05-13 17:29:04 -07004686 used = space_info->bytes_used + space_info->bytes_reserved +
4687 space_info->bytes_pinned + space_info->bytes_readonly +
4688 space_info->bytes_may_use;
4689 if (need_do_async_reclaim(space_info, fs_info, used)) {
4690 spin_unlock(&space_info->lock);
4691 return 1;
4692 }
4693 spin_unlock(&space_info->lock);
4694
4695 return 0;
4696}
4697
4698static void btrfs_async_reclaim_metadata_space(struct work_struct *work)
4699{
4700 struct btrfs_fs_info *fs_info;
4701 struct btrfs_space_info *space_info;
4702 u64 to_reclaim;
4703 int flush_state;
4704
4705 fs_info = container_of(work, struct btrfs_fs_info, async_reclaim_work);
4706 space_info = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_METADATA);
4707
4708 to_reclaim = btrfs_calc_reclaim_metadata_size(fs_info->fs_root,
4709 space_info);
4710 if (!to_reclaim)
4711 return;
4712
4713 flush_state = FLUSH_DELAYED_ITEMS_NR;
4714 do {
4715 flush_space(fs_info->fs_root, space_info, to_reclaim,
4716 to_reclaim, flush_state);
4717 flush_state++;
Liu Bo25ce4592014-09-10 12:58:50 +08004718 if (!btrfs_need_do_async_reclaim(space_info, fs_info,
4719 flush_state))
Miao Xie21c7e752014-05-13 17:29:04 -07004720 return;
Josef Bacik365c5312015-02-18 13:58:15 -08004721 } while (flush_state < COMMIT_TRANS);
Miao Xie21c7e752014-05-13 17:29:04 -07004722}
4723
4724void btrfs_init_async_reclaim_work(struct work_struct *work)
4725{
4726 INIT_WORK(work, btrfs_async_reclaim_metadata_space);
4727}
4728
Josef Bacik663350a2011-11-03 22:54:25 -04004729/**
Josef Bacik4a92b1b2011-08-30 12:34:28 -04004730 * reserve_metadata_bytes - try to reserve bytes from the block_rsv's space
4731 * @root - the root we're allocating for
4732 * @block_rsv - the block_rsv we're allocating for
4733 * @orig_bytes - the number of bytes we want
Adam Buchbinder48fc7f72012-09-19 21:48:00 -04004734 * @flush - whether or not we can flush to make our reservation
Josef Bacik8bb8ab22010-10-15 16:52:49 -04004735 *
Josef Bacik4a92b1b2011-08-30 12:34:28 -04004736 * This will reserve orgi_bytes number of bytes from the space info associated
4737 * with the block_rsv. If there is not enough space it will make an attempt to
4738 * flush out space to make room. It will do this by flushing delalloc if
4739 * possible or committing the transaction. If flush is 0 then no attempts to
4740 * regain reservations will be made and this will fail if there is not enough
4741 * space already.
Josef Bacik8bb8ab22010-10-15 16:52:49 -04004742 */
Josef Bacik4a92b1b2011-08-30 12:34:28 -04004743static int reserve_metadata_bytes(struct btrfs_root *root,
Josef Bacik8bb8ab22010-10-15 16:52:49 -04004744 struct btrfs_block_rsv *block_rsv,
Miao Xie08e007d2012-10-16 11:33:38 +00004745 u64 orig_bytes,
4746 enum btrfs_reserve_flush_enum flush)
Yan, Zhengf0486c62010-05-16 10:46:25 -04004747{
4748 struct btrfs_space_info *space_info = block_rsv->space_info;
Josef Bacik2bf64752011-09-26 17:12:22 -04004749 u64 used;
Josef Bacik8bb8ab22010-10-15 16:52:49 -04004750 u64 num_bytes = orig_bytes;
Josef Bacik67b0fd62012-09-24 13:42:00 -04004751 int flush_state = FLUSH_DELAYED_ITEMS_NR;
Josef Bacik8bb8ab22010-10-15 16:52:49 -04004752 int ret = 0;
Josef Bacikfdb5eff2011-06-07 16:07:44 -04004753 bool flushing = false;
Josef Bacik8bb8ab22010-10-15 16:52:49 -04004754
4755again:
Josef Bacikfdb5eff2011-06-07 16:07:44 -04004756 ret = 0;
Yan, Zhengf0486c62010-05-16 10:46:25 -04004757 spin_lock(&space_info->lock);
Josef Bacikfdb5eff2011-06-07 16:07:44 -04004758 /*
Miao Xie08e007d2012-10-16 11:33:38 +00004759 * We only want to wait if somebody other than us is flushing and we
4760 * are actually allowed to flush all things.
Josef Bacikfdb5eff2011-06-07 16:07:44 -04004761 */
Miao Xie08e007d2012-10-16 11:33:38 +00004762 while (flush == BTRFS_RESERVE_FLUSH_ALL && !flushing &&
4763 space_info->flush) {
Josef Bacikfdb5eff2011-06-07 16:07:44 -04004764 spin_unlock(&space_info->lock);
4765 /*
4766 * If we have a trans handle we can't wait because the flusher
4767 * may have to commit the transaction, which would mean we would
4768 * deadlock since we are waiting for the flusher to finish, but
4769 * hold the current transaction open.
4770 */
Josef Bacik663350a2011-11-03 22:54:25 -04004771 if (current->journal_info)
Josef Bacikfdb5eff2011-06-07 16:07:44 -04004772 return -EAGAIN;
Arne Jansenb9688bb2012-04-18 10:27:16 +02004773 ret = wait_event_killable(space_info->wait, !space_info->flush);
4774 /* Must have been killed, return */
4775 if (ret)
Josef Bacikfdb5eff2011-06-07 16:07:44 -04004776 return -EINTR;
4777
4778 spin_lock(&space_info->lock);
4779 }
4780
4781 ret = -ENOSPC;
Josef Bacik2bf64752011-09-26 17:12:22 -04004782 used = space_info->bytes_used + space_info->bytes_reserved +
4783 space_info->bytes_pinned + space_info->bytes_readonly +
4784 space_info->bytes_may_use;
Yan, Zhengf0486c62010-05-16 10:46:25 -04004785
Josef Bacik8bb8ab22010-10-15 16:52:49 -04004786 /*
4787 * The idea here is that we've not already over-reserved the block group
4788 * then we can go ahead and save our reservation first and then start
4789 * flushing if we need to. Otherwise if we've already overcommitted
4790 * lets start flushing stuff first and then come back and try to make
4791 * our reservation.
4792 */
Josef Bacik2bf64752011-09-26 17:12:22 -04004793 if (used <= space_info->total_bytes) {
4794 if (used + orig_bytes <= space_info->total_bytes) {
Josef Bacikfb25e912011-07-26 17:00:46 -04004795 space_info->bytes_may_use += orig_bytes;
Josef Bacik8c2a3ca2012-01-10 10:31:31 -05004796 trace_btrfs_space_reservation(root->fs_info,
Liu Bo2bcc0322012-03-29 09:57:44 -04004797 "space_info", space_info->flags, orig_bytes, 1);
Yan, Zhengf0486c62010-05-16 10:46:25 -04004798 ret = 0;
4799 } else {
Josef Bacik8bb8ab22010-10-15 16:52:49 -04004800 /*
4801 * Ok set num_bytes to orig_bytes since we aren't
4802 * overocmmitted, this way we only try and reclaim what
4803 * we need.
4804 */
4805 num_bytes = orig_bytes;
Yan, Zhengf0486c62010-05-16 10:46:25 -04004806 }
Josef Bacik8bb8ab22010-10-15 16:52:49 -04004807 } else {
4808 /*
4809 * Ok we're over committed, set num_bytes to the overcommitted
4810 * amount plus the amount of bytes that we need for this
4811 * reservation.
4812 */
Josef Bacik2bf64752011-09-26 17:12:22 -04004813 num_bytes = used - space_info->total_bytes +
Josef Bacik96c3f432012-06-21 14:05:49 -04004814 (orig_bytes * 2);
Yan, Zhengf0486c62010-05-16 10:46:25 -04004815 }
Josef Bacik8bb8ab22010-10-15 16:52:49 -04004816
Josef Bacik44734ed2012-09-28 16:04:19 -04004817 if (ret && can_overcommit(root, space_info, orig_bytes, flush)) {
4818 space_info->bytes_may_use += orig_bytes;
4819 trace_btrfs_space_reservation(root->fs_info, "space_info",
4820 space_info->flags, orig_bytes,
4821 1);
4822 ret = 0;
Josef Bacik2bf64752011-09-26 17:12:22 -04004823 }
4824
Josef Bacik8bb8ab22010-10-15 16:52:49 -04004825 /*
4826 * Couldn't make our reservation, save our place so while we're trying
4827 * to reclaim space we can actually use it instead of somebody else
4828 * stealing it from us.
Miao Xie08e007d2012-10-16 11:33:38 +00004829 *
4830 * We make the other tasks wait for the flush only when we can flush
4831 * all things.
Josef Bacik8bb8ab22010-10-15 16:52:49 -04004832 */
Josef Bacik72bcd992012-12-18 15:16:34 -05004833 if (ret && flush != BTRFS_RESERVE_NO_FLUSH) {
Josef Bacikfdb5eff2011-06-07 16:07:44 -04004834 flushing = true;
4835 space_info->flush = 1;
Miao Xie21c7e752014-05-13 17:29:04 -07004836 } else if (!ret && space_info->flags & BTRFS_BLOCK_GROUP_METADATA) {
4837 used += orig_bytes;
Josef Bacikf6acfd52014-09-18 11:27:17 -04004838 /*
4839 * We will do the space reservation dance during log replay,
4840 * which means we won't have fs_info->fs_root set, so don't do
4841 * the async reclaim as we will panic.
4842 */
4843 if (!root->fs_info->log_root_recovering &&
4844 need_do_async_reclaim(space_info, root->fs_info, used) &&
Miao Xie21c7e752014-05-13 17:29:04 -07004845 !work_busy(&root->fs_info->async_reclaim_work))
4846 queue_work(system_unbound_wq,
4847 &root->fs_info->async_reclaim_work);
Josef Bacik8bb8ab22010-10-15 16:52:49 -04004848 }
Yan, Zhengf0486c62010-05-16 10:46:25 -04004849 spin_unlock(&space_info->lock);
4850
Miao Xie08e007d2012-10-16 11:33:38 +00004851 if (!ret || flush == BTRFS_RESERVE_NO_FLUSH)
Josef Bacik8bb8ab22010-10-15 16:52:49 -04004852 goto out;
4853
Josef Bacik96c3f432012-06-21 14:05:49 -04004854 ret = flush_space(root, space_info, num_bytes, orig_bytes,
4855 flush_state);
4856 flush_state++;
Miao Xie08e007d2012-10-16 11:33:38 +00004857
4858 /*
4859 * If we are FLUSH_LIMIT, we can not flush delalloc, or the deadlock
4860 * would happen. So skip delalloc flush.
4861 */
4862 if (flush == BTRFS_RESERVE_FLUSH_LIMIT &&
4863 (flush_state == FLUSH_DELALLOC ||
4864 flush_state == FLUSH_DELALLOC_WAIT))
4865 flush_state = ALLOC_CHUNK;
4866
Josef Bacik96c3f432012-06-21 14:05:49 -04004867 if (!ret)
Josef Bacik8bb8ab22010-10-15 16:52:49 -04004868 goto again;
Miao Xie08e007d2012-10-16 11:33:38 +00004869 else if (flush == BTRFS_RESERVE_FLUSH_LIMIT &&
4870 flush_state < COMMIT_TRANS)
4871 goto again;
4872 else if (flush == BTRFS_RESERVE_FLUSH_ALL &&
4873 flush_state <= COMMIT_TRANS)
Josef Bacik8bb8ab22010-10-15 16:52:49 -04004874 goto again;
4875
4876out:
Josef Bacik5d803662013-02-07 16:06:02 -05004877 if (ret == -ENOSPC &&
4878 unlikely(root->orphan_cleanup_state == ORPHAN_CLEANUP_STARTED)) {
4879 struct btrfs_block_rsv *global_rsv =
4880 &root->fs_info->global_block_rsv;
4881
4882 if (block_rsv != global_rsv &&
4883 !block_rsv_use_bytes(global_rsv, orig_bytes))
4884 ret = 0;
4885 }
Jeff Mahoneycab45e22013-10-16 16:27:01 -04004886 if (ret == -ENOSPC)
4887 trace_btrfs_space_reservation(root->fs_info,
4888 "space_info:enospc",
4889 space_info->flags, orig_bytes, 1);
Josef Bacikfdb5eff2011-06-07 16:07:44 -04004890 if (flushing) {
Josef Bacik8bb8ab22010-10-15 16:52:49 -04004891 spin_lock(&space_info->lock);
Josef Bacikfdb5eff2011-06-07 16:07:44 -04004892 space_info->flush = 0;
4893 wake_up_all(&space_info->wait);
Josef Bacik8bb8ab22010-10-15 16:52:49 -04004894 spin_unlock(&space_info->lock);
4895 }
Yan, Zhengf0486c62010-05-16 10:46:25 -04004896 return ret;
4897}
4898
Jeff Mahoney79787ea2012-03-12 16:03:00 +01004899static struct btrfs_block_rsv *get_block_rsv(
4900 const struct btrfs_trans_handle *trans,
4901 const struct btrfs_root *root)
Yan, Zhengf0486c62010-05-16 10:46:25 -04004902{
Josef Bacik4c13d752011-08-30 11:31:29 -04004903 struct btrfs_block_rsv *block_rsv = NULL;
4904
Miao Xie27cdeb72014-04-02 19:51:05 +08004905 if (test_bit(BTRFS_ROOT_REF_COWS, &root->state))
Josef Bacik0e721102012-06-26 16:13:18 -04004906 block_rsv = trans->block_rsv;
4907
4908 if (root == root->fs_info->csum_root && trans->adding_csums)
Yan, Zhengf0486c62010-05-16 10:46:25 -04004909 block_rsv = trans->block_rsv;
Josef Bacik4c13d752011-08-30 11:31:29 -04004910
Stefan Behrensf7a81ea2013-08-15 17:11:19 +02004911 if (root == root->fs_info->uuid_root)
4912 block_rsv = trans->block_rsv;
4913
Josef Bacik4c13d752011-08-30 11:31:29 -04004914 if (!block_rsv)
Yan, Zhengf0486c62010-05-16 10:46:25 -04004915 block_rsv = root->block_rsv;
4916
4917 if (!block_rsv)
4918 block_rsv = &root->fs_info->empty_block_rsv;
4919
4920 return block_rsv;
4921}
4922
4923static int block_rsv_use_bytes(struct btrfs_block_rsv *block_rsv,
4924 u64 num_bytes)
4925{
4926 int ret = -ENOSPC;
4927 spin_lock(&block_rsv->lock);
4928 if (block_rsv->reserved >= num_bytes) {
4929 block_rsv->reserved -= num_bytes;
4930 if (block_rsv->reserved < block_rsv->size)
4931 block_rsv->full = 0;
4932 ret = 0;
4933 }
4934 spin_unlock(&block_rsv->lock);
4935 return ret;
4936}
4937
4938static void block_rsv_add_bytes(struct btrfs_block_rsv *block_rsv,
4939 u64 num_bytes, int update_size)
4940{
4941 spin_lock(&block_rsv->lock);
4942 block_rsv->reserved += num_bytes;
4943 if (update_size)
4944 block_rsv->size += num_bytes;
4945 else if (block_rsv->reserved >= block_rsv->size)
4946 block_rsv->full = 1;
4947 spin_unlock(&block_rsv->lock);
4948}
4949
Josef Bacikd52be812013-05-29 14:54:47 -04004950int btrfs_cond_migrate_bytes(struct btrfs_fs_info *fs_info,
4951 struct btrfs_block_rsv *dest, u64 num_bytes,
4952 int min_factor)
4953{
4954 struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv;
4955 u64 min_bytes;
4956
4957 if (global_rsv->space_info != dest->space_info)
4958 return -ENOSPC;
4959
4960 spin_lock(&global_rsv->lock);
4961 min_bytes = div_factor(global_rsv->size, min_factor);
4962 if (global_rsv->reserved < min_bytes + num_bytes) {
4963 spin_unlock(&global_rsv->lock);
4964 return -ENOSPC;
4965 }
4966 global_rsv->reserved -= num_bytes;
4967 if (global_rsv->reserved < global_rsv->size)
4968 global_rsv->full = 0;
4969 spin_unlock(&global_rsv->lock);
4970
4971 block_rsv_add_bytes(dest, num_bytes, 1);
4972 return 0;
4973}
4974
Josef Bacik8c2a3ca2012-01-10 10:31:31 -05004975static void block_rsv_release_bytes(struct btrfs_fs_info *fs_info,
4976 struct btrfs_block_rsv *block_rsv,
David Sterba62a45b62011-04-20 15:52:26 +02004977 struct btrfs_block_rsv *dest, u64 num_bytes)
Yan, Zhengf0486c62010-05-16 10:46:25 -04004978{
4979 struct btrfs_space_info *space_info = block_rsv->space_info;
4980
4981 spin_lock(&block_rsv->lock);
4982 if (num_bytes == (u64)-1)
4983 num_bytes = block_rsv->size;
4984 block_rsv->size -= num_bytes;
4985 if (block_rsv->reserved >= block_rsv->size) {
4986 num_bytes = block_rsv->reserved - block_rsv->size;
4987 block_rsv->reserved = block_rsv->size;
4988 block_rsv->full = 1;
4989 } else {
4990 num_bytes = 0;
4991 }
4992 spin_unlock(&block_rsv->lock);
4993
4994 if (num_bytes > 0) {
4995 if (dest) {
Josef Bacike9e22892011-01-24 21:43:19 +00004996 spin_lock(&dest->lock);
4997 if (!dest->full) {
4998 u64 bytes_to_add;
4999
5000 bytes_to_add = dest->size - dest->reserved;
5001 bytes_to_add = min(num_bytes, bytes_to_add);
5002 dest->reserved += bytes_to_add;
5003 if (dest->reserved >= dest->size)
5004 dest->full = 1;
5005 num_bytes -= bytes_to_add;
5006 }
5007 spin_unlock(&dest->lock);
5008 }
5009 if (num_bytes) {
Yan, Zhengf0486c62010-05-16 10:46:25 -04005010 spin_lock(&space_info->lock);
Josef Bacikfb25e912011-07-26 17:00:46 -04005011 space_info->bytes_may_use -= num_bytes;
Josef Bacik8c2a3ca2012-01-10 10:31:31 -05005012 trace_btrfs_space_reservation(fs_info, "space_info",
Liu Bo2bcc0322012-03-29 09:57:44 -04005013 space_info->flags, num_bytes, 0);
Yan, Zhengf0486c62010-05-16 10:46:25 -04005014 spin_unlock(&space_info->lock);
5015 }
5016 }
5017}
5018
5019static int block_rsv_migrate_bytes(struct btrfs_block_rsv *src,
5020 struct btrfs_block_rsv *dst, u64 num_bytes)
5021{
5022 int ret;
5023
5024 ret = block_rsv_use_bytes(src, num_bytes);
5025 if (ret)
5026 return ret;
5027
5028 block_rsv_add_bytes(dst, num_bytes, 1);
5029 return 0;
5030}
5031
Miao Xie66d8f3d2012-09-06 04:02:28 -06005032void btrfs_init_block_rsv(struct btrfs_block_rsv *rsv, unsigned short type)
Yan, Zhengf0486c62010-05-16 10:46:25 -04005033{
5034 memset(rsv, 0, sizeof(*rsv));
5035 spin_lock_init(&rsv->lock);
Miao Xie66d8f3d2012-09-06 04:02:28 -06005036 rsv->type = type;
Yan, Zhengf0486c62010-05-16 10:46:25 -04005037}
5038
Miao Xie66d8f3d2012-09-06 04:02:28 -06005039struct btrfs_block_rsv *btrfs_alloc_block_rsv(struct btrfs_root *root,
5040 unsigned short type)
Yan, Zhengf0486c62010-05-16 10:46:25 -04005041{
5042 struct btrfs_block_rsv *block_rsv;
5043 struct btrfs_fs_info *fs_info = root->fs_info;
Yan, Zhengf0486c62010-05-16 10:46:25 -04005044
5045 block_rsv = kmalloc(sizeof(*block_rsv), GFP_NOFS);
5046 if (!block_rsv)
5047 return NULL;
5048
Miao Xie66d8f3d2012-09-06 04:02:28 -06005049 btrfs_init_block_rsv(block_rsv, type);
Yan, Zhengf0486c62010-05-16 10:46:25 -04005050 block_rsv->space_info = __find_space_info(fs_info,
5051 BTRFS_BLOCK_GROUP_METADATA);
Yan, Zhengf0486c62010-05-16 10:46:25 -04005052 return block_rsv;
5053}
5054
5055void btrfs_free_block_rsv(struct btrfs_root *root,
5056 struct btrfs_block_rsv *rsv)
5057{
Josef Bacik2aaa6652012-08-29 14:27:18 -04005058 if (!rsv)
5059 return;
Josef Bacikdabdb642011-08-08 12:50:18 -04005060 btrfs_block_rsv_release(root, rsv, (u64)-1);
5061 kfree(rsv);
Yan, Zhengf0486c62010-05-16 10:46:25 -04005062}
5063
Chris Masoncdfb0802015-04-06 18:17:00 -07005064void __btrfs_free_block_rsv(struct btrfs_block_rsv *rsv)
5065{
5066 kfree(rsv);
5067}
5068
Miao Xie08e007d2012-10-16 11:33:38 +00005069int btrfs_block_rsv_add(struct btrfs_root *root,
5070 struct btrfs_block_rsv *block_rsv, u64 num_bytes,
5071 enum btrfs_reserve_flush_enum flush)
Yan, Zhengf0486c62010-05-16 10:46:25 -04005072{
5073 int ret;
5074
5075 if (num_bytes == 0)
5076 return 0;
Josef Bacik8bb8ab22010-10-15 16:52:49 -04005077
Miao Xie61b520a2011-11-10 20:45:05 -05005078 ret = reserve_metadata_bytes(root, block_rsv, num_bytes, flush);
Yan, Zhengf0486c62010-05-16 10:46:25 -04005079 if (!ret) {
5080 block_rsv_add_bytes(block_rsv, num_bytes, 1);
5081 return 0;
5082 }
5083
Yan, Zhengf0486c62010-05-16 10:46:25 -04005084 return ret;
5085}
5086
Josef Bacik4a92b1b2011-08-30 12:34:28 -04005087int btrfs_block_rsv_check(struct btrfs_root *root,
Josef Bacik36ba0222011-10-18 12:15:48 -04005088 struct btrfs_block_rsv *block_rsv, int min_factor)
Yan, Zhengf0486c62010-05-16 10:46:25 -04005089{
5090 u64 num_bytes = 0;
Yan, Zhengf0486c62010-05-16 10:46:25 -04005091 int ret = -ENOSPC;
5092
5093 if (!block_rsv)
5094 return 0;
5095
5096 spin_lock(&block_rsv->lock);
Josef Bacik36ba0222011-10-18 12:15:48 -04005097 num_bytes = div_factor(block_rsv->size, min_factor);
5098 if (block_rsv->reserved >= num_bytes)
Yan, Zhengf0486c62010-05-16 10:46:25 -04005099 ret = 0;
Yan, Zhengf0486c62010-05-16 10:46:25 -04005100 spin_unlock(&block_rsv->lock);
Yan, Zhengf0486c62010-05-16 10:46:25 -04005101
Josef Bacik36ba0222011-10-18 12:15:48 -04005102 return ret;
5103}
5104
Miao Xie08e007d2012-10-16 11:33:38 +00005105int btrfs_block_rsv_refill(struct btrfs_root *root,
5106 struct btrfs_block_rsv *block_rsv, u64 min_reserved,
5107 enum btrfs_reserve_flush_enum flush)
Josef Bacik36ba0222011-10-18 12:15:48 -04005108{
5109 u64 num_bytes = 0;
5110 int ret = -ENOSPC;
5111
5112 if (!block_rsv)
5113 return 0;
5114
5115 spin_lock(&block_rsv->lock);
5116 num_bytes = min_reserved;
Josef Bacik13553e52011-08-08 13:33:21 -04005117 if (block_rsv->reserved >= num_bytes)
Yan, Zhengf0486c62010-05-16 10:46:25 -04005118 ret = 0;
Josef Bacik13553e52011-08-08 13:33:21 -04005119 else
Yan, Zhengf0486c62010-05-16 10:46:25 -04005120 num_bytes -= block_rsv->reserved;
Yan, Zhengf0486c62010-05-16 10:46:25 -04005121 spin_unlock(&block_rsv->lock);
Josef Bacik13553e52011-08-08 13:33:21 -04005122
Yan, Zhengf0486c62010-05-16 10:46:25 -04005123 if (!ret)
5124 return 0;
5125
Miao Xieaa38a712011-11-18 17:43:00 +08005126 ret = reserve_metadata_bytes(root, block_rsv, num_bytes, flush);
Josef Bacikdabdb642011-08-08 12:50:18 -04005127 if (!ret) {
5128 block_rsv_add_bytes(block_rsv, num_bytes, 0);
Yan, Zhengf0486c62010-05-16 10:46:25 -04005129 return 0;
5130 }
5131
Josef Bacik13553e52011-08-08 13:33:21 -04005132 return ret;
Yan, Zhengf0486c62010-05-16 10:46:25 -04005133}
5134
5135int btrfs_block_rsv_migrate(struct btrfs_block_rsv *src_rsv,
5136 struct btrfs_block_rsv *dst_rsv,
5137 u64 num_bytes)
5138{
5139 return block_rsv_migrate_bytes(src_rsv, dst_rsv, num_bytes);
5140}
5141
5142void btrfs_block_rsv_release(struct btrfs_root *root,
5143 struct btrfs_block_rsv *block_rsv,
5144 u64 num_bytes)
5145{
5146 struct btrfs_block_rsv *global_rsv = &root->fs_info->global_block_rsv;
Liu Bo17504582013-12-29 21:44:50 +08005147 if (global_rsv == block_rsv ||
Yan, Zhengf0486c62010-05-16 10:46:25 -04005148 block_rsv->space_info != global_rsv->space_info)
5149 global_rsv = NULL;
Josef Bacik8c2a3ca2012-01-10 10:31:31 -05005150 block_rsv_release_bytes(root->fs_info, block_rsv, global_rsv,
5151 num_bytes);
Yan, Zhengf0486c62010-05-16 10:46:25 -04005152}
5153
Yan, Zheng8929ecfa2010-05-16 10:49:58 -04005154/*
5155 * helper to calculate size of global block reservation.
5156 * the desired value is sum of space used by extent tree,
5157 * checksum tree and root tree
5158 */
5159static u64 calc_global_metadata_size(struct btrfs_fs_info *fs_info)
5160{
5161 struct btrfs_space_info *sinfo;
5162 u64 num_bytes;
5163 u64 meta_used;
5164 u64 data_used;
David Sterba6c417612011-04-13 15:41:04 +02005165 int csum_size = btrfs_super_csum_size(fs_info->super_copy);
Yan, Zheng8929ecfa2010-05-16 10:49:58 -04005166
Yan, Zheng8929ecfa2010-05-16 10:49:58 -04005167 sinfo = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_DATA);
5168 spin_lock(&sinfo->lock);
5169 data_used = sinfo->bytes_used;
5170 spin_unlock(&sinfo->lock);
5171
5172 sinfo = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_METADATA);
5173 spin_lock(&sinfo->lock);
Josef Bacik6d487552010-10-15 15:13:32 -04005174 if (sinfo->flags & BTRFS_BLOCK_GROUP_DATA)
5175 data_used = 0;
Yan, Zheng8929ecfa2010-05-16 10:49:58 -04005176 meta_used = sinfo->bytes_used;
5177 spin_unlock(&sinfo->lock);
5178
5179 num_bytes = (data_used >> fs_info->sb->s_blocksize_bits) *
5180 csum_size * 2;
David Sterbaf8c269d2015-01-16 17:21:12 +01005181 num_bytes += div_u64(data_used + meta_used, 50);
Yan, Zheng8929ecfa2010-05-16 10:49:58 -04005182
5183 if (num_bytes * 3 > meta_used)
David Sterbaf8c269d2015-01-16 17:21:12 +01005184 num_bytes = div_u64(meta_used, 3);
Yan, Zheng8929ecfa2010-05-16 10:49:58 -04005185
David Sterba707e8a02014-06-04 19:22:26 +02005186 return ALIGN(num_bytes, fs_info->extent_root->nodesize << 10);
Yan, Zheng8929ecfa2010-05-16 10:49:58 -04005187}
5188
5189static void update_global_block_rsv(struct btrfs_fs_info *fs_info)
5190{
5191 struct btrfs_block_rsv *block_rsv = &fs_info->global_block_rsv;
5192 struct btrfs_space_info *sinfo = block_rsv->space_info;
5193 u64 num_bytes;
5194
5195 num_bytes = calc_global_metadata_size(fs_info);
5196
Yan, Zheng8929ecfa2010-05-16 10:49:58 -04005197 spin_lock(&sinfo->lock);
Stefan Behrens1f699d32012-04-27 12:41:46 -04005198 spin_lock(&block_rsv->lock);
Yan, Zheng8929ecfa2010-05-16 10:49:58 -04005199
Josef Bacikfdf30d12013-03-26 15:31:45 -04005200 block_rsv->size = min_t(u64, num_bytes, 512 * 1024 * 1024);
Yan, Zheng8929ecfa2010-05-16 10:49:58 -04005201
5202 num_bytes = sinfo->bytes_used + sinfo->bytes_pinned +
Josef Bacik6d487552010-10-15 15:13:32 -04005203 sinfo->bytes_reserved + sinfo->bytes_readonly +
5204 sinfo->bytes_may_use;
Yan, Zheng8929ecfa2010-05-16 10:49:58 -04005205
5206 if (sinfo->total_bytes > num_bytes) {
5207 num_bytes = sinfo->total_bytes - num_bytes;
5208 block_rsv->reserved += num_bytes;
Josef Bacikfb25e912011-07-26 17:00:46 -04005209 sinfo->bytes_may_use += num_bytes;
Josef Bacik8c2a3ca2012-01-10 10:31:31 -05005210 trace_btrfs_space_reservation(fs_info, "space_info",
Liu Bo2bcc0322012-03-29 09:57:44 -04005211 sinfo->flags, num_bytes, 1);
Yan, Zheng8929ecfa2010-05-16 10:49:58 -04005212 }
5213
5214 if (block_rsv->reserved >= block_rsv->size) {
5215 num_bytes = block_rsv->reserved - block_rsv->size;
Josef Bacikfb25e912011-07-26 17:00:46 -04005216 sinfo->bytes_may_use -= num_bytes;
Josef Bacik8c2a3ca2012-01-10 10:31:31 -05005217 trace_btrfs_space_reservation(fs_info, "space_info",
Liu Bo2bcc0322012-03-29 09:57:44 -04005218 sinfo->flags, num_bytes, 0);
Yan, Zheng8929ecfa2010-05-16 10:49:58 -04005219 block_rsv->reserved = block_rsv->size;
5220 block_rsv->full = 1;
5221 }
David Sterba182608c2011-05-05 13:13:16 +02005222
Yan, Zheng8929ecfa2010-05-16 10:49:58 -04005223 spin_unlock(&block_rsv->lock);
Stefan Behrens1f699d32012-04-27 12:41:46 -04005224 spin_unlock(&sinfo->lock);
Yan, Zheng8929ecfa2010-05-16 10:49:58 -04005225}
5226
Yan, Zhengf0486c62010-05-16 10:46:25 -04005227static void init_global_block_rsv(struct btrfs_fs_info *fs_info)
5228{
5229 struct btrfs_space_info *space_info;
5230
5231 space_info = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_SYSTEM);
5232 fs_info->chunk_block_rsv.space_info = space_info;
Yan, Zhengf0486c62010-05-16 10:46:25 -04005233
5234 space_info = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_METADATA);
Yan, Zheng8929ecfa2010-05-16 10:49:58 -04005235 fs_info->global_block_rsv.space_info = space_info;
Yan, Zheng8929ecfa2010-05-16 10:49:58 -04005236 fs_info->delalloc_block_rsv.space_info = space_info;
Yan, Zhengf0486c62010-05-16 10:46:25 -04005237 fs_info->trans_block_rsv.space_info = space_info;
5238 fs_info->empty_block_rsv.space_info = space_info;
Josef Bacik6d668dd2011-11-03 22:54:25 -04005239 fs_info->delayed_block_rsv.space_info = space_info;
Yan, Zhengf0486c62010-05-16 10:46:25 -04005240
Yan, Zheng8929ecfa2010-05-16 10:49:58 -04005241 fs_info->extent_root->block_rsv = &fs_info->global_block_rsv;
5242 fs_info->csum_root->block_rsv = &fs_info->global_block_rsv;
5243 fs_info->dev_root->block_rsv = &fs_info->global_block_rsv;
5244 fs_info->tree_root->block_rsv = &fs_info->global_block_rsv;
Stefan Behrens3a6cad92013-05-16 14:48:19 +00005245 if (fs_info->quota_root)
5246 fs_info->quota_root->block_rsv = &fs_info->global_block_rsv;
Yan, Zhengf0486c62010-05-16 10:46:25 -04005247 fs_info->chunk_root->block_rsv = &fs_info->chunk_block_rsv;
Yan, Zheng8929ecfa2010-05-16 10:49:58 -04005248
Yan, Zheng8929ecfa2010-05-16 10:49:58 -04005249 update_global_block_rsv(fs_info);
5250}
5251
5252static void release_global_block_rsv(struct btrfs_fs_info *fs_info)
5253{
Josef Bacik8c2a3ca2012-01-10 10:31:31 -05005254 block_rsv_release_bytes(fs_info, &fs_info->global_block_rsv, NULL,
5255 (u64)-1);
Yan, Zheng8929ecfa2010-05-16 10:49:58 -04005256 WARN_ON(fs_info->delalloc_block_rsv.size > 0);
5257 WARN_ON(fs_info->delalloc_block_rsv.reserved > 0);
5258 WARN_ON(fs_info->trans_block_rsv.size > 0);
5259 WARN_ON(fs_info->trans_block_rsv.reserved > 0);
5260 WARN_ON(fs_info->chunk_block_rsv.size > 0);
5261 WARN_ON(fs_info->chunk_block_rsv.reserved > 0);
Josef Bacik6d668dd2011-11-03 22:54:25 -04005262 WARN_ON(fs_info->delayed_block_rsv.size > 0);
5263 WARN_ON(fs_info->delayed_block_rsv.reserved > 0);
Josef Bacikfcb80c22011-05-03 10:40:22 -04005264}
5265
Yan, Zhenga22285a2010-05-16 10:48:46 -04005266void btrfs_trans_release_metadata(struct btrfs_trans_handle *trans,
5267 struct btrfs_root *root)
5268{
Josef Bacik0e721102012-06-26 16:13:18 -04005269 if (!trans->block_rsv)
5270 return;
5271
Yan, Zhenga22285a2010-05-16 10:48:46 -04005272 if (!trans->bytes_reserved)
5273 return;
5274
Chris Masone77266e2012-02-24 10:39:05 -05005275 trace_btrfs_space_reservation(root->fs_info, "transaction",
Liu Bo2bcc0322012-03-29 09:57:44 -04005276 trans->transid, trans->bytes_reserved, 0);
Josef Bacikb24e03d2011-10-14 14:40:17 -04005277 btrfs_block_rsv_release(root, trans->block_rsv, trans->bytes_reserved);
Yan, Zhenga22285a2010-05-16 10:48:46 -04005278 trans->bytes_reserved = 0;
5279}
5280
Filipe Manana4fbcdf62015-05-20 14:01:54 +01005281/*
5282 * To be called after all the new block groups attached to the transaction
5283 * handle have been created (btrfs_create_pending_block_groups()).
5284 */
5285void btrfs_trans_release_chunk_metadata(struct btrfs_trans_handle *trans)
5286{
5287 struct btrfs_fs_info *fs_info = trans->root->fs_info;
5288
5289 if (!trans->chunk_bytes_reserved)
5290 return;
5291
5292 WARN_ON_ONCE(!list_empty(&trans->new_bgs));
5293
5294 block_rsv_release_bytes(fs_info, &fs_info->chunk_block_rsv, NULL,
5295 trans->chunk_bytes_reserved);
5296 trans->chunk_bytes_reserved = 0;
5297}
5298
Jeff Mahoney79787ea2012-03-12 16:03:00 +01005299/* Can only return 0 or -ENOSPC */
Yan, Zhengd68fc572010-05-16 10:49:58 -04005300int btrfs_orphan_reserve_metadata(struct btrfs_trans_handle *trans,
5301 struct inode *inode)
5302{
5303 struct btrfs_root *root = BTRFS_I(inode)->root;
5304 struct btrfs_block_rsv *src_rsv = get_block_rsv(trans, root);
5305 struct btrfs_block_rsv *dst_rsv = root->orphan_block_rsv;
5306
5307 /*
Josef Bacikfcb80c22011-05-03 10:40:22 -04005308 * We need to hold space in order to delete our orphan item once we've
5309 * added it, so this takes the reservation so we can release it later
5310 * when we are truly done with the orphan item.
Yan, Zhengd68fc572010-05-16 10:49:58 -04005311 */
Chris Masonff5714c2011-05-28 07:00:39 -04005312 u64 num_bytes = btrfs_calc_trans_metadata_size(root, 1);
Josef Bacik8c2a3ca2012-01-10 10:31:31 -05005313 trace_btrfs_space_reservation(root->fs_info, "orphan",
5314 btrfs_ino(inode), num_bytes, 1);
Yan, Zhengd68fc572010-05-16 10:49:58 -04005315 return block_rsv_migrate_bytes(src_rsv, dst_rsv, num_bytes);
5316}
5317
5318void btrfs_orphan_release_metadata(struct inode *inode)
5319{
5320 struct btrfs_root *root = BTRFS_I(inode)->root;
Chris Masonff5714c2011-05-28 07:00:39 -04005321 u64 num_bytes = btrfs_calc_trans_metadata_size(root, 1);
Josef Bacik8c2a3ca2012-01-10 10:31:31 -05005322 trace_btrfs_space_reservation(root->fs_info, "orphan",
5323 btrfs_ino(inode), num_bytes, 0);
Yan, Zhengd68fc572010-05-16 10:49:58 -04005324 btrfs_block_rsv_release(root, root->orphan_block_rsv, num_bytes);
5325}
5326
Miao Xied5c12072013-02-28 10:04:33 +00005327/*
5328 * btrfs_subvolume_reserve_metadata() - reserve space for subvolume operation
5329 * root: the root of the parent directory
5330 * rsv: block reservation
5331 * items: the number of items that we need do reservation
5332 * qgroup_reserved: used to return the reserved size in qgroup
5333 *
5334 * This function is used to reserve the space for snapshot/subvolume
5335 * creation and deletion. Those operations are different with the
5336 * common file/directory operations, they change two fs/file trees
5337 * and root tree, the number of items that the qgroup reserves is
5338 * different with the free space reservation. So we can not use
5339 * the space reseravtion mechanism in start_transaction().
5340 */
5341int btrfs_subvolume_reserve_metadata(struct btrfs_root *root,
5342 struct btrfs_block_rsv *rsv,
5343 int items,
Jeff Mahoneyee3441b2013-07-09 16:37:21 -04005344 u64 *qgroup_reserved,
5345 bool use_global_rsv)
Yan, Zhenga22285a2010-05-16 10:48:46 -04005346{
Miao Xied5c12072013-02-28 10:04:33 +00005347 u64 num_bytes;
5348 int ret;
Jeff Mahoneyee3441b2013-07-09 16:37:21 -04005349 struct btrfs_block_rsv *global_rsv = &root->fs_info->global_block_rsv;
Miao Xied5c12072013-02-28 10:04:33 +00005350
5351 if (root->fs_info->quota_enabled) {
5352 /* One for parent inode, two for dir entries */
David Sterba707e8a02014-06-04 19:22:26 +02005353 num_bytes = 3 * root->nodesize;
Miao Xied5c12072013-02-28 10:04:33 +00005354 ret = btrfs_qgroup_reserve(root, num_bytes);
5355 if (ret)
5356 return ret;
5357 } else {
5358 num_bytes = 0;
5359 }
5360
5361 *qgroup_reserved = num_bytes;
5362
5363 num_bytes = btrfs_calc_trans_metadata_size(root, items);
5364 rsv->space_info = __find_space_info(root->fs_info,
5365 BTRFS_BLOCK_GROUP_METADATA);
5366 ret = btrfs_block_rsv_add(root, rsv, num_bytes,
5367 BTRFS_RESERVE_FLUSH_ALL);
Jeff Mahoneyee3441b2013-07-09 16:37:21 -04005368
5369 if (ret == -ENOSPC && use_global_rsv)
5370 ret = btrfs_block_rsv_migrate(global_rsv, rsv, num_bytes);
5371
Miao Xied5c12072013-02-28 10:04:33 +00005372 if (ret) {
5373 if (*qgroup_reserved)
5374 btrfs_qgroup_free(root, *qgroup_reserved);
5375 }
5376
5377 return ret;
5378}
5379
5380void btrfs_subvolume_release_metadata(struct btrfs_root *root,
5381 struct btrfs_block_rsv *rsv,
5382 u64 qgroup_reserved)
5383{
5384 btrfs_block_rsv_release(root, rsv, (u64)-1);
Yan, Zhenga22285a2010-05-16 10:48:46 -04005385}
5386
Josef Bacik7709cde2011-08-04 10:25:02 -04005387/**
5388 * drop_outstanding_extent - drop an outstanding extent
5389 * @inode: the inode we're dropping the extent for
Josef Bacikdcab6a32015-02-11 15:08:59 -05005390 * @num_bytes: the number of bytes we're relaseing.
Josef Bacik7709cde2011-08-04 10:25:02 -04005391 *
5392 * This is called when we are freeing up an outstanding extent, either called
5393 * after an error or after an extent is written. This will return the number of
5394 * reserved extents that need to be freed. This must be called with
5395 * BTRFS_I(inode)->lock held.
5396 */
Josef Bacikdcab6a32015-02-11 15:08:59 -05005397static unsigned drop_outstanding_extent(struct inode *inode, u64 num_bytes)
Josef Bacik9e0baf62011-07-15 15:16:44 +00005398{
Josef Bacik7fd2ae22011-11-08 15:47:34 -05005399 unsigned drop_inode_space = 0;
Josef Bacik9e0baf62011-07-15 15:16:44 +00005400 unsigned dropped_extents = 0;
Josef Bacikdcab6a32015-02-11 15:08:59 -05005401 unsigned num_extents = 0;
Josef Bacik9e0baf62011-07-15 15:16:44 +00005402
Josef Bacikdcab6a32015-02-11 15:08:59 -05005403 num_extents = (unsigned)div64_u64(num_bytes +
5404 BTRFS_MAX_EXTENT_SIZE - 1,
5405 BTRFS_MAX_EXTENT_SIZE);
5406 ASSERT(num_extents);
5407 ASSERT(BTRFS_I(inode)->outstanding_extents >= num_extents);
5408 BTRFS_I(inode)->outstanding_extents -= num_extents;
Josef Bacik9e0baf62011-07-15 15:16:44 +00005409
Josef Bacik7fd2ae22011-11-08 15:47:34 -05005410 if (BTRFS_I(inode)->outstanding_extents == 0 &&
Josef Bacik72ac3c02012-05-23 14:13:11 -04005411 test_and_clear_bit(BTRFS_INODE_DELALLOC_META_RESERVED,
5412 &BTRFS_I(inode)->runtime_flags))
Josef Bacik7fd2ae22011-11-08 15:47:34 -05005413 drop_inode_space = 1;
Josef Bacik7fd2ae22011-11-08 15:47:34 -05005414
Josef Bacik9e0baf62011-07-15 15:16:44 +00005415 /*
5416 * If we have more or the same amount of outsanding extents than we have
5417 * reserved then we need to leave the reserved extents count alone.
5418 */
5419 if (BTRFS_I(inode)->outstanding_extents >=
5420 BTRFS_I(inode)->reserved_extents)
Josef Bacik7fd2ae22011-11-08 15:47:34 -05005421 return drop_inode_space;
Josef Bacik9e0baf62011-07-15 15:16:44 +00005422
5423 dropped_extents = BTRFS_I(inode)->reserved_extents -
5424 BTRFS_I(inode)->outstanding_extents;
5425 BTRFS_I(inode)->reserved_extents -= dropped_extents;
Josef Bacik7fd2ae22011-11-08 15:47:34 -05005426 return dropped_extents + drop_inode_space;
Josef Bacik9e0baf62011-07-15 15:16:44 +00005427}
5428
Josef Bacik7709cde2011-08-04 10:25:02 -04005429/**
5430 * calc_csum_metadata_size - return the amount of metada space that must be
5431 * reserved/free'd for the given bytes.
5432 * @inode: the inode we're manipulating
5433 * @num_bytes: the number of bytes in question
5434 * @reserve: 1 if we are reserving space, 0 if we are freeing space
5435 *
5436 * This adjusts the number of csum_bytes in the inode and then returns the
5437 * correct amount of metadata that must either be reserved or freed. We
5438 * calculate how many checksums we can fit into one leaf and then divide the
5439 * number of bytes that will need to be checksumed by this value to figure out
5440 * how many checksums will be required. If we are adding bytes then the number
5441 * may go up and we will return the number of additional bytes that must be
5442 * reserved. If it is going down we will return the number of bytes that must
5443 * be freed.
5444 *
5445 * This must be called with BTRFS_I(inode)->lock held.
5446 */
5447static u64 calc_csum_metadata_size(struct inode *inode, u64 num_bytes,
5448 int reserve)
Yan, Zheng0ca1f7c2010-05-16 10:48:47 -04005449{
Josef Bacik7709cde2011-08-04 10:25:02 -04005450 struct btrfs_root *root = BTRFS_I(inode)->root;
Josef Bacik12621332015-02-03 07:50:16 -08005451 u64 old_csums, num_csums;
Josef Bacik7709cde2011-08-04 10:25:02 -04005452
5453 if (BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM &&
5454 BTRFS_I(inode)->csum_bytes == 0)
5455 return 0;
5456
Chris Mason28f75a02015-02-04 06:59:29 -08005457 old_csums = btrfs_csum_bytes_to_leaves(root, BTRFS_I(inode)->csum_bytes);
Josef Bacik7709cde2011-08-04 10:25:02 -04005458 if (reserve)
5459 BTRFS_I(inode)->csum_bytes += num_bytes;
5460 else
5461 BTRFS_I(inode)->csum_bytes -= num_bytes;
Chris Mason28f75a02015-02-04 06:59:29 -08005462 num_csums = btrfs_csum_bytes_to_leaves(root, BTRFS_I(inode)->csum_bytes);
Josef Bacik7709cde2011-08-04 10:25:02 -04005463
5464 /* No change, no need to reserve more */
5465 if (old_csums == num_csums)
5466 return 0;
5467
5468 if (reserve)
5469 return btrfs_calc_trans_metadata_size(root,
5470 num_csums - old_csums);
5471
5472 return btrfs_calc_trans_metadata_size(root, old_csums - num_csums);
Yan, Zheng0ca1f7c2010-05-16 10:48:47 -04005473}
5474
5475int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
5476{
5477 struct btrfs_root *root = BTRFS_I(inode)->root;
5478 struct btrfs_block_rsv *block_rsv = &root->fs_info->delalloc_block_rsv;
Josef Bacik9e0baf62011-07-15 15:16:44 +00005479 u64 to_reserve = 0;
Josef Bacik660d3f62011-12-09 11:18:51 -05005480 u64 csum_bytes;
Josef Bacik9e0baf62011-07-15 15:16:44 +00005481 unsigned nr_extents = 0;
Josef Bacik660d3f62011-12-09 11:18:51 -05005482 int extra_reserve = 0;
Miao Xie08e007d2012-10-16 11:33:38 +00005483 enum btrfs_reserve_flush_enum flush = BTRFS_RESERVE_FLUSH_ALL;
Jan Schmidteb6b88d2013-01-27 23:26:00 -07005484 int ret = 0;
Josef Bacikc64c2bd2012-12-14 13:48:14 -05005485 bool delalloc_lock = true;
Wang Shilong88e081bf2013-03-01 11:36:01 +00005486 u64 to_free = 0;
5487 unsigned dropped;
Yan, Zheng0ca1f7c2010-05-16 10:48:47 -04005488
Josef Bacikc64c2bd2012-12-14 13:48:14 -05005489 /* If we are a free space inode we need to not flush since we will be in
5490 * the middle of a transaction commit. We also don't need the delalloc
5491 * mutex since we won't race with anybody. We need this mostly to make
5492 * lockdep shut its filthy mouth.
5493 */
5494 if (btrfs_is_free_space_inode(inode)) {
Miao Xie08e007d2012-10-16 11:33:38 +00005495 flush = BTRFS_RESERVE_NO_FLUSH;
Josef Bacikc64c2bd2012-12-14 13:48:14 -05005496 delalloc_lock = false;
5497 }
Josef Bacikc09544e2011-08-30 10:19:10 -04005498
Miao Xie08e007d2012-10-16 11:33:38 +00005499 if (flush != BTRFS_RESERVE_NO_FLUSH &&
5500 btrfs_transaction_in_commit(root->fs_info))
Yan, Zheng0ca1f7c2010-05-16 10:48:47 -04005501 schedule_timeout(1);
5502
Josef Bacikc64c2bd2012-12-14 13:48:14 -05005503 if (delalloc_lock)
5504 mutex_lock(&BTRFS_I(inode)->delalloc_mutex);
5505
Yan, Zheng0ca1f7c2010-05-16 10:48:47 -04005506 num_bytes = ALIGN(num_bytes, root->sectorsize);
Josef Bacik8bb8ab22010-10-15 16:52:49 -04005507
Josef Bacik9e0baf62011-07-15 15:16:44 +00005508 spin_lock(&BTRFS_I(inode)->lock);
Josef Bacik6a41dd02015-03-13 15:12:23 -04005509 nr_extents = (unsigned)div64_u64(num_bytes +
5510 BTRFS_MAX_EXTENT_SIZE - 1,
5511 BTRFS_MAX_EXTENT_SIZE);
5512 BTRFS_I(inode)->outstanding_extents += nr_extents;
5513 nr_extents = 0;
Josef Bacik57a45ced2011-01-25 16:30:38 -05005514
Josef Bacik9e0baf62011-07-15 15:16:44 +00005515 if (BTRFS_I(inode)->outstanding_extents >
Josef Bacik660d3f62011-12-09 11:18:51 -05005516 BTRFS_I(inode)->reserved_extents)
Josef Bacik9e0baf62011-07-15 15:16:44 +00005517 nr_extents = BTRFS_I(inode)->outstanding_extents -
5518 BTRFS_I(inode)->reserved_extents;
Josef Bacik7fd2ae22011-11-08 15:47:34 -05005519
5520 /*
5521 * Add an item to reserve for updating the inode when we complete the
5522 * delalloc io.
5523 */
Josef Bacik72ac3c02012-05-23 14:13:11 -04005524 if (!test_bit(BTRFS_INODE_DELALLOC_META_RESERVED,
5525 &BTRFS_I(inode)->runtime_flags)) {
Josef Bacik7fd2ae22011-11-08 15:47:34 -05005526 nr_extents++;
Josef Bacik660d3f62011-12-09 11:18:51 -05005527 extra_reserve = 1;
Josef Bacik7fd2ae22011-11-08 15:47:34 -05005528 }
5529
5530 to_reserve = btrfs_calc_trans_metadata_size(root, nr_extents);
Josef Bacik7709cde2011-08-04 10:25:02 -04005531 to_reserve += calc_csum_metadata_size(inode, num_bytes, 1);
Josef Bacik660d3f62011-12-09 11:18:51 -05005532 csum_bytes = BTRFS_I(inode)->csum_bytes;
Josef Bacik9e0baf62011-07-15 15:16:44 +00005533 spin_unlock(&BTRFS_I(inode)->lock);
Josef Bacik57a45ced2011-01-25 16:30:38 -05005534
Wang Shilong88e081bf2013-03-01 11:36:01 +00005535 if (root->fs_info->quota_enabled) {
Dongsheng Yang237c0e92014-12-29 06:23:05 -05005536 ret = btrfs_qgroup_reserve(root, nr_extents * root->nodesize);
Wang Shilong88e081bf2013-03-01 11:36:01 +00005537 if (ret)
5538 goto out_fail;
Wang Shilonga9870c02013-03-01 11:33:01 +00005539 }
Arne Jansenc5567232011-09-14 15:44:05 +02005540
Wang Shilong88e081bf2013-03-01 11:36:01 +00005541 ret = reserve_metadata_bytes(root, block_rsv, to_reserve, flush);
5542 if (unlikely(ret)) {
5543 if (root->fs_info->quota_enabled)
Dongsheng Yang237c0e92014-12-29 06:23:05 -05005544 btrfs_qgroup_free(root, nr_extents * root->nodesize);
Wang Shilong88e081bf2013-03-01 11:36:01 +00005545 goto out_fail;
Josef Bacik9e0baf62011-07-15 15:16:44 +00005546 }
Yan, Zheng0ca1f7c2010-05-16 10:48:47 -04005547
Josef Bacik660d3f62011-12-09 11:18:51 -05005548 spin_lock(&BTRFS_I(inode)->lock);
5549 if (extra_reserve) {
Josef Bacik72ac3c02012-05-23 14:13:11 -04005550 set_bit(BTRFS_INODE_DELALLOC_META_RESERVED,
5551 &BTRFS_I(inode)->runtime_flags);
Josef Bacik660d3f62011-12-09 11:18:51 -05005552 nr_extents--;
5553 }
5554 BTRFS_I(inode)->reserved_extents += nr_extents;
5555 spin_unlock(&BTRFS_I(inode)->lock);
Josef Bacikc64c2bd2012-12-14 13:48:14 -05005556
5557 if (delalloc_lock)
5558 mutex_unlock(&BTRFS_I(inode)->delalloc_mutex);
Josef Bacik660d3f62011-12-09 11:18:51 -05005559
Josef Bacik8c2a3ca2012-01-10 10:31:31 -05005560 if (to_reserve)
Dulshani Gunawardhana67871252013-10-31 10:33:04 +05305561 trace_btrfs_space_reservation(root->fs_info, "delalloc",
Josef Bacik8c2a3ca2012-01-10 10:31:31 -05005562 btrfs_ino(inode), to_reserve, 1);
Yan, Zheng0ca1f7c2010-05-16 10:48:47 -04005563 block_rsv_add_bytes(block_rsv, to_reserve, 1);
5564
Yan, Zheng0ca1f7c2010-05-16 10:48:47 -04005565 return 0;
Wang Shilong88e081bf2013-03-01 11:36:01 +00005566
5567out_fail:
5568 spin_lock(&BTRFS_I(inode)->lock);
Josef Bacikdcab6a32015-02-11 15:08:59 -05005569 dropped = drop_outstanding_extent(inode, num_bytes);
Wang Shilong88e081bf2013-03-01 11:36:01 +00005570 /*
5571 * If the inodes csum_bytes is the same as the original
5572 * csum_bytes then we know we haven't raced with any free()ers
5573 * so we can just reduce our inodes csum bytes and carry on.
Wang Shilong88e081bf2013-03-01 11:36:01 +00005574 */
Josef Bacikf4881bc2013-03-25 16:03:35 -04005575 if (BTRFS_I(inode)->csum_bytes == csum_bytes) {
Wang Shilong88e081bf2013-03-01 11:36:01 +00005576 calc_csum_metadata_size(inode, num_bytes, 0);
Josef Bacikf4881bc2013-03-25 16:03:35 -04005577 } else {
5578 u64 orig_csum_bytes = BTRFS_I(inode)->csum_bytes;
5579 u64 bytes;
5580
5581 /*
5582 * This is tricky, but first we need to figure out how much we
5583 * free'd from any free-ers that occured during this
5584 * reservation, so we reset ->csum_bytes to the csum_bytes
5585 * before we dropped our lock, and then call the free for the
5586 * number of bytes that were freed while we were trying our
5587 * reservation.
5588 */
5589 bytes = csum_bytes - BTRFS_I(inode)->csum_bytes;
5590 BTRFS_I(inode)->csum_bytes = csum_bytes;
5591 to_free = calc_csum_metadata_size(inode, bytes, 0);
5592
5593
5594 /*
5595 * Now we need to see how much we would have freed had we not
5596 * been making this reservation and our ->csum_bytes were not
5597 * artificially inflated.
5598 */
5599 BTRFS_I(inode)->csum_bytes = csum_bytes - num_bytes;
5600 bytes = csum_bytes - orig_csum_bytes;
5601 bytes = calc_csum_metadata_size(inode, bytes, 0);
5602
5603 /*
5604 * Now reset ->csum_bytes to what it should be. If bytes is
5605 * more than to_free then we would have free'd more space had we
5606 * not had an artificially high ->csum_bytes, so we need to free
5607 * the remainder. If bytes is the same or less then we don't
5608 * need to do anything, the other free-ers did the correct
5609 * thing.
5610 */
5611 BTRFS_I(inode)->csum_bytes = orig_csum_bytes - num_bytes;
5612 if (bytes > to_free)
5613 to_free = bytes - to_free;
5614 else
5615 to_free = 0;
5616 }
Wang Shilong88e081bf2013-03-01 11:36:01 +00005617 spin_unlock(&BTRFS_I(inode)->lock);
Dongsheng Yange2d1f922015-02-06 10:26:52 -05005618 if (dropped)
Wang Shilong88e081bf2013-03-01 11:36:01 +00005619 to_free += btrfs_calc_trans_metadata_size(root, dropped);
5620
5621 if (to_free) {
5622 btrfs_block_rsv_release(root, block_rsv, to_free);
5623 trace_btrfs_space_reservation(root->fs_info, "delalloc",
5624 btrfs_ino(inode), to_free, 0);
5625 }
5626 if (delalloc_lock)
5627 mutex_unlock(&BTRFS_I(inode)->delalloc_mutex);
5628 return ret;
Yan, Zheng0ca1f7c2010-05-16 10:48:47 -04005629}
5630
Josef Bacik7709cde2011-08-04 10:25:02 -04005631/**
5632 * btrfs_delalloc_release_metadata - release a metadata reservation for an inode
5633 * @inode: the inode to release the reservation for
5634 * @num_bytes: the number of bytes we're releasing
5635 *
5636 * This will release the metadata reservation for an inode. This can be called
5637 * once we complete IO for a given set of bytes to release their metadata
5638 * reservations.
5639 */
Yan, Zheng0ca1f7c2010-05-16 10:48:47 -04005640void btrfs_delalloc_release_metadata(struct inode *inode, u64 num_bytes)
5641{
5642 struct btrfs_root *root = BTRFS_I(inode)->root;
Josef Bacik9e0baf62011-07-15 15:16:44 +00005643 u64 to_free = 0;
5644 unsigned dropped;
Yan, Zheng0ca1f7c2010-05-16 10:48:47 -04005645
5646 num_bytes = ALIGN(num_bytes, root->sectorsize);
Josef Bacik7709cde2011-08-04 10:25:02 -04005647 spin_lock(&BTRFS_I(inode)->lock);
Josef Bacikdcab6a32015-02-11 15:08:59 -05005648 dropped = drop_outstanding_extent(inode, num_bytes);
Yan, Zheng0ca1f7c2010-05-16 10:48:47 -04005649
Miao Xie09348562013-02-07 10:12:07 +00005650 if (num_bytes)
5651 to_free = calc_csum_metadata_size(inode, num_bytes, 0);
Josef Bacik7709cde2011-08-04 10:25:02 -04005652 spin_unlock(&BTRFS_I(inode)->lock);
Josef Bacik9e0baf62011-07-15 15:16:44 +00005653 if (dropped > 0)
5654 to_free += btrfs_calc_trans_metadata_size(root, dropped);
Yan, Zheng0ca1f7c2010-05-16 10:48:47 -04005655
Josef Bacik6a3891c2015-03-16 17:38:52 -04005656 if (btrfs_test_is_dummy_root(root))
5657 return;
5658
Josef Bacik8c2a3ca2012-01-10 10:31:31 -05005659 trace_btrfs_space_reservation(root->fs_info, "delalloc",
5660 btrfs_ino(inode), to_free, 0);
Arne Jansenc5567232011-09-14 15:44:05 +02005661
Yan, Zheng0ca1f7c2010-05-16 10:48:47 -04005662 btrfs_block_rsv_release(root, &root->fs_info->delalloc_block_rsv,
5663 to_free);
5664}
5665
Josef Bacik7709cde2011-08-04 10:25:02 -04005666/**
5667 * btrfs_delalloc_reserve_space - reserve data and metadata space for delalloc
5668 * @inode: inode we're writing to
5669 * @num_bytes: the number of bytes we want to allocate
5670 *
5671 * This will do the following things
5672 *
5673 * o reserve space in the data space info for num_bytes
5674 * o reserve space in the metadata space info based on number of outstanding
5675 * extents and how much csums will be needed
5676 * o add to the inodes ->delalloc_bytes
5677 * o add it to the fs_info's delalloc inodes list.
5678 *
5679 * This will return 0 for success and -ENOSPC if there is no space left.
5680 */
Yan, Zheng0ca1f7c2010-05-16 10:48:47 -04005681int btrfs_delalloc_reserve_space(struct inode *inode, u64 num_bytes)
5682{
5683 int ret;
5684
Dongsheng Yange2d1f922015-02-06 10:26:52 -05005685 ret = btrfs_check_data_free_space(inode, num_bytes, num_bytes);
Yan, Zheng0ca1f7c2010-05-16 10:48:47 -04005686 if (ret)
5687 return ret;
5688
5689 ret = btrfs_delalloc_reserve_metadata(inode, num_bytes);
5690 if (ret) {
5691 btrfs_free_reserved_data_space(inode, num_bytes);
5692 return ret;
5693 }
5694
5695 return 0;
5696}
5697
Josef Bacik7709cde2011-08-04 10:25:02 -04005698/**
5699 * btrfs_delalloc_release_space - release data and metadata space for delalloc
5700 * @inode: inode we're releasing space for
5701 * @num_bytes: the number of bytes we want to free up
5702 *
5703 * This must be matched with a call to btrfs_delalloc_reserve_space. This is
5704 * called in the case that we don't need the metadata AND data reservations
5705 * anymore. So if there is an error or we insert an inline extent.
5706 *
5707 * This function will release the metadata space that was not used and will
5708 * decrement ->delalloc_bytes and remove it from the fs_info delalloc_inodes
5709 * list if there are no delalloc bytes left.
5710 */
Yan, Zheng0ca1f7c2010-05-16 10:48:47 -04005711void btrfs_delalloc_release_space(struct inode *inode, u64 num_bytes)
5712{
5713 btrfs_delalloc_release_metadata(inode, num_bytes);
5714 btrfs_free_reserved_data_space(inode, num_bytes);
5715}
5716
Josef Bacikce93ec52014-11-17 15:45:48 -05005717static int update_block_group(struct btrfs_trans_handle *trans,
5718 struct btrfs_root *root, u64 bytenr,
5719 u64 num_bytes, int alloc)
Chris Mason9078a3e2007-04-26 16:46:15 -04005720{
Josef Bacik0af3d002010-06-21 14:48:16 -04005721 struct btrfs_block_group_cache *cache = NULL;
Chris Mason9078a3e2007-04-26 16:46:15 -04005722 struct btrfs_fs_info *info = root->fs_info;
Chris Masondb945352007-10-15 16:15:53 -04005723 u64 total = num_bytes;
Chris Mason9078a3e2007-04-26 16:46:15 -04005724 u64 old_val;
Chris Masondb945352007-10-15 16:15:53 -04005725 u64 byte_in_group;
Josef Bacik0af3d002010-06-21 14:48:16 -04005726 int factor;
Chris Mason3e1ad542007-05-07 20:03:49 -04005727
Yan Zheng5d4f98a2009-06-10 10:45:14 -04005728 /* block accounting for super block */
Miao Xieeb73c1b2013-05-15 07:48:22 +00005729 spin_lock(&info->delalloc_root_lock);
David Sterba6c417612011-04-13 15:41:04 +02005730 old_val = btrfs_super_bytes_used(info->super_copy);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04005731 if (alloc)
5732 old_val += num_bytes;
5733 else
5734 old_val -= num_bytes;
David Sterba6c417612011-04-13 15:41:04 +02005735 btrfs_set_super_bytes_used(info->super_copy, old_val);
Miao Xieeb73c1b2013-05-15 07:48:22 +00005736 spin_unlock(&info->delalloc_root_lock);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04005737
Chris Masond3977122009-01-05 21:25:51 -05005738 while (total) {
Chris Masondb945352007-10-15 16:15:53 -04005739 cache = btrfs_lookup_block_group(info, bytenr);
Josef Bacikf3465ca2008-11-12 14:19:50 -05005740 if (!cache)
Jeff Mahoney79787ea2012-03-12 16:03:00 +01005741 return -ENOENT;
Yan, Zhengb742bb822010-05-16 10:46:24 -04005742 if (cache->flags & (BTRFS_BLOCK_GROUP_DUP |
5743 BTRFS_BLOCK_GROUP_RAID1 |
5744 BTRFS_BLOCK_GROUP_RAID10))
5745 factor = 2;
5746 else
5747 factor = 1;
Josef Bacik9d66e232010-08-25 16:54:15 -04005748 /*
5749 * If this block group has free space cache written out, we
5750 * need to make sure to load it if we are removing space. This
5751 * is because we need the unpinning stage to actually add the
5752 * space back to the block group, otherwise we will leak space.
5753 */
5754 if (!alloc && cache->cached == BTRFS_CACHE_NO)
Liu Bof6373bf2012-12-27 09:01:18 +00005755 cache_block_group(cache, 1);
Josef Bacik0af3d002010-06-21 14:48:16 -04005756
Chris Masondb945352007-10-15 16:15:53 -04005757 byte_in_group = bytenr - cache->key.objectid;
5758 WARN_ON(byte_in_group > cache->key.offset);
Chris Mason9078a3e2007-04-26 16:46:15 -04005759
Josef Bacik25179202008-10-29 14:49:05 -04005760 spin_lock(&cache->space_info->lock);
Chris Masonc286ac42008-07-22 23:06:41 -04005761 spin_lock(&cache->lock);
Josef Bacik0af3d002010-06-21 14:48:16 -04005762
Josef Bacik73bc1872011-10-03 14:07:49 -04005763 if (btrfs_test_opt(root, SPACE_CACHE) &&
Josef Bacik0af3d002010-06-21 14:48:16 -04005764 cache->disk_cache_state < BTRFS_DC_CLEAR)
5765 cache->disk_cache_state = BTRFS_DC_CLEAR;
5766
Chris Mason9078a3e2007-04-26 16:46:15 -04005767 old_val = btrfs_block_group_used(&cache->item);
Chris Masondb945352007-10-15 16:15:53 -04005768 num_bytes = min(total, cache->key.offset - byte_in_group);
Chris Masoncd1bc462007-04-27 10:08:34 -04005769 if (alloc) {
Chris Masondb945352007-10-15 16:15:53 -04005770 old_val += num_bytes;
Yan Zheng11833d62009-09-11 16:11:19 -04005771 btrfs_set_block_group_used(&cache->item, old_val);
5772 cache->reserved -= num_bytes;
Yan Zheng11833d62009-09-11 16:11:19 -04005773 cache->space_info->bytes_reserved -= num_bytes;
Yan, Zhengb742bb822010-05-16 10:46:24 -04005774 cache->space_info->bytes_used += num_bytes;
5775 cache->space_info->disk_used += num_bytes * factor;
Chris Masonc286ac42008-07-22 23:06:41 -04005776 spin_unlock(&cache->lock);
Josef Bacik25179202008-10-29 14:49:05 -04005777 spin_unlock(&cache->space_info->lock);
Chris Masoncd1bc462007-04-27 10:08:34 -04005778 } else {
Chris Masondb945352007-10-15 16:15:53 -04005779 old_val -= num_bytes;
Filipe Mananaae0ab002014-11-26 15:28:52 +00005780 btrfs_set_block_group_used(&cache->item, old_val);
5781 cache->pinned += num_bytes;
5782 cache->space_info->bytes_pinned += num_bytes;
5783 cache->space_info->bytes_used -= num_bytes;
5784 cache->space_info->disk_used -= num_bytes * factor;
5785 spin_unlock(&cache->lock);
5786 spin_unlock(&cache->space_info->lock);
Josef Bacik47ab2a62014-09-18 11:20:02 -04005787
Filipe Mananaae0ab002014-11-26 15:28:52 +00005788 set_extent_dirty(info->pinned_extents,
5789 bytenr, bytenr + num_bytes - 1,
5790 GFP_NOFS | __GFP_NOFAIL);
Josef Bacik47ab2a62014-09-18 11:20:02 -04005791 /*
5792 * No longer have used bytes in this block group, queue
5793 * it for deletion.
5794 */
5795 if (old_val == 0) {
5796 spin_lock(&info->unused_bgs_lock);
5797 if (list_empty(&cache->bg_list)) {
5798 btrfs_get_block_group(cache);
5799 list_add_tail(&cache->bg_list,
5800 &info->unused_bgs);
5801 }
5802 spin_unlock(&info->unused_bgs_lock);
5803 }
Chris Masoncd1bc462007-04-27 10:08:34 -04005804 }
Chris Mason1bbc6212015-04-06 12:46:08 -07005805
5806 spin_lock(&trans->transaction->dirty_bgs_lock);
5807 if (list_empty(&cache->dirty_list)) {
5808 list_add_tail(&cache->dirty_list,
5809 &trans->transaction->dirty_bgs);
5810 trans->transaction->num_dirty_bgs++;
5811 btrfs_get_block_group(cache);
5812 }
5813 spin_unlock(&trans->transaction->dirty_bgs_lock);
5814
Chris Masonfa9c0d792009-04-03 09:47:43 -04005815 btrfs_put_block_group(cache);
Chris Masondb945352007-10-15 16:15:53 -04005816 total -= num_bytes;
5817 bytenr += num_bytes;
Chris Mason9078a3e2007-04-26 16:46:15 -04005818 }
5819 return 0;
5820}
Chris Mason6324fbf2008-03-24 15:01:59 -04005821
Chris Masona061fc82008-05-07 11:43:44 -04005822static u64 first_logical_byte(struct btrfs_root *root, u64 search_start)
5823{
Josef Bacik0f9dd462008-09-23 13:14:11 -04005824 struct btrfs_block_group_cache *cache;
Yan Zhengd2fb3432008-12-11 16:30:39 -05005825 u64 bytenr;
Josef Bacik0f9dd462008-09-23 13:14:11 -04005826
Liu Boa1897fd2012-12-27 09:01:23 +00005827 spin_lock(&root->fs_info->block_group_cache_lock);
5828 bytenr = root->fs_info->first_logical_byte;
5829 spin_unlock(&root->fs_info->block_group_cache_lock);
5830
5831 if (bytenr < (u64)-1)
5832 return bytenr;
5833
Josef Bacik0f9dd462008-09-23 13:14:11 -04005834 cache = btrfs_lookup_first_block_group(root->fs_info, search_start);
5835 if (!cache)
Chris Masona061fc82008-05-07 11:43:44 -04005836 return 0;
Josef Bacik0f9dd462008-09-23 13:14:11 -04005837
Yan Zhengd2fb3432008-12-11 16:30:39 -05005838 bytenr = cache->key.objectid;
Chris Masonfa9c0d792009-04-03 09:47:43 -04005839 btrfs_put_block_group(cache);
Yan Zhengd2fb3432008-12-11 16:30:39 -05005840
5841 return bytenr;
Chris Masona061fc82008-05-07 11:43:44 -04005842}
5843
Yan, Zhengf0486c62010-05-16 10:46:25 -04005844static int pin_down_extent(struct btrfs_root *root,
5845 struct btrfs_block_group_cache *cache,
5846 u64 bytenr, u64 num_bytes, int reserved)
Yan324ae4d2007-11-16 14:57:08 -05005847{
Yan Zheng11833d62009-09-11 16:11:19 -04005848 spin_lock(&cache->space_info->lock);
5849 spin_lock(&cache->lock);
5850 cache->pinned += num_bytes;
5851 cache->space_info->bytes_pinned += num_bytes;
5852 if (reserved) {
5853 cache->reserved -= num_bytes;
5854 cache->space_info->bytes_reserved -= num_bytes;
Yan324ae4d2007-11-16 14:57:08 -05005855 }
Yan Zheng11833d62009-09-11 16:11:19 -04005856 spin_unlock(&cache->lock);
5857 spin_unlock(&cache->space_info->lock);
5858
Yan, Zhengf0486c62010-05-16 10:46:25 -04005859 set_extent_dirty(root->fs_info->pinned_extents, bytenr,
5860 bytenr + num_bytes - 1, GFP_NOFS | __GFP_NOFAIL);
Dongsheng Yange2d1f922015-02-06 10:26:52 -05005861 if (reserved)
Josef Bacik0be5dc62013-10-07 15:18:52 -04005862 trace_btrfs_reserved_extent_free(root, bytenr, num_bytes);
Yan324ae4d2007-11-16 14:57:08 -05005863 return 0;
5864}
Chris Mason9078a3e2007-04-26 16:46:15 -04005865
Yan, Zhengf0486c62010-05-16 10:46:25 -04005866/*
5867 * this function must be called within transaction
5868 */
5869int btrfs_pin_extent(struct btrfs_root *root,
5870 u64 bytenr, u64 num_bytes, int reserved)
Zheng Yane8569812008-09-26 10:05:48 -04005871{
Yan, Zhengf0486c62010-05-16 10:46:25 -04005872 struct btrfs_block_group_cache *cache;
5873
5874 cache = btrfs_lookup_block_group(root->fs_info, bytenr);
Jeff Mahoney79787ea2012-03-12 16:03:00 +01005875 BUG_ON(!cache); /* Logic error */
Yan, Zhengf0486c62010-05-16 10:46:25 -04005876
5877 pin_down_extent(root, cache, bytenr, num_bytes, reserved);
5878
5879 btrfs_put_block_group(cache);
Yan Zheng11833d62009-09-11 16:11:19 -04005880 return 0;
5881}
Zheng Yane8569812008-09-26 10:05:48 -04005882
Yan, Zhengf0486c62010-05-16 10:46:25 -04005883/*
Chris Masone688b7252011-10-31 20:52:39 -04005884 * this function must be called within transaction
Yan, Zhengf0486c62010-05-16 10:46:25 -04005885 */
Liu Bodcfac412012-12-27 09:01:20 +00005886int btrfs_pin_extent_for_log_replay(struct btrfs_root *root,
Chris Masone688b7252011-10-31 20:52:39 -04005887 u64 bytenr, u64 num_bytes)
Yan, Zhengf0486c62010-05-16 10:46:25 -04005888{
Chris Masone688b7252011-10-31 20:52:39 -04005889 struct btrfs_block_group_cache *cache;
Josef Bacikb50c6e22013-04-25 15:55:30 -04005890 int ret;
Chris Masone688b7252011-10-31 20:52:39 -04005891
5892 cache = btrfs_lookup_block_group(root->fs_info, bytenr);
Josef Bacikb50c6e22013-04-25 15:55:30 -04005893 if (!cache)
5894 return -EINVAL;
Chris Masone688b7252011-10-31 20:52:39 -04005895
5896 /*
5897 * pull in the free space cache (if any) so that our pin
5898 * removes the free space from the cache. We have load_only set
5899 * to one because the slow code to read in the free extents does check
5900 * the pinned extents.
5901 */
Liu Bof6373bf2012-12-27 09:01:18 +00005902 cache_block_group(cache, 1);
Chris Masone688b7252011-10-31 20:52:39 -04005903
5904 pin_down_extent(root, cache, bytenr, num_bytes, 0);
5905
5906 /* remove us from the free space cache (if we're there at all) */
Josef Bacikb50c6e22013-04-25 15:55:30 -04005907 ret = btrfs_remove_free_space(cache, bytenr, num_bytes);
Chris Masone688b7252011-10-31 20:52:39 -04005908 btrfs_put_block_group(cache);
Josef Bacikb50c6e22013-04-25 15:55:30 -04005909 return ret;
Chris Masone688b7252011-10-31 20:52:39 -04005910}
5911
Josef Bacik8c2a1a32013-06-06 13:19:32 -04005912static int __exclude_logged_extent(struct btrfs_root *root, u64 start, u64 num_bytes)
5913{
5914 int ret;
5915 struct btrfs_block_group_cache *block_group;
5916 struct btrfs_caching_control *caching_ctl;
5917
5918 block_group = btrfs_lookup_block_group(root->fs_info, start);
5919 if (!block_group)
5920 return -EINVAL;
5921
5922 cache_block_group(block_group, 0);
5923 caching_ctl = get_caching_control(block_group);
5924
5925 if (!caching_ctl) {
5926 /* Logic error */
5927 BUG_ON(!block_group_cache_done(block_group));
5928 ret = btrfs_remove_free_space(block_group, start, num_bytes);
5929 } else {
5930 mutex_lock(&caching_ctl->mutex);
5931
5932 if (start >= caching_ctl->progress) {
5933 ret = add_excluded_extent(root, start, num_bytes);
5934 } else if (start + num_bytes <= caching_ctl->progress) {
5935 ret = btrfs_remove_free_space(block_group,
5936 start, num_bytes);
5937 } else {
5938 num_bytes = caching_ctl->progress - start;
5939 ret = btrfs_remove_free_space(block_group,
5940 start, num_bytes);
5941 if (ret)
5942 goto out_lock;
5943
5944 num_bytes = (start + num_bytes) -
5945 caching_ctl->progress;
5946 start = caching_ctl->progress;
5947 ret = add_excluded_extent(root, start, num_bytes);
5948 }
5949out_lock:
5950 mutex_unlock(&caching_ctl->mutex);
5951 put_caching_control(caching_ctl);
5952 }
5953 btrfs_put_block_group(block_group);
5954 return ret;
5955}
5956
5957int btrfs_exclude_logged_extents(struct btrfs_root *log,
5958 struct extent_buffer *eb)
5959{
5960 struct btrfs_file_extent_item *item;
5961 struct btrfs_key key;
5962 int found_type;
5963 int i;
5964
5965 if (!btrfs_fs_incompat(log->fs_info, MIXED_GROUPS))
5966 return 0;
5967
5968 for (i = 0; i < btrfs_header_nritems(eb); i++) {
5969 btrfs_item_key_to_cpu(eb, &key, i);
5970 if (key.type != BTRFS_EXTENT_DATA_KEY)
5971 continue;
5972 item = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item);
5973 found_type = btrfs_file_extent_type(eb, item);
5974 if (found_type == BTRFS_FILE_EXTENT_INLINE)
5975 continue;
5976 if (btrfs_file_extent_disk_bytenr(eb, item) == 0)
5977 continue;
5978 key.objectid = btrfs_file_extent_disk_bytenr(eb, item);
5979 key.offset = btrfs_file_extent_disk_num_bytes(eb, item);
5980 __exclude_logged_extent(log, key.objectid, key.offset);
5981 }
5982
5983 return 0;
5984}
5985
Josef Bacikfb25e912011-07-26 17:00:46 -04005986/**
5987 * btrfs_update_reserved_bytes - update the block_group and space info counters
5988 * @cache: The cache we are manipulating
5989 * @num_bytes: The number of bytes in question
5990 * @reserve: One of the reservation enums
Miao Xiee570fd22014-06-19 10:42:50 +08005991 * @delalloc: The blocks are allocated for the delalloc write
Josef Bacikfb25e912011-07-26 17:00:46 -04005992 *
5993 * This is called by the allocator when it reserves space, or by somebody who is
5994 * freeing space that was never actually used on disk. For example if you
5995 * reserve some space for a new leaf in transaction A and before transaction A
5996 * commits you free that leaf, you call this with reserve set to 0 in order to
5997 * clear the reservation.
5998 *
5999 * Metadata reservations should be called with RESERVE_ALLOC so we do the proper
6000 * ENOSPC accounting. For data we handle the reservation through clearing the
6001 * delalloc bits in the io_tree. We have to do this since we could end up
6002 * allocating less disk space for the amount of data we have reserved in the
6003 * case of compression.
6004 *
6005 * If this is a reservation and the block group has become read only we cannot
6006 * make the reservation and return -EAGAIN, otherwise this function always
6007 * succeeds.
Yan, Zhengf0486c62010-05-16 10:46:25 -04006008 */
Josef Bacikfb25e912011-07-26 17:00:46 -04006009static int btrfs_update_reserved_bytes(struct btrfs_block_group_cache *cache,
Miao Xiee570fd22014-06-19 10:42:50 +08006010 u64 num_bytes, int reserve, int delalloc)
Yan, Zhengf0486c62010-05-16 10:46:25 -04006011{
Josef Bacikfb25e912011-07-26 17:00:46 -04006012 struct btrfs_space_info *space_info = cache->space_info;
Yan, Zhengf0486c62010-05-16 10:46:25 -04006013 int ret = 0;
Jeff Mahoney79787ea2012-03-12 16:03:00 +01006014
Josef Bacikfb25e912011-07-26 17:00:46 -04006015 spin_lock(&space_info->lock);
6016 spin_lock(&cache->lock);
6017 if (reserve != RESERVE_FREE) {
Yan, Zhengf0486c62010-05-16 10:46:25 -04006018 if (cache->ro) {
6019 ret = -EAGAIN;
6020 } else {
Josef Bacikfb25e912011-07-26 17:00:46 -04006021 cache->reserved += num_bytes;
6022 space_info->bytes_reserved += num_bytes;
6023 if (reserve == RESERVE_ALLOC) {
Josef Bacik8c2a3ca2012-01-10 10:31:31 -05006024 trace_btrfs_space_reservation(cache->fs_info,
Liu Bo2bcc0322012-03-29 09:57:44 -04006025 "space_info", space_info->flags,
6026 num_bytes, 0);
Josef Bacikfb25e912011-07-26 17:00:46 -04006027 space_info->bytes_may_use -= num_bytes;
6028 }
Miao Xiee570fd22014-06-19 10:42:50 +08006029
6030 if (delalloc)
6031 cache->delalloc_bytes += num_bytes;
Yan, Zhengf0486c62010-05-16 10:46:25 -04006032 }
Josef Bacikfb25e912011-07-26 17:00:46 -04006033 } else {
6034 if (cache->ro)
6035 space_info->bytes_readonly += num_bytes;
6036 cache->reserved -= num_bytes;
6037 space_info->bytes_reserved -= num_bytes;
Miao Xiee570fd22014-06-19 10:42:50 +08006038
6039 if (delalloc)
6040 cache->delalloc_bytes -= num_bytes;
Yan, Zhengf0486c62010-05-16 10:46:25 -04006041 }
Josef Bacikfb25e912011-07-26 17:00:46 -04006042 spin_unlock(&cache->lock);
6043 spin_unlock(&space_info->lock);
Yan, Zhengf0486c62010-05-16 10:46:25 -04006044 return ret;
6045}
6046
Jeff Mahoney143bede2012-03-01 14:56:26 +01006047void btrfs_prepare_extent_commit(struct btrfs_trans_handle *trans,
Yan Zheng11833d62009-09-11 16:11:19 -04006048 struct btrfs_root *root)
6049{
6050 struct btrfs_fs_info *fs_info = root->fs_info;
6051 struct btrfs_caching_control *next;
6052 struct btrfs_caching_control *caching_ctl;
6053 struct btrfs_block_group_cache *cache;
6054
Josef Bacik9e351cc2014-03-13 15:42:13 -04006055 down_write(&fs_info->commit_root_sem);
Yan Zheng11833d62009-09-11 16:11:19 -04006056
6057 list_for_each_entry_safe(caching_ctl, next,
6058 &fs_info->caching_block_groups, list) {
6059 cache = caching_ctl->block_group;
6060 if (block_group_cache_done(cache)) {
6061 cache->last_byte_to_unpin = (u64)-1;
6062 list_del_init(&caching_ctl->list);
6063 put_caching_control(caching_ctl);
6064 } else {
6065 cache->last_byte_to_unpin = caching_ctl->progress;
6066 }
6067 }
6068
6069 if (fs_info->pinned_extents == &fs_info->freed_extents[0])
6070 fs_info->pinned_extents = &fs_info->freed_extents[1];
6071 else
6072 fs_info->pinned_extents = &fs_info->freed_extents[0];
6073
Josef Bacik9e351cc2014-03-13 15:42:13 -04006074 up_write(&fs_info->commit_root_sem);
Yan, Zheng8929ecfa2010-05-16 10:49:58 -04006075
6076 update_global_block_rsv(fs_info);
Yan Zheng11833d62009-09-11 16:11:19 -04006077}
6078
Filipe Manana678886b2014-12-07 21:31:47 +00006079static int unpin_extent_range(struct btrfs_root *root, u64 start, u64 end,
6080 const bool return_free_space)
Yan Zheng11833d62009-09-11 16:11:19 -04006081{
6082 struct btrfs_fs_info *fs_info = root->fs_info;
6083 struct btrfs_block_group_cache *cache = NULL;
Josef Bacik7b398f82012-10-22 15:52:28 -04006084 struct btrfs_space_info *space_info;
6085 struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv;
Yan Zheng11833d62009-09-11 16:11:19 -04006086 u64 len;
Josef Bacik7b398f82012-10-22 15:52:28 -04006087 bool readonly;
Yan Zheng11833d62009-09-11 16:11:19 -04006088
6089 while (start <= end) {
Josef Bacik7b398f82012-10-22 15:52:28 -04006090 readonly = false;
Yan Zheng11833d62009-09-11 16:11:19 -04006091 if (!cache ||
6092 start >= cache->key.objectid + cache->key.offset) {
6093 if (cache)
6094 btrfs_put_block_group(cache);
6095 cache = btrfs_lookup_block_group(fs_info, start);
Jeff Mahoney79787ea2012-03-12 16:03:00 +01006096 BUG_ON(!cache); /* Logic error */
Yan Zheng11833d62009-09-11 16:11:19 -04006097 }
6098
6099 len = cache->key.objectid + cache->key.offset - start;
6100 len = min(len, end + 1 - start);
6101
6102 if (start < cache->last_byte_to_unpin) {
6103 len = min(len, cache->last_byte_to_unpin - start);
Filipe Manana678886b2014-12-07 21:31:47 +00006104 if (return_free_space)
6105 btrfs_add_free_space(cache, start, len);
Yan Zheng11833d62009-09-11 16:11:19 -04006106 }
Josef Bacik25179202008-10-29 14:49:05 -04006107
Yan, Zhengf0486c62010-05-16 10:46:25 -04006108 start += len;
Josef Bacik7b398f82012-10-22 15:52:28 -04006109 space_info = cache->space_info;
Yan, Zhengf0486c62010-05-16 10:46:25 -04006110
Josef Bacik7b398f82012-10-22 15:52:28 -04006111 spin_lock(&space_info->lock);
Josef Bacik25179202008-10-29 14:49:05 -04006112 spin_lock(&cache->lock);
Yan Zheng11833d62009-09-11 16:11:19 -04006113 cache->pinned -= len;
Josef Bacik7b398f82012-10-22 15:52:28 -04006114 space_info->bytes_pinned -= len;
Liu Bod288db52014-07-02 16:58:01 +08006115 percpu_counter_add(&space_info->total_bytes_pinned, -len);
Josef Bacik7b398f82012-10-22 15:52:28 -04006116 if (cache->ro) {
6117 space_info->bytes_readonly += len;
6118 readonly = true;
6119 }
Josef Bacik25179202008-10-29 14:49:05 -04006120 spin_unlock(&cache->lock);
Josef Bacik7b398f82012-10-22 15:52:28 -04006121 if (!readonly && global_rsv->space_info == space_info) {
6122 spin_lock(&global_rsv->lock);
6123 if (!global_rsv->full) {
6124 len = min(len, global_rsv->size -
6125 global_rsv->reserved);
6126 global_rsv->reserved += len;
6127 space_info->bytes_may_use += len;
6128 if (global_rsv->reserved >= global_rsv->size)
6129 global_rsv->full = 1;
6130 }
6131 spin_unlock(&global_rsv->lock);
6132 }
6133 spin_unlock(&space_info->lock);
Yan Zheng11833d62009-09-11 16:11:19 -04006134 }
6135
6136 if (cache)
Chris Masonfa9c0d792009-04-03 09:47:43 -04006137 btrfs_put_block_group(cache);
Chris Masonccd467d2007-06-28 15:57:36 -04006138 return 0;
6139}
6140
6141int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans,
Yan Zheng11833d62009-09-11 16:11:19 -04006142 struct btrfs_root *root)
Chris Masona28ec192007-03-06 20:08:01 -05006143{
Yan Zheng11833d62009-09-11 16:11:19 -04006144 struct btrfs_fs_info *fs_info = root->fs_info;
Jeff Mahoneye33e17e2015-06-15 09:41:19 -04006145 struct btrfs_block_group_cache *block_group, *tmp;
6146 struct list_head *deleted_bgs;
Yan Zheng11833d62009-09-11 16:11:19 -04006147 struct extent_io_tree *unpin;
Chris Mason1a5bc162007-10-15 16:15:26 -04006148 u64 start;
6149 u64 end;
Chris Masona28ec192007-03-06 20:08:01 -05006150 int ret;
Chris Masona28ec192007-03-06 20:08:01 -05006151
Yan Zheng11833d62009-09-11 16:11:19 -04006152 if (fs_info->pinned_extents == &fs_info->freed_extents[0])
6153 unpin = &fs_info->freed_extents[1];
6154 else
6155 unpin = &fs_info->freed_extents[0];
6156
Jeff Mahoneye33e17e2015-06-15 09:41:19 -04006157 while (!trans->aborted) {
Filipe Mananad4b450c2015-01-29 19:18:25 +00006158 mutex_lock(&fs_info->unused_bg_unpin_mutex);
Chris Mason1a5bc162007-10-15 16:15:26 -04006159 ret = find_first_extent_bit(unpin, 0, &start, &end,
Josef Bacike6138872012-09-27 17:07:30 -04006160 EXTENT_DIRTY, NULL);
Filipe Mananad4b450c2015-01-29 19:18:25 +00006161 if (ret) {
6162 mutex_unlock(&fs_info->unused_bg_unpin_mutex);
Chris Masona28ec192007-03-06 20:08:01 -05006163 break;
Filipe Mananad4b450c2015-01-29 19:18:25 +00006164 }
Liu Hui1f3c79a2009-01-05 15:57:51 -05006165
Li Dongyang5378e602011-03-24 10:24:27 +00006166 if (btrfs_test_opt(root, DISCARD))
6167 ret = btrfs_discard_extent(root, start,
6168 end + 1 - start, NULL);
Liu Hui1f3c79a2009-01-05 15:57:51 -05006169
Chris Mason1a5bc162007-10-15 16:15:26 -04006170 clear_extent_dirty(unpin, start, end, GFP_NOFS);
Filipe Manana678886b2014-12-07 21:31:47 +00006171 unpin_extent_range(root, start, end, true);
Filipe Mananad4b450c2015-01-29 19:18:25 +00006172 mutex_unlock(&fs_info->unused_bg_unpin_mutex);
Chris Masonb9473432009-03-13 11:00:37 -04006173 cond_resched();
Chris Masona28ec192007-03-06 20:08:01 -05006174 }
Josef Bacik817d52f2009-07-13 21:29:25 -04006175
Jeff Mahoneye33e17e2015-06-15 09:41:19 -04006176 /*
6177 * Transaction is finished. We don't need the lock anymore. We
6178 * do need to clean up the block groups in case of a transaction
6179 * abort.
6180 */
6181 deleted_bgs = &trans->transaction->deleted_bgs;
6182 list_for_each_entry_safe(block_group, tmp, deleted_bgs, bg_list) {
6183 u64 trimmed = 0;
6184
6185 ret = -EROFS;
6186 if (!trans->aborted)
6187 ret = btrfs_discard_extent(root,
6188 block_group->key.objectid,
6189 block_group->key.offset,
6190 &trimmed);
6191
6192 list_del_init(&block_group->bg_list);
6193 btrfs_put_block_group_trimming(block_group);
6194 btrfs_put_block_group(block_group);
6195
6196 if (ret) {
6197 const char *errstr = btrfs_decode_error(ret);
6198 btrfs_warn(fs_info,
6199 "Discard failed while removing blockgroup: errno=%d %s\n",
6200 ret, errstr);
6201 }
6202 }
6203
Chris Masone20d96d2007-03-22 12:13:20 -04006204 return 0;
6205}
6206
Josef Bacikb150a4f2013-06-19 15:00:04 -04006207static void add_pinned_bytes(struct btrfs_fs_info *fs_info, u64 num_bytes,
6208 u64 owner, u64 root_objectid)
6209{
6210 struct btrfs_space_info *space_info;
6211 u64 flags;
6212
6213 if (owner < BTRFS_FIRST_FREE_OBJECTID) {
6214 if (root_objectid == BTRFS_CHUNK_TREE_OBJECTID)
6215 flags = BTRFS_BLOCK_GROUP_SYSTEM;
6216 else
6217 flags = BTRFS_BLOCK_GROUP_METADATA;
6218 } else {
6219 flags = BTRFS_BLOCK_GROUP_DATA;
6220 }
6221
6222 space_info = __find_space_info(fs_info, flags);
6223 BUG_ON(!space_info); /* Logic bug */
6224 percpu_counter_add(&space_info->total_bytes_pinned, num_bytes);
6225}
6226
6227
Yan Zheng5d4f98a2009-06-10 10:45:14 -04006228static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
6229 struct btrfs_root *root,
Qu Wenruoc682f9b2015-03-17 16:59:47 +08006230 struct btrfs_delayed_ref_node *node, u64 parent,
Yan Zheng5d4f98a2009-06-10 10:45:14 -04006231 u64 root_objectid, u64 owner_objectid,
6232 u64 owner_offset, int refs_to_drop,
Qu Wenruoc682f9b2015-03-17 16:59:47 +08006233 struct btrfs_delayed_extent_op *extent_op)
Chris Masona28ec192007-03-06 20:08:01 -05006234{
Chris Masone2fa7222007-03-12 16:22:34 -04006235 struct btrfs_key key;
Yan Zheng5d4f98a2009-06-10 10:45:14 -04006236 struct btrfs_path *path;
Chris Mason1261ec42007-03-20 20:35:03 -04006237 struct btrfs_fs_info *info = root->fs_info;
6238 struct btrfs_root *extent_root = info->extent_root;
Chris Mason5f39d392007-10-15 16:14:19 -04006239 struct extent_buffer *leaf;
Yan Zheng5d4f98a2009-06-10 10:45:14 -04006240 struct btrfs_extent_item *ei;
6241 struct btrfs_extent_inline_ref *iref;
Chris Masona28ec192007-03-06 20:08:01 -05006242 int ret;
Yan Zheng5d4f98a2009-06-10 10:45:14 -04006243 int is_data;
Chris Mason952fcca2008-02-18 16:33:44 -05006244 int extent_slot = 0;
6245 int found_extent = 0;
6246 int num_to_del = 1;
Qu Wenruoc682f9b2015-03-17 16:59:47 +08006247 int no_quota = node->no_quota;
Yan Zheng5d4f98a2009-06-10 10:45:14 -04006248 u32 item_size;
6249 u64 refs;
Qu Wenruoc682f9b2015-03-17 16:59:47 +08006250 u64 bytenr = node->bytenr;
6251 u64 num_bytes = node->num_bytes;
Josef Bacikfcebe452014-05-13 17:30:47 -07006252 int last_ref = 0;
Josef Bacik3173a182013-03-07 14:22:04 -05006253 bool skinny_metadata = btrfs_fs_incompat(root->fs_info,
6254 SKINNY_METADATA);
Chris Mason037e6392007-03-07 11:50:24 -05006255
Josef Bacikfcebe452014-05-13 17:30:47 -07006256 if (!info->quota_enabled || !is_fstree(root_objectid))
6257 no_quota = 1;
6258
Chris Mason5caf2a02007-04-02 11:20:42 -04006259 path = btrfs_alloc_path();
Chris Mason54aa1f42007-06-22 14:16:25 -04006260 if (!path)
6261 return -ENOMEM;
6262
Chris Mason3c12ac72008-04-21 12:01:38 -04006263 path->reada = 1;
Chris Masonb9473432009-03-13 11:00:37 -04006264 path->leave_spinning = 1;
Yan Zheng5d4f98a2009-06-10 10:45:14 -04006265
6266 is_data = owner_objectid >= BTRFS_FIRST_FREE_OBJECTID;
6267 BUG_ON(!is_data && refs_to_drop != 1);
6268
Josef Bacik3173a182013-03-07 14:22:04 -05006269 if (is_data)
6270 skinny_metadata = 0;
6271
Yan Zheng5d4f98a2009-06-10 10:45:14 -04006272 ret = lookup_extent_backref(trans, extent_root, path, &iref,
6273 bytenr, num_bytes, parent,
6274 root_objectid, owner_objectid,
6275 owner_offset);
Chris Mason7bb86312007-12-11 09:25:06 -05006276 if (ret == 0) {
Chris Mason952fcca2008-02-18 16:33:44 -05006277 extent_slot = path->slots[0];
Yan Zheng5d4f98a2009-06-10 10:45:14 -04006278 while (extent_slot >= 0) {
6279 btrfs_item_key_to_cpu(path->nodes[0], &key,
Chris Mason952fcca2008-02-18 16:33:44 -05006280 extent_slot);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04006281 if (key.objectid != bytenr)
Chris Mason952fcca2008-02-18 16:33:44 -05006282 break;
Yan Zheng5d4f98a2009-06-10 10:45:14 -04006283 if (key.type == BTRFS_EXTENT_ITEM_KEY &&
6284 key.offset == num_bytes) {
Chris Mason952fcca2008-02-18 16:33:44 -05006285 found_extent = 1;
6286 break;
6287 }
Josef Bacik3173a182013-03-07 14:22:04 -05006288 if (key.type == BTRFS_METADATA_ITEM_KEY &&
6289 key.offset == owner_objectid) {
6290 found_extent = 1;
6291 break;
6292 }
Chris Mason952fcca2008-02-18 16:33:44 -05006293 if (path->slots[0] - extent_slot > 5)
6294 break;
Yan Zheng5d4f98a2009-06-10 10:45:14 -04006295 extent_slot--;
Chris Mason952fcca2008-02-18 16:33:44 -05006296 }
Yan Zheng5d4f98a2009-06-10 10:45:14 -04006297#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
6298 item_size = btrfs_item_size_nr(path->nodes[0], extent_slot);
6299 if (found_extent && item_size < sizeof(*ei))
6300 found_extent = 0;
6301#endif
Zheng Yan31840ae2008-09-23 13:14:14 -04006302 if (!found_extent) {
Yan Zheng5d4f98a2009-06-10 10:45:14 -04006303 BUG_ON(iref);
Chris Mason56bec292009-03-13 10:10:06 -04006304 ret = remove_extent_backref(trans, extent_root, path,
Yan Zheng5d4f98a2009-06-10 10:45:14 -04006305 NULL, refs_to_drop,
Josef Bacikfcebe452014-05-13 17:30:47 -07006306 is_data, &last_ref);
David Sterba005d6422012-09-18 07:52:32 -06006307 if (ret) {
6308 btrfs_abort_transaction(trans, extent_root, ret);
6309 goto out;
6310 }
David Sterbab3b4aa72011-04-21 01:20:15 +02006311 btrfs_release_path(path);
Chris Masonb9473432009-03-13 11:00:37 -04006312 path->leave_spinning = 1;
Yan Zheng5d4f98a2009-06-10 10:45:14 -04006313
6314 key.objectid = bytenr;
6315 key.type = BTRFS_EXTENT_ITEM_KEY;
6316 key.offset = num_bytes;
6317
Josef Bacik3173a182013-03-07 14:22:04 -05006318 if (!is_data && skinny_metadata) {
6319 key.type = BTRFS_METADATA_ITEM_KEY;
6320 key.offset = owner_objectid;
6321 }
6322
Zheng Yan31840ae2008-09-23 13:14:14 -04006323 ret = btrfs_search_slot(trans, extent_root,
6324 &key, path, -1, 1);
Josef Bacik3173a182013-03-07 14:22:04 -05006325 if (ret > 0 && skinny_metadata && path->slots[0]) {
6326 /*
6327 * Couldn't find our skinny metadata item,
6328 * see if we have ye olde extent item.
6329 */
6330 path->slots[0]--;
6331 btrfs_item_key_to_cpu(path->nodes[0], &key,
6332 path->slots[0]);
6333 if (key.objectid == bytenr &&
6334 key.type == BTRFS_EXTENT_ITEM_KEY &&
6335 key.offset == num_bytes)
6336 ret = 0;
6337 }
6338
6339 if (ret > 0 && skinny_metadata) {
6340 skinny_metadata = false;
Filipe Manana9ce49a02014-04-24 15:15:28 +01006341 key.objectid = bytenr;
Josef Bacik3173a182013-03-07 14:22:04 -05006342 key.type = BTRFS_EXTENT_ITEM_KEY;
6343 key.offset = num_bytes;
6344 btrfs_release_path(path);
6345 ret = btrfs_search_slot(trans, extent_root,
6346 &key, path, -1, 1);
6347 }
6348
Josef Bacikf3465ca2008-11-12 14:19:50 -05006349 if (ret) {
Simon Kirbyc2cf52e2013-03-19 22:41:23 +00006350 btrfs_err(info, "umm, got %d back from search, was looking for %llu",
Geert Uytterhoevenc1c9ff72013-08-20 13:20:07 +02006351 ret, bytenr);
Josef Bacikb783e622011-07-13 15:03:50 +00006352 if (ret > 0)
6353 btrfs_print_leaf(extent_root,
6354 path->nodes[0]);
Josef Bacikf3465ca2008-11-12 14:19:50 -05006355 }
David Sterba005d6422012-09-18 07:52:32 -06006356 if (ret < 0) {
6357 btrfs_abort_transaction(trans, extent_root, ret);
6358 goto out;
6359 }
Zheng Yan31840ae2008-09-23 13:14:14 -04006360 extent_slot = path->slots[0];
6361 }
Dulshani Gunawardhanafae7f212013-10-31 10:30:08 +05306362 } else if (WARN_ON(ret == -ENOENT)) {
Chris Mason7bb86312007-12-11 09:25:06 -05006363 btrfs_print_leaf(extent_root, path->nodes[0]);
Simon Kirbyc2cf52e2013-03-19 22:41:23 +00006364 btrfs_err(info,
6365 "unable to find ref byte nr %llu parent %llu root %llu owner %llu offset %llu",
Geert Uytterhoevenc1c9ff72013-08-20 13:20:07 +02006366 bytenr, parent, root_objectid, owner_objectid,
6367 owner_offset);
Josef Bacikc4a050b2014-03-14 16:36:53 -04006368 btrfs_abort_transaction(trans, extent_root, ret);
6369 goto out;
Jeff Mahoney79787ea2012-03-12 16:03:00 +01006370 } else {
David Sterba005d6422012-09-18 07:52:32 -06006371 btrfs_abort_transaction(trans, extent_root, ret);
6372 goto out;
Chris Mason7bb86312007-12-11 09:25:06 -05006373 }
Chris Mason5f39d392007-10-15 16:14:19 -04006374
6375 leaf = path->nodes[0];
Yan Zheng5d4f98a2009-06-10 10:45:14 -04006376 item_size = btrfs_item_size_nr(leaf, extent_slot);
6377#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
6378 if (item_size < sizeof(*ei)) {
6379 BUG_ON(found_extent || extent_slot != path->slots[0]);
6380 ret = convert_extent_item_v0(trans, extent_root, path,
6381 owner_objectid, 0);
David Sterba005d6422012-09-18 07:52:32 -06006382 if (ret < 0) {
6383 btrfs_abort_transaction(trans, extent_root, ret);
6384 goto out;
6385 }
Yan Zheng5d4f98a2009-06-10 10:45:14 -04006386
David Sterbab3b4aa72011-04-21 01:20:15 +02006387 btrfs_release_path(path);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04006388 path->leave_spinning = 1;
6389
6390 key.objectid = bytenr;
6391 key.type = BTRFS_EXTENT_ITEM_KEY;
6392 key.offset = num_bytes;
6393
6394 ret = btrfs_search_slot(trans, extent_root, &key, path,
6395 -1, 1);
6396 if (ret) {
Simon Kirbyc2cf52e2013-03-19 22:41:23 +00006397 btrfs_err(info, "umm, got %d back from search, was looking for %llu",
Geert Uytterhoevenc1c9ff72013-08-20 13:20:07 +02006398 ret, bytenr);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04006399 btrfs_print_leaf(extent_root, path->nodes[0]);
6400 }
David Sterba005d6422012-09-18 07:52:32 -06006401 if (ret < 0) {
6402 btrfs_abort_transaction(trans, extent_root, ret);
6403 goto out;
6404 }
6405
Yan Zheng5d4f98a2009-06-10 10:45:14 -04006406 extent_slot = path->slots[0];
6407 leaf = path->nodes[0];
6408 item_size = btrfs_item_size_nr(leaf, extent_slot);
6409 }
6410#endif
6411 BUG_ON(item_size < sizeof(*ei));
Chris Mason952fcca2008-02-18 16:33:44 -05006412 ei = btrfs_item_ptr(leaf, extent_slot,
Chris Mason123abc82007-03-14 14:14:43 -04006413 struct btrfs_extent_item);
Josef Bacik3173a182013-03-07 14:22:04 -05006414 if (owner_objectid < BTRFS_FIRST_FREE_OBJECTID &&
6415 key.type == BTRFS_EXTENT_ITEM_KEY) {
Yan Zheng5d4f98a2009-06-10 10:45:14 -04006416 struct btrfs_tree_block_info *bi;
6417 BUG_ON(item_size < sizeof(*ei) + sizeof(*bi));
6418 bi = (struct btrfs_tree_block_info *)(ei + 1);
6419 WARN_ON(owner_objectid != btrfs_tree_block_level(leaf, bi));
Chris Mason952fcca2008-02-18 16:33:44 -05006420 }
6421
Yan Zheng5d4f98a2009-06-10 10:45:14 -04006422 refs = btrfs_extent_refs(leaf, ei);
Josef Bacik32b02532013-04-24 16:38:50 -04006423 if (refs < refs_to_drop) {
6424 btrfs_err(info, "trying to drop %d refs but we only have %Lu "
David Sterba351fd352014-05-15 16:48:20 +02006425 "for bytenr %Lu", refs_to_drop, refs, bytenr);
Josef Bacik32b02532013-04-24 16:38:50 -04006426 ret = -EINVAL;
6427 btrfs_abort_transaction(trans, extent_root, ret);
6428 goto out;
6429 }
Yan Zheng5d4f98a2009-06-10 10:45:14 -04006430 refs -= refs_to_drop;
6431
6432 if (refs > 0) {
6433 if (extent_op)
6434 __run_delayed_extent_op(extent_op, leaf, ei);
6435 /*
6436 * In the case of inline back ref, reference count will
6437 * be updated by remove_extent_backref
6438 */
6439 if (iref) {
6440 BUG_ON(!found_extent);
6441 } else {
6442 btrfs_set_extent_refs(leaf, ei, refs);
6443 btrfs_mark_buffer_dirty(leaf);
6444 }
6445 if (found_extent) {
6446 ret = remove_extent_backref(trans, extent_root, path,
6447 iref, refs_to_drop,
Josef Bacikfcebe452014-05-13 17:30:47 -07006448 is_data, &last_ref);
David Sterba005d6422012-09-18 07:52:32 -06006449 if (ret) {
6450 btrfs_abort_transaction(trans, extent_root, ret);
6451 goto out;
6452 }
Yan Zheng5d4f98a2009-06-10 10:45:14 -04006453 }
Josef Bacikb150a4f2013-06-19 15:00:04 -04006454 add_pinned_bytes(root->fs_info, -num_bytes, owner_objectid,
6455 root_objectid);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04006456 } else {
Yan Zheng5d4f98a2009-06-10 10:45:14 -04006457 if (found_extent) {
6458 BUG_ON(is_data && refs_to_drop !=
Zhaolei9ed0dea2015-08-06 22:16:24 +08006459 extent_data_ref_count(path, iref));
Yan Zheng5d4f98a2009-06-10 10:45:14 -04006460 if (iref) {
6461 BUG_ON(path->slots[0] != extent_slot);
6462 } else {
6463 BUG_ON(path->slots[0] != extent_slot + 1);
6464 path->slots[0] = extent_slot;
6465 num_to_del = 2;
6466 }
Chris Mason78fae272007-03-25 11:35:08 -04006467 }
Chris Masonb9473432009-03-13 11:00:37 -04006468
Josef Bacikfcebe452014-05-13 17:30:47 -07006469 last_ref = 1;
Chris Mason952fcca2008-02-18 16:33:44 -05006470 ret = btrfs_del_items(trans, extent_root, path, path->slots[0],
6471 num_to_del);
David Sterba005d6422012-09-18 07:52:32 -06006472 if (ret) {
6473 btrfs_abort_transaction(trans, extent_root, ret);
6474 goto out;
6475 }
David Sterbab3b4aa72011-04-21 01:20:15 +02006476 btrfs_release_path(path);
David Woodhouse21af8042008-08-12 14:13:26 +01006477
Yan Zheng5d4f98a2009-06-10 10:45:14 -04006478 if (is_data) {
Chris Mason459931e2008-12-10 09:10:46 -05006479 ret = btrfs_del_csums(trans, root, bytenr, num_bytes);
David Sterba005d6422012-09-18 07:52:32 -06006480 if (ret) {
6481 btrfs_abort_transaction(trans, extent_root, ret);
6482 goto out;
6483 }
Chris Mason459931e2008-12-10 09:10:46 -05006484 }
6485
Omar Sandoval1e144fb2015-09-29 20:50:37 -07006486 ret = add_to_free_space_tree(trans, root->fs_info, bytenr,
6487 num_bytes);
6488 if (ret) {
6489 btrfs_abort_transaction(trans, extent_root, ret);
6490 goto out;
6491 }
6492
Josef Bacikce93ec52014-11-17 15:45:48 -05006493 ret = update_block_group(trans, root, bytenr, num_bytes, 0);
David Sterba005d6422012-09-18 07:52:32 -06006494 if (ret) {
6495 btrfs_abort_transaction(trans, extent_root, ret);
6496 goto out;
6497 }
Chris Masona28ec192007-03-06 20:08:01 -05006498 }
Josef Bacikfcebe452014-05-13 17:30:47 -07006499 btrfs_release_path(path);
6500
Jeff Mahoney79787ea2012-03-12 16:03:00 +01006501out:
Chris Mason5caf2a02007-04-02 11:20:42 -04006502 btrfs_free_path(path);
Chris Masona28ec192007-03-06 20:08:01 -05006503 return ret;
6504}
6505
6506/*
Yan, Zhengf0486c62010-05-16 10:46:25 -04006507 * when we free an block, it is possible (and likely) that we free the last
Chris Mason1887be62009-03-13 10:11:24 -04006508 * delayed ref for that extent as well. This searches the delayed ref tree for
6509 * a given extent, and if there are no other delayed refs to be processed, it
6510 * removes it from the tree.
6511 */
6512static noinline int check_ref_cleanup(struct btrfs_trans_handle *trans,
6513 struct btrfs_root *root, u64 bytenr)
6514{
6515 struct btrfs_delayed_ref_head *head;
6516 struct btrfs_delayed_ref_root *delayed_refs;
Yan, Zhengf0486c62010-05-16 10:46:25 -04006517 int ret = 0;
Chris Mason1887be62009-03-13 10:11:24 -04006518
6519 delayed_refs = &trans->transaction->delayed_refs;
6520 spin_lock(&delayed_refs->lock);
6521 head = btrfs_find_delayed_ref_head(trans, bytenr);
6522 if (!head)
Chris Masoncf93da72014-01-29 07:02:40 -08006523 goto out_delayed_unlock;
Chris Mason1887be62009-03-13 10:11:24 -04006524
Josef Bacikd7df2c72014-01-23 09:21:38 -05006525 spin_lock(&head->lock);
Qu Wenruoc6fc2452015-03-30 17:03:00 +08006526 if (!list_empty(&head->ref_list))
Chris Mason1887be62009-03-13 10:11:24 -04006527 goto out;
6528
Yan Zheng5d4f98a2009-06-10 10:45:14 -04006529 if (head->extent_op) {
6530 if (!head->must_insert_reserved)
6531 goto out;
Miao Xie78a61842012-11-21 02:21:28 +00006532 btrfs_free_delayed_extent_op(head->extent_op);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04006533 head->extent_op = NULL;
6534 }
6535
Chris Mason1887be62009-03-13 10:11:24 -04006536 /*
6537 * waiting for the lock here would deadlock. If someone else has it
6538 * locked they are already in the process of dropping it anyway
6539 */
6540 if (!mutex_trylock(&head->mutex))
6541 goto out;
6542
6543 /*
6544 * at this point we have a head with no other entries. Go
6545 * ahead and process it.
6546 */
6547 head->node.in_tree = 0;
Liu Boc46effa2013-10-14 12:59:45 +08006548 rb_erase(&head->href_node, &delayed_refs->href_root);
Chris Masonc3e69d52009-03-13 10:17:05 -04006549
Josef Bacikd7df2c72014-01-23 09:21:38 -05006550 atomic_dec(&delayed_refs->num_entries);
Chris Mason1887be62009-03-13 10:11:24 -04006551
6552 /*
6553 * we don't take a ref on the node because we're removing it from the
6554 * tree, so we just steal the ref the tree was holding.
6555 */
Chris Masonc3e69d52009-03-13 10:17:05 -04006556 delayed_refs->num_heads--;
Josef Bacikd7df2c72014-01-23 09:21:38 -05006557 if (head->processing == 0)
Chris Masonc3e69d52009-03-13 10:17:05 -04006558 delayed_refs->num_heads_ready--;
Josef Bacikd7df2c72014-01-23 09:21:38 -05006559 head->processing = 0;
6560 spin_unlock(&head->lock);
Chris Mason1887be62009-03-13 10:11:24 -04006561 spin_unlock(&delayed_refs->lock);
6562
Yan, Zhengf0486c62010-05-16 10:46:25 -04006563 BUG_ON(head->extent_op);
6564 if (head->must_insert_reserved)
6565 ret = 1;
6566
6567 mutex_unlock(&head->mutex);
Chris Mason1887be62009-03-13 10:11:24 -04006568 btrfs_put_delayed_ref(&head->node);
Yan, Zhengf0486c62010-05-16 10:46:25 -04006569 return ret;
Chris Mason1887be62009-03-13 10:11:24 -04006570out:
Josef Bacikd7df2c72014-01-23 09:21:38 -05006571 spin_unlock(&head->lock);
Chris Masoncf93da72014-01-29 07:02:40 -08006572
6573out_delayed_unlock:
Chris Mason1887be62009-03-13 10:11:24 -04006574 spin_unlock(&delayed_refs->lock);
6575 return 0;
6576}
6577
Yan, Zhengf0486c62010-05-16 10:46:25 -04006578void btrfs_free_tree_block(struct btrfs_trans_handle *trans,
6579 struct btrfs_root *root,
6580 struct extent_buffer *buf,
Jan Schmidt5581a512012-05-16 17:04:52 +02006581 u64 parent, int last_ref)
Yan, Zhengf0486c62010-05-16 10:46:25 -04006582{
Josef Bacikb150a4f2013-06-19 15:00:04 -04006583 int pin = 1;
Yan, Zhengf0486c62010-05-16 10:46:25 -04006584 int ret;
6585
6586 if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) {
Arne Jansen66d7e7f2011-09-12 15:26:38 +02006587 ret = btrfs_add_delayed_tree_ref(root->fs_info, trans,
6588 buf->start, buf->len,
6589 parent, root->root_key.objectid,
6590 btrfs_header_level(buf),
Jan Schmidt5581a512012-05-16 17:04:52 +02006591 BTRFS_DROP_DELAYED_REF, NULL, 0);
Jeff Mahoney79787ea2012-03-12 16:03:00 +01006592 BUG_ON(ret); /* -ENOMEM */
Yan, Zhengf0486c62010-05-16 10:46:25 -04006593 }
6594
6595 if (!last_ref)
6596 return;
6597
Yan, Zhengf0486c62010-05-16 10:46:25 -04006598 if (btrfs_header_generation(buf) == trans->transid) {
Filipe Manana62198722015-01-06 20:18:45 +00006599 struct btrfs_block_group_cache *cache;
6600
Yan, Zhengf0486c62010-05-16 10:46:25 -04006601 if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) {
6602 ret = check_ref_cleanup(trans, root, buf->start);
6603 if (!ret)
Josef Bacik37be25b2011-08-05 10:25:38 -04006604 goto out;
Yan, Zhengf0486c62010-05-16 10:46:25 -04006605 }
6606
Filipe Manana62198722015-01-06 20:18:45 +00006607 cache = btrfs_lookup_block_group(root->fs_info, buf->start);
6608
Yan, Zhengf0486c62010-05-16 10:46:25 -04006609 if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN)) {
6610 pin_down_extent(root, cache, buf->start, buf->len, 1);
Filipe Manana62198722015-01-06 20:18:45 +00006611 btrfs_put_block_group(cache);
Josef Bacik37be25b2011-08-05 10:25:38 -04006612 goto out;
Yan, Zhengf0486c62010-05-16 10:46:25 -04006613 }
6614
6615 WARN_ON(test_bit(EXTENT_BUFFER_DIRTY, &buf->bflags));
6616
6617 btrfs_add_free_space(cache, buf->start, buf->len);
Miao Xiee570fd22014-06-19 10:42:50 +08006618 btrfs_update_reserved_bytes(cache, buf->len, RESERVE_FREE, 0);
Filipe Manana62198722015-01-06 20:18:45 +00006619 btrfs_put_block_group(cache);
Josef Bacik0be5dc62013-10-07 15:18:52 -04006620 trace_btrfs_reserved_extent_free(root, buf->start, buf->len);
Josef Bacikb150a4f2013-06-19 15:00:04 -04006621 pin = 0;
Yan, Zhengf0486c62010-05-16 10:46:25 -04006622 }
6623out:
Josef Bacikb150a4f2013-06-19 15:00:04 -04006624 if (pin)
6625 add_pinned_bytes(root->fs_info, buf->len,
6626 btrfs_header_level(buf),
6627 root->root_key.objectid);
6628
Josef Bacika826d6d2011-03-16 13:42:43 -04006629 /*
6630 * Deleting the buffer, clear the corrupt flag since it doesn't matter
6631 * anymore.
6632 */
6633 clear_bit(EXTENT_BUFFER_CORRUPT, &buf->bflags);
Yan, Zhengf0486c62010-05-16 10:46:25 -04006634}
6635
Jeff Mahoney79787ea2012-03-12 16:03:00 +01006636/* Can return -ENOMEM */
Arne Jansen66d7e7f2011-09-12 15:26:38 +02006637int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root,
6638 u64 bytenr, u64 num_bytes, u64 parent, u64 root_objectid,
Josef Bacikfcebe452014-05-13 17:30:47 -07006639 u64 owner, u64 offset, int no_quota)
Chris Mason925baed2008-06-25 16:01:30 -04006640{
6641 int ret;
Arne Jansen66d7e7f2011-09-12 15:26:38 +02006642 struct btrfs_fs_info *fs_info = root->fs_info;
Chris Mason925baed2008-06-25 16:01:30 -04006643
David Sterbafccb84c2014-09-29 23:53:21 +02006644 if (btrfs_test_is_dummy_root(root))
Josef Bacikfaa2dbf2014-05-07 17:06:09 -04006645 return 0;
David Sterbafccb84c2014-09-29 23:53:21 +02006646
Josef Bacikb150a4f2013-06-19 15:00:04 -04006647 add_pinned_bytes(root->fs_info, num_bytes, owner, root_objectid);
6648
Chris Mason56bec292009-03-13 10:10:06 -04006649 /*
6650 * tree log blocks never actually go into the extent allocation
6651 * tree, just update pinning info and exit early.
Chris Mason56bec292009-03-13 10:10:06 -04006652 */
Yan Zheng5d4f98a2009-06-10 10:45:14 -04006653 if (root_objectid == BTRFS_TREE_LOG_OBJECTID) {
6654 WARN_ON(owner >= BTRFS_FIRST_FREE_OBJECTID);
Chris Masonb9473432009-03-13 11:00:37 -04006655 /* unlocks the pinned mutex */
Yan Zheng11833d62009-09-11 16:11:19 -04006656 btrfs_pin_extent(root, bytenr, num_bytes, 1);
Chris Mason56bec292009-03-13 10:10:06 -04006657 ret = 0;
Yan Zheng5d4f98a2009-06-10 10:45:14 -04006658 } else if (owner < BTRFS_FIRST_FREE_OBJECTID) {
Arne Jansen66d7e7f2011-09-12 15:26:38 +02006659 ret = btrfs_add_delayed_tree_ref(fs_info, trans, bytenr,
6660 num_bytes,
Yan Zheng5d4f98a2009-06-10 10:45:14 -04006661 parent, root_objectid, (int)owner,
Josef Bacikfcebe452014-05-13 17:30:47 -07006662 BTRFS_DROP_DELAYED_REF, NULL, no_quota);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04006663 } else {
Arne Jansen66d7e7f2011-09-12 15:26:38 +02006664 ret = btrfs_add_delayed_data_ref(fs_info, trans, bytenr,
6665 num_bytes,
6666 parent, root_objectid, owner,
6667 offset, BTRFS_DROP_DELAYED_REF,
Josef Bacikfcebe452014-05-13 17:30:47 -07006668 NULL, no_quota);
Chris Mason56bec292009-03-13 10:10:06 -04006669 }
Chris Mason925baed2008-06-25 16:01:30 -04006670 return ret;
6671}
6672
Chris Masonfec577f2007-02-26 10:40:21 -05006673/*
Josef Bacik817d52f2009-07-13 21:29:25 -04006674 * when we wait for progress in the block group caching, its because
6675 * our allocation attempt failed at least once. So, we must sleep
6676 * and let some progress happen before we try again.
6677 *
6678 * This function will sleep at least once waiting for new free space to
6679 * show up, and then it will check the block group free space numbers
6680 * for our min num_bytes. Another option is to have it go ahead
6681 * and look in the rbtree for a free extent of a given size, but this
6682 * is a good start.
Josef Bacik36cce922013-08-05 11:15:21 -04006683 *
6684 * Callers of this must check if cache->cached == BTRFS_CACHE_ERROR before using
6685 * any of the information in this block group.
Josef Bacik817d52f2009-07-13 21:29:25 -04006686 */
Josef Bacik36cce922013-08-05 11:15:21 -04006687static noinline void
Josef Bacik817d52f2009-07-13 21:29:25 -04006688wait_block_group_cache_progress(struct btrfs_block_group_cache *cache,
6689 u64 num_bytes)
6690{
Yan Zheng11833d62009-09-11 16:11:19 -04006691 struct btrfs_caching_control *caching_ctl;
Josef Bacik817d52f2009-07-13 21:29:25 -04006692
Yan Zheng11833d62009-09-11 16:11:19 -04006693 caching_ctl = get_caching_control(cache);
6694 if (!caching_ctl)
Josef Bacik36cce922013-08-05 11:15:21 -04006695 return;
Josef Bacik817d52f2009-07-13 21:29:25 -04006696
Yan Zheng11833d62009-09-11 16:11:19 -04006697 wait_event(caching_ctl->wait, block_group_cache_done(cache) ||
Li Zefan34d52cb2011-03-29 13:46:06 +08006698 (cache->free_space_ctl->free_space >= num_bytes));
Yan Zheng11833d62009-09-11 16:11:19 -04006699
6700 put_caching_control(caching_ctl);
Yan Zheng11833d62009-09-11 16:11:19 -04006701}
6702
6703static noinline int
6704wait_block_group_cache_done(struct btrfs_block_group_cache *cache)
6705{
6706 struct btrfs_caching_control *caching_ctl;
Josef Bacik36cce922013-08-05 11:15:21 -04006707 int ret = 0;
Yan Zheng11833d62009-09-11 16:11:19 -04006708
6709 caching_ctl = get_caching_control(cache);
6710 if (!caching_ctl)
Josef Bacik36cce922013-08-05 11:15:21 -04006711 return (cache->cached == BTRFS_CACHE_ERROR) ? -EIO : 0;
Yan Zheng11833d62009-09-11 16:11:19 -04006712
6713 wait_event(caching_ctl->wait, block_group_cache_done(cache));
Josef Bacik36cce922013-08-05 11:15:21 -04006714 if (cache->cached == BTRFS_CACHE_ERROR)
6715 ret = -EIO;
Yan Zheng11833d62009-09-11 16:11:19 -04006716 put_caching_control(caching_ctl);
Josef Bacik36cce922013-08-05 11:15:21 -04006717 return ret;
Josef Bacik817d52f2009-07-13 21:29:25 -04006718}
6719
Liu Bo31e50222012-11-21 14:18:10 +00006720int __get_raid_index(u64 flags)
Yan, Zhengb742bb822010-05-16 10:46:24 -04006721{
Ilya Dryomov7738a532012-03-27 17:09:17 +03006722 if (flags & BTRFS_BLOCK_GROUP_RAID10)
Miao Xiee6ec7162013-01-17 05:38:51 +00006723 return BTRFS_RAID_RAID10;
Ilya Dryomov7738a532012-03-27 17:09:17 +03006724 else if (flags & BTRFS_BLOCK_GROUP_RAID1)
Miao Xiee6ec7162013-01-17 05:38:51 +00006725 return BTRFS_RAID_RAID1;
Ilya Dryomov7738a532012-03-27 17:09:17 +03006726 else if (flags & BTRFS_BLOCK_GROUP_DUP)
Miao Xiee6ec7162013-01-17 05:38:51 +00006727 return BTRFS_RAID_DUP;
Ilya Dryomov7738a532012-03-27 17:09:17 +03006728 else if (flags & BTRFS_BLOCK_GROUP_RAID0)
Miao Xiee6ec7162013-01-17 05:38:51 +00006729 return BTRFS_RAID_RAID0;
David Woodhouse53b381b2013-01-29 18:40:14 -05006730 else if (flags & BTRFS_BLOCK_GROUP_RAID5)
Chris Masone942f882013-02-20 14:06:05 -05006731 return BTRFS_RAID_RAID5;
David Woodhouse53b381b2013-01-29 18:40:14 -05006732 else if (flags & BTRFS_BLOCK_GROUP_RAID6)
Chris Masone942f882013-02-20 14:06:05 -05006733 return BTRFS_RAID_RAID6;
Ilya Dryomov7738a532012-03-27 17:09:17 +03006734
Chris Masone942f882013-02-20 14:06:05 -05006735 return BTRFS_RAID_SINGLE; /* BTRFS_BLOCK_GROUP_SINGLE */
Yan, Zhengb742bb822010-05-16 10:46:24 -04006736}
6737
Jeff Mahoney6ab0a202013-11-01 13:07:04 -04006738int get_block_group_index(struct btrfs_block_group_cache *cache)
Ilya Dryomov7738a532012-03-27 17:09:17 +03006739{
Liu Bo31e50222012-11-21 14:18:10 +00006740 return __get_raid_index(cache->flags);
Ilya Dryomov7738a532012-03-27 17:09:17 +03006741}
6742
Jeff Mahoney6ab0a202013-11-01 13:07:04 -04006743static const char *btrfs_raid_type_names[BTRFS_NR_RAID_TYPES] = {
6744 [BTRFS_RAID_RAID10] = "raid10",
6745 [BTRFS_RAID_RAID1] = "raid1",
6746 [BTRFS_RAID_DUP] = "dup",
6747 [BTRFS_RAID_RAID0] = "raid0",
6748 [BTRFS_RAID_SINGLE] = "single",
6749 [BTRFS_RAID_RAID5] = "raid5",
6750 [BTRFS_RAID_RAID6] = "raid6",
6751};
6752
Jeff Mahoney1b8e5df2013-11-20 16:50:23 -05006753static const char *get_raid_name(enum btrfs_raid_types type)
Jeff Mahoney6ab0a202013-11-01 13:07:04 -04006754{
6755 if (type >= BTRFS_NR_RAID_TYPES)
6756 return NULL;
6757
6758 return btrfs_raid_type_names[type];
6759}
6760
Josef Bacik817d52f2009-07-13 21:29:25 -04006761enum btrfs_loop_type {
Josef Bacik285ff5a2012-01-13 15:27:45 -05006762 LOOP_CACHING_NOWAIT = 0,
6763 LOOP_CACHING_WAIT = 1,
6764 LOOP_ALLOC_CHUNK = 2,
6765 LOOP_NO_EMPTY_SIZE = 3,
Josef Bacik817d52f2009-07-13 21:29:25 -04006766};
6767
Miao Xiee570fd22014-06-19 10:42:50 +08006768static inline void
6769btrfs_lock_block_group(struct btrfs_block_group_cache *cache,
6770 int delalloc)
6771{
6772 if (delalloc)
6773 down_read(&cache->data_rwsem);
6774}
6775
6776static inline void
6777btrfs_grab_block_group(struct btrfs_block_group_cache *cache,
6778 int delalloc)
6779{
6780 btrfs_get_block_group(cache);
6781 if (delalloc)
6782 down_read(&cache->data_rwsem);
6783}
6784
6785static struct btrfs_block_group_cache *
6786btrfs_lock_cluster(struct btrfs_block_group_cache *block_group,
6787 struct btrfs_free_cluster *cluster,
6788 int delalloc)
6789{
6790 struct btrfs_block_group_cache *used_bg;
6791 bool locked = false;
6792again:
6793 spin_lock(&cluster->refill_lock);
6794 if (locked) {
6795 if (used_bg == cluster->block_group)
6796 return used_bg;
6797
6798 up_read(&used_bg->data_rwsem);
6799 btrfs_put_block_group(used_bg);
6800 }
6801
6802 used_bg = cluster->block_group;
6803 if (!used_bg)
6804 return NULL;
6805
6806 if (used_bg == block_group)
6807 return used_bg;
6808
6809 btrfs_get_block_group(used_bg);
6810
6811 if (!delalloc)
6812 return used_bg;
6813
6814 if (down_read_trylock(&used_bg->data_rwsem))
6815 return used_bg;
6816
6817 spin_unlock(&cluster->refill_lock);
6818 down_read(&used_bg->data_rwsem);
6819 locked = true;
6820 goto again;
6821}
6822
6823static inline void
6824btrfs_release_block_group(struct btrfs_block_group_cache *cache,
6825 int delalloc)
6826{
6827 if (delalloc)
6828 up_read(&cache->data_rwsem);
6829 btrfs_put_block_group(cache);
6830}
6831
Josef Bacik817d52f2009-07-13 21:29:25 -04006832/*
Chris Masonfec577f2007-02-26 10:40:21 -05006833 * walks the btree of allocated extents and find a hole of a given size.
6834 * The key ins is changed to record the hole:
Miao Xiea4820392013-09-09 13:19:42 +08006835 * ins->objectid == start position
Chris Mason62e27492007-03-15 12:56:47 -04006836 * ins->flags = BTRFS_EXTENT_ITEM_KEY
Miao Xiea4820392013-09-09 13:19:42 +08006837 * ins->offset == the size of the hole.
Chris Masonfec577f2007-02-26 10:40:21 -05006838 * Any available blocks before search_start are skipped.
Miao Xiea4820392013-09-09 13:19:42 +08006839 *
6840 * If there is no suitable free space, we will record the max size of
6841 * the free space extent currently.
Chris Masonfec577f2007-02-26 10:40:21 -05006842 */
Josef Bacik00361582013-08-14 14:02:47 -04006843static noinline int find_free_extent(struct btrfs_root *orig_root,
Chris Mason98ed5172008-01-03 10:01:48 -05006844 u64 num_bytes, u64 empty_size,
Chris Mason98ed5172008-01-03 10:01:48 -05006845 u64 hint_byte, struct btrfs_key *ins,
Miao Xiee570fd22014-06-19 10:42:50 +08006846 u64 flags, int delalloc)
Chris Masonfec577f2007-02-26 10:40:21 -05006847{
Josef Bacik80eb2342008-10-29 14:49:05 -04006848 int ret = 0;
Chris Masond3977122009-01-05 21:25:51 -05006849 struct btrfs_root *root = orig_root->fs_info->extent_root;
Chris Masonfa9c0d792009-04-03 09:47:43 -04006850 struct btrfs_free_cluster *last_ptr = NULL;
Josef Bacik80eb2342008-10-29 14:49:05 -04006851 struct btrfs_block_group_cache *block_group = NULL;
Josef Bacik81c9ad22012-01-18 10:56:06 -05006852 u64 search_start = 0;
Miao Xiea4820392013-09-09 13:19:42 +08006853 u64 max_extent_size = 0;
Chris Mason239b14b2008-03-24 15:02:07 -04006854 int empty_cluster = 2 * 1024 * 1024;
Josef Bacik80eb2342008-10-29 14:49:05 -04006855 struct btrfs_space_info *space_info;
Chris Masonfa9c0d792009-04-03 09:47:43 -04006856 int loop = 0;
David Sterbab6919a52013-04-29 13:39:40 +00006857 int index = __get_raid_index(flags);
6858 int alloc_type = (flags & BTRFS_BLOCK_GROUP_DATA) ?
Josef Bacikfb25e912011-07-26 17:00:46 -04006859 RESERVE_ALLOC_NO_ACCOUNT : RESERVE_ALLOC;
Josef Bacik0a243252009-09-11 16:11:20 -04006860 bool failed_cluster_refill = false;
Josef Bacik1cdda9b2009-10-06 10:04:28 -04006861 bool failed_alloc = false;
Josef Bacik67377732010-09-16 16:19:09 -04006862 bool use_cluster = true;
Miao Xie60d2adb2011-09-09 17:34:35 +08006863 bool have_caching_bg = false;
Chris Masonfec577f2007-02-26 10:40:21 -05006864
Chris Masondb945352007-10-15 16:15:53 -04006865 WARN_ON(num_bytes < root->sectorsize);
David Sterba962a2982014-06-04 18:41:45 +02006866 ins->type = BTRFS_EXTENT_ITEM_KEY;
Josef Bacik80eb2342008-10-29 14:49:05 -04006867 ins->objectid = 0;
6868 ins->offset = 0;
Chris Masonb1a4d962007-04-04 15:27:52 -04006869
David Sterbab6919a52013-04-29 13:39:40 +00006870 trace_find_free_extent(orig_root, num_bytes, empty_size, flags);
Josef Bacik3f7de032011-11-10 08:29:20 -05006871
David Sterbab6919a52013-04-29 13:39:40 +00006872 space_info = __find_space_info(root->fs_info, flags);
Josef Bacik1b1d1f62010-03-19 20:49:55 +00006873 if (!space_info) {
David Sterbab6919a52013-04-29 13:39:40 +00006874 btrfs_err(root->fs_info, "No space info for %llu", flags);
Josef Bacik1b1d1f62010-03-19 20:49:55 +00006875 return -ENOSPC;
6876 }
Josef Bacik2552d172009-04-03 10:14:19 -04006877
Josef Bacik67377732010-09-16 16:19:09 -04006878 /*
6879 * If the space info is for both data and metadata it means we have a
6880 * small filesystem and we can't use the clustering stuff.
6881 */
6882 if (btrfs_mixed_space_info(space_info))
6883 use_cluster = false;
6884
David Sterbab6919a52013-04-29 13:39:40 +00006885 if (flags & BTRFS_BLOCK_GROUP_METADATA && use_cluster) {
Chris Masonfa9c0d792009-04-03 09:47:43 -04006886 last_ptr = &root->fs_info->meta_alloc_cluster;
Chris Mason536ac8a2009-02-12 09:41:38 -05006887 if (!btrfs_test_opt(root, SSD))
6888 empty_cluster = 64 * 1024;
Chris Mason239b14b2008-03-24 15:02:07 -04006889 }
6890
David Sterbab6919a52013-04-29 13:39:40 +00006891 if ((flags & BTRFS_BLOCK_GROUP_DATA) && use_cluster &&
Josef Bacik67377732010-09-16 16:19:09 -04006892 btrfs_test_opt(root, SSD)) {
Chris Masonfa9c0d792009-04-03 09:47:43 -04006893 last_ptr = &root->fs_info->data_alloc_cluster;
6894 }
Josef Bacik0f9dd462008-09-23 13:14:11 -04006895
Chris Mason239b14b2008-03-24 15:02:07 -04006896 if (last_ptr) {
Chris Masonfa9c0d792009-04-03 09:47:43 -04006897 spin_lock(&last_ptr->lock);
6898 if (last_ptr->block_group)
6899 hint_byte = last_ptr->window_start;
6900 spin_unlock(&last_ptr->lock);
Chris Mason239b14b2008-03-24 15:02:07 -04006901 }
Chris Masonfa9c0d792009-04-03 09:47:43 -04006902
Chris Masona061fc82008-05-07 11:43:44 -04006903 search_start = max(search_start, first_logical_byte(root, 0));
Chris Mason239b14b2008-03-24 15:02:07 -04006904 search_start = max(search_start, hint_byte);
Chris Mason0b86a832008-03-24 15:01:56 -04006905
Josef Bacik817d52f2009-07-13 21:29:25 -04006906 if (!last_ptr)
Chris Masonfa9c0d792009-04-03 09:47:43 -04006907 empty_cluster = 0;
Chris Masonfa9c0d792009-04-03 09:47:43 -04006908
Josef Bacik2552d172009-04-03 10:14:19 -04006909 if (search_start == hint_byte) {
Josef Bacik2552d172009-04-03 10:14:19 -04006910 block_group = btrfs_lookup_block_group(root->fs_info,
6911 search_start);
Josef Bacik817d52f2009-07-13 21:29:25 -04006912 /*
6913 * we don't want to use the block group if it doesn't match our
6914 * allocation bits, or if its not cached.
Josef Bacikccf0e722009-11-10 21:23:48 -05006915 *
6916 * However if we are re-searching with an ideal block group
6917 * picked out then we don't care that the block group is cached.
Josef Bacik817d52f2009-07-13 21:29:25 -04006918 */
David Sterbab6919a52013-04-29 13:39:40 +00006919 if (block_group && block_group_bits(block_group, flags) &&
Josef Bacik285ff5a2012-01-13 15:27:45 -05006920 block_group->cached != BTRFS_CACHE_NO) {
Josef Bacik2552d172009-04-03 10:14:19 -04006921 down_read(&space_info->groups_sem);
Chris Mason44fb5512009-06-04 15:34:51 -04006922 if (list_empty(&block_group->list) ||
6923 block_group->ro) {
6924 /*
6925 * someone is removing this block group,
6926 * we can't jump into the have_block_group
6927 * target because our list pointers are not
6928 * valid
6929 */
6930 btrfs_put_block_group(block_group);
6931 up_read(&space_info->groups_sem);
Josef Bacikccf0e722009-11-10 21:23:48 -05006932 } else {
Yan, Zhengb742bb822010-05-16 10:46:24 -04006933 index = get_block_group_index(block_group);
Miao Xiee570fd22014-06-19 10:42:50 +08006934 btrfs_lock_block_group(block_group, delalloc);
Chris Mason44fb5512009-06-04 15:34:51 -04006935 goto have_block_group;
Josef Bacikccf0e722009-11-10 21:23:48 -05006936 }
Josef Bacik2552d172009-04-03 10:14:19 -04006937 } else if (block_group) {
Chris Masonfa9c0d792009-04-03 09:47:43 -04006938 btrfs_put_block_group(block_group);
Josef Bacik2552d172009-04-03 10:14:19 -04006939 }
Chris Mason42e70e72008-11-07 18:17:11 -05006940 }
Josef Bacik2552d172009-04-03 10:14:19 -04006941search:
Miao Xie60d2adb2011-09-09 17:34:35 +08006942 have_caching_bg = false;
Josef Bacik80eb2342008-10-29 14:49:05 -04006943 down_read(&space_info->groups_sem);
Yan, Zhengb742bb822010-05-16 10:46:24 -04006944 list_for_each_entry(block_group, &space_info->block_groups[index],
6945 list) {
Josef Bacik6226cb02009-04-03 10:14:18 -04006946 u64 offset;
Josef Bacik817d52f2009-07-13 21:29:25 -04006947 int cached;
Chris Mason8a1413a22008-11-10 16:13:54 -05006948
Miao Xiee570fd22014-06-19 10:42:50 +08006949 btrfs_grab_block_group(block_group, delalloc);
Josef Bacik2552d172009-04-03 10:14:19 -04006950 search_start = block_group->key.objectid;
Chris Mason42e70e72008-11-07 18:17:11 -05006951
Chris Mason83a50de2010-12-13 15:06:46 -05006952 /*
6953 * this can happen if we end up cycling through all the
6954 * raid types, but we want to make sure we only allocate
6955 * for the proper type.
6956 */
David Sterbab6919a52013-04-29 13:39:40 +00006957 if (!block_group_bits(block_group, flags)) {
Chris Mason83a50de2010-12-13 15:06:46 -05006958 u64 extra = BTRFS_BLOCK_GROUP_DUP |
6959 BTRFS_BLOCK_GROUP_RAID1 |
David Woodhouse53b381b2013-01-29 18:40:14 -05006960 BTRFS_BLOCK_GROUP_RAID5 |
6961 BTRFS_BLOCK_GROUP_RAID6 |
Chris Mason83a50de2010-12-13 15:06:46 -05006962 BTRFS_BLOCK_GROUP_RAID10;
6963
6964 /*
6965 * if they asked for extra copies and this block group
6966 * doesn't provide them, bail. This does allow us to
6967 * fill raid0 from raid1.
6968 */
David Sterbab6919a52013-04-29 13:39:40 +00006969 if ((flags & extra) && !(block_group->flags & extra))
Chris Mason83a50de2010-12-13 15:06:46 -05006970 goto loop;
6971 }
6972
Josef Bacik2552d172009-04-03 10:14:19 -04006973have_block_group:
Josef Bacik291c7d22011-11-14 13:52:14 -05006974 cached = block_group_cache_done(block_group);
6975 if (unlikely(!cached)) {
Liu Bof6373bf2012-12-27 09:01:18 +00006976 ret = cache_block_group(block_group, 0);
Chris Mason1d4284b2012-03-28 20:31:37 -04006977 BUG_ON(ret < 0);
6978 ret = 0;
Josef Bacikea6a4782008-11-20 12:16:16 -05006979 }
6980
Josef Bacik36cce922013-08-05 11:15:21 -04006981 if (unlikely(block_group->cached == BTRFS_CACHE_ERROR))
6982 goto loop;
Josef Bacikea6a4782008-11-20 12:16:16 -05006983 if (unlikely(block_group->ro))
Josef Bacik2552d172009-04-03 10:14:19 -04006984 goto loop;
Josef Bacik0f9dd462008-09-23 13:14:11 -04006985
Josef Bacik0a243252009-09-11 16:11:20 -04006986 /*
Alexandre Oliva062c05c2011-12-07 19:50:42 -05006987 * Ok we want to try and use the cluster allocator, so
6988 * lets look there
Josef Bacik0a243252009-09-11 16:11:20 -04006989 */
Alexandre Oliva062c05c2011-12-07 19:50:42 -05006990 if (last_ptr) {
Miao Xie215a63d2014-01-15 20:00:56 +08006991 struct btrfs_block_group_cache *used_block_group;
Chris Mason8de972b2013-01-04 15:39:43 -05006992 unsigned long aligned_cluster;
Chris Masonfa9c0d792009-04-03 09:47:43 -04006993 /*
6994 * the refill lock keeps out other
6995 * people trying to start a new cluster
6996 */
Miao Xiee570fd22014-06-19 10:42:50 +08006997 used_block_group = btrfs_lock_cluster(block_group,
6998 last_ptr,
6999 delalloc);
7000 if (!used_block_group)
Chris Mason44fb5512009-06-04 15:34:51 -04007001 goto refill_cluster;
Chris Mason44fb5512009-06-04 15:34:51 -04007002
Miao Xiee570fd22014-06-19 10:42:50 +08007003 if (used_block_group != block_group &&
7004 (used_block_group->ro ||
7005 !block_group_bits(used_block_group, flags)))
7006 goto release_cluster;
Alexandre Oliva274bd4f2011-12-07 20:08:40 -05007007
7008 offset = btrfs_alloc_from_cluster(used_block_group,
Miao Xiea4820392013-09-09 13:19:42 +08007009 last_ptr,
7010 num_bytes,
7011 used_block_group->key.objectid,
7012 &max_extent_size);
Chris Masonfa9c0d792009-04-03 09:47:43 -04007013 if (offset) {
7014 /* we have a block, we're done */
7015 spin_unlock(&last_ptr->refill_lock);
Josef Bacik3f7de032011-11-10 08:29:20 -05007016 trace_btrfs_reserve_extent_cluster(root,
Miao Xie89d43462014-01-15 20:00:57 +08007017 used_block_group,
7018 search_start, num_bytes);
Miao Xie215a63d2014-01-15 20:00:56 +08007019 if (used_block_group != block_group) {
Miao Xiee570fd22014-06-19 10:42:50 +08007020 btrfs_release_block_group(block_group,
7021 delalloc);
Miao Xie215a63d2014-01-15 20:00:56 +08007022 block_group = used_block_group;
7023 }
Chris Masonfa9c0d792009-04-03 09:47:43 -04007024 goto checks;
7025 }
7026
Alexandre Oliva274bd4f2011-12-07 20:08:40 -05007027 WARN_ON(last_ptr->block_group != used_block_group);
Miao Xiee570fd22014-06-19 10:42:50 +08007028release_cluster:
Alexandre Oliva062c05c2011-12-07 19:50:42 -05007029 /* If we are on LOOP_NO_EMPTY_SIZE, we can't
7030 * set up a new clusters, so lets just skip it
7031 * and let the allocator find whatever block
7032 * it can find. If we reach this point, we
7033 * will have tried the cluster allocator
7034 * plenty of times and not have found
7035 * anything, so we are likely way too
7036 * fragmented for the clustering stuff to find
Alexandre Olivaa5f6f712011-12-12 04:48:19 -02007037 * anything.
7038 *
7039 * However, if the cluster is taken from the
7040 * current block group, release the cluster
7041 * first, so that we stand a better chance of
7042 * succeeding in the unclustered
7043 * allocation. */
7044 if (loop >= LOOP_NO_EMPTY_SIZE &&
Miao Xiee570fd22014-06-19 10:42:50 +08007045 used_block_group != block_group) {
Alexandre Oliva062c05c2011-12-07 19:50:42 -05007046 spin_unlock(&last_ptr->refill_lock);
Miao Xiee570fd22014-06-19 10:42:50 +08007047 btrfs_release_block_group(used_block_group,
7048 delalloc);
Alexandre Oliva062c05c2011-12-07 19:50:42 -05007049 goto unclustered_alloc;
7050 }
7051
Chris Masonfa9c0d792009-04-03 09:47:43 -04007052 /*
7053 * this cluster didn't work out, free it and
7054 * start over
7055 */
7056 btrfs_return_cluster_to_free_space(NULL, last_ptr);
7057
Miao Xiee570fd22014-06-19 10:42:50 +08007058 if (used_block_group != block_group)
7059 btrfs_release_block_group(used_block_group,
7060 delalloc);
7061refill_cluster:
Alexandre Olivaa5f6f712011-12-12 04:48:19 -02007062 if (loop >= LOOP_NO_EMPTY_SIZE) {
7063 spin_unlock(&last_ptr->refill_lock);
7064 goto unclustered_alloc;
7065 }
7066
Chris Mason8de972b2013-01-04 15:39:43 -05007067 aligned_cluster = max_t(unsigned long,
7068 empty_cluster + empty_size,
7069 block_group->full_stripe_len);
7070
Chris Masonfa9c0d792009-04-03 09:47:43 -04007071 /* allocate a cluster in this block group */
Josef Bacik00361582013-08-14 14:02:47 -04007072 ret = btrfs_find_space_cluster(root, block_group,
7073 last_ptr, search_start,
7074 num_bytes,
7075 aligned_cluster);
Chris Masonfa9c0d792009-04-03 09:47:43 -04007076 if (ret == 0) {
7077 /*
7078 * now pull our allocation out of this
7079 * cluster
7080 */
7081 offset = btrfs_alloc_from_cluster(block_group,
Miao Xiea4820392013-09-09 13:19:42 +08007082 last_ptr,
7083 num_bytes,
7084 search_start,
7085 &max_extent_size);
Chris Masonfa9c0d792009-04-03 09:47:43 -04007086 if (offset) {
7087 /* we found one, proceed */
7088 spin_unlock(&last_ptr->refill_lock);
Josef Bacik3f7de032011-11-10 08:29:20 -05007089 trace_btrfs_reserve_extent_cluster(root,
7090 block_group, search_start,
7091 num_bytes);
Chris Masonfa9c0d792009-04-03 09:47:43 -04007092 goto checks;
7093 }
Josef Bacik0a243252009-09-11 16:11:20 -04007094 } else if (!cached && loop > LOOP_CACHING_NOWAIT
7095 && !failed_cluster_refill) {
Josef Bacik817d52f2009-07-13 21:29:25 -04007096 spin_unlock(&last_ptr->refill_lock);
7097
Josef Bacik0a243252009-09-11 16:11:20 -04007098 failed_cluster_refill = true;
Josef Bacik817d52f2009-07-13 21:29:25 -04007099 wait_block_group_cache_progress(block_group,
7100 num_bytes + empty_cluster + empty_size);
7101 goto have_block_group;
Chris Masonfa9c0d792009-04-03 09:47:43 -04007102 }
Josef Bacik817d52f2009-07-13 21:29:25 -04007103
Chris Masonfa9c0d792009-04-03 09:47:43 -04007104 /*
7105 * at this point we either didn't find a cluster
7106 * or we weren't able to allocate a block from our
7107 * cluster. Free the cluster we've been trying
7108 * to use, and go to the next block group
7109 */
Josef Bacik0a243252009-09-11 16:11:20 -04007110 btrfs_return_cluster_to_free_space(NULL, last_ptr);
Chris Masonfa9c0d792009-04-03 09:47:43 -04007111 spin_unlock(&last_ptr->refill_lock);
Josef Bacik0a243252009-09-11 16:11:20 -04007112 goto loop;
Chris Masonfa9c0d792009-04-03 09:47:43 -04007113 }
7114
Alexandre Oliva062c05c2011-12-07 19:50:42 -05007115unclustered_alloc:
Alexandre Olivaa5f6f712011-12-12 04:48:19 -02007116 spin_lock(&block_group->free_space_ctl->tree_lock);
7117 if (cached &&
7118 block_group->free_space_ctl->free_space <
7119 num_bytes + empty_cluster + empty_size) {
Miao Xiea4820392013-09-09 13:19:42 +08007120 if (block_group->free_space_ctl->free_space >
7121 max_extent_size)
7122 max_extent_size =
7123 block_group->free_space_ctl->free_space;
Alexandre Olivaa5f6f712011-12-12 04:48:19 -02007124 spin_unlock(&block_group->free_space_ctl->tree_lock);
7125 goto loop;
7126 }
7127 spin_unlock(&block_group->free_space_ctl->tree_lock);
7128
Josef Bacik6226cb02009-04-03 10:14:18 -04007129 offset = btrfs_find_space_for_alloc(block_group, search_start,
Miao Xiea4820392013-09-09 13:19:42 +08007130 num_bytes, empty_size,
7131 &max_extent_size);
Josef Bacik1cdda9b2009-10-06 10:04:28 -04007132 /*
7133 * If we didn't find a chunk, and we haven't failed on this
7134 * block group before, and this block group is in the middle of
7135 * caching and we are ok with waiting, then go ahead and wait
7136 * for progress to be made, and set failed_alloc to true.
7137 *
7138 * If failed_alloc is true then we've already waited on this
7139 * block group once and should move on to the next block group.
7140 */
7141 if (!offset && !failed_alloc && !cached &&
7142 loop > LOOP_CACHING_NOWAIT) {
Josef Bacik817d52f2009-07-13 21:29:25 -04007143 wait_block_group_cache_progress(block_group,
Josef Bacik1cdda9b2009-10-06 10:04:28 -04007144 num_bytes + empty_size);
7145 failed_alloc = true;
Josef Bacik817d52f2009-07-13 21:29:25 -04007146 goto have_block_group;
Josef Bacik1cdda9b2009-10-06 10:04:28 -04007147 } else if (!offset) {
Miao Xie60d2adb2011-09-09 17:34:35 +08007148 if (!cached)
7149 have_caching_bg = true;
Josef Bacik1cdda9b2009-10-06 10:04:28 -04007150 goto loop;
Josef Bacik817d52f2009-07-13 21:29:25 -04007151 }
Chris Masonfa9c0d792009-04-03 09:47:43 -04007152checks:
David Sterba4e54b172014-06-05 01:39:19 +02007153 search_start = ALIGN(offset, root->stripesize);
Chris Masone37c9e62007-05-09 20:13:14 -04007154
Josef Bacik2552d172009-04-03 10:14:19 -04007155 /* move on to the next group */
7156 if (search_start + num_bytes >
Miao Xie215a63d2014-01-15 20:00:56 +08007157 block_group->key.objectid + block_group->key.offset) {
7158 btrfs_add_free_space(block_group, offset, num_bytes);
Josef Bacik2552d172009-04-03 10:14:19 -04007159 goto loop;
Josef Bacik6226cb02009-04-03 10:14:18 -04007160 }
Josef Bacik80eb2342008-10-29 14:49:05 -04007161
Josef Bacik6226cb02009-04-03 10:14:18 -04007162 if (offset < search_start)
Miao Xie215a63d2014-01-15 20:00:56 +08007163 btrfs_add_free_space(block_group, offset,
Josef Bacik6226cb02009-04-03 10:14:18 -04007164 search_start - offset);
7165 BUG_ON(offset > search_start);
7166
Miao Xie215a63d2014-01-15 20:00:56 +08007167 ret = btrfs_update_reserved_bytes(block_group, num_bytes,
Miao Xiee570fd22014-06-19 10:42:50 +08007168 alloc_type, delalloc);
Yan, Zhengf0486c62010-05-16 10:46:25 -04007169 if (ret == -EAGAIN) {
Miao Xie215a63d2014-01-15 20:00:56 +08007170 btrfs_add_free_space(block_group, offset, num_bytes);
Yan, Zhengf0486c62010-05-16 10:46:25 -04007171 goto loop;
7172 }
Yan Zheng11833d62009-09-11 16:11:19 -04007173
Josef Bacik2552d172009-04-03 10:14:19 -04007174 /* we are all good, lets return */
Yan, Zhengf0486c62010-05-16 10:46:25 -04007175 ins->objectid = search_start;
7176 ins->offset = num_bytes;
7177
Josef Bacik3f7de032011-11-10 08:29:20 -05007178 trace_btrfs_reserve_extent(orig_root, block_group,
7179 search_start, num_bytes);
Miao Xiee570fd22014-06-19 10:42:50 +08007180 btrfs_release_block_group(block_group, delalloc);
Josef Bacik2552d172009-04-03 10:14:19 -04007181 break;
7182loop:
Josef Bacik0a243252009-09-11 16:11:20 -04007183 failed_cluster_refill = false;
Josef Bacik1cdda9b2009-10-06 10:04:28 -04007184 failed_alloc = false;
Yan, Zhengb742bb822010-05-16 10:46:24 -04007185 BUG_ON(index != get_block_group_index(block_group));
Miao Xiee570fd22014-06-19 10:42:50 +08007186 btrfs_release_block_group(block_group, delalloc);
Josef Bacik2552d172009-04-03 10:14:19 -04007187 }
7188 up_read(&space_info->groups_sem);
Chris Masonf5a31e12008-11-10 11:47:09 -05007189
Miao Xie60d2adb2011-09-09 17:34:35 +08007190 if (!ins->objectid && loop >= LOOP_CACHING_WAIT && have_caching_bg)
7191 goto search;
7192
Yan, Zhengb742bb822010-05-16 10:46:24 -04007193 if (!ins->objectid && ++index < BTRFS_NR_RAID_TYPES)
7194 goto search;
7195
Josef Bacik285ff5a2012-01-13 15:27:45 -05007196 /*
Josef Bacikccf0e722009-11-10 21:23:48 -05007197 * LOOP_CACHING_NOWAIT, search partially cached block groups, kicking
7198 * caching kthreads as we move along
Josef Bacik817d52f2009-07-13 21:29:25 -04007199 * LOOP_CACHING_WAIT, search everything, and wait if our bg is caching
7200 * LOOP_ALLOC_CHUNK, force a chunk allocation and try again
7201 * LOOP_NO_EMPTY_SIZE, set empty_size and empty_cluster to 0 and try
7202 * again
Chris Masonfa9c0d792009-04-03 09:47:43 -04007203 */
Josef Bacik723bda22011-05-27 16:11:38 -04007204 if (!ins->objectid && loop < LOOP_NO_EMPTY_SIZE) {
Yan, Zhengb742bb822010-05-16 10:46:24 -04007205 index = 0;
Josef Bacik723bda22011-05-27 16:11:38 -04007206 loop++;
Josef Bacik817d52f2009-07-13 21:29:25 -04007207 if (loop == LOOP_ALLOC_CHUNK) {
Josef Bacik00361582013-08-14 14:02:47 -04007208 struct btrfs_trans_handle *trans;
Wang Shilongf017f152014-03-13 13:19:47 +08007209 int exist = 0;
Josef Bacik00361582013-08-14 14:02:47 -04007210
Wang Shilongf017f152014-03-13 13:19:47 +08007211 trans = current->journal_info;
7212 if (trans)
7213 exist = 1;
7214 else
7215 trans = btrfs_join_transaction(root);
7216
Josef Bacik00361582013-08-14 14:02:47 -04007217 if (IS_ERR(trans)) {
7218 ret = PTR_ERR(trans);
7219 goto out;
7220 }
7221
David Sterbab6919a52013-04-29 13:39:40 +00007222 ret = do_chunk_alloc(trans, root, flags,
Josef Bacikea658ba2012-09-11 16:57:25 -04007223 CHUNK_ALLOC_FORCE);
7224 /*
7225 * Do not bail out on ENOSPC since we
7226 * can do more things.
7227 */
Josef Bacik00361582013-08-14 14:02:47 -04007228 if (ret < 0 && ret != -ENOSPC)
Josef Bacikea658ba2012-09-11 16:57:25 -04007229 btrfs_abort_transaction(trans,
7230 root, ret);
Josef Bacik00361582013-08-14 14:02:47 -04007231 else
7232 ret = 0;
Wang Shilongf017f152014-03-13 13:19:47 +08007233 if (!exist)
7234 btrfs_end_transaction(trans, root);
Josef Bacik00361582013-08-14 14:02:47 -04007235 if (ret)
Josef Bacikea658ba2012-09-11 16:57:25 -04007236 goto out;
Josef Bacik723bda22011-05-27 16:11:38 -04007237 }
7238
7239 if (loop == LOOP_NO_EMPTY_SIZE) {
Chris Masonfa9c0d792009-04-03 09:47:43 -04007240 empty_size = 0;
7241 empty_cluster = 0;
7242 }
Chris Mason42e70e72008-11-07 18:17:11 -05007243
Josef Bacik723bda22011-05-27 16:11:38 -04007244 goto search;
Josef Bacik2552d172009-04-03 10:14:19 -04007245 } else if (!ins->objectid) {
7246 ret = -ENOSPC;
Josef Bacikd82a6f1d2011-05-11 15:26:06 -04007247 } else if (ins->objectid) {
Josef Bacik2552d172009-04-03 10:14:19 -04007248 ret = 0;
7249 }
Jeff Mahoney79787ea2012-03-12 16:03:00 +01007250out:
Miao Xiea4820392013-09-09 13:19:42 +08007251 if (ret == -ENOSPC)
7252 ins->offset = max_extent_size;
Chris Mason0f70abe2007-02-28 16:46:22 -05007253 return ret;
Chris Masonfec577f2007-02-26 10:40:21 -05007254}
Chris Masonec44a352008-04-28 15:29:52 -04007255
Josef Bacik9ed74f22009-09-11 16:12:44 -04007256static void dump_space_info(struct btrfs_space_info *info, u64 bytes,
7257 int dump_block_groups)
Josef Bacik0f9dd462008-09-23 13:14:11 -04007258{
7259 struct btrfs_block_group_cache *cache;
Yan, Zhengb742bb822010-05-16 10:46:24 -04007260 int index = 0;
Josef Bacik0f9dd462008-09-23 13:14:11 -04007261
Josef Bacik9ed74f22009-09-11 16:12:44 -04007262 spin_lock(&info->lock);
Frank Holtonefe120a2013-12-20 11:37:06 -05007263 printk(KERN_INFO "BTRFS: space_info %llu has %llu free, is %sfull\n",
Geert Uytterhoevenc1c9ff72013-08-20 13:20:07 +02007264 info->flags,
7265 info->total_bytes - info->bytes_used - info->bytes_pinned -
7266 info->bytes_reserved - info->bytes_readonly,
Chris Masond3977122009-01-05 21:25:51 -05007267 (info->full) ? "" : "not ");
Frank Holtonefe120a2013-12-20 11:37:06 -05007268 printk(KERN_INFO "BTRFS: space_info total=%llu, used=%llu, pinned=%llu, "
Yan, Zheng8929ecfa2010-05-16 10:49:58 -04007269 "reserved=%llu, may_use=%llu, readonly=%llu\n",
Geert Uytterhoevenc1c9ff72013-08-20 13:20:07 +02007270 info->total_bytes, info->bytes_used, info->bytes_pinned,
7271 info->bytes_reserved, info->bytes_may_use,
7272 info->bytes_readonly);
Josef Bacik9ed74f22009-09-11 16:12:44 -04007273 spin_unlock(&info->lock);
7274
7275 if (!dump_block_groups)
7276 return;
Josef Bacik0f9dd462008-09-23 13:14:11 -04007277
Josef Bacik80eb2342008-10-29 14:49:05 -04007278 down_read(&info->groups_sem);
Yan, Zhengb742bb822010-05-16 10:46:24 -04007279again:
7280 list_for_each_entry(cache, &info->block_groups[index], list) {
Josef Bacik0f9dd462008-09-23 13:14:11 -04007281 spin_lock(&cache->lock);
Frank Holtonefe120a2013-12-20 11:37:06 -05007282 printk(KERN_INFO "BTRFS: "
7283 "block group %llu has %llu bytes, "
7284 "%llu used %llu pinned %llu reserved %s\n",
Geert Uytterhoevenc1c9ff72013-08-20 13:20:07 +02007285 cache->key.objectid, cache->key.offset,
7286 btrfs_block_group_used(&cache->item), cache->pinned,
7287 cache->reserved, cache->ro ? "[readonly]" : "");
Josef Bacik0f9dd462008-09-23 13:14:11 -04007288 btrfs_dump_free_space(cache, bytes);
7289 spin_unlock(&cache->lock);
7290 }
Yan, Zhengb742bb822010-05-16 10:46:24 -04007291 if (++index < BTRFS_NR_RAID_TYPES)
7292 goto again;
Josef Bacik80eb2342008-10-29 14:49:05 -04007293 up_read(&info->groups_sem);
Josef Bacik0f9dd462008-09-23 13:14:11 -04007294}
Zheng Yane8569812008-09-26 10:05:48 -04007295
Josef Bacik00361582013-08-14 14:02:47 -04007296int btrfs_reserve_extent(struct btrfs_root *root,
Yan Zheng11833d62009-09-11 16:11:19 -04007297 u64 num_bytes, u64 min_alloc_size,
7298 u64 empty_size, u64 hint_byte,
Miao Xiee570fd22014-06-19 10:42:50 +08007299 struct btrfs_key *ins, int is_data, int delalloc)
Chris Masonfec577f2007-02-26 10:40:21 -05007300{
Miao Xie9e622d62012-01-26 15:01:12 -05007301 bool final_tried = false;
David Sterbab6919a52013-04-29 13:39:40 +00007302 u64 flags;
Chris Masonfec577f2007-02-26 10:40:21 -05007303 int ret;
Chris Mason925baed2008-06-25 16:01:30 -04007304
David Sterbab6919a52013-04-29 13:39:40 +00007305 flags = btrfs_get_alloc_profile(root, is_data);
Chris Mason98d20f62008-04-14 09:46:10 -04007306again:
Chris Masondb945352007-10-15 16:15:53 -04007307 WARN_ON(num_bytes < root->sectorsize);
Josef Bacik00361582013-08-14 14:02:47 -04007308 ret = find_free_extent(root, num_bytes, empty_size, hint_byte, ins,
Miao Xiee570fd22014-06-19 10:42:50 +08007309 flags, delalloc);
Chris Mason3b951512008-04-17 11:29:12 -04007310
Miao Xie9e622d62012-01-26 15:01:12 -05007311 if (ret == -ENOSPC) {
Miao Xiea4820392013-09-09 13:19:42 +08007312 if (!final_tried && ins->offset) {
7313 num_bytes = min(num_bytes >> 1, ins->offset);
Zach Brown24542bf2012-11-16 00:04:43 +00007314 num_bytes = round_down(num_bytes, root->sectorsize);
Miao Xie9e622d62012-01-26 15:01:12 -05007315 num_bytes = max(num_bytes, min_alloc_size);
Miao Xie9e622d62012-01-26 15:01:12 -05007316 if (num_bytes == min_alloc_size)
7317 final_tried = true;
7318 goto again;
7319 } else if (btrfs_test_opt(root, ENOSPC_DEBUG)) {
7320 struct btrfs_space_info *sinfo;
Josef Bacik0f9dd462008-09-23 13:14:11 -04007321
David Sterbab6919a52013-04-29 13:39:40 +00007322 sinfo = __find_space_info(root->fs_info, flags);
Simon Kirbyc2cf52e2013-03-19 22:41:23 +00007323 btrfs_err(root->fs_info, "allocation failed flags %llu, wanted %llu",
Geert Uytterhoevenc1c9ff72013-08-20 13:20:07 +02007324 flags, num_bytes);
Jeff Mahoney53804282012-03-01 14:56:28 +01007325 if (sinfo)
7326 dump_space_info(sinfo, num_bytes, 1);
Miao Xie9e622d62012-01-26 15:01:12 -05007327 }
Chris Mason925baed2008-06-25 16:01:30 -04007328 }
Josef Bacik0f9dd462008-09-23 13:14:11 -04007329
7330 return ret;
Chris Masone6dcd2d2008-07-17 12:53:50 -04007331}
7332
Chris Masone688b7252011-10-31 20:52:39 -04007333static int __btrfs_free_reserved_extent(struct btrfs_root *root,
Miao Xiee570fd22014-06-19 10:42:50 +08007334 u64 start, u64 len,
7335 int pin, int delalloc)
Chris Mason65b51a02008-08-01 15:11:20 -04007336{
Josef Bacik0f9dd462008-09-23 13:14:11 -04007337 struct btrfs_block_group_cache *cache;
Liu Hui1f3c79a2009-01-05 15:57:51 -05007338 int ret = 0;
Josef Bacik0f9dd462008-09-23 13:14:11 -04007339
Josef Bacik0f9dd462008-09-23 13:14:11 -04007340 cache = btrfs_lookup_block_group(root->fs_info, start);
7341 if (!cache) {
Simon Kirbyc2cf52e2013-03-19 22:41:23 +00007342 btrfs_err(root->fs_info, "Unable to find block group for %llu",
Geert Uytterhoevenc1c9ff72013-08-20 13:20:07 +02007343 start);
Josef Bacik0f9dd462008-09-23 13:14:11 -04007344 return -ENOSPC;
7345 }
Liu Hui1f3c79a2009-01-05 15:57:51 -05007346
Chris Masone688b7252011-10-31 20:52:39 -04007347 if (pin)
7348 pin_down_extent(root, cache, start, len, 1);
7349 else {
Filipe Mananadcc82f42015-03-23 14:07:40 +00007350 if (btrfs_test_opt(root, DISCARD))
7351 ret = btrfs_discard_extent(root, start, len, NULL);
Chris Masone688b7252011-10-31 20:52:39 -04007352 btrfs_add_free_space(cache, start, len);
Miao Xiee570fd22014-06-19 10:42:50 +08007353 btrfs_update_reserved_bytes(cache, len, RESERVE_FREE, delalloc);
Chris Masone688b7252011-10-31 20:52:39 -04007354 }
Dongsheng Yang31193212014-12-12 16:44:35 +08007355
Chris Masonfa9c0d792009-04-03 09:47:43 -04007356 btrfs_put_block_group(cache);
Josef Bacik817d52f2009-07-13 21:29:25 -04007357
liubo1abe9b82011-03-24 11:18:59 +00007358 trace_btrfs_reserved_extent_free(root, start, len);
7359
Chris Masone6dcd2d2008-07-17 12:53:50 -04007360 return ret;
7361}
7362
Chris Masone688b7252011-10-31 20:52:39 -04007363int btrfs_free_reserved_extent(struct btrfs_root *root,
Miao Xiee570fd22014-06-19 10:42:50 +08007364 u64 start, u64 len, int delalloc)
Chris Masone688b7252011-10-31 20:52:39 -04007365{
Miao Xiee570fd22014-06-19 10:42:50 +08007366 return __btrfs_free_reserved_extent(root, start, len, 0, delalloc);
Chris Masone688b7252011-10-31 20:52:39 -04007367}
7368
7369int btrfs_free_and_pin_reserved_extent(struct btrfs_root *root,
7370 u64 start, u64 len)
7371{
Miao Xiee570fd22014-06-19 10:42:50 +08007372 return __btrfs_free_reserved_extent(root, start, len, 1, 0);
Chris Masone688b7252011-10-31 20:52:39 -04007373}
7374
Yan Zheng5d4f98a2009-06-10 10:45:14 -04007375static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
7376 struct btrfs_root *root,
7377 u64 parent, u64 root_objectid,
7378 u64 flags, u64 owner, u64 offset,
7379 struct btrfs_key *ins, int ref_mod)
Chris Masone6dcd2d2008-07-17 12:53:50 -04007380{
7381 int ret;
Yan Zheng5d4f98a2009-06-10 10:45:14 -04007382 struct btrfs_fs_info *fs_info = root->fs_info;
Chris Masone6dcd2d2008-07-17 12:53:50 -04007383 struct btrfs_extent_item *extent_item;
Yan Zheng5d4f98a2009-06-10 10:45:14 -04007384 struct btrfs_extent_inline_ref *iref;
Chris Masone6dcd2d2008-07-17 12:53:50 -04007385 struct btrfs_path *path;
Yan Zheng5d4f98a2009-06-10 10:45:14 -04007386 struct extent_buffer *leaf;
7387 int type;
7388 u32 size;
Chris Masonf2654de2007-06-26 12:20:46 -04007389
Yan Zheng5d4f98a2009-06-10 10:45:14 -04007390 if (parent > 0)
7391 type = BTRFS_SHARED_DATA_REF_KEY;
7392 else
7393 type = BTRFS_EXTENT_DATA_REF_KEY;
Zheng Yan31840ae2008-09-23 13:14:14 -04007394
Yan Zheng5d4f98a2009-06-10 10:45:14 -04007395 size = sizeof(*extent_item) + btrfs_extent_inline_ref_size(type);
Chris Mason7bb86312007-12-11 09:25:06 -05007396
7397 path = btrfs_alloc_path();
Tsutomu Itohdb5b4932011-03-23 08:14:16 +00007398 if (!path)
7399 return -ENOMEM;
Chris Mason47e4bb92008-02-01 14:51:59 -05007400
Chris Masonb9473432009-03-13 11:00:37 -04007401 path->leave_spinning = 1;
Yan Zheng5d4f98a2009-06-10 10:45:14 -04007402 ret = btrfs_insert_empty_item(trans, fs_info->extent_root, path,
7403 ins, size);
Jeff Mahoney79787ea2012-03-12 16:03:00 +01007404 if (ret) {
7405 btrfs_free_path(path);
7406 return ret;
7407 }
Josef Bacik0f9dd462008-09-23 13:14:11 -04007408
Yan Zheng5d4f98a2009-06-10 10:45:14 -04007409 leaf = path->nodes[0];
7410 extent_item = btrfs_item_ptr(leaf, path->slots[0],
Chris Mason47e4bb92008-02-01 14:51:59 -05007411 struct btrfs_extent_item);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04007412 btrfs_set_extent_refs(leaf, extent_item, ref_mod);
7413 btrfs_set_extent_generation(leaf, extent_item, trans->transid);
7414 btrfs_set_extent_flags(leaf, extent_item,
7415 flags | BTRFS_EXTENT_FLAG_DATA);
Chris Mason47e4bb92008-02-01 14:51:59 -05007416
Yan Zheng5d4f98a2009-06-10 10:45:14 -04007417 iref = (struct btrfs_extent_inline_ref *)(extent_item + 1);
7418 btrfs_set_extent_inline_ref_type(leaf, iref, type);
7419 if (parent > 0) {
7420 struct btrfs_shared_data_ref *ref;
7421 ref = (struct btrfs_shared_data_ref *)(iref + 1);
7422 btrfs_set_extent_inline_ref_offset(leaf, iref, parent);
7423 btrfs_set_shared_data_ref_count(leaf, ref, ref_mod);
7424 } else {
7425 struct btrfs_extent_data_ref *ref;
7426 ref = (struct btrfs_extent_data_ref *)(&iref->offset);
7427 btrfs_set_extent_data_ref_root(leaf, ref, root_objectid);
7428 btrfs_set_extent_data_ref_objectid(leaf, ref, owner);
7429 btrfs_set_extent_data_ref_offset(leaf, ref, offset);
7430 btrfs_set_extent_data_ref_count(leaf, ref, ref_mod);
7431 }
Chris Mason47e4bb92008-02-01 14:51:59 -05007432
7433 btrfs_mark_buffer_dirty(path->nodes[0]);
Chris Mason7bb86312007-12-11 09:25:06 -05007434 btrfs_free_path(path);
Chris Masonf510cfe2007-10-15 16:14:48 -04007435
Omar Sandoval1e144fb2015-09-29 20:50:37 -07007436 ret = remove_from_free_space_tree(trans, fs_info, ins->objectid,
7437 ins->offset);
7438 if (ret)
7439 return ret;
7440
Josef Bacikce93ec52014-11-17 15:45:48 -05007441 ret = update_block_group(trans, root, ins->objectid, ins->offset, 1);
Jeff Mahoney79787ea2012-03-12 16:03:00 +01007442 if (ret) { /* -ENOENT, logic error */
Simon Kirbyc2cf52e2013-03-19 22:41:23 +00007443 btrfs_err(fs_info, "update block group failed for %llu %llu",
Geert Uytterhoevenc1c9ff72013-08-20 13:20:07 +02007444 ins->objectid, ins->offset);
Chris Masonf5947062008-02-04 10:10:13 -05007445 BUG();
7446 }
Josef Bacik0be5dc62013-10-07 15:18:52 -04007447 trace_btrfs_reserved_extent_alloc(root, ins->objectid, ins->offset);
Chris Masone6dcd2d2008-07-17 12:53:50 -04007448 return ret;
7449}
7450
Yan Zheng5d4f98a2009-06-10 10:45:14 -04007451static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans,
7452 struct btrfs_root *root,
7453 u64 parent, u64 root_objectid,
7454 u64 flags, struct btrfs_disk_key *key,
Josef Bacikfcebe452014-05-13 17:30:47 -07007455 int level, struct btrfs_key *ins,
7456 int no_quota)
Yan Zheng5d4f98a2009-06-10 10:45:14 -04007457{
7458 int ret;
7459 struct btrfs_fs_info *fs_info = root->fs_info;
7460 struct btrfs_extent_item *extent_item;
7461 struct btrfs_tree_block_info *block_info;
7462 struct btrfs_extent_inline_ref *iref;
7463 struct btrfs_path *path;
7464 struct extent_buffer *leaf;
Josef Bacik3173a182013-03-07 14:22:04 -05007465 u32 size = sizeof(*extent_item) + sizeof(*iref);
Josef Bacikfcebe452014-05-13 17:30:47 -07007466 u64 num_bytes = ins->offset;
Josef Bacik3173a182013-03-07 14:22:04 -05007467 bool skinny_metadata = btrfs_fs_incompat(root->fs_info,
7468 SKINNY_METADATA);
7469
7470 if (!skinny_metadata)
7471 size += sizeof(*block_info);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04007472
7473 path = btrfs_alloc_path();
Josef Bacik857cc2f2013-10-07 15:21:08 -04007474 if (!path) {
7475 btrfs_free_and_pin_reserved_extent(root, ins->objectid,
David Sterba707e8a02014-06-04 19:22:26 +02007476 root->nodesize);
Mark Fashehd8926bb2011-07-13 10:38:47 -07007477 return -ENOMEM;
Josef Bacik857cc2f2013-10-07 15:21:08 -04007478 }
Yan Zheng5d4f98a2009-06-10 10:45:14 -04007479
7480 path->leave_spinning = 1;
7481 ret = btrfs_insert_empty_item(trans, fs_info->extent_root, path,
7482 ins, size);
Jeff Mahoney79787ea2012-03-12 16:03:00 +01007483 if (ret) {
Chris Masondd825252015-04-01 08:36:05 -07007484 btrfs_free_path(path);
Josef Bacik857cc2f2013-10-07 15:21:08 -04007485 btrfs_free_and_pin_reserved_extent(root, ins->objectid,
David Sterba707e8a02014-06-04 19:22:26 +02007486 root->nodesize);
Jeff Mahoney79787ea2012-03-12 16:03:00 +01007487 return ret;
7488 }
Yan Zheng5d4f98a2009-06-10 10:45:14 -04007489
7490 leaf = path->nodes[0];
7491 extent_item = btrfs_item_ptr(leaf, path->slots[0],
7492 struct btrfs_extent_item);
7493 btrfs_set_extent_refs(leaf, extent_item, 1);
7494 btrfs_set_extent_generation(leaf, extent_item, trans->transid);
7495 btrfs_set_extent_flags(leaf, extent_item,
7496 flags | BTRFS_EXTENT_FLAG_TREE_BLOCK);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04007497
Josef Bacik3173a182013-03-07 14:22:04 -05007498 if (skinny_metadata) {
7499 iref = (struct btrfs_extent_inline_ref *)(extent_item + 1);
David Sterba707e8a02014-06-04 19:22:26 +02007500 num_bytes = root->nodesize;
Josef Bacik3173a182013-03-07 14:22:04 -05007501 } else {
7502 block_info = (struct btrfs_tree_block_info *)(extent_item + 1);
7503 btrfs_set_tree_block_key(leaf, block_info, key);
7504 btrfs_set_tree_block_level(leaf, block_info, level);
7505 iref = (struct btrfs_extent_inline_ref *)(block_info + 1);
7506 }
Yan Zheng5d4f98a2009-06-10 10:45:14 -04007507
Yan Zheng5d4f98a2009-06-10 10:45:14 -04007508 if (parent > 0) {
7509 BUG_ON(!(flags & BTRFS_BLOCK_FLAG_FULL_BACKREF));
7510 btrfs_set_extent_inline_ref_type(leaf, iref,
7511 BTRFS_SHARED_BLOCK_REF_KEY);
7512 btrfs_set_extent_inline_ref_offset(leaf, iref, parent);
7513 } else {
7514 btrfs_set_extent_inline_ref_type(leaf, iref,
7515 BTRFS_TREE_BLOCK_REF_KEY);
7516 btrfs_set_extent_inline_ref_offset(leaf, iref, root_objectid);
7517 }
7518
7519 btrfs_mark_buffer_dirty(leaf);
7520 btrfs_free_path(path);
7521
Omar Sandoval1e144fb2015-09-29 20:50:37 -07007522 ret = remove_from_free_space_tree(trans, fs_info, ins->objectid,
7523 num_bytes);
7524 if (ret)
7525 return ret;
7526
Josef Bacikce93ec52014-11-17 15:45:48 -05007527 ret = update_block_group(trans, root, ins->objectid, root->nodesize,
7528 1);
Jeff Mahoney79787ea2012-03-12 16:03:00 +01007529 if (ret) { /* -ENOENT, logic error */
Simon Kirbyc2cf52e2013-03-19 22:41:23 +00007530 btrfs_err(fs_info, "update block group failed for %llu %llu",
Geert Uytterhoevenc1c9ff72013-08-20 13:20:07 +02007531 ins->objectid, ins->offset);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04007532 BUG();
7533 }
Josef Bacik0be5dc62013-10-07 15:18:52 -04007534
David Sterba707e8a02014-06-04 19:22:26 +02007535 trace_btrfs_reserved_extent_alloc(root, ins->objectid, root->nodesize);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04007536 return ret;
7537}
7538
7539int btrfs_alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
7540 struct btrfs_root *root,
7541 u64 root_objectid, u64 owner,
7542 u64 offset, struct btrfs_key *ins)
Chris Masone6dcd2d2008-07-17 12:53:50 -04007543{
7544 int ret;
Chris Mason1c2308f82008-09-23 13:14:13 -04007545
Yan Zheng5d4f98a2009-06-10 10:45:14 -04007546 BUG_ON(root_objectid == BTRFS_TREE_LOG_OBJECTID);
Chris Mason56bec292009-03-13 10:10:06 -04007547
Arne Jansen66d7e7f2011-09-12 15:26:38 +02007548 ret = btrfs_add_delayed_data_ref(root->fs_info, trans, ins->objectid,
7549 ins->offset, 0,
7550 root_objectid, owner, offset,
7551 BTRFS_ADD_DELAYED_EXTENT, NULL, 0);
Chris Masone6dcd2d2008-07-17 12:53:50 -04007552 return ret;
7553}
Chris Masone02119d2008-09-05 16:13:11 -04007554
7555/*
7556 * this is used by the tree logging recovery code. It records that
7557 * an extent has been allocated and makes sure to clear the free
7558 * space cache bits as well
7559 */
Yan Zheng5d4f98a2009-06-10 10:45:14 -04007560int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans,
7561 struct btrfs_root *root,
7562 u64 root_objectid, u64 owner, u64 offset,
7563 struct btrfs_key *ins)
Chris Masone02119d2008-09-05 16:13:11 -04007564{
7565 int ret;
7566 struct btrfs_block_group_cache *block_group;
Josef Bacik8c2a1a32013-06-06 13:19:32 -04007567
7568 /*
7569 * Mixed block groups will exclude before processing the log so we only
7570 * need to do the exlude dance if this fs isn't mixed.
7571 */
7572 if (!btrfs_fs_incompat(root->fs_info, MIXED_GROUPS)) {
7573 ret = __exclude_logged_extent(root, ins->objectid, ins->offset);
7574 if (ret)
7575 return ret;
7576 }
Chris Masone02119d2008-09-05 16:13:11 -04007577
Chris Masone02119d2008-09-05 16:13:11 -04007578 block_group = btrfs_lookup_block_group(root->fs_info, ins->objectid);
Josef Bacik8c2a1a32013-06-06 13:19:32 -04007579 if (!block_group)
7580 return -EINVAL;
Yan Zheng11833d62009-09-11 16:11:19 -04007581
Josef Bacikfb25e912011-07-26 17:00:46 -04007582 ret = btrfs_update_reserved_bytes(block_group, ins->offset,
Miao Xiee570fd22014-06-19 10:42:50 +08007583 RESERVE_ALLOC_NO_ACCOUNT, 0);
Jeff Mahoney79787ea2012-03-12 16:03:00 +01007584 BUG_ON(ret); /* logic error */
Yan Zheng5d4f98a2009-06-10 10:45:14 -04007585 ret = alloc_reserved_file_extent(trans, root, 0, root_objectid,
7586 0, owner, offset, ins, 1);
Josef Bacikb50c6e22013-04-25 15:55:30 -04007587 btrfs_put_block_group(block_group);
Chris Masone02119d2008-09-05 16:13:11 -04007588 return ret;
7589}
7590
Eric Sandeen48a3b632013-04-25 20:41:01 +00007591static struct extent_buffer *
7592btrfs_init_new_buffer(struct btrfs_trans_handle *trans, struct btrfs_root *root,
David Sterbafe864572014-06-15 02:28:42 +02007593 u64 bytenr, int level)
Chris Mason65b51a02008-08-01 15:11:20 -04007594{
7595 struct extent_buffer *buf;
7596
David Sterbaa83fffb2014-06-15 02:39:54 +02007597 buf = btrfs_find_create_tree_block(root, bytenr);
Chris Mason65b51a02008-08-01 15:11:20 -04007598 if (!buf)
7599 return ERR_PTR(-ENOMEM);
7600 btrfs_set_header_generation(buf, trans->transid);
Chris Mason85d4e462011-07-26 16:11:19 -04007601 btrfs_set_buffer_lockdep_class(root->root_key.objectid, buf, level);
Chris Mason65b51a02008-08-01 15:11:20 -04007602 btrfs_tree_lock(buf);
Daniel Dressler01d58472014-11-21 17:15:07 +09007603 clean_tree_block(trans, root->fs_info, buf);
Josef Bacik3083ee22012-03-09 16:01:49 -05007604 clear_bit(EXTENT_BUFFER_STALE, &buf->bflags);
Chris Masonb4ce94d2009-02-04 09:25:08 -05007605
7606 btrfs_set_lock_blocking(buf);
Chris Mason65b51a02008-08-01 15:11:20 -04007607 btrfs_set_buffer_uptodate(buf);
Chris Masonb4ce94d2009-02-04 09:25:08 -05007608
Chris Masond0c803c2008-09-11 16:17:57 -04007609 if (root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID) {
Filipe Manana656f30d2014-09-26 12:25:56 +01007610 buf->log_index = root->log_transid % 2;
Yan, Zheng8cef4e12009-11-12 09:33:26 +00007611 /*
7612 * we allow two log transactions at a time, use different
7613 * EXENT bit to differentiate dirty pages.
7614 */
Filipe Manana656f30d2014-09-26 12:25:56 +01007615 if (buf->log_index == 0)
Yan, Zheng8cef4e12009-11-12 09:33:26 +00007616 set_extent_dirty(&root->dirty_log_pages, buf->start,
7617 buf->start + buf->len - 1, GFP_NOFS);
7618 else
7619 set_extent_new(&root->dirty_log_pages, buf->start,
7620 buf->start + buf->len - 1, GFP_NOFS);
Chris Masond0c803c2008-09-11 16:17:57 -04007621 } else {
Filipe Manana656f30d2014-09-26 12:25:56 +01007622 buf->log_index = -1;
Chris Masond0c803c2008-09-11 16:17:57 -04007623 set_extent_dirty(&trans->transaction->dirty_pages, buf->start,
7624 buf->start + buf->len - 1, GFP_NOFS);
7625 }
Chris Mason65b51a02008-08-01 15:11:20 -04007626 trans->blocks_used++;
Chris Masonb4ce94d2009-02-04 09:25:08 -05007627 /* this returns a buffer locked for blocking */
Chris Mason65b51a02008-08-01 15:11:20 -04007628 return buf;
7629}
7630
Yan, Zhengf0486c62010-05-16 10:46:25 -04007631static struct btrfs_block_rsv *
7632use_block_rsv(struct btrfs_trans_handle *trans,
7633 struct btrfs_root *root, u32 blocksize)
7634{
7635 struct btrfs_block_rsv *block_rsv;
Josef Bacik68a82272011-01-24 21:43:20 +00007636 struct btrfs_block_rsv *global_rsv = &root->fs_info->global_block_rsv;
Yan, Zhengf0486c62010-05-16 10:46:25 -04007637 int ret;
Miao Xied88033d2013-05-13 13:55:12 +00007638 bool global_updated = false;
Yan, Zhengf0486c62010-05-16 10:46:25 -04007639
7640 block_rsv = get_block_rsv(trans, root);
7641
Miao Xieb586b322013-05-13 13:55:10 +00007642 if (unlikely(block_rsv->size == 0))
7643 goto try_reserve;
Miao Xied88033d2013-05-13 13:55:12 +00007644again:
Yan, Zhengf0486c62010-05-16 10:46:25 -04007645 ret = block_rsv_use_bytes(block_rsv, blocksize);
7646 if (!ret)
7647 return block_rsv;
7648
Miao Xieb586b322013-05-13 13:55:10 +00007649 if (block_rsv->failfast)
7650 return ERR_PTR(ret);
7651
Miao Xied88033d2013-05-13 13:55:12 +00007652 if (block_rsv->type == BTRFS_BLOCK_RSV_GLOBAL && !global_updated) {
7653 global_updated = true;
7654 update_global_block_rsv(root->fs_info);
7655 goto again;
7656 }
7657
Miao Xieb586b322013-05-13 13:55:10 +00007658 if (btrfs_test_opt(root, ENOSPC_DEBUG)) {
7659 static DEFINE_RATELIMIT_STATE(_rs,
7660 DEFAULT_RATELIMIT_INTERVAL * 10,
7661 /*DEFAULT_RATELIMIT_BURST*/ 1);
7662 if (__ratelimit(&_rs))
7663 WARN(1, KERN_DEBUG
Frank Holtonefe120a2013-12-20 11:37:06 -05007664 "BTRFS: block rsv returned %d\n", ret);
Miao Xieb586b322013-05-13 13:55:10 +00007665 }
7666try_reserve:
7667 ret = reserve_metadata_bytes(root, block_rsv, blocksize,
7668 BTRFS_RESERVE_NO_FLUSH);
7669 if (!ret)
7670 return block_rsv;
7671 /*
7672 * If we couldn't reserve metadata bytes try and use some from
Miao Xie5881cfc2013-05-13 13:55:11 +00007673 * the global reserve if its space type is the same as the global
7674 * reservation.
Miao Xieb586b322013-05-13 13:55:10 +00007675 */
Miao Xie5881cfc2013-05-13 13:55:11 +00007676 if (block_rsv->type != BTRFS_BLOCK_RSV_GLOBAL &&
7677 block_rsv->space_info == global_rsv->space_info) {
Miao Xieb586b322013-05-13 13:55:10 +00007678 ret = block_rsv_use_bytes(global_rsv, blocksize);
7679 if (!ret)
7680 return global_rsv;
7681 }
7682 return ERR_PTR(ret);
Yan, Zhengf0486c62010-05-16 10:46:25 -04007683}
7684
Josef Bacik8c2a3ca2012-01-10 10:31:31 -05007685static void unuse_block_rsv(struct btrfs_fs_info *fs_info,
7686 struct btrfs_block_rsv *block_rsv, u32 blocksize)
Yan, Zhengf0486c62010-05-16 10:46:25 -04007687{
7688 block_rsv_add_bytes(block_rsv, blocksize, 0);
Josef Bacik8c2a3ca2012-01-10 10:31:31 -05007689 block_rsv_release_bytes(fs_info, block_rsv, NULL, 0);
Yan, Zhengf0486c62010-05-16 10:46:25 -04007690}
7691
Chris Masonfec577f2007-02-26 10:40:21 -05007692/*
Yan, Zhengf0486c62010-05-16 10:46:25 -04007693 * finds a free extent and does all the dirty work required for allocation
Omar Sandoval67b78592015-02-24 02:47:04 -08007694 * returns the tree buffer or an ERR_PTR on error.
Chris Masonfec577f2007-02-26 10:40:21 -05007695 */
David Sterba4d75f8a2014-06-15 01:54:12 +02007696struct extent_buffer *btrfs_alloc_tree_block(struct btrfs_trans_handle *trans,
7697 struct btrfs_root *root,
Yan Zheng5d4f98a2009-06-10 10:45:14 -04007698 u64 parent, u64 root_objectid,
7699 struct btrfs_disk_key *key, int level,
Jan Schmidt5581a512012-05-16 17:04:52 +02007700 u64 hint, u64 empty_size)
Chris Masonfec577f2007-02-26 10:40:21 -05007701{
Chris Masone2fa7222007-03-12 16:22:34 -04007702 struct btrfs_key ins;
Yan, Zhengf0486c62010-05-16 10:46:25 -04007703 struct btrfs_block_rsv *block_rsv;
Chris Mason5f39d392007-10-15 16:14:19 -04007704 struct extent_buffer *buf;
Omar Sandoval67b78592015-02-24 02:47:04 -08007705 struct btrfs_delayed_extent_op *extent_op;
Yan, Zhengf0486c62010-05-16 10:46:25 -04007706 u64 flags = 0;
7707 int ret;
David Sterba4d75f8a2014-06-15 01:54:12 +02007708 u32 blocksize = root->nodesize;
Josef Bacik3173a182013-03-07 14:22:04 -05007709 bool skinny_metadata = btrfs_fs_incompat(root->fs_info,
7710 SKINNY_METADATA);
Yan, Zhengf0486c62010-05-16 10:46:25 -04007711
David Sterbafccb84c2014-09-29 23:53:21 +02007712 if (btrfs_test_is_dummy_root(root)) {
Josef Bacikfaa2dbf2014-05-07 17:06:09 -04007713 buf = btrfs_init_new_buffer(trans, root, root->alloc_bytenr,
David Sterbafe864572014-06-15 02:28:42 +02007714 level);
Josef Bacikfaa2dbf2014-05-07 17:06:09 -04007715 if (!IS_ERR(buf))
7716 root->alloc_bytenr += blocksize;
7717 return buf;
7718 }
David Sterbafccb84c2014-09-29 23:53:21 +02007719
Yan, Zhengf0486c62010-05-16 10:46:25 -04007720 block_rsv = use_block_rsv(trans, root, blocksize);
7721 if (IS_ERR(block_rsv))
7722 return ERR_CAST(block_rsv);
7723
Josef Bacik00361582013-08-14 14:02:47 -04007724 ret = btrfs_reserve_extent(root, blocksize, blocksize,
Miao Xiee570fd22014-06-19 10:42:50 +08007725 empty_size, hint, &ins, 0, 0);
Omar Sandoval67b78592015-02-24 02:47:04 -08007726 if (ret)
7727 goto out_unuse;
Chris Mason55c69072008-01-09 15:55:33 -05007728
David Sterbafe864572014-06-15 02:28:42 +02007729 buf = btrfs_init_new_buffer(trans, root, ins.objectid, level);
Omar Sandoval67b78592015-02-24 02:47:04 -08007730 if (IS_ERR(buf)) {
7731 ret = PTR_ERR(buf);
7732 goto out_free_reserved;
7733 }
Yan, Zhengf0486c62010-05-16 10:46:25 -04007734
7735 if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
7736 if (parent == 0)
7737 parent = ins.objectid;
7738 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7739 } else
7740 BUG_ON(parent > 0);
7741
7742 if (root_objectid != BTRFS_TREE_LOG_OBJECTID) {
Miao Xie78a61842012-11-21 02:21:28 +00007743 extent_op = btrfs_alloc_delayed_extent_op();
Omar Sandoval67b78592015-02-24 02:47:04 -08007744 if (!extent_op) {
7745 ret = -ENOMEM;
7746 goto out_free_buf;
7747 }
Yan, Zhengf0486c62010-05-16 10:46:25 -04007748 if (key)
7749 memcpy(&extent_op->key, key, sizeof(extent_op->key));
7750 else
7751 memset(&extent_op->key, 0, sizeof(extent_op->key));
7752 extent_op->flags_to_set = flags;
Josef Bacik3173a182013-03-07 14:22:04 -05007753 if (skinny_metadata)
7754 extent_op->update_key = 0;
7755 else
7756 extent_op->update_key = 1;
Yan, Zhengf0486c62010-05-16 10:46:25 -04007757 extent_op->update_flags = 1;
7758 extent_op->is_data = 0;
Josef Bacikb1c79e02013-05-09 13:49:30 -04007759 extent_op->level = level;
Yan, Zhengf0486c62010-05-16 10:46:25 -04007760
Arne Jansen66d7e7f2011-09-12 15:26:38 +02007761 ret = btrfs_add_delayed_tree_ref(root->fs_info, trans,
Omar Sandoval67b78592015-02-24 02:47:04 -08007762 ins.objectid, ins.offset,
7763 parent, root_objectid, level,
7764 BTRFS_ADD_DELAYED_EXTENT,
7765 extent_op, 0);
7766 if (ret)
7767 goto out_free_delayed;
Yan, Zhengf0486c62010-05-16 10:46:25 -04007768 }
Chris Masonfec577f2007-02-26 10:40:21 -05007769 return buf;
Omar Sandoval67b78592015-02-24 02:47:04 -08007770
7771out_free_delayed:
7772 btrfs_free_delayed_extent_op(extent_op);
7773out_free_buf:
7774 free_extent_buffer(buf);
7775out_free_reserved:
7776 btrfs_free_reserved_extent(root, ins.objectid, ins.offset, 0);
7777out_unuse:
7778 unuse_block_rsv(root->fs_info, block_rsv, blocksize);
7779 return ERR_PTR(ret);
Chris Masonfec577f2007-02-26 10:40:21 -05007780}
Chris Masona28ec192007-03-06 20:08:01 -05007781
Yan Zheng2c47e6052009-06-27 21:07:35 -04007782struct walk_control {
7783 u64 refs[BTRFS_MAX_LEVEL];
7784 u64 flags[BTRFS_MAX_LEVEL];
7785 struct btrfs_key update_progress;
7786 int stage;
7787 int level;
7788 int shared_level;
7789 int update_ref;
7790 int keep_locks;
Yan, Zheng1c4850e2009-09-21 15:55:59 -04007791 int reada_slot;
7792 int reada_count;
Arne Jansen66d7e7f2011-09-12 15:26:38 +02007793 int for_reloc;
Yan Zheng2c47e6052009-06-27 21:07:35 -04007794};
7795
7796#define DROP_REFERENCE 1
7797#define UPDATE_BACKREF 2
7798
Yan, Zheng1c4850e2009-09-21 15:55:59 -04007799static noinline void reada_walk_down(struct btrfs_trans_handle *trans,
7800 struct btrfs_root *root,
7801 struct walk_control *wc,
7802 struct btrfs_path *path)
7803{
7804 u64 bytenr;
7805 u64 generation;
7806 u64 refs;
Yan, Zheng94fcca92009-10-09 09:25:16 -04007807 u64 flags;
Yan, Zheng1c4850e2009-09-21 15:55:59 -04007808 u32 nritems;
7809 u32 blocksize;
7810 struct btrfs_key key;
7811 struct extent_buffer *eb;
7812 int ret;
7813 int slot;
7814 int nread = 0;
7815
7816 if (path->slots[wc->level] < wc->reada_slot) {
7817 wc->reada_count = wc->reada_count * 2 / 3;
7818 wc->reada_count = max(wc->reada_count, 2);
7819 } else {
7820 wc->reada_count = wc->reada_count * 3 / 2;
7821 wc->reada_count = min_t(int, wc->reada_count,
7822 BTRFS_NODEPTRS_PER_BLOCK(root));
7823 }
7824
7825 eb = path->nodes[wc->level];
7826 nritems = btrfs_header_nritems(eb);
David Sterba707e8a02014-06-04 19:22:26 +02007827 blocksize = root->nodesize;
Yan, Zheng1c4850e2009-09-21 15:55:59 -04007828
7829 for (slot = path->slots[wc->level]; slot < nritems; slot++) {
7830 if (nread >= wc->reada_count)
7831 break;
7832
7833 cond_resched();
7834 bytenr = btrfs_node_blockptr(eb, slot);
7835 generation = btrfs_node_ptr_generation(eb, slot);
7836
7837 if (slot == path->slots[wc->level])
7838 goto reada;
7839
7840 if (wc->stage == UPDATE_BACKREF &&
7841 generation <= root->root_key.offset)
7842 continue;
7843
Yan, Zheng94fcca92009-10-09 09:25:16 -04007844 /* We don't lock the tree block, it's OK to be racy here */
Josef Bacik3173a182013-03-07 14:22:04 -05007845 ret = btrfs_lookup_extent_info(trans, root, bytenr,
7846 wc->level - 1, 1, &refs,
7847 &flags);
Jeff Mahoney79787ea2012-03-12 16:03:00 +01007848 /* We don't care about errors in readahead. */
7849 if (ret < 0)
7850 continue;
Yan, Zheng94fcca92009-10-09 09:25:16 -04007851 BUG_ON(refs == 0);
7852
Yan, Zheng1c4850e2009-09-21 15:55:59 -04007853 if (wc->stage == DROP_REFERENCE) {
Yan, Zheng1c4850e2009-09-21 15:55:59 -04007854 if (refs == 1)
7855 goto reada;
7856
Yan, Zheng94fcca92009-10-09 09:25:16 -04007857 if (wc->level == 1 &&
7858 (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF))
7859 continue;
Yan, Zheng1c4850e2009-09-21 15:55:59 -04007860 if (!wc->update_ref ||
7861 generation <= root->root_key.offset)
7862 continue;
7863 btrfs_node_key_to_cpu(eb, &key, slot);
7864 ret = btrfs_comp_cpu_keys(&key,
7865 &wc->update_progress);
7866 if (ret < 0)
7867 continue;
Yan, Zheng94fcca92009-10-09 09:25:16 -04007868 } else {
7869 if (wc->level == 1 &&
7870 (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF))
7871 continue;
Yan, Zheng1c4850e2009-09-21 15:55:59 -04007872 }
7873reada:
David Sterbad3e46fe2014-06-15 02:04:19 +02007874 readahead_tree_block(root, bytenr);
Yan, Zheng1c4850e2009-09-21 15:55:59 -04007875 nread++;
7876 }
7877 wc->reada_slot = slot;
7878}
7879
Qu Wenruo0ed47922015-04-16 16:55:08 +08007880/*
7881 * TODO: Modify related function to add related node/leaf to dirty_extent_root,
7882 * for later qgroup accounting.
7883 *
7884 * Current, this function does nothing.
7885 */
Mark Fasheh11526512014-07-17 12:39:01 -07007886static int account_leaf_items(struct btrfs_trans_handle *trans,
7887 struct btrfs_root *root,
7888 struct extent_buffer *eb)
7889{
7890 int nr = btrfs_header_nritems(eb);
Qu Wenruo0ed47922015-04-16 16:55:08 +08007891 int i, extent_type;
Mark Fasheh11526512014-07-17 12:39:01 -07007892 struct btrfs_key key;
7893 struct btrfs_file_extent_item *fi;
7894 u64 bytenr, num_bytes;
7895
7896 for (i = 0; i < nr; i++) {
7897 btrfs_item_key_to_cpu(eb, &key, i);
7898
7899 if (key.type != BTRFS_EXTENT_DATA_KEY)
7900 continue;
7901
7902 fi = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item);
7903 /* filter out non qgroup-accountable extents */
7904 extent_type = btrfs_file_extent_type(eb, fi);
7905
7906 if (extent_type == BTRFS_FILE_EXTENT_INLINE)
7907 continue;
7908
7909 bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
7910 if (!bytenr)
7911 continue;
7912
7913 num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
Mark Fasheh11526512014-07-17 12:39:01 -07007914 }
7915 return 0;
7916}
7917
7918/*
7919 * Walk up the tree from the bottom, freeing leaves and any interior
7920 * nodes which have had all slots visited. If a node (leaf or
7921 * interior) is freed, the node above it will have it's slot
7922 * incremented. The root node will never be freed.
7923 *
7924 * At the end of this function, we should have a path which has all
7925 * slots incremented to the next position for a search. If we need to
7926 * read a new node it will be NULL and the node above it will have the
7927 * correct slot selected for a later read.
7928 *
7929 * If we increment the root nodes slot counter past the number of
7930 * elements, 1 is returned to signal completion of the search.
7931 */
7932static int adjust_slots_upwards(struct btrfs_root *root,
7933 struct btrfs_path *path, int root_level)
7934{
7935 int level = 0;
7936 int nr, slot;
7937 struct extent_buffer *eb;
7938
7939 if (root_level == 0)
7940 return 1;
7941
7942 while (level <= root_level) {
7943 eb = path->nodes[level];
7944 nr = btrfs_header_nritems(eb);
7945 path->slots[level]++;
7946 slot = path->slots[level];
7947 if (slot >= nr || level == 0) {
7948 /*
7949 * Don't free the root - we will detect this
7950 * condition after our loop and return a
7951 * positive value for caller to stop walking the tree.
7952 */
7953 if (level != root_level) {
7954 btrfs_tree_unlock_rw(eb, path->locks[level]);
7955 path->locks[level] = 0;
7956
7957 free_extent_buffer(eb);
7958 path->nodes[level] = NULL;
7959 path->slots[level] = 0;
7960 }
7961 } else {
7962 /*
7963 * We have a valid slot to walk back down
7964 * from. Stop here so caller can process these
7965 * new nodes.
7966 */
7967 break;
7968 }
7969
7970 level++;
7971 }
7972
7973 eb = path->nodes[root_level];
7974 if (path->slots[root_level] >= btrfs_header_nritems(eb))
7975 return 1;
7976
7977 return 0;
7978}
7979
7980/*
7981 * root_eb is the subtree root and is locked before this function is called.
Qu Wenruo0ed47922015-04-16 16:55:08 +08007982 * TODO: Modify this function to mark all (including complete shared node)
7983 * to dirty_extent_root to allow it get accounted in qgroup.
Mark Fasheh11526512014-07-17 12:39:01 -07007984 */
7985static int account_shared_subtree(struct btrfs_trans_handle *trans,
7986 struct btrfs_root *root,
7987 struct extent_buffer *root_eb,
7988 u64 root_gen,
7989 int root_level)
7990{
7991 int ret = 0;
7992 int level;
7993 struct extent_buffer *eb = root_eb;
7994 struct btrfs_path *path = NULL;
7995
7996 BUG_ON(root_level < 0 || root_level > BTRFS_MAX_LEVEL);
7997 BUG_ON(root_eb == NULL);
7998
7999 if (!root->fs_info->quota_enabled)
8000 return 0;
8001
8002 if (!extent_buffer_uptodate(root_eb)) {
8003 ret = btrfs_read_buffer(root_eb, root_gen);
8004 if (ret)
8005 goto out;
8006 }
8007
8008 if (root_level == 0) {
8009 ret = account_leaf_items(trans, root, root_eb);
8010 goto out;
8011 }
8012
8013 path = btrfs_alloc_path();
8014 if (!path)
8015 return -ENOMEM;
8016
8017 /*
8018 * Walk down the tree. Missing extent blocks are filled in as
8019 * we go. Metadata is accounted every time we read a new
8020 * extent block.
8021 *
8022 * When we reach a leaf, we account for file extent items in it,
8023 * walk back up the tree (adjusting slot pointers as we go)
8024 * and restart the search process.
8025 */
8026 extent_buffer_get(root_eb); /* For path */
8027 path->nodes[root_level] = root_eb;
8028 path->slots[root_level] = 0;
8029 path->locks[root_level] = 0; /* so release_path doesn't try to unlock */
8030walk_down:
8031 level = root_level;
8032 while (level >= 0) {
8033 if (path->nodes[level] == NULL) {
Mark Fasheh11526512014-07-17 12:39:01 -07008034 int parent_slot;
8035 u64 child_gen;
8036 u64 child_bytenr;
8037
8038 /* We need to get child blockptr/gen from
8039 * parent before we can read it. */
8040 eb = path->nodes[level + 1];
8041 parent_slot = path->slots[level + 1];
8042 child_bytenr = btrfs_node_blockptr(eb, parent_slot);
8043 child_gen = btrfs_node_ptr_generation(eb, parent_slot);
8044
David Sterbace86cd52014-06-15 01:07:32 +02008045 eb = read_tree_block(root, child_bytenr, child_gen);
Liu Bo64c043d2015-05-25 17:30:15 +08008046 if (IS_ERR(eb)) {
8047 ret = PTR_ERR(eb);
8048 goto out;
8049 } else if (!extent_buffer_uptodate(eb)) {
Liu Bo8635eda2015-05-25 17:30:14 +08008050 free_extent_buffer(eb);
Liu Bo64c043d2015-05-25 17:30:15 +08008051 ret = -EIO;
Mark Fasheh11526512014-07-17 12:39:01 -07008052 goto out;
8053 }
8054
8055 path->nodes[level] = eb;
8056 path->slots[level] = 0;
8057
8058 btrfs_tree_read_lock(eb);
8059 btrfs_set_lock_blocking_rw(eb, BTRFS_READ_LOCK);
8060 path->locks[level] = BTRFS_READ_LOCK_BLOCKING;
Mark Fasheh11526512014-07-17 12:39:01 -07008061 }
8062
8063 if (level == 0) {
8064 ret = account_leaf_items(trans, root, path->nodes[level]);
8065 if (ret)
8066 goto out;
8067
8068 /* Nonzero return here means we completed our search */
8069 ret = adjust_slots_upwards(root, path, root_level);
8070 if (ret)
8071 break;
8072
8073 /* Restart search with new slots */
8074 goto walk_down;
8075 }
8076
8077 level--;
8078 }
8079
8080 ret = 0;
8081out:
8082 btrfs_free_path(path);
8083
8084 return ret;
8085}
8086
Chris Mason9aca1d52007-03-13 11:09:37 -04008087/*
Liu Bo2c016dc2012-12-26 15:32:17 +08008088 * helper to process tree block while walking down the tree.
Yan Zheng2c47e6052009-06-27 21:07:35 -04008089 *
Yan Zheng2c47e6052009-06-27 21:07:35 -04008090 * when wc->stage == UPDATE_BACKREF, this function updates
8091 * back refs for pointers in the block.
8092 *
8093 * NOTE: return value 1 means we should stop walking down.
Yan Zhengf82d02d2008-10-29 14:49:05 -04008094 */
Yan Zheng2c47e6052009-06-27 21:07:35 -04008095static noinline int walk_down_proc(struct btrfs_trans_handle *trans,
8096 struct btrfs_root *root,
8097 struct btrfs_path *path,
Yan, Zheng94fcca92009-10-09 09:25:16 -04008098 struct walk_control *wc, int lookup_info)
Yan Zheng2c47e6052009-06-27 21:07:35 -04008099{
8100 int level = wc->level;
8101 struct extent_buffer *eb = path->nodes[level];
Yan Zheng2c47e6052009-06-27 21:07:35 -04008102 u64 flag = BTRFS_BLOCK_FLAG_FULL_BACKREF;
8103 int ret;
8104
8105 if (wc->stage == UPDATE_BACKREF &&
8106 btrfs_header_owner(eb) != root->root_key.objectid)
8107 return 1;
8108
8109 /*
8110 * when reference count of tree block is 1, it won't increase
8111 * again. once full backref flag is set, we never clear it.
8112 */
Yan, Zheng94fcca92009-10-09 09:25:16 -04008113 if (lookup_info &&
8114 ((wc->stage == DROP_REFERENCE && wc->refs[level] != 1) ||
8115 (wc->stage == UPDATE_BACKREF && !(wc->flags[level] & flag)))) {
Yan Zheng2c47e6052009-06-27 21:07:35 -04008116 BUG_ON(!path->locks[level]);
8117 ret = btrfs_lookup_extent_info(trans, root,
Josef Bacik3173a182013-03-07 14:22:04 -05008118 eb->start, level, 1,
Yan Zheng2c47e6052009-06-27 21:07:35 -04008119 &wc->refs[level],
8120 &wc->flags[level]);
Jeff Mahoney79787ea2012-03-12 16:03:00 +01008121 BUG_ON(ret == -ENOMEM);
8122 if (ret)
8123 return ret;
Yan Zheng2c47e6052009-06-27 21:07:35 -04008124 BUG_ON(wc->refs[level] == 0);
8125 }
8126
Yan Zheng2c47e6052009-06-27 21:07:35 -04008127 if (wc->stage == DROP_REFERENCE) {
8128 if (wc->refs[level] > 1)
8129 return 1;
8130
8131 if (path->locks[level] && !wc->keep_locks) {
Chris Masonbd681512011-07-16 15:23:14 -04008132 btrfs_tree_unlock_rw(eb, path->locks[level]);
Yan Zheng2c47e6052009-06-27 21:07:35 -04008133 path->locks[level] = 0;
8134 }
8135 return 0;
8136 }
8137
8138 /* wc->stage == UPDATE_BACKREF */
8139 if (!(wc->flags[level] & flag)) {
8140 BUG_ON(!path->locks[level]);
Josef Bacike339a6b2014-07-02 10:54:25 -07008141 ret = btrfs_inc_ref(trans, root, eb, 1);
Jeff Mahoney79787ea2012-03-12 16:03:00 +01008142 BUG_ON(ret); /* -ENOMEM */
Josef Bacike339a6b2014-07-02 10:54:25 -07008143 ret = btrfs_dec_ref(trans, root, eb, 0);
Jeff Mahoney79787ea2012-03-12 16:03:00 +01008144 BUG_ON(ret); /* -ENOMEM */
Yan Zheng2c47e6052009-06-27 21:07:35 -04008145 ret = btrfs_set_disk_extent_flags(trans, root, eb->start,
Josef Bacikb1c79e02013-05-09 13:49:30 -04008146 eb->len, flag,
8147 btrfs_header_level(eb), 0);
Jeff Mahoney79787ea2012-03-12 16:03:00 +01008148 BUG_ON(ret); /* -ENOMEM */
Yan Zheng2c47e6052009-06-27 21:07:35 -04008149 wc->flags[level] |= flag;
8150 }
8151
8152 /*
8153 * the block is shared by multiple trees, so it's not good to
8154 * keep the tree lock
8155 */
8156 if (path->locks[level] && level > 0) {
Chris Masonbd681512011-07-16 15:23:14 -04008157 btrfs_tree_unlock_rw(eb, path->locks[level]);
Yan Zheng2c47e6052009-06-27 21:07:35 -04008158 path->locks[level] = 0;
8159 }
8160 return 0;
8161}
8162
8163/*
Liu Bo2c016dc2012-12-26 15:32:17 +08008164 * helper to process tree block pointer.
Yan, Zheng1c4850e2009-09-21 15:55:59 -04008165 *
8166 * when wc->stage == DROP_REFERENCE, this function checks
8167 * reference count of the block pointed to. if the block
8168 * is shared and we need update back refs for the subtree
8169 * rooted at the block, this function changes wc->stage to
8170 * UPDATE_BACKREF. if the block is shared and there is no
8171 * need to update back, this function drops the reference
8172 * to the block.
8173 *
8174 * NOTE: return value 1 means we should stop walking down.
8175 */
8176static noinline int do_walk_down(struct btrfs_trans_handle *trans,
8177 struct btrfs_root *root,
8178 struct btrfs_path *path,
Yan, Zheng94fcca92009-10-09 09:25:16 -04008179 struct walk_control *wc, int *lookup_info)
Yan, Zheng1c4850e2009-09-21 15:55:59 -04008180{
8181 u64 bytenr;
8182 u64 generation;
8183 u64 parent;
8184 u32 blocksize;
8185 struct btrfs_key key;
8186 struct extent_buffer *next;
8187 int level = wc->level;
8188 int reada = 0;
8189 int ret = 0;
Mark Fasheh11526512014-07-17 12:39:01 -07008190 bool need_account = false;
Yan, Zheng1c4850e2009-09-21 15:55:59 -04008191
8192 generation = btrfs_node_ptr_generation(path->nodes[level],
8193 path->slots[level]);
8194 /*
8195 * if the lower level block was created before the snapshot
8196 * was created, we know there is no need to update back refs
8197 * for the subtree
8198 */
8199 if (wc->stage == UPDATE_BACKREF &&
Yan, Zheng94fcca92009-10-09 09:25:16 -04008200 generation <= root->root_key.offset) {
8201 *lookup_info = 1;
Yan, Zheng1c4850e2009-09-21 15:55:59 -04008202 return 1;
Yan, Zheng94fcca92009-10-09 09:25:16 -04008203 }
Yan, Zheng1c4850e2009-09-21 15:55:59 -04008204
8205 bytenr = btrfs_node_blockptr(path->nodes[level], path->slots[level]);
David Sterba707e8a02014-06-04 19:22:26 +02008206 blocksize = root->nodesize;
Yan, Zheng1c4850e2009-09-21 15:55:59 -04008207
Daniel Dressler01d58472014-11-21 17:15:07 +09008208 next = btrfs_find_tree_block(root->fs_info, bytenr);
Yan, Zheng1c4850e2009-09-21 15:55:59 -04008209 if (!next) {
David Sterbaa83fffb2014-06-15 02:39:54 +02008210 next = btrfs_find_create_tree_block(root, bytenr);
Miao Xie90d2c51d2010-03-25 12:37:12 +00008211 if (!next)
8212 return -ENOMEM;
Josef Bacikb2aaaa32013-07-05 17:05:38 -04008213 btrfs_set_buffer_lockdep_class(root->root_key.objectid, next,
8214 level - 1);
Yan, Zheng1c4850e2009-09-21 15:55:59 -04008215 reada = 1;
8216 }
8217 btrfs_tree_lock(next);
8218 btrfs_set_lock_blocking(next);
8219
Josef Bacik3173a182013-03-07 14:22:04 -05008220 ret = btrfs_lookup_extent_info(trans, root, bytenr, level - 1, 1,
Yan, Zheng94fcca92009-10-09 09:25:16 -04008221 &wc->refs[level - 1],
8222 &wc->flags[level - 1]);
Jeff Mahoney79787ea2012-03-12 16:03:00 +01008223 if (ret < 0) {
8224 btrfs_tree_unlock(next);
8225 return ret;
8226 }
8227
Simon Kirbyc2cf52e2013-03-19 22:41:23 +00008228 if (unlikely(wc->refs[level - 1] == 0)) {
8229 btrfs_err(root->fs_info, "Missing references.");
8230 BUG();
8231 }
Yan, Zheng94fcca92009-10-09 09:25:16 -04008232 *lookup_info = 0;
Yan, Zheng1c4850e2009-09-21 15:55:59 -04008233
Yan, Zheng94fcca92009-10-09 09:25:16 -04008234 if (wc->stage == DROP_REFERENCE) {
Yan, Zheng1c4850e2009-09-21 15:55:59 -04008235 if (wc->refs[level - 1] > 1) {
Mark Fasheh11526512014-07-17 12:39:01 -07008236 need_account = true;
Yan, Zheng94fcca92009-10-09 09:25:16 -04008237 if (level == 1 &&
8238 (wc->flags[0] & BTRFS_BLOCK_FLAG_FULL_BACKREF))
8239 goto skip;
8240
Yan, Zheng1c4850e2009-09-21 15:55:59 -04008241 if (!wc->update_ref ||
8242 generation <= root->root_key.offset)
8243 goto skip;
8244
8245 btrfs_node_key_to_cpu(path->nodes[level], &key,
8246 path->slots[level]);
8247 ret = btrfs_comp_cpu_keys(&key, &wc->update_progress);
8248 if (ret < 0)
8249 goto skip;
8250
8251 wc->stage = UPDATE_BACKREF;
8252 wc->shared_level = level - 1;
8253 }
Yan, Zheng94fcca92009-10-09 09:25:16 -04008254 } else {
8255 if (level == 1 &&
8256 (wc->flags[0] & BTRFS_BLOCK_FLAG_FULL_BACKREF))
8257 goto skip;
Yan, Zheng1c4850e2009-09-21 15:55:59 -04008258 }
8259
Chris Masonb9fab912012-05-06 07:23:47 -04008260 if (!btrfs_buffer_uptodate(next, generation, 0)) {
Yan, Zheng1c4850e2009-09-21 15:55:59 -04008261 btrfs_tree_unlock(next);
8262 free_extent_buffer(next);
8263 next = NULL;
Yan, Zheng94fcca92009-10-09 09:25:16 -04008264 *lookup_info = 1;
Yan, Zheng1c4850e2009-09-21 15:55:59 -04008265 }
8266
8267 if (!next) {
8268 if (reada && level == 1)
8269 reada_walk_down(trans, root, wc, path);
David Sterbace86cd52014-06-15 01:07:32 +02008270 next = read_tree_block(root, bytenr, generation);
Liu Bo64c043d2015-05-25 17:30:15 +08008271 if (IS_ERR(next)) {
8272 return PTR_ERR(next);
8273 } else if (!extent_buffer_uptodate(next)) {
Josef Bacik416bc652013-04-23 14:17:42 -04008274 free_extent_buffer(next);
Tsutomu Itoh97d9a8a2011-03-24 06:33:21 +00008275 return -EIO;
Josef Bacik416bc652013-04-23 14:17:42 -04008276 }
Yan, Zheng1c4850e2009-09-21 15:55:59 -04008277 btrfs_tree_lock(next);
8278 btrfs_set_lock_blocking(next);
8279 }
8280
8281 level--;
8282 BUG_ON(level != btrfs_header_level(next));
8283 path->nodes[level] = next;
8284 path->slots[level] = 0;
Chris Masonbd681512011-07-16 15:23:14 -04008285 path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING;
Yan, Zheng1c4850e2009-09-21 15:55:59 -04008286 wc->level = level;
8287 if (wc->level == 1)
8288 wc->reada_slot = 0;
8289 return 0;
8290skip:
8291 wc->refs[level - 1] = 0;
8292 wc->flags[level - 1] = 0;
Yan, Zheng94fcca92009-10-09 09:25:16 -04008293 if (wc->stage == DROP_REFERENCE) {
8294 if (wc->flags[level] & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
8295 parent = path->nodes[level]->start;
8296 } else {
8297 BUG_ON(root->root_key.objectid !=
8298 btrfs_header_owner(path->nodes[level]));
8299 parent = 0;
8300 }
Yan, Zheng1c4850e2009-09-21 15:55:59 -04008301
Mark Fasheh11526512014-07-17 12:39:01 -07008302 if (need_account) {
8303 ret = account_shared_subtree(trans, root, next,
8304 generation, level - 1);
8305 if (ret) {
8306 printk_ratelimited(KERN_ERR "BTRFS: %s Error "
8307 "%d accounting shared subtree. Quota "
8308 "is out of sync, rescan required.\n",
8309 root->fs_info->sb->s_id, ret);
8310 }
8311 }
Yan, Zheng94fcca92009-10-09 09:25:16 -04008312 ret = btrfs_free_extent(trans, root, bytenr, blocksize, parent,
Arne Jansen66d7e7f2011-09-12 15:26:38 +02008313 root->root_key.objectid, level - 1, 0, 0);
Jeff Mahoney79787ea2012-03-12 16:03:00 +01008314 BUG_ON(ret); /* -ENOMEM */
Yan, Zheng1c4850e2009-09-21 15:55:59 -04008315 }
Yan, Zheng1c4850e2009-09-21 15:55:59 -04008316 btrfs_tree_unlock(next);
8317 free_extent_buffer(next);
Yan, Zheng94fcca92009-10-09 09:25:16 -04008318 *lookup_info = 1;
Yan, Zheng1c4850e2009-09-21 15:55:59 -04008319 return 1;
8320}
8321
8322/*
Liu Bo2c016dc2012-12-26 15:32:17 +08008323 * helper to process tree block while walking up the tree.
Yan Zheng2c47e6052009-06-27 21:07:35 -04008324 *
8325 * when wc->stage == DROP_REFERENCE, this function drops
8326 * reference count on the block.
8327 *
8328 * when wc->stage == UPDATE_BACKREF, this function changes
8329 * wc->stage back to DROP_REFERENCE if we changed wc->stage
8330 * to UPDATE_BACKREF previously while processing the block.
8331 *
8332 * NOTE: return value 1 means we should stop walking up.
8333 */
8334static noinline int walk_up_proc(struct btrfs_trans_handle *trans,
8335 struct btrfs_root *root,
8336 struct btrfs_path *path,
8337 struct walk_control *wc)
8338{
Yan, Zhengf0486c62010-05-16 10:46:25 -04008339 int ret;
Yan Zheng2c47e6052009-06-27 21:07:35 -04008340 int level = wc->level;
8341 struct extent_buffer *eb = path->nodes[level];
8342 u64 parent = 0;
8343
8344 if (wc->stage == UPDATE_BACKREF) {
8345 BUG_ON(wc->shared_level < level);
8346 if (level < wc->shared_level)
8347 goto out;
8348
Yan Zheng2c47e6052009-06-27 21:07:35 -04008349 ret = find_next_key(path, level + 1, &wc->update_progress);
8350 if (ret > 0)
8351 wc->update_ref = 0;
8352
8353 wc->stage = DROP_REFERENCE;
8354 wc->shared_level = -1;
8355 path->slots[level] = 0;
8356
8357 /*
8358 * check reference count again if the block isn't locked.
8359 * we should start walking down the tree again if reference
8360 * count is one.
8361 */
8362 if (!path->locks[level]) {
8363 BUG_ON(level == 0);
8364 btrfs_tree_lock(eb);
8365 btrfs_set_lock_blocking(eb);
Chris Masonbd681512011-07-16 15:23:14 -04008366 path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING;
Yan Zheng2c47e6052009-06-27 21:07:35 -04008367
8368 ret = btrfs_lookup_extent_info(trans, root,
Josef Bacik3173a182013-03-07 14:22:04 -05008369 eb->start, level, 1,
Yan Zheng2c47e6052009-06-27 21:07:35 -04008370 &wc->refs[level],
8371 &wc->flags[level]);
Jeff Mahoney79787ea2012-03-12 16:03:00 +01008372 if (ret < 0) {
8373 btrfs_tree_unlock_rw(eb, path->locks[level]);
Liu Bo3268a242012-12-28 09:33:19 +00008374 path->locks[level] = 0;
Jeff Mahoney79787ea2012-03-12 16:03:00 +01008375 return ret;
8376 }
Yan Zheng2c47e6052009-06-27 21:07:35 -04008377 BUG_ON(wc->refs[level] == 0);
8378 if (wc->refs[level] == 1) {
Chris Masonbd681512011-07-16 15:23:14 -04008379 btrfs_tree_unlock_rw(eb, path->locks[level]);
Liu Bo3268a242012-12-28 09:33:19 +00008380 path->locks[level] = 0;
Yan Zheng2c47e6052009-06-27 21:07:35 -04008381 return 1;
8382 }
Yan Zheng2c47e6052009-06-27 21:07:35 -04008383 }
8384 }
8385
8386 /* wc->stage == DROP_REFERENCE */
8387 BUG_ON(wc->refs[level] > 1 && !path->locks[level]);
8388
8389 if (wc->refs[level] == 1) {
8390 if (level == 0) {
8391 if (wc->flags[level] & BTRFS_BLOCK_FLAG_FULL_BACKREF)
Josef Bacike339a6b2014-07-02 10:54:25 -07008392 ret = btrfs_dec_ref(trans, root, eb, 1);
Yan Zheng2c47e6052009-06-27 21:07:35 -04008393 else
Josef Bacike339a6b2014-07-02 10:54:25 -07008394 ret = btrfs_dec_ref(trans, root, eb, 0);
Jeff Mahoney79787ea2012-03-12 16:03:00 +01008395 BUG_ON(ret); /* -ENOMEM */
Mark Fasheh11526512014-07-17 12:39:01 -07008396 ret = account_leaf_items(trans, root, eb);
8397 if (ret) {
8398 printk_ratelimited(KERN_ERR "BTRFS: %s Error "
8399 "%d accounting leaf items. Quota "
8400 "is out of sync, rescan required.\n",
8401 root->fs_info->sb->s_id, ret);
8402 }
Yan Zheng2c47e6052009-06-27 21:07:35 -04008403 }
8404 /* make block locked assertion in clean_tree_block happy */
8405 if (!path->locks[level] &&
8406 btrfs_header_generation(eb) == trans->transid) {
8407 btrfs_tree_lock(eb);
8408 btrfs_set_lock_blocking(eb);
Chris Masonbd681512011-07-16 15:23:14 -04008409 path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING;
Yan Zheng2c47e6052009-06-27 21:07:35 -04008410 }
Daniel Dressler01d58472014-11-21 17:15:07 +09008411 clean_tree_block(trans, root->fs_info, eb);
Yan Zheng2c47e6052009-06-27 21:07:35 -04008412 }
8413
8414 if (eb == root->node) {
8415 if (wc->flags[level] & BTRFS_BLOCK_FLAG_FULL_BACKREF)
8416 parent = eb->start;
8417 else
8418 BUG_ON(root->root_key.objectid !=
8419 btrfs_header_owner(eb));
8420 } else {
8421 if (wc->flags[level + 1] & BTRFS_BLOCK_FLAG_FULL_BACKREF)
8422 parent = path->nodes[level + 1]->start;
8423 else
8424 BUG_ON(root->root_key.objectid !=
8425 btrfs_header_owner(path->nodes[level + 1]));
8426 }
8427
Jan Schmidt5581a512012-05-16 17:04:52 +02008428 btrfs_free_tree_block(trans, root, eb, parent, wc->refs[level] == 1);
Yan Zheng2c47e6052009-06-27 21:07:35 -04008429out:
8430 wc->refs[level] = 0;
8431 wc->flags[level] = 0;
Yan, Zhengf0486c62010-05-16 10:46:25 -04008432 return 0;
Yan Zheng2c47e6052009-06-27 21:07:35 -04008433}
8434
Yan Zheng5d4f98a2009-06-10 10:45:14 -04008435static noinline int walk_down_tree(struct btrfs_trans_handle *trans,
8436 struct btrfs_root *root,
Yan Zheng2c47e6052009-06-27 21:07:35 -04008437 struct btrfs_path *path,
8438 struct walk_control *wc)
Yan Zhengf82d02d2008-10-29 14:49:05 -04008439{
Yan Zheng2c47e6052009-06-27 21:07:35 -04008440 int level = wc->level;
Yan, Zheng94fcca92009-10-09 09:25:16 -04008441 int lookup_info = 1;
Yan Zhengf82d02d2008-10-29 14:49:05 -04008442 int ret;
8443
Yan Zheng2c47e6052009-06-27 21:07:35 -04008444 while (level >= 0) {
Yan, Zheng94fcca92009-10-09 09:25:16 -04008445 ret = walk_down_proc(trans, root, path, wc, lookup_info);
Yan Zheng2c47e6052009-06-27 21:07:35 -04008446 if (ret > 0)
Yan Zhengf82d02d2008-10-29 14:49:05 -04008447 break;
Yan Zhengf82d02d2008-10-29 14:49:05 -04008448
Yan Zheng2c47e6052009-06-27 21:07:35 -04008449 if (level == 0)
8450 break;
8451
Yan, Zheng7a7965f2010-02-01 02:41:17 +00008452 if (path->slots[level] >=
8453 btrfs_header_nritems(path->nodes[level]))
8454 break;
8455
Yan, Zheng94fcca92009-10-09 09:25:16 -04008456 ret = do_walk_down(trans, root, path, wc, &lookup_info);
Yan, Zheng1c4850e2009-09-21 15:55:59 -04008457 if (ret > 0) {
8458 path->slots[level]++;
8459 continue;
Miao Xie90d2c51d2010-03-25 12:37:12 +00008460 } else if (ret < 0)
8461 return ret;
Yan, Zheng1c4850e2009-09-21 15:55:59 -04008462 level = wc->level;
Yan Zhengf82d02d2008-10-29 14:49:05 -04008463 }
Yan Zhengf82d02d2008-10-29 14:49:05 -04008464 return 0;
8465}
8466
Chris Masond3977122009-01-05 21:25:51 -05008467static noinline int walk_up_tree(struct btrfs_trans_handle *trans,
Chris Mason98ed5172008-01-03 10:01:48 -05008468 struct btrfs_root *root,
Yan Zhengf82d02d2008-10-29 14:49:05 -04008469 struct btrfs_path *path,
Yan Zheng2c47e6052009-06-27 21:07:35 -04008470 struct walk_control *wc, int max_level)
Chris Mason20524f02007-03-10 06:35:47 -05008471{
Yan Zheng2c47e6052009-06-27 21:07:35 -04008472 int level = wc->level;
Chris Mason20524f02007-03-10 06:35:47 -05008473 int ret;
Chris Mason9f3a7422007-08-07 15:52:19 -04008474
Yan Zheng2c47e6052009-06-27 21:07:35 -04008475 path->slots[level] = btrfs_header_nritems(path->nodes[level]);
8476 while (level < max_level && path->nodes[level]) {
8477 wc->level = level;
8478 if (path->slots[level] + 1 <
8479 btrfs_header_nritems(path->nodes[level])) {
8480 path->slots[level]++;
Chris Mason20524f02007-03-10 06:35:47 -05008481 return 0;
8482 } else {
Yan Zheng2c47e6052009-06-27 21:07:35 -04008483 ret = walk_up_proc(trans, root, path, wc);
8484 if (ret > 0)
8485 return 0;
Chris Masonbd56b302009-02-04 09:27:02 -05008486
Yan Zheng2c47e6052009-06-27 21:07:35 -04008487 if (path->locks[level]) {
Chris Masonbd681512011-07-16 15:23:14 -04008488 btrfs_tree_unlock_rw(path->nodes[level],
8489 path->locks[level]);
Yan Zheng2c47e6052009-06-27 21:07:35 -04008490 path->locks[level] = 0;
Yan Zhengf82d02d2008-10-29 14:49:05 -04008491 }
Yan Zheng2c47e6052009-06-27 21:07:35 -04008492 free_extent_buffer(path->nodes[level]);
8493 path->nodes[level] = NULL;
8494 level++;
Chris Mason20524f02007-03-10 06:35:47 -05008495 }
8496 }
8497 return 1;
8498}
8499
Chris Mason9aca1d52007-03-13 11:09:37 -04008500/*
Yan Zheng2c47e6052009-06-27 21:07:35 -04008501 * drop a subvolume tree.
8502 *
8503 * this function traverses the tree freeing any blocks that only
8504 * referenced by the tree.
8505 *
8506 * when a shared tree block is found. this function decreases its
8507 * reference count by one. if update_ref is true, this function
8508 * also make sure backrefs for the shared block and all lower level
8509 * blocks are properly updated.
David Sterba9d1a2a32013-03-12 15:13:28 +00008510 *
8511 * If called with for_reloc == 0, may exit early with -EAGAIN
Chris Mason9aca1d52007-03-13 11:09:37 -04008512 */
Jeff Mahoney2c536792011-10-03 23:22:41 -04008513int btrfs_drop_snapshot(struct btrfs_root *root,
Arne Jansen66d7e7f2011-09-12 15:26:38 +02008514 struct btrfs_block_rsv *block_rsv, int update_ref,
8515 int for_reloc)
Chris Mason20524f02007-03-10 06:35:47 -05008516{
Chris Mason5caf2a02007-04-02 11:20:42 -04008517 struct btrfs_path *path;
Yan Zheng2c47e6052009-06-27 21:07:35 -04008518 struct btrfs_trans_handle *trans;
8519 struct btrfs_root *tree_root = root->fs_info->tree_root;
Chris Mason9f3a7422007-08-07 15:52:19 -04008520 struct btrfs_root_item *root_item = &root->root_item;
Yan Zheng2c47e6052009-06-27 21:07:35 -04008521 struct walk_control *wc;
8522 struct btrfs_key key;
8523 int err = 0;
8524 int ret;
8525 int level;
Josef Bacikd29a9f62013-07-17 19:30:20 -04008526 bool root_dropped = false;
Chris Mason20524f02007-03-10 06:35:47 -05008527
Mark Fasheh11526512014-07-17 12:39:01 -07008528 btrfs_debug(root->fs_info, "Drop subvolume %llu", root->objectid);
8529
Chris Mason5caf2a02007-04-02 11:20:42 -04008530 path = btrfs_alloc_path();
Tsutomu Itohcb1b69f2011-08-09 07:11:13 +00008531 if (!path) {
8532 err = -ENOMEM;
8533 goto out;
8534 }
Chris Mason20524f02007-03-10 06:35:47 -05008535
Yan Zheng2c47e6052009-06-27 21:07:35 -04008536 wc = kzalloc(sizeof(*wc), GFP_NOFS);
Mark Fasheh38a1a912011-07-13 10:59:59 -07008537 if (!wc) {
8538 btrfs_free_path(path);
Tsutomu Itohcb1b69f2011-08-09 07:11:13 +00008539 err = -ENOMEM;
8540 goto out;
Mark Fasheh38a1a912011-07-13 10:59:59 -07008541 }
Yan Zheng2c47e6052009-06-27 21:07:35 -04008542
Yan, Zhenga22285a2010-05-16 10:48:46 -04008543 trans = btrfs_start_transaction(tree_root, 0);
Jeff Mahoney79787ea2012-03-12 16:03:00 +01008544 if (IS_ERR(trans)) {
8545 err = PTR_ERR(trans);
8546 goto out_free;
8547 }
Tsutomu Itoh98d5dc12011-01-20 06:19:37 +00008548
Yan, Zheng3fd0a552010-05-16 10:49:59 -04008549 if (block_rsv)
8550 trans->block_rsv = block_rsv;
Yan Zheng2c47e6052009-06-27 21:07:35 -04008551
Chris Mason9f3a7422007-08-07 15:52:19 -04008552 if (btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
Yan Zheng2c47e6052009-06-27 21:07:35 -04008553 level = btrfs_header_level(root->node);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04008554 path->nodes[level] = btrfs_lock_root_node(root);
8555 btrfs_set_lock_blocking(path->nodes[level]);
Chris Mason9f3a7422007-08-07 15:52:19 -04008556 path->slots[level] = 0;
Chris Masonbd681512011-07-16 15:23:14 -04008557 path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING;
Yan Zheng2c47e6052009-06-27 21:07:35 -04008558 memset(&wc->update_progress, 0,
8559 sizeof(wc->update_progress));
Chris Mason9f3a7422007-08-07 15:52:19 -04008560 } else {
Chris Mason9f3a7422007-08-07 15:52:19 -04008561 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
Yan Zheng2c47e6052009-06-27 21:07:35 -04008562 memcpy(&wc->update_progress, &key,
8563 sizeof(wc->update_progress));
8564
Chris Mason6702ed42007-08-07 16:15:09 -04008565 level = root_item->drop_level;
Yan Zheng2c47e6052009-06-27 21:07:35 -04008566 BUG_ON(level == 0);
Chris Mason6702ed42007-08-07 16:15:09 -04008567 path->lowest_level = level;
Yan Zheng2c47e6052009-06-27 21:07:35 -04008568 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
8569 path->lowest_level = 0;
8570 if (ret < 0) {
8571 err = ret;
Jeff Mahoney79787ea2012-03-12 16:03:00 +01008572 goto out_end_trans;
Chris Mason9f3a7422007-08-07 15:52:19 -04008573 }
Yan, Zheng1c4850e2009-09-21 15:55:59 -04008574 WARN_ON(ret > 0);
Yan Zheng2c47e6052009-06-27 21:07:35 -04008575
Chris Mason7d9eb122008-07-08 14:19:17 -04008576 /*
8577 * unlock our path, this is safe because only this
8578 * function is allowed to delete this snapshot
8579 */
Yan Zheng5d4f98a2009-06-10 10:45:14 -04008580 btrfs_unlock_up_safe(path, 0);
Chris Mason9aca1d52007-03-13 11:09:37 -04008581
Yan Zheng2c47e6052009-06-27 21:07:35 -04008582 level = btrfs_header_level(root->node);
8583 while (1) {
8584 btrfs_tree_lock(path->nodes[level]);
8585 btrfs_set_lock_blocking(path->nodes[level]);
Josef Bacikfec386a2013-07-15 12:41:42 -04008586 path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING;
Yan Zheng2c47e6052009-06-27 21:07:35 -04008587
8588 ret = btrfs_lookup_extent_info(trans, root,
8589 path->nodes[level]->start,
Josef Bacik3173a182013-03-07 14:22:04 -05008590 level, 1, &wc->refs[level],
Yan Zheng2c47e6052009-06-27 21:07:35 -04008591 &wc->flags[level]);
Jeff Mahoney79787ea2012-03-12 16:03:00 +01008592 if (ret < 0) {
8593 err = ret;
8594 goto out_end_trans;
8595 }
Yan Zheng2c47e6052009-06-27 21:07:35 -04008596 BUG_ON(wc->refs[level] == 0);
8597
8598 if (level == root_item->drop_level)
8599 break;
8600
8601 btrfs_tree_unlock(path->nodes[level]);
Josef Bacikfec386a2013-07-15 12:41:42 -04008602 path->locks[level] = 0;
Yan Zheng2c47e6052009-06-27 21:07:35 -04008603 WARN_ON(wc->refs[level] != 1);
8604 level--;
8605 }
8606 }
8607
8608 wc->level = level;
8609 wc->shared_level = -1;
8610 wc->stage = DROP_REFERENCE;
8611 wc->update_ref = update_ref;
8612 wc->keep_locks = 0;
Arne Jansen66d7e7f2011-09-12 15:26:38 +02008613 wc->for_reloc = for_reloc;
Yan, Zheng1c4850e2009-09-21 15:55:59 -04008614 wc->reada_count = BTRFS_NODEPTRS_PER_BLOCK(root);
Yan Zheng2c47e6052009-06-27 21:07:35 -04008615
8616 while (1) {
David Sterba9d1a2a32013-03-12 15:13:28 +00008617
Yan Zheng2c47e6052009-06-27 21:07:35 -04008618 ret = walk_down_tree(trans, root, path, wc);
8619 if (ret < 0) {
8620 err = ret;
Chris Masone7a84562008-06-25 16:01:31 -04008621 break;
8622 }
Yan Zheng2c47e6052009-06-27 21:07:35 -04008623
8624 ret = walk_up_tree(trans, root, path, wc, BTRFS_MAX_LEVEL);
8625 if (ret < 0) {
8626 err = ret;
8627 break;
8628 }
8629
8630 if (ret > 0) {
8631 BUG_ON(wc->stage != DROP_REFERENCE);
8632 break;
8633 }
8634
8635 if (wc->stage == DROP_REFERENCE) {
8636 level = wc->level;
8637 btrfs_node_key(path->nodes[level],
8638 &root_item->drop_progress,
8639 path->slots[level]);
8640 root_item->drop_level = level;
8641 }
8642
8643 BUG_ON(wc->level == 0);
Josef Bacik3c8f2422013-07-15 11:57:06 -04008644 if (btrfs_should_end_transaction(trans, tree_root) ||
8645 (!for_reloc && btrfs_need_cleaner_sleep(root))) {
Yan Zheng2c47e6052009-06-27 21:07:35 -04008646 ret = btrfs_update_root(trans, tree_root,
8647 &root->root_key,
8648 root_item);
Jeff Mahoney79787ea2012-03-12 16:03:00 +01008649 if (ret) {
8650 btrfs_abort_transaction(trans, tree_root, ret);
8651 err = ret;
8652 goto out_end_trans;
8653 }
Yan Zheng2c47e6052009-06-27 21:07:35 -04008654
Yan, Zheng3fd0a552010-05-16 10:49:59 -04008655 btrfs_end_transaction_throttle(trans, tree_root);
Josef Bacik3c8f2422013-07-15 11:57:06 -04008656 if (!for_reloc && btrfs_need_cleaner_sleep(root)) {
Frank Holtonefe120a2013-12-20 11:37:06 -05008657 pr_debug("BTRFS: drop snapshot early exit\n");
Josef Bacik3c8f2422013-07-15 11:57:06 -04008658 err = -EAGAIN;
8659 goto out_free;
8660 }
8661
Yan, Zhenga22285a2010-05-16 10:48:46 -04008662 trans = btrfs_start_transaction(tree_root, 0);
Jeff Mahoney79787ea2012-03-12 16:03:00 +01008663 if (IS_ERR(trans)) {
8664 err = PTR_ERR(trans);
8665 goto out_free;
8666 }
Yan, Zheng3fd0a552010-05-16 10:49:59 -04008667 if (block_rsv)
8668 trans->block_rsv = block_rsv;
Chris Masonc3e69d52009-03-13 10:17:05 -04008669 }
Chris Mason20524f02007-03-10 06:35:47 -05008670 }
David Sterbab3b4aa72011-04-21 01:20:15 +02008671 btrfs_release_path(path);
Jeff Mahoney79787ea2012-03-12 16:03:00 +01008672 if (err)
8673 goto out_end_trans;
Yan Zheng2c47e6052009-06-27 21:07:35 -04008674
8675 ret = btrfs_del_root(trans, tree_root, &root->root_key);
Jeff Mahoney79787ea2012-03-12 16:03:00 +01008676 if (ret) {
8677 btrfs_abort_transaction(trans, tree_root, ret);
8678 goto out_end_trans;
8679 }
Yan Zheng2c47e6052009-06-27 21:07:35 -04008680
Yan, Zheng76dda932009-09-21 16:00:26 -04008681 if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
Miao Xiecb517ea2013-05-15 07:48:19 +00008682 ret = btrfs_find_root(tree_root, &root->root_key, path,
8683 NULL, NULL);
Jeff Mahoney79787ea2012-03-12 16:03:00 +01008684 if (ret < 0) {
8685 btrfs_abort_transaction(trans, tree_root, ret);
8686 err = ret;
8687 goto out_end_trans;
8688 } else if (ret > 0) {
Josef Bacik84cd9482010-12-08 12:24:01 -05008689 /* if we fail to delete the orphan item this time
8690 * around, it'll get picked up the next time.
8691 *
8692 * The most common failure here is just -ENOENT.
8693 */
8694 btrfs_del_orphan_item(trans, tree_root,
8695 root->root_key.objectid);
Yan, Zheng76dda932009-09-21 16:00:26 -04008696 }
8697 }
8698
Miao Xie27cdeb72014-04-02 19:51:05 +08008699 if (test_bit(BTRFS_ROOT_IN_RADIX, &root->state)) {
Josef Bacik2b9dbef2015-09-15 10:07:04 -04008700 btrfs_add_dropped_root(trans, root);
Yan, Zheng76dda932009-09-21 16:00:26 -04008701 } else {
8702 free_extent_buffer(root->node);
8703 free_extent_buffer(root->commit_root);
Miao Xieb0feb9d2013-05-15 07:48:20 +00008704 btrfs_put_fs_root(root);
Yan, Zheng76dda932009-09-21 16:00:26 -04008705 }
Josef Bacikd29a9f62013-07-17 19:30:20 -04008706 root_dropped = true;
Jeff Mahoney79787ea2012-03-12 16:03:00 +01008707out_end_trans:
Yan, Zheng3fd0a552010-05-16 10:49:59 -04008708 btrfs_end_transaction_throttle(trans, tree_root);
Jeff Mahoney79787ea2012-03-12 16:03:00 +01008709out_free:
Yan Zheng2c47e6052009-06-27 21:07:35 -04008710 kfree(wc);
Chris Mason5caf2a02007-04-02 11:20:42 -04008711 btrfs_free_path(path);
Tsutomu Itohcb1b69f2011-08-09 07:11:13 +00008712out:
Josef Bacikd29a9f62013-07-17 19:30:20 -04008713 /*
8714 * So if we need to stop dropping the snapshot for whatever reason we
8715 * need to make sure to add it back to the dead root list so that we
8716 * keep trying to do the work later. This also cleans up roots if we
8717 * don't have it in the radix (like when we recover after a power fail
8718 * or unmount) so we don't leak memory.
8719 */
Josef Bacikb37b39c2013-07-23 16:57:15 -04008720 if (!for_reloc && root_dropped == false)
Josef Bacikd29a9f62013-07-17 19:30:20 -04008721 btrfs_add_dead_root(root);
Wang Shilong90515e72014-01-07 17:26:58 +08008722 if (err && err != -EAGAIN)
Tsutomu Itohcb1b69f2011-08-09 07:11:13 +00008723 btrfs_std_error(root->fs_info, err);
Jeff Mahoney2c536792011-10-03 23:22:41 -04008724 return err;
Chris Mason20524f02007-03-10 06:35:47 -05008725}
Chris Mason9078a3e2007-04-26 16:46:15 -04008726
Yan Zheng2c47e6052009-06-27 21:07:35 -04008727/*
8728 * drop subtree rooted at tree block 'node'.
8729 *
8730 * NOTE: this function will unlock and release tree block 'node'
Arne Jansen66d7e7f2011-09-12 15:26:38 +02008731 * only used by relocation code
Yan Zheng2c47e6052009-06-27 21:07:35 -04008732 */
Yan Zhengf82d02d2008-10-29 14:49:05 -04008733int btrfs_drop_subtree(struct btrfs_trans_handle *trans,
8734 struct btrfs_root *root,
8735 struct extent_buffer *node,
8736 struct extent_buffer *parent)
8737{
8738 struct btrfs_path *path;
Yan Zheng2c47e6052009-06-27 21:07:35 -04008739 struct walk_control *wc;
Yan Zhengf82d02d2008-10-29 14:49:05 -04008740 int level;
8741 int parent_level;
8742 int ret = 0;
8743 int wret;
8744
Yan Zheng2c47e6052009-06-27 21:07:35 -04008745 BUG_ON(root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID);
8746
Yan Zhengf82d02d2008-10-29 14:49:05 -04008747 path = btrfs_alloc_path();
Tsutomu Itohdb5b4932011-03-23 08:14:16 +00008748 if (!path)
8749 return -ENOMEM;
Yan Zhengf82d02d2008-10-29 14:49:05 -04008750
Yan Zheng2c47e6052009-06-27 21:07:35 -04008751 wc = kzalloc(sizeof(*wc), GFP_NOFS);
Tsutomu Itohdb5b4932011-03-23 08:14:16 +00008752 if (!wc) {
8753 btrfs_free_path(path);
8754 return -ENOMEM;
8755 }
Yan Zheng2c47e6052009-06-27 21:07:35 -04008756
Chris Masonb9447ef82009-03-09 11:45:38 -04008757 btrfs_assert_tree_locked(parent);
Yan Zhengf82d02d2008-10-29 14:49:05 -04008758 parent_level = btrfs_header_level(parent);
8759 extent_buffer_get(parent);
8760 path->nodes[parent_level] = parent;
8761 path->slots[parent_level] = btrfs_header_nritems(parent);
8762
Chris Masonb9447ef82009-03-09 11:45:38 -04008763 btrfs_assert_tree_locked(node);
Yan Zhengf82d02d2008-10-29 14:49:05 -04008764 level = btrfs_header_level(node);
Yan Zhengf82d02d2008-10-29 14:49:05 -04008765 path->nodes[level] = node;
8766 path->slots[level] = 0;
Chris Masonbd681512011-07-16 15:23:14 -04008767 path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING;
Yan Zheng2c47e6052009-06-27 21:07:35 -04008768
8769 wc->refs[parent_level] = 1;
8770 wc->flags[parent_level] = BTRFS_BLOCK_FLAG_FULL_BACKREF;
8771 wc->level = level;
8772 wc->shared_level = -1;
8773 wc->stage = DROP_REFERENCE;
8774 wc->update_ref = 0;
8775 wc->keep_locks = 1;
Arne Jansen66d7e7f2011-09-12 15:26:38 +02008776 wc->for_reloc = 1;
Yan, Zheng1c4850e2009-09-21 15:55:59 -04008777 wc->reada_count = BTRFS_NODEPTRS_PER_BLOCK(root);
Yan Zhengf82d02d2008-10-29 14:49:05 -04008778
8779 while (1) {
Yan Zheng2c47e6052009-06-27 21:07:35 -04008780 wret = walk_down_tree(trans, root, path, wc);
8781 if (wret < 0) {
Yan Zhengf82d02d2008-10-29 14:49:05 -04008782 ret = wret;
Yan Zhengf82d02d2008-10-29 14:49:05 -04008783 break;
Yan Zheng2c47e6052009-06-27 21:07:35 -04008784 }
Yan Zhengf82d02d2008-10-29 14:49:05 -04008785
Yan Zheng2c47e6052009-06-27 21:07:35 -04008786 wret = walk_up_tree(trans, root, path, wc, parent_level);
Yan Zhengf82d02d2008-10-29 14:49:05 -04008787 if (wret < 0)
8788 ret = wret;
8789 if (wret != 0)
8790 break;
8791 }
8792
Yan Zheng2c47e6052009-06-27 21:07:35 -04008793 kfree(wc);
Yan Zhengf82d02d2008-10-29 14:49:05 -04008794 btrfs_free_path(path);
8795 return ret;
8796}
8797
Chris Masonec44a352008-04-28 15:29:52 -04008798static u64 update_block_group_flags(struct btrfs_root *root, u64 flags)
8799{
8800 u64 num_devices;
Ilya Dryomovfc67c452012-03-27 17:09:17 +03008801 u64 stripped;
Chris Masonec44a352008-04-28 15:29:52 -04008802
Ilya Dryomovfc67c452012-03-27 17:09:17 +03008803 /*
8804 * if restripe for this chunk_type is on pick target profile and
8805 * return, otherwise do the usual balance
8806 */
8807 stripped = get_restripe_target(root->fs_info, flags);
8808 if (stripped)
8809 return extended_to_chunk(stripped);
Ilya Dryomove4d8ec02012-01-16 22:04:48 +02008810
Miao Xie95669972014-07-24 11:37:14 +08008811 num_devices = root->fs_info->fs_devices->rw_devices;
Chris Masoncd02dca2010-12-13 14:56:23 -05008812
Ilya Dryomovfc67c452012-03-27 17:09:17 +03008813 stripped = BTRFS_BLOCK_GROUP_RAID0 |
David Woodhouse53b381b2013-01-29 18:40:14 -05008814 BTRFS_BLOCK_GROUP_RAID5 | BTRFS_BLOCK_GROUP_RAID6 |
Ilya Dryomovfc67c452012-03-27 17:09:17 +03008815 BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID10;
8816
Chris Masonec44a352008-04-28 15:29:52 -04008817 if (num_devices == 1) {
8818 stripped |= BTRFS_BLOCK_GROUP_DUP;
8819 stripped = flags & ~stripped;
8820
8821 /* turn raid0 into single device chunks */
8822 if (flags & BTRFS_BLOCK_GROUP_RAID0)
8823 return stripped;
8824
8825 /* turn mirroring into duplication */
8826 if (flags & (BTRFS_BLOCK_GROUP_RAID1 |
8827 BTRFS_BLOCK_GROUP_RAID10))
8828 return stripped | BTRFS_BLOCK_GROUP_DUP;
Chris Masonec44a352008-04-28 15:29:52 -04008829 } else {
8830 /* they already had raid on here, just return */
Chris Masonec44a352008-04-28 15:29:52 -04008831 if (flags & stripped)
8832 return flags;
8833
8834 stripped |= BTRFS_BLOCK_GROUP_DUP;
8835 stripped = flags & ~stripped;
8836
8837 /* switch duplicated blocks with raid1 */
8838 if (flags & BTRFS_BLOCK_GROUP_DUP)
8839 return stripped | BTRFS_BLOCK_GROUP_RAID1;
8840
Ilya Dryomove3176ca2012-03-27 17:09:16 +03008841 /* this is drive concat, leave it alone */
Chris Masonec44a352008-04-28 15:29:52 -04008842 }
Ilya Dryomove3176ca2012-03-27 17:09:16 +03008843
Chris Masonec44a352008-04-28 15:29:52 -04008844 return flags;
8845}
8846
Zhaolei868f4012015-08-05 16:43:27 +08008847static int inc_block_group_ro(struct btrfs_block_group_cache *cache, int force)
Chris Mason0ef3e662008-05-24 14:04:53 -04008848{
Yan, Zhengf0486c62010-05-16 10:46:25 -04008849 struct btrfs_space_info *sinfo = cache->space_info;
8850 u64 num_bytes;
Miao Xie199c36e2011-07-15 10:34:36 +00008851 u64 min_allocable_bytes;
Yan, Zhengf0486c62010-05-16 10:46:25 -04008852 int ret = -ENOSPC;
Chris Mason0ef3e662008-05-24 14:04:53 -04008853
Miao Xie199c36e2011-07-15 10:34:36 +00008854 /*
8855 * We need some metadata space and system metadata space for
8856 * allocating chunks in some corner cases until we force to set
8857 * it to be readonly.
8858 */
8859 if ((sinfo->flags &
8860 (BTRFS_BLOCK_GROUP_SYSTEM | BTRFS_BLOCK_GROUP_METADATA)) &&
8861 !force)
8862 min_allocable_bytes = 1 * 1024 * 1024;
8863 else
8864 min_allocable_bytes = 0;
8865
Yan, Zhengf0486c62010-05-16 10:46:25 -04008866 spin_lock(&sinfo->lock);
8867 spin_lock(&cache->lock);
WuBo61cfea92011-07-26 03:30:11 +00008868
8869 if (cache->ro) {
Zhaolei868f4012015-08-05 16:43:27 +08008870 cache->ro++;
WuBo61cfea92011-07-26 03:30:11 +00008871 ret = 0;
8872 goto out;
8873 }
8874
Yan, Zhengf0486c62010-05-16 10:46:25 -04008875 num_bytes = cache->key.offset - cache->reserved - cache->pinned -
8876 cache->bytes_super - btrfs_block_group_used(&cache->item);
Chris Mason7d9eb122008-07-08 14:19:17 -04008877
Yan, Zhengf0486c62010-05-16 10:46:25 -04008878 if (sinfo->bytes_used + sinfo->bytes_reserved + sinfo->bytes_pinned +
Josef Bacik37be25b2011-08-05 10:25:38 -04008879 sinfo->bytes_may_use + sinfo->bytes_readonly + num_bytes +
8880 min_allocable_bytes <= sinfo->total_bytes) {
Yan, Zhengf0486c62010-05-16 10:46:25 -04008881 sinfo->bytes_readonly += num_bytes;
Zhaolei868f4012015-08-05 16:43:27 +08008882 cache->ro++;
Josef Bacik633c0aa2014-10-31 09:49:34 -04008883 list_add_tail(&cache->ro_list, &sinfo->ro_bgs);
Yan, Zhengf0486c62010-05-16 10:46:25 -04008884 ret = 0;
8885 }
WuBo61cfea92011-07-26 03:30:11 +00008886out:
Yan, Zhengf0486c62010-05-16 10:46:25 -04008887 spin_unlock(&cache->lock);
8888 spin_unlock(&sinfo->lock);
8889 return ret;
Chris Mason0ef3e662008-05-24 14:04:53 -04008890}
8891
Zhaolei868f4012015-08-05 16:43:27 +08008892int btrfs_inc_block_group_ro(struct btrfs_root *root,
Yan, Zhengf0486c62010-05-16 10:46:25 -04008893 struct btrfs_block_group_cache *cache)
Yan Zheng5d4f98a2009-06-10 10:45:14 -04008894
8895{
Yan, Zhengf0486c62010-05-16 10:46:25 -04008896 struct btrfs_trans_handle *trans;
8897 u64 alloc_flags;
8898 int ret;
8899
Chris Mason1bbc6212015-04-06 12:46:08 -07008900again:
Josef Bacik7a7eaa42011-04-13 12:54:33 -04008901 trans = btrfs_join_transaction(root);
Jeff Mahoney79787ea2012-03-12 16:03:00 +01008902 if (IS_ERR(trans))
8903 return PTR_ERR(trans);
Yan, Zhengf0486c62010-05-16 10:46:25 -04008904
Chris Mason1bbc6212015-04-06 12:46:08 -07008905 /*
8906 * we're not allowed to set block groups readonly after the dirty
8907 * block groups cache has started writing. If it already started,
8908 * back off and let this transaction commit
8909 */
8910 mutex_lock(&root->fs_info->ro_block_group_mutex);
8911 if (trans->transaction->dirty_bg_run) {
8912 u64 transid = trans->transid;
8913
8914 mutex_unlock(&root->fs_info->ro_block_group_mutex);
8915 btrfs_end_transaction(trans, root);
8916
8917 ret = btrfs_wait_for_commit(root, transid);
8918 if (ret)
8919 return ret;
8920 goto again;
8921 }
8922
Chris Mason153c35b2015-05-19 18:54:41 -07008923 /*
8924 * if we are changing raid levels, try to allocate a corresponding
8925 * block group with the new raid level.
8926 */
8927 alloc_flags = update_block_group_flags(root, cache->flags);
8928 if (alloc_flags != cache->flags) {
8929 ret = do_chunk_alloc(trans, root, alloc_flags,
8930 CHUNK_ALLOC_FORCE);
8931 /*
8932 * ENOSPC is allowed here, we may have enough space
8933 * already allocated at the new raid level to
8934 * carry on
8935 */
8936 if (ret == -ENOSPC)
8937 ret = 0;
8938 if (ret < 0)
8939 goto out;
8940 }
Chris Mason1bbc6212015-04-06 12:46:08 -07008941
Zhaolei868f4012015-08-05 16:43:27 +08008942 ret = inc_block_group_ro(cache, 0);
Yan, Zhengf0486c62010-05-16 10:46:25 -04008943 if (!ret)
8944 goto out;
8945 alloc_flags = get_alloc_profile(root, cache->space_info->flags);
Josef Bacik698d0082012-09-12 14:08:47 -04008946 ret = do_chunk_alloc(trans, root, alloc_flags,
Chris Mason0e4f8f82011-04-15 16:05:44 -04008947 CHUNK_ALLOC_FORCE);
Yan, Zhengf0486c62010-05-16 10:46:25 -04008948 if (ret < 0)
8949 goto out;
Zhaolei868f4012015-08-05 16:43:27 +08008950 ret = inc_block_group_ro(cache, 0);
Yan, Zhengf0486c62010-05-16 10:46:25 -04008951out:
Shaohua Li2f081082015-01-09 10:40:15 -08008952 if (cache->flags & BTRFS_BLOCK_GROUP_SYSTEM) {
8953 alloc_flags = update_block_group_flags(root, cache->flags);
Filipe Mananaa9629592015-05-18 19:11:40 +01008954 lock_chunks(root->fs_info->chunk_root);
Filipe Manana4617ea32015-06-09 17:48:21 +01008955 check_system_chunk(trans, root, alloc_flags);
Filipe Mananaa9629592015-05-18 19:11:40 +01008956 unlock_chunks(root->fs_info->chunk_root);
Shaohua Li2f081082015-01-09 10:40:15 -08008957 }
Chris Mason1bbc6212015-04-06 12:46:08 -07008958 mutex_unlock(&root->fs_info->ro_block_group_mutex);
Shaohua Li2f081082015-01-09 10:40:15 -08008959
Yan, Zhengf0486c62010-05-16 10:46:25 -04008960 btrfs_end_transaction(trans, root);
8961 return ret;
8962}
8963
Chris Masonc87f08c2011-02-16 13:57:04 -05008964int btrfs_force_chunk_alloc(struct btrfs_trans_handle *trans,
8965 struct btrfs_root *root, u64 type)
8966{
8967 u64 alloc_flags = get_alloc_profile(root, type);
Josef Bacik698d0082012-09-12 14:08:47 -04008968 return do_chunk_alloc(trans, root, alloc_flags,
Chris Mason0e4f8f82011-04-15 16:05:44 -04008969 CHUNK_ALLOC_FORCE);
Chris Masonc87f08c2011-02-16 13:57:04 -05008970}
8971
Miao Xie6d07bce2011-01-05 10:07:31 +00008972/*
8973 * helper to account the unused space of all the readonly block group in the
Josef Bacik633c0aa2014-10-31 09:49:34 -04008974 * space_info. takes mirrors into account.
Miao Xie6d07bce2011-01-05 10:07:31 +00008975 */
Josef Bacik633c0aa2014-10-31 09:49:34 -04008976u64 btrfs_account_ro_block_groups_free_space(struct btrfs_space_info *sinfo)
Miao Xie6d07bce2011-01-05 10:07:31 +00008977{
8978 struct btrfs_block_group_cache *block_group;
8979 u64 free_bytes = 0;
8980 int factor;
8981
Josef Bacik633c0aa2014-10-31 09:49:34 -04008982 /* It's df, we don't care if it's racey */
8983 if (list_empty(&sinfo->ro_bgs))
8984 return 0;
8985
8986 spin_lock(&sinfo->lock);
8987 list_for_each_entry(block_group, &sinfo->ro_bgs, ro_list) {
Miao Xie6d07bce2011-01-05 10:07:31 +00008988 spin_lock(&block_group->lock);
8989
8990 if (!block_group->ro) {
8991 spin_unlock(&block_group->lock);
8992 continue;
8993 }
8994
8995 if (block_group->flags & (BTRFS_BLOCK_GROUP_RAID1 |
8996 BTRFS_BLOCK_GROUP_RAID10 |
8997 BTRFS_BLOCK_GROUP_DUP))
8998 factor = 2;
8999 else
9000 factor = 1;
9001
9002 free_bytes += (block_group->key.offset -
9003 btrfs_block_group_used(&block_group->item)) *
9004 factor;
9005
9006 spin_unlock(&block_group->lock);
9007 }
Miao Xie6d07bce2011-01-05 10:07:31 +00009008 spin_unlock(&sinfo->lock);
9009
9010 return free_bytes;
9011}
9012
Zhaolei868f4012015-08-05 16:43:27 +08009013void btrfs_dec_block_group_ro(struct btrfs_root *root,
Yan, Zhengf0486c62010-05-16 10:46:25 -04009014 struct btrfs_block_group_cache *cache)
9015{
9016 struct btrfs_space_info *sinfo = cache->space_info;
9017 u64 num_bytes;
9018
9019 BUG_ON(!cache->ro);
9020
9021 spin_lock(&sinfo->lock);
9022 spin_lock(&cache->lock);
Zhaolei868f4012015-08-05 16:43:27 +08009023 if (!--cache->ro) {
9024 num_bytes = cache->key.offset - cache->reserved -
9025 cache->pinned - cache->bytes_super -
9026 btrfs_block_group_used(&cache->item);
9027 sinfo->bytes_readonly -= num_bytes;
9028 list_del_init(&cache->ro_list);
9029 }
Yan, Zhengf0486c62010-05-16 10:46:25 -04009030 spin_unlock(&cache->lock);
9031 spin_unlock(&sinfo->lock);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04009032}
9033
Josef Bacikba1bf482009-09-11 16:11:19 -04009034/*
9035 * checks to see if its even possible to relocate this block group.
9036 *
9037 * @return - -1 if it's not a good idea to relocate this block group, 0 if its
9038 * ok to go ahead and try.
9039 */
9040int btrfs_can_relocate(struct btrfs_root *root, u64 bytenr)
Zheng Yan1a40e232008-09-26 10:09:34 -04009041{
Zheng Yan1a40e232008-09-26 10:09:34 -04009042 struct btrfs_block_group_cache *block_group;
Josef Bacikba1bf482009-09-11 16:11:19 -04009043 struct btrfs_space_info *space_info;
9044 struct btrfs_fs_devices *fs_devices = root->fs_info->fs_devices;
9045 struct btrfs_device *device;
Josef Bacik6df9a952013-06-27 13:22:46 -04009046 struct btrfs_trans_handle *trans;
liubocdcb7252011-08-03 10:15:25 +00009047 u64 min_free;
Josef Bacik6719db62011-08-20 08:29:51 -04009048 u64 dev_min = 1;
9049 u64 dev_nr = 0;
Ilya Dryomov4a5e98f2012-03-27 17:09:17 +03009050 u64 target;
liubocdcb7252011-08-03 10:15:25 +00009051 int index;
Josef Bacikba1bf482009-09-11 16:11:19 -04009052 int full = 0;
9053 int ret = 0;
Chris Masonedbd8d42007-12-21 16:27:24 -05009054
Josef Bacikba1bf482009-09-11 16:11:19 -04009055 block_group = btrfs_lookup_block_group(root->fs_info, bytenr);
Zheng Yan1a40e232008-09-26 10:09:34 -04009056
Josef Bacikba1bf482009-09-11 16:11:19 -04009057 /* odd, couldn't find the block group, leave it alone */
9058 if (!block_group)
9059 return -1;
Chris Masonedbd8d42007-12-21 16:27:24 -05009060
liubocdcb7252011-08-03 10:15:25 +00009061 min_free = btrfs_block_group_used(&block_group->item);
9062
Josef Bacikba1bf482009-09-11 16:11:19 -04009063 /* no bytes used, we're good */
liubocdcb7252011-08-03 10:15:25 +00009064 if (!min_free)
Josef Bacikba1bf482009-09-11 16:11:19 -04009065 goto out;
Chris Mason323da792008-05-09 11:46:48 -04009066
Josef Bacikba1bf482009-09-11 16:11:19 -04009067 space_info = block_group->space_info;
9068 spin_lock(&space_info->lock);
Chris Mason323da792008-05-09 11:46:48 -04009069
Josef Bacikba1bf482009-09-11 16:11:19 -04009070 full = space_info->full;
Zheng Yan1a40e232008-09-26 10:09:34 -04009071
Josef Bacikba1bf482009-09-11 16:11:19 -04009072 /*
9073 * if this is the last block group we have in this space, we can't
Chris Mason7ce618d2009-09-22 14:48:44 -04009074 * relocate it unless we're able to allocate a new chunk below.
9075 *
9076 * Otherwise, we need to make sure we have room in the space to handle
9077 * all of the extents from this block group. If we can, we're good
Josef Bacikba1bf482009-09-11 16:11:19 -04009078 */
Chris Mason7ce618d2009-09-22 14:48:44 -04009079 if ((space_info->total_bytes != block_group->key.offset) &&
liubocdcb7252011-08-03 10:15:25 +00009080 (space_info->bytes_used + space_info->bytes_reserved +
9081 space_info->bytes_pinned + space_info->bytes_readonly +
9082 min_free < space_info->total_bytes)) {
Josef Bacikba1bf482009-09-11 16:11:19 -04009083 spin_unlock(&space_info->lock);
9084 goto out;
9085 }
9086 spin_unlock(&space_info->lock);
Zheng Yan1a40e232008-09-26 10:09:34 -04009087
Josef Bacikba1bf482009-09-11 16:11:19 -04009088 /*
9089 * ok we don't have enough space, but maybe we have free space on our
9090 * devices to allocate new chunks for relocation, so loop through our
Ilya Dryomov4a5e98f2012-03-27 17:09:17 +03009091 * alloc devices and guess if we have enough space. if this block
9092 * group is going to be restriped, run checks against the target
9093 * profile instead of the current one.
Josef Bacikba1bf482009-09-11 16:11:19 -04009094 */
9095 ret = -1;
Chris Mason4313b392008-01-03 09:08:48 -05009096
liubocdcb7252011-08-03 10:15:25 +00009097 /*
9098 * index:
9099 * 0: raid10
9100 * 1: raid1
9101 * 2: dup
9102 * 3: raid0
9103 * 4: single
9104 */
Ilya Dryomov4a5e98f2012-03-27 17:09:17 +03009105 target = get_restripe_target(root->fs_info, block_group->flags);
9106 if (target) {
Liu Bo31e50222012-11-21 14:18:10 +00009107 index = __get_raid_index(extended_to_chunk(target));
Ilya Dryomov4a5e98f2012-03-27 17:09:17 +03009108 } else {
9109 /*
9110 * this is just a balance, so if we were marked as full
9111 * we know there is no space for a new chunk
9112 */
9113 if (full)
9114 goto out;
9115
9116 index = get_block_group_index(block_group);
9117 }
9118
Miao Xiee6ec7162013-01-17 05:38:51 +00009119 if (index == BTRFS_RAID_RAID10) {
liubocdcb7252011-08-03 10:15:25 +00009120 dev_min = 4;
Josef Bacik6719db62011-08-20 08:29:51 -04009121 /* Divide by 2 */
9122 min_free >>= 1;
Miao Xiee6ec7162013-01-17 05:38:51 +00009123 } else if (index == BTRFS_RAID_RAID1) {
liubocdcb7252011-08-03 10:15:25 +00009124 dev_min = 2;
Miao Xiee6ec7162013-01-17 05:38:51 +00009125 } else if (index == BTRFS_RAID_DUP) {
Josef Bacik6719db62011-08-20 08:29:51 -04009126 /* Multiply by 2 */
9127 min_free <<= 1;
Miao Xiee6ec7162013-01-17 05:38:51 +00009128 } else if (index == BTRFS_RAID_RAID0) {
liubocdcb7252011-08-03 10:15:25 +00009129 dev_min = fs_devices->rw_devices;
David Sterba47c57132015-02-20 18:43:47 +01009130 min_free = div64_u64(min_free, dev_min);
liubocdcb7252011-08-03 10:15:25 +00009131 }
9132
Josef Bacik6df9a952013-06-27 13:22:46 -04009133 /* We need to do this so that we can look at pending chunks */
9134 trans = btrfs_join_transaction(root);
9135 if (IS_ERR(trans)) {
9136 ret = PTR_ERR(trans);
9137 goto out;
9138 }
9139
Josef Bacikba1bf482009-09-11 16:11:19 -04009140 mutex_lock(&root->fs_info->chunk_mutex);
9141 list_for_each_entry(device, &fs_devices->alloc_list, dev_alloc_list) {
Miao Xie7bfc8372011-01-05 10:07:26 +00009142 u64 dev_offset;
Chris Masonea8c2812008-08-04 23:17:27 -04009143
Josef Bacikba1bf482009-09-11 16:11:19 -04009144 /*
9145 * check to make sure we can actually find a chunk with enough
9146 * space to fit our block group in.
9147 */
Stefan Behrens63a212a2012-11-05 18:29:28 +01009148 if (device->total_bytes > device->bytes_used + min_free &&
9149 !device->is_tgtdev_for_dev_replace) {
Josef Bacik6df9a952013-06-27 13:22:46 -04009150 ret = find_free_dev_extent(trans, device, min_free,
Miao Xie7bfc8372011-01-05 10:07:26 +00009151 &dev_offset, NULL);
Josef Bacikba1bf482009-09-11 16:11:19 -04009152 if (!ret)
liubocdcb7252011-08-03 10:15:25 +00009153 dev_nr++;
9154
9155 if (dev_nr >= dev_min)
Yan73e48b22008-01-03 14:14:39 -05009156 break;
liubocdcb7252011-08-03 10:15:25 +00009157
Josef Bacikba1bf482009-09-11 16:11:19 -04009158 ret = -1;
Yan73e48b22008-01-03 14:14:39 -05009159 }
Chris Masonedbd8d42007-12-21 16:27:24 -05009160 }
Josef Bacikba1bf482009-09-11 16:11:19 -04009161 mutex_unlock(&root->fs_info->chunk_mutex);
Josef Bacik6df9a952013-06-27 13:22:46 -04009162 btrfs_end_transaction(trans, root);
Chris Masonedbd8d42007-12-21 16:27:24 -05009163out:
Josef Bacikba1bf482009-09-11 16:11:19 -04009164 btrfs_put_block_group(block_group);
Chris Masonedbd8d42007-12-21 16:27:24 -05009165 return ret;
9166}
9167
Christoph Hellwigb2950862008-12-02 09:54:17 -05009168static int find_first_block_group(struct btrfs_root *root,
9169 struct btrfs_path *path, struct btrfs_key *key)
Chris Mason0b86a832008-03-24 15:01:56 -04009170{
Chris Mason925baed2008-06-25 16:01:30 -04009171 int ret = 0;
Chris Mason0b86a832008-03-24 15:01:56 -04009172 struct btrfs_key found_key;
9173 struct extent_buffer *leaf;
9174 int slot;
9175
9176 ret = btrfs_search_slot(NULL, root, key, path, 0, 0);
9177 if (ret < 0)
Chris Mason925baed2008-06-25 16:01:30 -04009178 goto out;
9179
Chris Masond3977122009-01-05 21:25:51 -05009180 while (1) {
Chris Mason0b86a832008-03-24 15:01:56 -04009181 slot = path->slots[0];
9182 leaf = path->nodes[0];
9183 if (slot >= btrfs_header_nritems(leaf)) {
9184 ret = btrfs_next_leaf(root, path);
9185 if (ret == 0)
9186 continue;
9187 if (ret < 0)
Chris Mason925baed2008-06-25 16:01:30 -04009188 goto out;
Chris Mason0b86a832008-03-24 15:01:56 -04009189 break;
9190 }
9191 btrfs_item_key_to_cpu(leaf, &found_key, slot);
9192
9193 if (found_key.objectid >= key->objectid &&
Chris Mason925baed2008-06-25 16:01:30 -04009194 found_key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
9195 ret = 0;
9196 goto out;
9197 }
Chris Mason0b86a832008-03-24 15:01:56 -04009198 path->slots[0]++;
9199 }
Chris Mason925baed2008-06-25 16:01:30 -04009200out:
Chris Mason0b86a832008-03-24 15:01:56 -04009201 return ret;
9202}
9203
Josef Bacik0af3d002010-06-21 14:48:16 -04009204void btrfs_put_block_group_cache(struct btrfs_fs_info *info)
9205{
9206 struct btrfs_block_group_cache *block_group;
9207 u64 last = 0;
9208
9209 while (1) {
9210 struct inode *inode;
9211
9212 block_group = btrfs_lookup_first_block_group(info, last);
9213 while (block_group) {
9214 spin_lock(&block_group->lock);
9215 if (block_group->iref)
9216 break;
9217 spin_unlock(&block_group->lock);
9218 block_group = next_block_group(info->tree_root,
9219 block_group);
9220 }
9221 if (!block_group) {
9222 if (last == 0)
9223 break;
9224 last = 0;
9225 continue;
9226 }
9227
9228 inode = block_group->inode;
9229 block_group->iref = 0;
9230 block_group->inode = NULL;
9231 spin_unlock(&block_group->lock);
9232 iput(inode);
9233 last = block_group->key.objectid + block_group->key.offset;
9234 btrfs_put_block_group(block_group);
9235 }
9236}
9237
Zheng Yan1a40e232008-09-26 10:09:34 -04009238int btrfs_free_block_groups(struct btrfs_fs_info *info)
9239{
9240 struct btrfs_block_group_cache *block_group;
Chris Mason4184ea72009-03-10 12:39:20 -04009241 struct btrfs_space_info *space_info;
Yan Zheng11833d62009-09-11 16:11:19 -04009242 struct btrfs_caching_control *caching_ctl;
Zheng Yan1a40e232008-09-26 10:09:34 -04009243 struct rb_node *n;
9244
Josef Bacik9e351cc2014-03-13 15:42:13 -04009245 down_write(&info->commit_root_sem);
Yan Zheng11833d62009-09-11 16:11:19 -04009246 while (!list_empty(&info->caching_block_groups)) {
9247 caching_ctl = list_entry(info->caching_block_groups.next,
9248 struct btrfs_caching_control, list);
9249 list_del(&caching_ctl->list);
9250 put_caching_control(caching_ctl);
9251 }
Josef Bacik9e351cc2014-03-13 15:42:13 -04009252 up_write(&info->commit_root_sem);
Yan Zheng11833d62009-09-11 16:11:19 -04009253
Josef Bacik47ab2a62014-09-18 11:20:02 -04009254 spin_lock(&info->unused_bgs_lock);
9255 while (!list_empty(&info->unused_bgs)) {
9256 block_group = list_first_entry(&info->unused_bgs,
9257 struct btrfs_block_group_cache,
9258 bg_list);
9259 list_del_init(&block_group->bg_list);
9260 btrfs_put_block_group(block_group);
9261 }
9262 spin_unlock(&info->unused_bgs_lock);
9263
Zheng Yan1a40e232008-09-26 10:09:34 -04009264 spin_lock(&info->block_group_cache_lock);
9265 while ((n = rb_last(&info->block_group_cache_tree)) != NULL) {
9266 block_group = rb_entry(n, struct btrfs_block_group_cache,
9267 cache_node);
Zheng Yan1a40e232008-09-26 10:09:34 -04009268 rb_erase(&block_group->cache_node,
9269 &info->block_group_cache_tree);
Filipe Manana01eacb22014-12-04 18:38:30 +00009270 RB_CLEAR_NODE(&block_group->cache_node);
Yan Zhengd899e052008-10-30 14:25:28 -04009271 spin_unlock(&info->block_group_cache_lock);
9272
Josef Bacik80eb2342008-10-29 14:49:05 -04009273 down_write(&block_group->space_info->groups_sem);
Zheng Yan1a40e232008-09-26 10:09:34 -04009274 list_del(&block_group->list);
Josef Bacik80eb2342008-10-29 14:49:05 -04009275 up_write(&block_group->space_info->groups_sem);
Yan Zhengd2fb3432008-12-11 16:30:39 -05009276
Josef Bacik817d52f2009-07-13 21:29:25 -04009277 if (block_group->cached == BTRFS_CACHE_STARTED)
Yan Zheng11833d62009-09-11 16:11:19 -04009278 wait_block_group_cache_done(block_group);
Josef Bacik817d52f2009-07-13 21:29:25 -04009279
Josef Bacik3c148742011-02-02 15:53:47 +00009280 /*
9281 * We haven't cached this block group, which means we could
9282 * possibly have excluded extents on this block group.
9283 */
Josef Bacik36cce922013-08-05 11:15:21 -04009284 if (block_group->cached == BTRFS_CACHE_NO ||
9285 block_group->cached == BTRFS_CACHE_ERROR)
Josef Bacik3c148742011-02-02 15:53:47 +00009286 free_excluded_extents(info->extent_root, block_group);
9287
Josef Bacik817d52f2009-07-13 21:29:25 -04009288 btrfs_remove_free_space_cache(block_group);
Josef Bacik11dfe352009-11-13 20:12:59 +00009289 btrfs_put_block_group(block_group);
Yan Zhengd899e052008-10-30 14:25:28 -04009290
9291 spin_lock(&info->block_group_cache_lock);
Zheng Yan1a40e232008-09-26 10:09:34 -04009292 }
9293 spin_unlock(&info->block_group_cache_lock);
Chris Mason4184ea72009-03-10 12:39:20 -04009294
9295 /* now that all the block groups are freed, go through and
9296 * free all the space_info structs. This is only called during
9297 * the final stages of unmount, and so we know nobody is
9298 * using them. We call synchronize_rcu() once before we start,
9299 * just to be on the safe side.
9300 */
9301 synchronize_rcu();
9302
Yan, Zheng8929ecfa2010-05-16 10:49:58 -04009303 release_global_block_rsv(info);
9304
Dulshani Gunawardhana67871252013-10-31 10:33:04 +05309305 while (!list_empty(&info->space_info)) {
Jeff Mahoney6ab0a202013-11-01 13:07:04 -04009306 int i;
9307
Chris Mason4184ea72009-03-10 12:39:20 -04009308 space_info = list_entry(info->space_info.next,
9309 struct btrfs_space_info,
9310 list);
David Sterbab069e0c2013-02-08 21:28:17 +00009311 if (btrfs_test_opt(info->tree_root, ENOSPC_DEBUG)) {
Dulshani Gunawardhanafae7f212013-10-31 10:30:08 +05309312 if (WARN_ON(space_info->bytes_pinned > 0 ||
David Sterbab069e0c2013-02-08 21:28:17 +00009313 space_info->bytes_reserved > 0 ||
Dulshani Gunawardhanafae7f212013-10-31 10:30:08 +05309314 space_info->bytes_may_use > 0)) {
David Sterbab069e0c2013-02-08 21:28:17 +00009315 dump_space_info(space_info, 0, 0);
9316 }
Yan, Zhengf0486c62010-05-16 10:46:25 -04009317 }
Chris Mason4184ea72009-03-10 12:39:20 -04009318 list_del(&space_info->list);
Jeff Mahoney6ab0a202013-11-01 13:07:04 -04009319 for (i = 0; i < BTRFS_NR_RAID_TYPES; i++) {
9320 struct kobject *kobj;
Jeff Mahoneyc1895442014-05-27 12:59:57 -04009321 kobj = space_info->block_group_kobjs[i];
9322 space_info->block_group_kobjs[i] = NULL;
9323 if (kobj) {
Jeff Mahoney6ab0a202013-11-01 13:07:04 -04009324 kobject_del(kobj);
9325 kobject_put(kobj);
9326 }
9327 }
9328 kobject_del(&space_info->kobj);
9329 kobject_put(&space_info->kobj);
Chris Mason4184ea72009-03-10 12:39:20 -04009330 }
Zheng Yan1a40e232008-09-26 10:09:34 -04009331 return 0;
9332}
9333
Yan, Zhengb742bb822010-05-16 10:46:24 -04009334static void __link_block_group(struct btrfs_space_info *space_info,
9335 struct btrfs_block_group_cache *cache)
9336{
9337 int index = get_block_group_index(cache);
Jeff Mahoneyed55b6a2014-03-26 14:11:26 -04009338 bool first = false;
Yan, Zhengb742bb822010-05-16 10:46:24 -04009339
9340 down_write(&space_info->groups_sem);
Jeff Mahoneyed55b6a2014-03-26 14:11:26 -04009341 if (list_empty(&space_info->block_groups[index]))
9342 first = true;
9343 list_add_tail(&cache->list, &space_info->block_groups[index]);
9344 up_write(&space_info->groups_sem);
9345
9346 if (first) {
Jeff Mahoneyc1895442014-05-27 12:59:57 -04009347 struct raid_kobject *rkobj;
Jeff Mahoney6ab0a202013-11-01 13:07:04 -04009348 int ret;
9349
Jeff Mahoneyc1895442014-05-27 12:59:57 -04009350 rkobj = kzalloc(sizeof(*rkobj), GFP_NOFS);
9351 if (!rkobj)
9352 goto out_err;
9353 rkobj->raid_type = index;
9354 kobject_init(&rkobj->kobj, &btrfs_raid_ktype);
9355 ret = kobject_add(&rkobj->kobj, &space_info->kobj,
9356 "%s", get_raid_name(index));
Jeff Mahoney6ab0a202013-11-01 13:07:04 -04009357 if (ret) {
Jeff Mahoneyc1895442014-05-27 12:59:57 -04009358 kobject_put(&rkobj->kobj);
9359 goto out_err;
Jeff Mahoney6ab0a202013-11-01 13:07:04 -04009360 }
Jeff Mahoneyc1895442014-05-27 12:59:57 -04009361 space_info->block_group_kobjs[index] = &rkobj->kobj;
Jeff Mahoney6ab0a202013-11-01 13:07:04 -04009362 }
Jeff Mahoneyc1895442014-05-27 12:59:57 -04009363
9364 return;
9365out_err:
9366 pr_warn("BTRFS: failed to add kobject for block cache. ignoring.\n");
Yan, Zhengb742bb822010-05-16 10:46:24 -04009367}
9368
Miao Xie920e4a52014-01-15 20:00:55 +08009369static struct btrfs_block_group_cache *
9370btrfs_create_block_group_cache(struct btrfs_root *root, u64 start, u64 size)
9371{
9372 struct btrfs_block_group_cache *cache;
9373
9374 cache = kzalloc(sizeof(*cache), GFP_NOFS);
9375 if (!cache)
9376 return NULL;
9377
9378 cache->free_space_ctl = kzalloc(sizeof(*cache->free_space_ctl),
9379 GFP_NOFS);
9380 if (!cache->free_space_ctl) {
9381 kfree(cache);
9382 return NULL;
9383 }
9384
9385 cache->key.objectid = start;
9386 cache->key.offset = size;
9387 cache->key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
9388
9389 cache->sectorsize = root->sectorsize;
9390 cache->fs_info = root->fs_info;
9391 cache->full_stripe_len = btrfs_full_stripe_len(root,
9392 &root->fs_info->mapping_tree,
9393 start);
Omar Sandoval1e144fb2015-09-29 20:50:37 -07009394 set_free_space_tree_thresholds(cache);
9395
Miao Xie920e4a52014-01-15 20:00:55 +08009396 atomic_set(&cache->count, 1);
9397 spin_lock_init(&cache->lock);
Miao Xiee570fd22014-06-19 10:42:50 +08009398 init_rwsem(&cache->data_rwsem);
Miao Xie920e4a52014-01-15 20:00:55 +08009399 INIT_LIST_HEAD(&cache->list);
9400 INIT_LIST_HEAD(&cache->cluster_list);
Josef Bacik47ab2a62014-09-18 11:20:02 -04009401 INIT_LIST_HEAD(&cache->bg_list);
Josef Bacik633c0aa2014-10-31 09:49:34 -04009402 INIT_LIST_HEAD(&cache->ro_list);
Josef Bacikce93ec52014-11-17 15:45:48 -05009403 INIT_LIST_HEAD(&cache->dirty_list);
Chris Masonc9dc4c62015-04-04 17:14:42 -07009404 INIT_LIST_HEAD(&cache->io_list);
Miao Xie920e4a52014-01-15 20:00:55 +08009405 btrfs_init_free_space_ctl(cache);
Filipe Manana04216822014-11-27 21:14:15 +00009406 atomic_set(&cache->trimming, 0);
Omar Sandovala5ed9182015-09-29 20:50:35 -07009407 mutex_init(&cache->free_space_lock);
Miao Xie920e4a52014-01-15 20:00:55 +08009408
9409 return cache;
9410}
9411
Chris Mason9078a3e2007-04-26 16:46:15 -04009412int btrfs_read_block_groups(struct btrfs_root *root)
9413{
9414 struct btrfs_path *path;
9415 int ret;
Chris Mason9078a3e2007-04-26 16:46:15 -04009416 struct btrfs_block_group_cache *cache;
Chris Masonbe744172007-05-06 10:15:01 -04009417 struct btrfs_fs_info *info = root->fs_info;
Chris Mason6324fbf2008-03-24 15:01:59 -04009418 struct btrfs_space_info *space_info;
Chris Mason9078a3e2007-04-26 16:46:15 -04009419 struct btrfs_key key;
9420 struct btrfs_key found_key;
Chris Mason5f39d392007-10-15 16:14:19 -04009421 struct extent_buffer *leaf;
Josef Bacik0af3d002010-06-21 14:48:16 -04009422 int need_clear = 0;
9423 u64 cache_gen;
Chris Mason96b51792007-10-15 16:15:19 -04009424
Chris Masonbe744172007-05-06 10:15:01 -04009425 root = info->extent_root;
Chris Mason9078a3e2007-04-26 16:46:15 -04009426 key.objectid = 0;
Chris Mason0b86a832008-03-24 15:01:56 -04009427 key.offset = 0;
David Sterba962a2982014-06-04 18:41:45 +02009428 key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
Chris Mason9078a3e2007-04-26 16:46:15 -04009429 path = btrfs_alloc_path();
9430 if (!path)
9431 return -ENOMEM;
Josef Bacik026fd312011-05-13 10:32:11 -04009432 path->reada = 1;
Chris Mason9078a3e2007-04-26 16:46:15 -04009433
David Sterba6c417612011-04-13 15:41:04 +02009434 cache_gen = btrfs_super_cache_generation(root->fs_info->super_copy);
Josef Bacik73bc1872011-10-03 14:07:49 -04009435 if (btrfs_test_opt(root, SPACE_CACHE) &&
David Sterba6c417612011-04-13 15:41:04 +02009436 btrfs_super_generation(root->fs_info->super_copy) != cache_gen)
Josef Bacik0af3d002010-06-21 14:48:16 -04009437 need_clear = 1;
Josef Bacik88c2ba32010-09-21 14:21:34 -04009438 if (btrfs_test_opt(root, CLEAR_CACHE))
9439 need_clear = 1;
Josef Bacik0af3d002010-06-21 14:48:16 -04009440
Chris Masond3977122009-01-05 21:25:51 -05009441 while (1) {
Chris Mason0b86a832008-03-24 15:01:56 -04009442 ret = find_first_block_group(root, path, &key);
Yan, Zhengb742bb822010-05-16 10:46:24 -04009443 if (ret > 0)
9444 break;
Chris Mason0b86a832008-03-24 15:01:56 -04009445 if (ret != 0)
9446 goto error;
Miao Xie920e4a52014-01-15 20:00:55 +08009447
Chris Mason5f39d392007-10-15 16:14:19 -04009448 leaf = path->nodes[0];
9449 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
Miao Xie920e4a52014-01-15 20:00:55 +08009450
9451 cache = btrfs_create_block_group_cache(root, found_key.objectid,
9452 found_key.offset);
Chris Mason9078a3e2007-04-26 16:46:15 -04009453 if (!cache) {
Chris Mason0b86a832008-03-24 15:01:56 -04009454 ret = -ENOMEM;
Yan, Zhengf0486c62010-05-16 10:46:25 -04009455 goto error;
Chris Mason9078a3e2007-04-26 16:46:15 -04009456 }
Josef Bacik96303082009-07-13 21:29:25 -04009457
Liu Bocf7c1ef2012-07-06 03:31:34 -06009458 if (need_clear) {
9459 /*
9460 * When we mount with old space cache, we need to
9461 * set BTRFS_DC_CLEAR and set dirty flag.
9462 *
9463 * a) Setting 'BTRFS_DC_CLEAR' makes sure that we
9464 * truncate the old free space cache inode and
9465 * setup a new one.
9466 * b) Setting 'dirty flag' makes sure that we flush
9467 * the new space cache info onto disk.
9468 */
Liu Bocf7c1ef2012-07-06 03:31:34 -06009469 if (btrfs_test_opt(root, SPACE_CACHE))
Josef Bacikce93ec52014-11-17 15:45:48 -05009470 cache->disk_cache_state = BTRFS_DC_CLEAR;
Liu Bocf7c1ef2012-07-06 03:31:34 -06009471 }
Josef Bacik0af3d002010-06-21 14:48:16 -04009472
Chris Mason5f39d392007-10-15 16:14:19 -04009473 read_extent_buffer(leaf, &cache->item,
9474 btrfs_item_ptr_offset(leaf, path->slots[0]),
9475 sizeof(cache->item));
Miao Xie920e4a52014-01-15 20:00:55 +08009476 cache->flags = btrfs_block_group_flags(&cache->item);
Chris Mason0b86a832008-03-24 15:01:56 -04009477
Chris Mason9078a3e2007-04-26 16:46:15 -04009478 key.objectid = found_key.objectid + found_key.offset;
David Sterbab3b4aa72011-04-21 01:20:15 +02009479 btrfs_release_path(path);
Li Zefan34d52cb2011-03-29 13:46:06 +08009480
Josef Bacik817d52f2009-07-13 21:29:25 -04009481 /*
Josef Bacik3c148742011-02-02 15:53:47 +00009482 * We need to exclude the super stripes now so that the space
9483 * info has super bytes accounted for, otherwise we'll think
9484 * we have more space than we actually do.
9485 */
Josef Bacik835d9742013-03-19 12:13:25 -04009486 ret = exclude_super_stripes(root, cache);
9487 if (ret) {
9488 /*
9489 * We may have excluded something, so call this just in
9490 * case.
9491 */
9492 free_excluded_extents(root, cache);
Miao Xie920e4a52014-01-15 20:00:55 +08009493 btrfs_put_block_group(cache);
Josef Bacik835d9742013-03-19 12:13:25 -04009494 goto error;
9495 }
Josef Bacik3c148742011-02-02 15:53:47 +00009496
9497 /*
Josef Bacik817d52f2009-07-13 21:29:25 -04009498 * check for two cases, either we are full, and therefore
9499 * don't need to bother with the caching work since we won't
9500 * find any space, or we are empty, and we can just add all
9501 * the space in and be done with it. This saves us _alot_ of
9502 * time, particularly in the full case.
9503 */
9504 if (found_key.offset == btrfs_block_group_used(&cache->item)) {
Yan Zheng11833d62009-09-11 16:11:19 -04009505 cache->last_byte_to_unpin = (u64)-1;
Josef Bacik817d52f2009-07-13 21:29:25 -04009506 cache->cached = BTRFS_CACHE_FINISHED;
Josef Bacik1b2da372009-09-11 16:11:20 -04009507 free_excluded_extents(root, cache);
Josef Bacik817d52f2009-07-13 21:29:25 -04009508 } else if (btrfs_block_group_used(&cache->item) == 0) {
Yan Zheng11833d62009-09-11 16:11:19 -04009509 cache->last_byte_to_unpin = (u64)-1;
Josef Bacik817d52f2009-07-13 21:29:25 -04009510 cache->cached = BTRFS_CACHE_FINISHED;
9511 add_new_free_space(cache, root->fs_info,
9512 found_key.objectid,
9513 found_key.objectid +
9514 found_key.offset);
Yan Zheng11833d62009-09-11 16:11:19 -04009515 free_excluded_extents(root, cache);
Josef Bacik817d52f2009-07-13 21:29:25 -04009516 }
Chris Mason96b51792007-10-15 16:15:19 -04009517
Josef Bacik8c579fe2013-04-02 12:40:42 -04009518 ret = btrfs_add_block_group_cache(root->fs_info, cache);
9519 if (ret) {
9520 btrfs_remove_free_space_cache(cache);
9521 btrfs_put_block_group(cache);
9522 goto error;
9523 }
9524
Chris Mason6324fbf2008-03-24 15:01:59 -04009525 ret = update_space_info(info, cache->flags, found_key.offset,
9526 btrfs_block_group_used(&cache->item),
9527 &space_info);
Josef Bacik8c579fe2013-04-02 12:40:42 -04009528 if (ret) {
9529 btrfs_remove_free_space_cache(cache);
9530 spin_lock(&info->block_group_cache_lock);
9531 rb_erase(&cache->cache_node,
9532 &info->block_group_cache_tree);
Filipe Manana01eacb22014-12-04 18:38:30 +00009533 RB_CLEAR_NODE(&cache->cache_node);
Josef Bacik8c579fe2013-04-02 12:40:42 -04009534 spin_unlock(&info->block_group_cache_lock);
9535 btrfs_put_block_group(cache);
9536 goto error;
9537 }
9538
Chris Mason6324fbf2008-03-24 15:01:59 -04009539 cache->space_info = space_info;
Josef Bacik1b2da372009-09-11 16:11:20 -04009540 spin_lock(&cache->space_info->lock);
Yan, Zhengf0486c62010-05-16 10:46:25 -04009541 cache->space_info->bytes_readonly += cache->bytes_super;
Josef Bacik1b2da372009-09-11 16:11:20 -04009542 spin_unlock(&cache->space_info->lock);
9543
Yan, Zhengb742bb822010-05-16 10:46:24 -04009544 __link_block_group(space_info, cache);
Chris Mason6324fbf2008-03-24 15:01:59 -04009545
Chris Mason75ccf472008-09-30 19:24:06 -04009546 set_avail_alloc_bits(root->fs_info, cache->flags);
Josef Bacik47ab2a62014-09-18 11:20:02 -04009547 if (btrfs_chunk_readonly(root, cache->key.objectid)) {
Zhaolei868f4012015-08-05 16:43:27 +08009548 inc_block_group_ro(cache, 1);
Josef Bacik47ab2a62014-09-18 11:20:02 -04009549 } else if (btrfs_block_group_used(&cache->item) == 0) {
9550 spin_lock(&info->unused_bgs_lock);
9551 /* Should always be true but just in case. */
9552 if (list_empty(&cache->bg_list)) {
9553 btrfs_get_block_group(cache);
9554 list_add_tail(&cache->bg_list,
9555 &info->unused_bgs);
9556 }
9557 spin_unlock(&info->unused_bgs_lock);
9558 }
Chris Mason9078a3e2007-04-26 16:46:15 -04009559 }
Yan, Zhengb742bb822010-05-16 10:46:24 -04009560
9561 list_for_each_entry_rcu(space_info, &root->fs_info->space_info, list) {
9562 if (!(get_alloc_profile(root, space_info->flags) &
9563 (BTRFS_BLOCK_GROUP_RAID10 |
9564 BTRFS_BLOCK_GROUP_RAID1 |
David Woodhouse53b381b2013-01-29 18:40:14 -05009565 BTRFS_BLOCK_GROUP_RAID5 |
9566 BTRFS_BLOCK_GROUP_RAID6 |
Yan, Zhengb742bb822010-05-16 10:46:24 -04009567 BTRFS_BLOCK_GROUP_DUP)))
9568 continue;
9569 /*
9570 * avoid allocating from un-mirrored block group if there are
9571 * mirrored block groups.
9572 */
chandan1095cc02013-07-16 12:28:56 +05309573 list_for_each_entry(cache,
9574 &space_info->block_groups[BTRFS_RAID_RAID0],
9575 list)
Zhaolei868f4012015-08-05 16:43:27 +08009576 inc_block_group_ro(cache, 1);
chandan1095cc02013-07-16 12:28:56 +05309577 list_for_each_entry(cache,
9578 &space_info->block_groups[BTRFS_RAID_SINGLE],
9579 list)
Zhaolei868f4012015-08-05 16:43:27 +08009580 inc_block_group_ro(cache, 1);
Yan, Zhengb742bb822010-05-16 10:46:24 -04009581 }
Yan, Zhengf0486c62010-05-16 10:46:25 -04009582
9583 init_global_block_rsv(info);
Chris Mason0b86a832008-03-24 15:01:56 -04009584 ret = 0;
9585error:
Chris Mason9078a3e2007-04-26 16:46:15 -04009586 btrfs_free_path(path);
Chris Mason0b86a832008-03-24 15:01:56 -04009587 return ret;
Chris Mason9078a3e2007-04-26 16:46:15 -04009588}
Chris Mason6324fbf2008-03-24 15:01:59 -04009589
Josef Bacikea658ba2012-09-11 16:57:25 -04009590void btrfs_create_pending_block_groups(struct btrfs_trans_handle *trans,
9591 struct btrfs_root *root)
9592{
9593 struct btrfs_block_group_cache *block_group, *tmp;
9594 struct btrfs_root *extent_root = root->fs_info->extent_root;
9595 struct btrfs_block_group_item item;
9596 struct btrfs_key key;
9597 int ret = 0;
Filipe Mananad9a05402015-10-03 13:13:13 +01009598 bool can_flush_pending_bgs = trans->can_flush_pending_bgs;
Josef Bacikea658ba2012-09-11 16:57:25 -04009599
Filipe Mananad9a05402015-10-03 13:13:13 +01009600 trans->can_flush_pending_bgs = false;
Josef Bacik47ab2a62014-09-18 11:20:02 -04009601 list_for_each_entry_safe(block_group, tmp, &trans->new_bgs, bg_list) {
Josef Bacikea658ba2012-09-11 16:57:25 -04009602 if (ret)
Filipe Mananac92f6be2014-11-26 15:28:55 +00009603 goto next;
Josef Bacikea658ba2012-09-11 16:57:25 -04009604
9605 spin_lock(&block_group->lock);
9606 memcpy(&item, &block_group->item, sizeof(item));
9607 memcpy(&key, &block_group->key, sizeof(key));
9608 spin_unlock(&block_group->lock);
9609
9610 ret = btrfs_insert_item(trans, extent_root, &key, &item,
9611 sizeof(item));
9612 if (ret)
9613 btrfs_abort_transaction(trans, extent_root, ret);
Josef Bacik6df9a952013-06-27 13:22:46 -04009614 ret = btrfs_finish_chunk_alloc(trans, extent_root,
9615 key.objectid, key.offset);
9616 if (ret)
9617 btrfs_abort_transaction(trans, extent_root, ret);
Omar Sandoval1e144fb2015-09-29 20:50:37 -07009618 add_block_group_free_space(trans, root->fs_info, block_group);
9619 /* already aborted the transaction if it failed. */
Filipe Mananac92f6be2014-11-26 15:28:55 +00009620next:
9621 list_del_init(&block_group->bg_list);
Josef Bacikea658ba2012-09-11 16:57:25 -04009622 }
Filipe Mananad9a05402015-10-03 13:13:13 +01009623 trans->can_flush_pending_bgs = can_flush_pending_bgs;
Josef Bacikea658ba2012-09-11 16:57:25 -04009624}
9625
Chris Mason6324fbf2008-03-24 15:01:59 -04009626int btrfs_make_block_group(struct btrfs_trans_handle *trans,
9627 struct btrfs_root *root, u64 bytes_used,
Chris Masone17cade2008-04-15 15:41:47 -04009628 u64 type, u64 chunk_objectid, u64 chunk_offset,
Chris Mason6324fbf2008-03-24 15:01:59 -04009629 u64 size)
9630{
9631 int ret;
Chris Mason6324fbf2008-03-24 15:01:59 -04009632 struct btrfs_root *extent_root;
9633 struct btrfs_block_group_cache *cache;
Chris Mason6324fbf2008-03-24 15:01:59 -04009634
9635 extent_root = root->fs_info->extent_root;
Chris Mason6324fbf2008-03-24 15:01:59 -04009636
Miao Xie995946d2014-04-02 19:51:06 +08009637 btrfs_set_log_full_commit(root->fs_info, trans);
Chris Masone02119d2008-09-05 16:13:11 -04009638
Miao Xie920e4a52014-01-15 20:00:55 +08009639 cache = btrfs_create_block_group_cache(root, chunk_offset, size);
Josef Bacik0f9dd462008-09-23 13:14:11 -04009640 if (!cache)
9641 return -ENOMEM;
Li Zefan34d52cb2011-03-29 13:46:06 +08009642
Chris Mason6324fbf2008-03-24 15:01:59 -04009643 btrfs_set_block_group_used(&cache->item, bytes_used);
Chris Mason6324fbf2008-03-24 15:01:59 -04009644 btrfs_set_block_group_chunk_objectid(&cache->item, chunk_objectid);
Chris Mason6324fbf2008-03-24 15:01:59 -04009645 btrfs_set_block_group_flags(&cache->item, type);
9646
Miao Xie920e4a52014-01-15 20:00:55 +08009647 cache->flags = type;
Yan Zheng11833d62009-09-11 16:11:19 -04009648 cache->last_byte_to_unpin = (u64)-1;
Josef Bacik817d52f2009-07-13 21:29:25 -04009649 cache->cached = BTRFS_CACHE_FINISHED;
Omar Sandoval1e144fb2015-09-29 20:50:37 -07009650 cache->needs_free_space = 1;
Josef Bacik835d9742013-03-19 12:13:25 -04009651 ret = exclude_super_stripes(root, cache);
9652 if (ret) {
9653 /*
9654 * We may have excluded something, so call this just in
9655 * case.
9656 */
9657 free_excluded_extents(root, cache);
Miao Xie920e4a52014-01-15 20:00:55 +08009658 btrfs_put_block_group(cache);
Josef Bacik835d9742013-03-19 12:13:25 -04009659 return ret;
9660 }
Josef Bacik96303082009-07-13 21:29:25 -04009661
Josef Bacik817d52f2009-07-13 21:29:25 -04009662 add_new_free_space(cache, root->fs_info, chunk_offset,
9663 chunk_offset + size);
9664
Yan Zheng11833d62009-09-11 16:11:19 -04009665 free_excluded_extents(root, cache);
9666
Filipe Manana2e6e5182015-05-12 00:28:11 +01009667 /*
9668 * Call to ensure the corresponding space_info object is created and
9669 * assigned to our block group, but don't update its counters just yet.
9670 * We want our bg to be added to the rbtree with its ->space_info set.
9671 */
9672 ret = update_space_info(root->fs_info, cache->flags, 0, 0,
9673 &cache->space_info);
9674 if (ret) {
9675 btrfs_remove_free_space_cache(cache);
9676 btrfs_put_block_group(cache);
9677 return ret;
9678 }
9679
Josef Bacik8c579fe2013-04-02 12:40:42 -04009680 ret = btrfs_add_block_group_cache(root->fs_info, cache);
9681 if (ret) {
9682 btrfs_remove_free_space_cache(cache);
9683 btrfs_put_block_group(cache);
9684 return ret;
9685 }
9686
Filipe Manana2e6e5182015-05-12 00:28:11 +01009687 /*
9688 * Now that our block group has its ->space_info set and is inserted in
9689 * the rbtree, update the space info's counters.
9690 */
Chris Mason6324fbf2008-03-24 15:01:59 -04009691 ret = update_space_info(root->fs_info, cache->flags, size, bytes_used,
9692 &cache->space_info);
Josef Bacik8c579fe2013-04-02 12:40:42 -04009693 if (ret) {
9694 btrfs_remove_free_space_cache(cache);
9695 spin_lock(&root->fs_info->block_group_cache_lock);
9696 rb_erase(&cache->cache_node,
9697 &root->fs_info->block_group_cache_tree);
Filipe Manana01eacb22014-12-04 18:38:30 +00009698 RB_CLEAR_NODE(&cache->cache_node);
Josef Bacik8c579fe2013-04-02 12:40:42 -04009699 spin_unlock(&root->fs_info->block_group_cache_lock);
9700 btrfs_put_block_group(cache);
9701 return ret;
9702 }
Li Zefanc7c144d2011-12-07 10:39:22 +08009703 update_global_block_rsv(root->fs_info);
Josef Bacik1b2da372009-09-11 16:11:20 -04009704
9705 spin_lock(&cache->space_info->lock);
Yan, Zhengf0486c62010-05-16 10:46:25 -04009706 cache->space_info->bytes_readonly += cache->bytes_super;
Josef Bacik1b2da372009-09-11 16:11:20 -04009707 spin_unlock(&cache->space_info->lock);
9708
Yan, Zhengb742bb822010-05-16 10:46:24 -04009709 __link_block_group(cache->space_info, cache);
Chris Mason6324fbf2008-03-24 15:01:59 -04009710
Josef Bacik47ab2a62014-09-18 11:20:02 -04009711 list_add_tail(&cache->bg_list, &trans->new_bgs);
Chris Mason6324fbf2008-03-24 15:01:59 -04009712
Chris Masond18a2c42008-04-04 15:40:00 -04009713 set_avail_alloc_bits(extent_root->fs_info, type);
Chris Mason925baed2008-06-25 16:01:30 -04009714
Chris Mason6324fbf2008-03-24 15:01:59 -04009715 return 0;
9716}
Zheng Yan1a40e232008-09-26 10:09:34 -04009717
Ilya Dryomov10ea00f2012-01-16 22:04:47 +02009718static void clear_avail_alloc_bits(struct btrfs_fs_info *fs_info, u64 flags)
9719{
Ilya Dryomov899c81e2012-03-27 17:09:16 +03009720 u64 extra_flags = chunk_to_extended(flags) &
9721 BTRFS_EXTENDED_PROFILE_MASK;
Ilya Dryomov10ea00f2012-01-16 22:04:47 +02009722
Miao Xiede98ced2013-01-29 10:13:12 +00009723 write_seqlock(&fs_info->profiles_lock);
Ilya Dryomov10ea00f2012-01-16 22:04:47 +02009724 if (flags & BTRFS_BLOCK_GROUP_DATA)
9725 fs_info->avail_data_alloc_bits &= ~extra_flags;
9726 if (flags & BTRFS_BLOCK_GROUP_METADATA)
9727 fs_info->avail_metadata_alloc_bits &= ~extra_flags;
9728 if (flags & BTRFS_BLOCK_GROUP_SYSTEM)
9729 fs_info->avail_system_alloc_bits &= ~extra_flags;
Miao Xiede98ced2013-01-29 10:13:12 +00009730 write_sequnlock(&fs_info->profiles_lock);
Ilya Dryomov10ea00f2012-01-16 22:04:47 +02009731}
9732
Zheng Yan1a40e232008-09-26 10:09:34 -04009733int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
Filipe Manana04216822014-11-27 21:14:15 +00009734 struct btrfs_root *root, u64 group_start,
9735 struct extent_map *em)
Zheng Yan1a40e232008-09-26 10:09:34 -04009736{
9737 struct btrfs_path *path;
9738 struct btrfs_block_group_cache *block_group;
Chris Mason44fb5512009-06-04 15:34:51 -04009739 struct btrfs_free_cluster *cluster;
Josef Bacik0af3d002010-06-21 14:48:16 -04009740 struct btrfs_root *tree_root = root->fs_info->tree_root;
Zheng Yan1a40e232008-09-26 10:09:34 -04009741 struct btrfs_key key;
Josef Bacik0af3d002010-06-21 14:48:16 -04009742 struct inode *inode;
Jeff Mahoneyc1895442014-05-27 12:59:57 -04009743 struct kobject *kobj = NULL;
Zheng Yan1a40e232008-09-26 10:09:34 -04009744 int ret;
Ilya Dryomov10ea00f2012-01-16 22:04:47 +02009745 int index;
Josef Bacik89a55892010-10-14 14:52:27 -04009746 int factor;
Filipe Manana4f69cb92014-11-26 15:28:51 +00009747 struct btrfs_caching_control *caching_ctl = NULL;
Filipe Manana04216822014-11-27 21:14:15 +00009748 bool remove_em;
Zheng Yan1a40e232008-09-26 10:09:34 -04009749
Zheng Yan1a40e232008-09-26 10:09:34 -04009750 root = root->fs_info->extent_root;
9751
9752 block_group = btrfs_lookup_block_group(root->fs_info, group_start);
9753 BUG_ON(!block_group);
Yan Zhengc146afa2008-11-12 14:34:12 -05009754 BUG_ON(!block_group->ro);
Zheng Yan1a40e232008-09-26 10:09:34 -04009755
liubo9f7c43c2011-03-07 02:13:33 +00009756 /*
9757 * Free the reserved super bytes from this block group before
9758 * remove it.
9759 */
9760 free_excluded_extents(root, block_group);
9761
Zheng Yan1a40e232008-09-26 10:09:34 -04009762 memcpy(&key, &block_group->key, sizeof(key));
Ilya Dryomov10ea00f2012-01-16 22:04:47 +02009763 index = get_block_group_index(block_group);
Josef Bacik89a55892010-10-14 14:52:27 -04009764 if (block_group->flags & (BTRFS_BLOCK_GROUP_DUP |
9765 BTRFS_BLOCK_GROUP_RAID1 |
9766 BTRFS_BLOCK_GROUP_RAID10))
9767 factor = 2;
9768 else
9769 factor = 1;
Zheng Yan1a40e232008-09-26 10:09:34 -04009770
Chris Mason44fb5512009-06-04 15:34:51 -04009771 /* make sure this block group isn't part of an allocation cluster */
9772 cluster = &root->fs_info->data_alloc_cluster;
9773 spin_lock(&cluster->refill_lock);
9774 btrfs_return_cluster_to_free_space(block_group, cluster);
9775 spin_unlock(&cluster->refill_lock);
9776
9777 /*
9778 * make sure this block group isn't part of a metadata
9779 * allocation cluster
9780 */
9781 cluster = &root->fs_info->meta_alloc_cluster;
9782 spin_lock(&cluster->refill_lock);
9783 btrfs_return_cluster_to_free_space(block_group, cluster);
9784 spin_unlock(&cluster->refill_lock);
9785
Zheng Yan1a40e232008-09-26 10:09:34 -04009786 path = btrfs_alloc_path();
Mark Fashehd8926bb2011-07-13 10:38:47 -07009787 if (!path) {
9788 ret = -ENOMEM;
9789 goto out;
9790 }
Zheng Yan1a40e232008-09-26 10:09:34 -04009791
Chris Mason1bbc6212015-04-06 12:46:08 -07009792 /*
9793 * get the inode first so any iput calls done for the io_list
9794 * aren't the final iput (no unlinks allowed now)
9795 */
Ilya Dryomov10b2f342011-10-02 13:56:53 +03009796 inode = lookup_free_space_inode(tree_root, block_group, path);
Chris Mason1bbc6212015-04-06 12:46:08 -07009797
9798 mutex_lock(&trans->transaction->cache_write_mutex);
9799 /*
9800 * make sure our free spache cache IO is done before remove the
9801 * free space inode
9802 */
9803 spin_lock(&trans->transaction->dirty_bgs_lock);
9804 if (!list_empty(&block_group->io_list)) {
9805 list_del_init(&block_group->io_list);
9806
9807 WARN_ON(!IS_ERR(inode) && inode != block_group->io_ctl.inode);
9808
9809 spin_unlock(&trans->transaction->dirty_bgs_lock);
9810 btrfs_wait_cache_io(root, trans, block_group,
9811 &block_group->io_ctl, path,
9812 block_group->key.objectid);
9813 btrfs_put_block_group(block_group);
9814 spin_lock(&trans->transaction->dirty_bgs_lock);
9815 }
9816
9817 if (!list_empty(&block_group->dirty_list)) {
9818 list_del_init(&block_group->dirty_list);
9819 btrfs_put_block_group(block_group);
9820 }
9821 spin_unlock(&trans->transaction->dirty_bgs_lock);
9822 mutex_unlock(&trans->transaction->cache_write_mutex);
9823
Josef Bacik0af3d002010-06-21 14:48:16 -04009824 if (!IS_ERR(inode)) {
Tsutomu Itohb5324022011-07-19 07:27:20 +00009825 ret = btrfs_orphan_add(trans, inode);
Jeff Mahoney79787ea2012-03-12 16:03:00 +01009826 if (ret) {
9827 btrfs_add_delayed_iput(inode);
9828 goto out;
9829 }
Josef Bacik0af3d002010-06-21 14:48:16 -04009830 clear_nlink(inode);
9831 /* One for the block groups ref */
9832 spin_lock(&block_group->lock);
9833 if (block_group->iref) {
9834 block_group->iref = 0;
9835 block_group->inode = NULL;
9836 spin_unlock(&block_group->lock);
9837 iput(inode);
9838 } else {
9839 spin_unlock(&block_group->lock);
9840 }
9841 /* One for our lookup ref */
Josef Bacik455757c2011-09-19 12:26:24 -04009842 btrfs_add_delayed_iput(inode);
Josef Bacik0af3d002010-06-21 14:48:16 -04009843 }
9844
9845 key.objectid = BTRFS_FREE_SPACE_OBJECTID;
9846 key.offset = block_group->key.objectid;
9847 key.type = 0;
9848
9849 ret = btrfs_search_slot(trans, tree_root, &key, path, -1, 1);
9850 if (ret < 0)
9851 goto out;
9852 if (ret > 0)
David Sterbab3b4aa72011-04-21 01:20:15 +02009853 btrfs_release_path(path);
Josef Bacik0af3d002010-06-21 14:48:16 -04009854 if (ret == 0) {
9855 ret = btrfs_del_item(trans, tree_root, path);
9856 if (ret)
9857 goto out;
David Sterbab3b4aa72011-04-21 01:20:15 +02009858 btrfs_release_path(path);
Josef Bacik0af3d002010-06-21 14:48:16 -04009859 }
9860
Yan Zheng3dfdb932009-01-21 10:49:16 -05009861 spin_lock(&root->fs_info->block_group_cache_lock);
Zheng Yan1a40e232008-09-26 10:09:34 -04009862 rb_erase(&block_group->cache_node,
9863 &root->fs_info->block_group_cache_tree);
Filipe Manana292cbd52014-11-26 15:28:50 +00009864 RB_CLEAR_NODE(&block_group->cache_node);
Liu Boa1897fd2012-12-27 09:01:23 +00009865
9866 if (root->fs_info->first_logical_byte == block_group->key.objectid)
9867 root->fs_info->first_logical_byte = (u64)-1;
Yan Zheng3dfdb932009-01-21 10:49:16 -05009868 spin_unlock(&root->fs_info->block_group_cache_lock);
Josef Bacik817d52f2009-07-13 21:29:25 -04009869
Josef Bacik80eb2342008-10-29 14:49:05 -04009870 down_write(&block_group->space_info->groups_sem);
Chris Mason44fb5512009-06-04 15:34:51 -04009871 /*
9872 * we must use list_del_init so people can check to see if they
9873 * are still on the list after taking the semaphore
9874 */
9875 list_del_init(&block_group->list);
Jeff Mahoney6ab0a202013-11-01 13:07:04 -04009876 if (list_empty(&block_group->space_info->block_groups[index])) {
Jeff Mahoneyc1895442014-05-27 12:59:57 -04009877 kobj = block_group->space_info->block_group_kobjs[index];
9878 block_group->space_info->block_group_kobjs[index] = NULL;
Ilya Dryomov10ea00f2012-01-16 22:04:47 +02009879 clear_avail_alloc_bits(root->fs_info, block_group->flags);
Jeff Mahoney6ab0a202013-11-01 13:07:04 -04009880 }
Josef Bacik80eb2342008-10-29 14:49:05 -04009881 up_write(&block_group->space_info->groups_sem);
Jeff Mahoneyc1895442014-05-27 12:59:57 -04009882 if (kobj) {
9883 kobject_del(kobj);
9884 kobject_put(kobj);
9885 }
Zheng Yan1a40e232008-09-26 10:09:34 -04009886
Filipe Manana4f69cb92014-11-26 15:28:51 +00009887 if (block_group->has_caching_ctl)
9888 caching_ctl = get_caching_control(block_group);
Josef Bacik817d52f2009-07-13 21:29:25 -04009889 if (block_group->cached == BTRFS_CACHE_STARTED)
Yan Zheng11833d62009-09-11 16:11:19 -04009890 wait_block_group_cache_done(block_group);
Filipe Manana4f69cb92014-11-26 15:28:51 +00009891 if (block_group->has_caching_ctl) {
9892 down_write(&root->fs_info->commit_root_sem);
9893 if (!caching_ctl) {
9894 struct btrfs_caching_control *ctl;
9895
9896 list_for_each_entry(ctl,
9897 &root->fs_info->caching_block_groups, list)
9898 if (ctl->block_group == block_group) {
9899 caching_ctl = ctl;
9900 atomic_inc(&caching_ctl->count);
9901 break;
9902 }
9903 }
9904 if (caching_ctl)
9905 list_del_init(&caching_ctl->list);
9906 up_write(&root->fs_info->commit_root_sem);
9907 if (caching_ctl) {
9908 /* Once for the caching bgs list and once for us. */
9909 put_caching_control(caching_ctl);
9910 put_caching_control(caching_ctl);
9911 }
9912 }
Josef Bacik817d52f2009-07-13 21:29:25 -04009913
Josef Bacikce93ec52014-11-17 15:45:48 -05009914 spin_lock(&trans->transaction->dirty_bgs_lock);
9915 if (!list_empty(&block_group->dirty_list)) {
Chris Mason1bbc6212015-04-06 12:46:08 -07009916 WARN_ON(1);
9917 }
9918 if (!list_empty(&block_group->io_list)) {
9919 WARN_ON(1);
Josef Bacikce93ec52014-11-17 15:45:48 -05009920 }
9921 spin_unlock(&trans->transaction->dirty_bgs_lock);
Josef Bacik817d52f2009-07-13 21:29:25 -04009922 btrfs_remove_free_space_cache(block_group);
9923
Yan Zhengc146afa2008-11-12 14:34:12 -05009924 spin_lock(&block_group->space_info->lock);
Filipe Manana75c68e92015-01-16 13:24:40 +00009925 list_del_init(&block_group->ro_list);
Zhao Lei18d018a2015-02-24 20:07:44 +08009926
9927 if (btrfs_test_opt(root, ENOSPC_DEBUG)) {
9928 WARN_ON(block_group->space_info->total_bytes
9929 < block_group->key.offset);
9930 WARN_ON(block_group->space_info->bytes_readonly
9931 < block_group->key.offset);
9932 WARN_ON(block_group->space_info->disk_total
9933 < block_group->key.offset * factor);
9934 }
Yan Zhengc146afa2008-11-12 14:34:12 -05009935 block_group->space_info->total_bytes -= block_group->key.offset;
9936 block_group->space_info->bytes_readonly -= block_group->key.offset;
Josef Bacik89a55892010-10-14 14:52:27 -04009937 block_group->space_info->disk_total -= block_group->key.offset * factor;
Zhao Lei18d018a2015-02-24 20:07:44 +08009938
Yan Zhengc146afa2008-11-12 14:34:12 -05009939 spin_unlock(&block_group->space_info->lock);
Chris Mason283bb192009-07-24 16:30:55 -04009940
Josef Bacik0af3d002010-06-21 14:48:16 -04009941 memcpy(&key, &block_group->key, sizeof(key));
9942
Filipe Manana04216822014-11-27 21:14:15 +00009943 lock_chunks(root);
Filipe Manana495e64f2014-12-02 18:07:30 +00009944 if (!list_empty(&em->list)) {
9945 /* We're in the transaction->pending_chunks list. */
9946 free_extent_map(em);
9947 }
Filipe Manana04216822014-11-27 21:14:15 +00009948 spin_lock(&block_group->lock);
9949 block_group->removed = 1;
9950 /*
9951 * At this point trimming can't start on this block group, because we
9952 * removed the block group from the tree fs_info->block_group_cache_tree
9953 * so no one can't find it anymore and even if someone already got this
9954 * block group before we removed it from the rbtree, they have already
9955 * incremented block_group->trimming - if they didn't, they won't find
9956 * any free space entries because we already removed them all when we
9957 * called btrfs_remove_free_space_cache().
9958 *
9959 * And we must not remove the extent map from the fs_info->mapping_tree
9960 * to prevent the same logical address range and physical device space
9961 * ranges from being reused for a new block group. This is because our
9962 * fs trim operation (btrfs_trim_fs() / btrfs_ioctl_fitrim()) is
9963 * completely transactionless, so while it is trimming a range the
9964 * currently running transaction might finish and a new one start,
9965 * allowing for new block groups to be created that can reuse the same
9966 * physical device locations unless we take this special care.
Jeff Mahoneye33e17e2015-06-15 09:41:19 -04009967 *
9968 * There may also be an implicit trim operation if the file system
9969 * is mounted with -odiscard. The same protections must remain
9970 * in place until the extents have been discarded completely when
9971 * the transaction commit has completed.
Filipe Manana04216822014-11-27 21:14:15 +00009972 */
9973 remove_em = (atomic_read(&block_group->trimming) == 0);
9974 /*
9975 * Make sure a trimmer task always sees the em in the pinned_chunks list
9976 * if it sees block_group->removed == 1 (needs to lock block_group->lock
9977 * before checking block_group->removed).
9978 */
9979 if (!remove_em) {
9980 /*
9981 * Our em might be in trans->transaction->pending_chunks which
9982 * is protected by fs_info->chunk_mutex ([lock|unlock]_chunks),
9983 * and so is the fs_info->pinned_chunks list.
9984 *
9985 * So at this point we must be holding the chunk_mutex to avoid
9986 * any races with chunk allocation (more specifically at
9987 * volumes.c:contains_pending_extent()), to ensure it always
9988 * sees the em, either in the pending_chunks list or in the
9989 * pinned_chunks list.
9990 */
9991 list_move_tail(&em->list, &root->fs_info->pinned_chunks);
9992 }
9993 spin_unlock(&block_group->lock);
Filipe Manana04216822014-11-27 21:14:15 +00009994
9995 if (remove_em) {
9996 struct extent_map_tree *em_tree;
9997
9998 em_tree = &root->fs_info->mapping_tree.map_tree;
9999 write_lock(&em_tree->lock);
Filipe Manana8dbcd102014-12-02 18:07:49 +000010000 /*
10001 * The em might be in the pending_chunks list, so make sure the
10002 * chunk mutex is locked, since remove_extent_mapping() will
10003 * delete us from that list.
10004 */
Filipe Manana04216822014-11-27 21:14:15 +000010005 remove_extent_mapping(em_tree, em);
10006 write_unlock(&em_tree->lock);
10007 /* once for the tree */
10008 free_extent_map(em);
10009 }
10010
Filipe Manana8dbcd102014-12-02 18:07:49 +000010011 unlock_chunks(root);
10012
Omar Sandoval1e144fb2015-09-29 20:50:37 -070010013 ret = remove_block_group_free_space(trans, root->fs_info, block_group);
10014 if (ret)
10015 goto out;
10016
Chris Masonfa9c0d792009-04-03 09:47:43 -040010017 btrfs_put_block_group(block_group);
10018 btrfs_put_block_group(block_group);
Zheng Yan1a40e232008-09-26 10:09:34 -040010019
10020 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
10021 if (ret > 0)
10022 ret = -EIO;
10023 if (ret < 0)
10024 goto out;
10025
10026 ret = btrfs_del_item(trans, root, path);
10027out:
10028 btrfs_free_path(path);
10029 return ret;
10030}
liuboacce9522011-01-06 19:30:25 +080010031
Josef Bacik47ab2a62014-09-18 11:20:02 -040010032/*
10033 * Process the unused_bgs list and remove any that don't have any allocated
10034 * space inside of them.
10035 */
10036void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info)
10037{
10038 struct btrfs_block_group_cache *block_group;
10039 struct btrfs_space_info *space_info;
10040 struct btrfs_root *root = fs_info->extent_root;
10041 struct btrfs_trans_handle *trans;
10042 int ret = 0;
10043
10044 if (!fs_info->open)
10045 return;
10046
10047 spin_lock(&fs_info->unused_bgs_lock);
10048 while (!list_empty(&fs_info->unused_bgs)) {
10049 u64 start, end;
Jeff Mahoneye33e17e2015-06-15 09:41:19 -040010050 int trimming;
Josef Bacik47ab2a62014-09-18 11:20:02 -040010051
10052 block_group = list_first_entry(&fs_info->unused_bgs,
10053 struct btrfs_block_group_cache,
10054 bg_list);
10055 space_info = block_group->space_info;
10056 list_del_init(&block_group->bg_list);
10057 if (ret || btrfs_mixed_space_info(space_info)) {
10058 btrfs_put_block_group(block_group);
10059 continue;
10060 }
10061 spin_unlock(&fs_info->unused_bgs_lock);
10062
Filipe Manana67c5e7d2015-06-11 00:58:53 +010010063 mutex_lock(&root->fs_info->delete_unused_bgs_mutex);
10064
Josef Bacik47ab2a62014-09-18 11:20:02 -040010065 /* Don't want to race with allocators so take the groups_sem */
10066 down_write(&space_info->groups_sem);
10067 spin_lock(&block_group->lock);
10068 if (block_group->reserved ||
10069 btrfs_block_group_used(&block_group->item) ||
10070 block_group->ro) {
10071 /*
10072 * We want to bail if we made new allocations or have
10073 * outstanding allocations in this block group. We do
10074 * the ro check in case balance is currently acting on
10075 * this block group.
10076 */
10077 spin_unlock(&block_group->lock);
10078 up_write(&space_info->groups_sem);
10079 goto next;
10080 }
10081 spin_unlock(&block_group->lock);
10082
10083 /* We don't want to force the issue, only flip if it's ok. */
Zhaolei868f4012015-08-05 16:43:27 +080010084 ret = inc_block_group_ro(block_group, 0);
Josef Bacik47ab2a62014-09-18 11:20:02 -040010085 up_write(&space_info->groups_sem);
10086 if (ret < 0) {
10087 ret = 0;
10088 goto next;
10089 }
10090
10091 /*
10092 * Want to do this before we do anything else so we can recover
10093 * properly if we fail to join the transaction.
10094 */
Forrest Liu3d84be72015-02-11 14:24:12 +080010095 /* 1 for btrfs_orphan_reserve_metadata() */
10096 trans = btrfs_start_transaction(root, 1);
Josef Bacik47ab2a62014-09-18 11:20:02 -040010097 if (IS_ERR(trans)) {
Zhaolei868f4012015-08-05 16:43:27 +080010098 btrfs_dec_block_group_ro(root, block_group);
Josef Bacik47ab2a62014-09-18 11:20:02 -040010099 ret = PTR_ERR(trans);
10100 goto next;
10101 }
10102
10103 /*
10104 * We could have pending pinned extents for this block group,
10105 * just delete them, we don't care about them anymore.
10106 */
10107 start = block_group->key.objectid;
10108 end = start + block_group->key.offset - 1;
Filipe Mananad4b450c2015-01-29 19:18:25 +000010109 /*
10110 * Hold the unused_bg_unpin_mutex lock to avoid racing with
10111 * btrfs_finish_extent_commit(). If we are at transaction N,
10112 * another task might be running finish_extent_commit() for the
10113 * previous transaction N - 1, and have seen a range belonging
10114 * to the block group in freed_extents[] before we were able to
10115 * clear the whole block group range from freed_extents[]. This
10116 * means that task can lookup for the block group after we
10117 * unpinned it from freed_extents[] and removed it, leading to
10118 * a BUG_ON() at btrfs_unpin_extent_range().
10119 */
10120 mutex_lock(&fs_info->unused_bg_unpin_mutex);
Filipe Manana758eb512014-11-03 14:08:39 +000010121 ret = clear_extent_bits(&fs_info->freed_extents[0], start, end,
Josef Bacik47ab2a62014-09-18 11:20:02 -040010122 EXTENT_DIRTY, GFP_NOFS);
Filipe Manana758eb512014-11-03 14:08:39 +000010123 if (ret) {
Filipe Mananad4b450c2015-01-29 19:18:25 +000010124 mutex_unlock(&fs_info->unused_bg_unpin_mutex);
Zhaolei868f4012015-08-05 16:43:27 +080010125 btrfs_dec_block_group_ro(root, block_group);
Filipe Manana758eb512014-11-03 14:08:39 +000010126 goto end_trans;
10127 }
10128 ret = clear_extent_bits(&fs_info->freed_extents[1], start, end,
Josef Bacik47ab2a62014-09-18 11:20:02 -040010129 EXTENT_DIRTY, GFP_NOFS);
Filipe Manana758eb512014-11-03 14:08:39 +000010130 if (ret) {
Filipe Mananad4b450c2015-01-29 19:18:25 +000010131 mutex_unlock(&fs_info->unused_bg_unpin_mutex);
Zhaolei868f4012015-08-05 16:43:27 +080010132 btrfs_dec_block_group_ro(root, block_group);
Filipe Manana758eb512014-11-03 14:08:39 +000010133 goto end_trans;
10134 }
Filipe Mananad4b450c2015-01-29 19:18:25 +000010135 mutex_unlock(&fs_info->unused_bg_unpin_mutex);
Josef Bacik47ab2a62014-09-18 11:20:02 -040010136
10137 /* Reset pinned so btrfs_put_block_group doesn't complain */
Zhao Leic30666d2015-02-25 14:17:20 +080010138 spin_lock(&space_info->lock);
10139 spin_lock(&block_group->lock);
10140
10141 space_info->bytes_pinned -= block_group->pinned;
10142 space_info->bytes_readonly += block_group->pinned;
10143 percpu_counter_add(&space_info->total_bytes_pinned,
10144 -block_group->pinned);
Josef Bacik47ab2a62014-09-18 11:20:02 -040010145 block_group->pinned = 0;
10146
Zhao Leic30666d2015-02-25 14:17:20 +080010147 spin_unlock(&block_group->lock);
10148 spin_unlock(&space_info->lock);
10149
Jeff Mahoneye33e17e2015-06-15 09:41:19 -040010150 /* DISCARD can flip during remount */
10151 trimming = btrfs_test_opt(root, DISCARD);
10152
10153 /* Implicit trim during transaction commit. */
10154 if (trimming)
10155 btrfs_get_block_group_trimming(block_group);
10156
Josef Bacik47ab2a62014-09-18 11:20:02 -040010157 /*
10158 * Btrfs_remove_chunk will abort the transaction if things go
10159 * horribly wrong.
10160 */
10161 ret = btrfs_remove_chunk(trans, root,
10162 block_group->key.objectid);
Jeff Mahoneye33e17e2015-06-15 09:41:19 -040010163
10164 if (ret) {
10165 if (trimming)
10166 btrfs_put_block_group_trimming(block_group);
10167 goto end_trans;
10168 }
10169
10170 /*
10171 * If we're not mounted with -odiscard, we can just forget
10172 * about this block group. Otherwise we'll need to wait
10173 * until transaction commit to do the actual discard.
10174 */
10175 if (trimming) {
10176 WARN_ON(!list_empty(&block_group->bg_list));
10177 spin_lock(&trans->transaction->deleted_bgs_lock);
10178 list_move(&block_group->bg_list,
10179 &trans->transaction->deleted_bgs);
10180 spin_unlock(&trans->transaction->deleted_bgs_lock);
10181 btrfs_get_block_group(block_group);
10182 }
Filipe Manana758eb512014-11-03 14:08:39 +000010183end_trans:
Josef Bacik47ab2a62014-09-18 11:20:02 -040010184 btrfs_end_transaction(trans, root);
10185next:
Filipe Manana67c5e7d2015-06-11 00:58:53 +010010186 mutex_unlock(&root->fs_info->delete_unused_bgs_mutex);
Josef Bacik47ab2a62014-09-18 11:20:02 -040010187 btrfs_put_block_group(block_group);
10188 spin_lock(&fs_info->unused_bgs_lock);
10189 }
10190 spin_unlock(&fs_info->unused_bgs_lock);
10191}
10192
liuboc59021f2011-03-07 02:13:14 +000010193int btrfs_init_space_info(struct btrfs_fs_info *fs_info)
10194{
10195 struct btrfs_space_info *space_info;
liubo1aba86d2011-04-08 08:44:37 +000010196 struct btrfs_super_block *disk_super;
10197 u64 features;
10198 u64 flags;
10199 int mixed = 0;
liuboc59021f2011-03-07 02:13:14 +000010200 int ret;
10201
David Sterba6c417612011-04-13 15:41:04 +020010202 disk_super = fs_info->super_copy;
liubo1aba86d2011-04-08 08:44:37 +000010203 if (!btrfs_super_root(disk_super))
10204 return 1;
liuboc59021f2011-03-07 02:13:14 +000010205
liubo1aba86d2011-04-08 08:44:37 +000010206 features = btrfs_super_incompat_flags(disk_super);
10207 if (features & BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS)
10208 mixed = 1;
liuboc59021f2011-03-07 02:13:14 +000010209
liubo1aba86d2011-04-08 08:44:37 +000010210 flags = BTRFS_BLOCK_GROUP_SYSTEM;
10211 ret = update_space_info(fs_info, flags, 0, 0, &space_info);
liuboc59021f2011-03-07 02:13:14 +000010212 if (ret)
liubo1aba86d2011-04-08 08:44:37 +000010213 goto out;
liuboc59021f2011-03-07 02:13:14 +000010214
liubo1aba86d2011-04-08 08:44:37 +000010215 if (mixed) {
10216 flags = BTRFS_BLOCK_GROUP_METADATA | BTRFS_BLOCK_GROUP_DATA;
10217 ret = update_space_info(fs_info, flags, 0, 0, &space_info);
10218 } else {
10219 flags = BTRFS_BLOCK_GROUP_METADATA;
10220 ret = update_space_info(fs_info, flags, 0, 0, &space_info);
10221 if (ret)
10222 goto out;
10223
10224 flags = BTRFS_BLOCK_GROUP_DATA;
10225 ret = update_space_info(fs_info, flags, 0, 0, &space_info);
10226 }
10227out:
liuboc59021f2011-03-07 02:13:14 +000010228 return ret;
10229}
10230
liuboacce9522011-01-06 19:30:25 +080010231int btrfs_error_unpin_extent_range(struct btrfs_root *root, u64 start, u64 end)
10232{
Filipe Manana678886b2014-12-07 21:31:47 +000010233 return unpin_extent_range(root, start, end, false);
liuboacce9522011-01-06 19:30:25 +080010234}
10235
Jeff Mahoney499f3772015-06-15 09:41:17 -040010236/*
10237 * It used to be that old block groups would be left around forever.
10238 * Iterating over them would be enough to trim unused space. Since we
10239 * now automatically remove them, we also need to iterate over unallocated
10240 * space.
10241 *
10242 * We don't want a transaction for this since the discard may take a
10243 * substantial amount of time. We don't require that a transaction be
10244 * running, but we do need to take a running transaction into account
10245 * to ensure that we're not discarding chunks that were released in
10246 * the current transaction.
10247 *
10248 * Holding the chunks lock will prevent other threads from allocating
10249 * or releasing chunks, but it won't prevent a running transaction
10250 * from committing and releasing the memory that the pending chunks
10251 * list head uses. For that, we need to take a reference to the
10252 * transaction.
10253 */
10254static int btrfs_trim_free_extents(struct btrfs_device *device,
10255 u64 minlen, u64 *trimmed)
10256{
10257 u64 start = 0, len = 0;
10258 int ret;
10259
10260 *trimmed = 0;
10261
10262 /* Not writeable = nothing to do. */
10263 if (!device->writeable)
10264 return 0;
10265
10266 /* No free space = nothing to do. */
10267 if (device->total_bytes <= device->bytes_used)
10268 return 0;
10269
10270 ret = 0;
10271
10272 while (1) {
10273 struct btrfs_fs_info *fs_info = device->dev_root->fs_info;
10274 struct btrfs_transaction *trans;
10275 u64 bytes;
10276
10277 ret = mutex_lock_interruptible(&fs_info->chunk_mutex);
10278 if (ret)
10279 return ret;
10280
10281 down_read(&fs_info->commit_root_sem);
10282
10283 spin_lock(&fs_info->trans_lock);
10284 trans = fs_info->running_transaction;
10285 if (trans)
10286 atomic_inc(&trans->use_count);
10287 spin_unlock(&fs_info->trans_lock);
10288
10289 ret = find_free_dev_extent_start(trans, device, minlen, start,
10290 &start, &len);
10291 if (trans)
10292 btrfs_put_transaction(trans);
10293
10294 if (ret) {
10295 up_read(&fs_info->commit_root_sem);
10296 mutex_unlock(&fs_info->chunk_mutex);
10297 if (ret == -ENOSPC)
10298 ret = 0;
10299 break;
10300 }
10301
10302 ret = btrfs_issue_discard(device->bdev, start, len, &bytes);
10303 up_read(&fs_info->commit_root_sem);
10304 mutex_unlock(&fs_info->chunk_mutex);
10305
10306 if (ret)
10307 break;
10308
10309 start += len;
10310 *trimmed += bytes;
10311
10312 if (fatal_signal_pending(current)) {
10313 ret = -ERESTARTSYS;
10314 break;
10315 }
10316
10317 cond_resched();
10318 }
10319
10320 return ret;
10321}
10322
Li Dongyangf7039b12011-03-24 10:24:28 +000010323int btrfs_trim_fs(struct btrfs_root *root, struct fstrim_range *range)
10324{
10325 struct btrfs_fs_info *fs_info = root->fs_info;
10326 struct btrfs_block_group_cache *cache = NULL;
Jeff Mahoney499f3772015-06-15 09:41:17 -040010327 struct btrfs_device *device;
10328 struct list_head *devices;
Li Dongyangf7039b12011-03-24 10:24:28 +000010329 u64 group_trimmed;
10330 u64 start;
10331 u64 end;
10332 u64 trimmed = 0;
Liu Bo2cac13e2012-02-09 18:17:41 +080010333 u64 total_bytes = btrfs_super_total_bytes(fs_info->super_copy);
Li Dongyangf7039b12011-03-24 10:24:28 +000010334 int ret = 0;
10335
Liu Bo2cac13e2012-02-09 18:17:41 +080010336 /*
10337 * try to trim all FS space, our block group may start from non-zero.
10338 */
10339 if (range->len == total_bytes)
10340 cache = btrfs_lookup_first_block_group(fs_info, range->start);
10341 else
10342 cache = btrfs_lookup_block_group(fs_info, range->start);
Li Dongyangf7039b12011-03-24 10:24:28 +000010343
10344 while (cache) {
10345 if (cache->key.objectid >= (range->start + range->len)) {
10346 btrfs_put_block_group(cache);
10347 break;
10348 }
10349
10350 start = max(range->start, cache->key.objectid);
10351 end = min(range->start + range->len,
10352 cache->key.objectid + cache->key.offset);
10353
10354 if (end - start >= range->minlen) {
10355 if (!block_group_cache_done(cache)) {
Liu Bof6373bf2012-12-27 09:01:18 +000010356 ret = cache_block_group(cache, 0);
Josef Bacik1be41b72013-06-12 13:56:06 -040010357 if (ret) {
10358 btrfs_put_block_group(cache);
10359 break;
10360 }
10361 ret = wait_block_group_cache_done(cache);
10362 if (ret) {
10363 btrfs_put_block_group(cache);
10364 break;
10365 }
Li Dongyangf7039b12011-03-24 10:24:28 +000010366 }
10367 ret = btrfs_trim_block_group(cache,
10368 &group_trimmed,
10369 start,
10370 end,
10371 range->minlen);
10372
10373 trimmed += group_trimmed;
10374 if (ret) {
10375 btrfs_put_block_group(cache);
10376 break;
10377 }
10378 }
10379
10380 cache = next_block_group(fs_info->tree_root, cache);
10381 }
10382
Jeff Mahoney499f3772015-06-15 09:41:17 -040010383 mutex_lock(&root->fs_info->fs_devices->device_list_mutex);
10384 devices = &root->fs_info->fs_devices->alloc_list;
10385 list_for_each_entry(device, devices, dev_alloc_list) {
10386 ret = btrfs_trim_free_extents(device, range->minlen,
10387 &group_trimmed);
10388 if (ret)
10389 break;
10390
10391 trimmed += group_trimmed;
10392 }
10393 mutex_unlock(&root->fs_info->fs_devices->device_list_mutex);
10394
Li Dongyangf7039b12011-03-24 10:24:28 +000010395 range->len = trimmed;
10396 return ret;
10397}
Miao Xie8257b2d2014-03-06 13:38:19 +080010398
10399/*
Filipe Manana9ea24bb2014-10-29 11:57:59 +000010400 * btrfs_{start,end}_write_no_snapshoting() are similar to
10401 * mnt_{want,drop}_write(), they are used to prevent some tasks from writing
10402 * data into the page cache through nocow before the subvolume is snapshoted,
10403 * but flush the data into disk after the snapshot creation, or to prevent
10404 * operations while snapshoting is ongoing and that cause the snapshot to be
10405 * inconsistent (writes followed by expanding truncates for example).
Miao Xie8257b2d2014-03-06 13:38:19 +080010406 */
Filipe Manana9ea24bb2014-10-29 11:57:59 +000010407void btrfs_end_write_no_snapshoting(struct btrfs_root *root)
Miao Xie8257b2d2014-03-06 13:38:19 +080010408{
10409 percpu_counter_dec(&root->subv_writers->counter);
10410 /*
10411 * Make sure counter is updated before we wake up
10412 * waiters.
10413 */
10414 smp_mb();
10415 if (waitqueue_active(&root->subv_writers->wait))
10416 wake_up(&root->subv_writers->wait);
10417}
10418
Filipe Manana9ea24bb2014-10-29 11:57:59 +000010419int btrfs_start_write_no_snapshoting(struct btrfs_root *root)
Miao Xie8257b2d2014-03-06 13:38:19 +080010420{
David Sterbaee39b432014-09-30 01:33:33 +020010421 if (atomic_read(&root->will_be_snapshoted))
Miao Xie8257b2d2014-03-06 13:38:19 +080010422 return 0;
10423
10424 percpu_counter_inc(&root->subv_writers->counter);
10425 /*
10426 * Make sure counter is updated before we check for snapshot creation.
10427 */
10428 smp_mb();
David Sterbaee39b432014-09-30 01:33:33 +020010429 if (atomic_read(&root->will_be_snapshoted)) {
Filipe Manana9ea24bb2014-10-29 11:57:59 +000010430 btrfs_end_write_no_snapshoting(root);
Miao Xie8257b2d2014-03-06 13:38:19 +080010431 return 0;
10432 }
10433 return 1;
10434}