blob: 174f4f0311dacb49836cb919e60e1aa65fc98050 [file] [log] [blame]
David Sterbac1d7c512018-04-03 19:23:33 +02001// SPDX-License-Identifier: GPL-2.0
Chris Mason6cbd5572007-06-12 09:07:21 -04002/*
3 * Copyright (C) 2007 Oracle. All rights reserved.
Chris Mason6cbd5572007-06-12 09:07:21 -04004 */
David Sterbac1d7c512018-04-03 19:23:33 +02005
Zach Brownec6b9102007-07-11 10:00:37 -04006#include <linux/sched.h>
Ingo Molnarf361bf42017-02-03 23:47:37 +01007#include <linux/sched/signal.h>
Chris Masonedbd8d42007-12-21 16:27:24 -05008#include <linux/pagemap.h>
Chris Masonec44a352008-04-28 15:29:52 -04009#include <linux/writeback.h>
David Woodhouse21af8042008-08-12 14:13:26 +010010#include <linux/blkdev.h>
Chris Masonb7a9f292009-02-04 09:23:45 -050011#include <linux/sort.h>
Chris Mason4184ea72009-03-10 12:39:20 -040012#include <linux/rcupdate.h>
Josef Bacik817d52f2009-07-13 21:29:25 -040013#include <linux/kthread.h>
Tejun Heo5a0e3ad2010-03-24 17:04:11 +090014#include <linux/slab.h>
David Sterbadff51cd2011-06-14 12:52:17 +020015#include <linux/ratelimit.h>
Josef Bacikb150a4f2013-06-19 15:00:04 -040016#include <linux/percpu_counter.h>
Josef Bacik69fe2d72017-10-19 14:15:57 -040017#include <linux/lockdep.h>
Nikolay Borisov9678c542018-01-08 11:45:05 +020018#include <linux/crc32c.h>
Miao Xie995946d2014-04-02 19:51:06 +080019#include "tree-log.h"
Chris Masonfec577f2007-02-26 10:40:21 -050020#include "disk-io.h"
21#include "print-tree.h"
Chris Mason0b86a832008-03-24 15:01:56 -040022#include "volumes.h"
David Woodhouse53b381b2013-01-29 18:40:14 -050023#include "raid56.h"
Chris Mason925baed2008-06-25 16:01:30 -040024#include "locking.h"
Chris Masonfa9c0d792009-04-03 09:47:43 -040025#include "free-space-cache.h"
Omar Sandoval1e144fb2015-09-29 20:50:37 -070026#include "free-space-tree.h"
Miao Xie3fed40c2012-09-13 04:51:36 -060027#include "math.h"
Jeff Mahoney6ab0a202013-11-01 13:07:04 -040028#include "sysfs.h"
Josef Bacikfcebe452014-05-13 17:30:47 -070029#include "qgroup.h"
Josef Bacikfd708b82017-09-29 15:43:50 -040030#include "ref-verify.h"
Chris Masonfec577f2007-02-26 10:40:21 -050031
Arne Jansen709c0482011-09-12 12:22:57 +020032#undef SCRAMBLE_DELAYED_REFS
33
Miao Xie9e622d62012-01-26 15:01:12 -050034/*
35 * control flags for do_chunk_alloc's force field
Chris Mason0e4f8f82011-04-15 16:05:44 -040036 * CHUNK_ALLOC_NO_FORCE means to only allocate a chunk
37 * if we really need one.
38 *
Chris Mason0e4f8f82011-04-15 16:05:44 -040039 * CHUNK_ALLOC_LIMITED means to only try and allocate one
40 * if we have very few chunks already allocated. This is
41 * used as part of the clustering code to help make sure
42 * we have a good pool of storage to cluster in, without
43 * filling the FS with empty chunks
44 *
Miao Xie9e622d62012-01-26 15:01:12 -050045 * CHUNK_ALLOC_FORCE means it must try to allocate one
46 *
Chris Mason0e4f8f82011-04-15 16:05:44 -040047 */
48enum {
49 CHUNK_ALLOC_NO_FORCE = 0,
Miao Xie9e622d62012-01-26 15:01:12 -050050 CHUNK_ALLOC_LIMITED = 1,
51 CHUNK_ALLOC_FORCE = 2,
Chris Mason0e4f8f82011-04-15 16:05:44 -040052};
53
Qu Wenruo9f9b8e82018-10-24 20:24:01 +080054/*
55 * Declare a helper function to detect underflow of various space info members
56 */
57#define DECLARE_SPACE_INFO_UPDATE(name) \
58static inline void update_##name(struct btrfs_space_info *sinfo, \
59 s64 bytes) \
60{ \
61 if (bytes < 0 && sinfo->name < -bytes) { \
62 WARN_ON(1); \
63 sinfo->name = 0; \
64 return; \
65 } \
66 sinfo->name += bytes; \
67}
68
69DECLARE_SPACE_INFO_UPDATE(bytes_may_use);
Lu Fengqie2907c12018-10-24 20:24:02 +080070DECLARE_SPACE_INFO_UPDATE(bytes_pinned);
Qu Wenruo9f9b8e82018-10-24 20:24:01 +080071
Yan Zheng5d4f98a2009-06-10 10:45:14 -040072static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
Nikolay Borisove72cb922018-06-20 15:48:57 +030073 struct btrfs_delayed_ref_node *node, u64 parent,
74 u64 root_objectid, u64 owner_objectid,
75 u64 owner_offset, int refs_to_drop,
76 struct btrfs_delayed_extent_op *extra_op);
Yan Zheng5d4f98a2009-06-10 10:45:14 -040077static void __run_delayed_extent_op(struct btrfs_delayed_extent_op *extent_op,
78 struct extent_buffer *leaf,
79 struct btrfs_extent_item *ei);
80static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
Yan Zheng5d4f98a2009-06-10 10:45:14 -040081 u64 parent, u64 root_objectid,
82 u64 flags, u64 owner, u64 offset,
83 struct btrfs_key *ins, int ref_mod);
84static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans,
Nikolay Borisov4e6bd4e2018-05-21 12:27:21 +030085 struct btrfs_delayed_ref_node *node,
Nikolay Borisov21ebfbe2018-05-21 12:27:22 +030086 struct btrfs_delayed_extent_op *extent_op);
Nikolay Borisov01458822018-06-20 15:49:05 +030087static int do_chunk_alloc(struct btrfs_trans_handle *trans, u64 flags,
Josef Bacik698d0082012-09-12 14:08:47 -040088 int force);
Yan Zheng11833d62009-09-11 16:11:19 -040089static int find_next_key(struct btrfs_path *path, int level,
90 struct btrfs_key *key);
Jeff Mahoneyab8d0fc2016-09-20 10:05:02 -040091static void dump_space_info(struct btrfs_fs_info *fs_info,
92 struct btrfs_space_info *info, u64 bytes,
Josef Bacik9ed74f22009-09-11 16:12:44 -040093 int dump_block_groups);
Josef Bacik5d803662013-02-07 16:06:02 -050094static int block_rsv_use_bytes(struct btrfs_block_rsv *block_rsv,
95 u64 num_bytes);
Josef Bacik957780e2016-05-17 13:30:55 -040096static void space_info_add_new_bytes(struct btrfs_fs_info *fs_info,
97 struct btrfs_space_info *space_info,
98 u64 num_bytes);
99static void space_info_add_old_bytes(struct btrfs_fs_info *fs_info,
100 struct btrfs_space_info *space_info,
101 u64 num_bytes);
Josef Bacik6a632092009-02-20 11:00:09 -0500102
Josef Bacik817d52f2009-07-13 21:29:25 -0400103static noinline int
104block_group_cache_done(struct btrfs_block_group_cache *cache)
105{
106 smp_mb();
Josef Bacik36cce922013-08-05 11:15:21 -0400107 return cache->cached == BTRFS_CACHE_FINISHED ||
108 cache->cached == BTRFS_CACHE_ERROR;
Josef Bacik817d52f2009-07-13 21:29:25 -0400109}
110
Josef Bacik0f9dd462008-09-23 13:14:11 -0400111static int block_group_bits(struct btrfs_block_group_cache *cache, u64 bits)
112{
113 return (cache->flags & bits) == bits;
114}
115
Filipe Manana758f2df2015-11-19 11:45:48 +0000116void btrfs_get_block_group(struct btrfs_block_group_cache *cache)
Josef Bacik11dfe352009-11-13 20:12:59 +0000117{
118 atomic_inc(&cache->count);
119}
120
121void btrfs_put_block_group(struct btrfs_block_group_cache *cache)
122{
Yan, Zhengf0486c62010-05-16 10:46:25 -0400123 if (atomic_dec_and_test(&cache->count)) {
124 WARN_ON(cache->pinned > 0);
125 WARN_ON(cache->reserved > 0);
Qu Wenruo0966a7b2017-04-14 08:35:54 +0800126
127 /*
128 * If not empty, someone is still holding mutex of
129 * full_stripe_lock, which can only be released by caller.
130 * And it will definitely cause use-after-free when caller
131 * tries to release full stripe lock.
132 *
133 * No better way to resolve, but only to warn.
134 */
135 WARN_ON(!RB_EMPTY_ROOT(&cache->full_stripe_locks_root.root));
Li Zefan34d52cb2011-03-29 13:46:06 +0800136 kfree(cache->free_space_ctl);
Josef Bacik11dfe352009-11-13 20:12:59 +0000137 kfree(cache);
Yan, Zhengf0486c62010-05-16 10:46:25 -0400138 }
Josef Bacik11dfe352009-11-13 20:12:59 +0000139}
140
Josef Bacik0f9dd462008-09-23 13:14:11 -0400141/*
142 * this adds the block group to the fs_info rb tree for the block group
143 * cache
144 */
Christoph Hellwigb2950862008-12-02 09:54:17 -0500145static int btrfs_add_block_group_cache(struct btrfs_fs_info *info,
Josef Bacik0f9dd462008-09-23 13:14:11 -0400146 struct btrfs_block_group_cache *block_group)
147{
148 struct rb_node **p;
149 struct rb_node *parent = NULL;
150 struct btrfs_block_group_cache *cache;
151
152 spin_lock(&info->block_group_cache_lock);
153 p = &info->block_group_cache_tree.rb_node;
154
155 while (*p) {
156 parent = *p;
157 cache = rb_entry(parent, struct btrfs_block_group_cache,
158 cache_node);
159 if (block_group->key.objectid < cache->key.objectid) {
160 p = &(*p)->rb_left;
161 } else if (block_group->key.objectid > cache->key.objectid) {
162 p = &(*p)->rb_right;
163 } else {
164 spin_unlock(&info->block_group_cache_lock);
165 return -EEXIST;
166 }
167 }
168
169 rb_link_node(&block_group->cache_node, parent, p);
170 rb_insert_color(&block_group->cache_node,
171 &info->block_group_cache_tree);
Liu Boa1897fd2012-12-27 09:01:23 +0000172
173 if (info->first_logical_byte > block_group->key.objectid)
174 info->first_logical_byte = block_group->key.objectid;
175
Josef Bacik0f9dd462008-09-23 13:14:11 -0400176 spin_unlock(&info->block_group_cache_lock);
177
178 return 0;
179}
180
181/*
182 * This will return the block group at or after bytenr if contains is 0, else
183 * it will return the block group that contains the bytenr
184 */
185static struct btrfs_block_group_cache *
186block_group_cache_tree_search(struct btrfs_fs_info *info, u64 bytenr,
187 int contains)
188{
189 struct btrfs_block_group_cache *cache, *ret = NULL;
190 struct rb_node *n;
191 u64 end, start;
192
193 spin_lock(&info->block_group_cache_lock);
194 n = info->block_group_cache_tree.rb_node;
195
196 while (n) {
197 cache = rb_entry(n, struct btrfs_block_group_cache,
198 cache_node);
199 end = cache->key.objectid + cache->key.offset - 1;
200 start = cache->key.objectid;
201
202 if (bytenr < start) {
203 if (!contains && (!ret || start < ret->key.objectid))
204 ret = cache;
205 n = n->rb_left;
206 } else if (bytenr > start) {
207 if (contains && bytenr <= end) {
208 ret = cache;
209 break;
210 }
211 n = n->rb_right;
212 } else {
213 ret = cache;
214 break;
215 }
216 }
Liu Boa1897fd2012-12-27 09:01:23 +0000217 if (ret) {
Josef Bacik11dfe352009-11-13 20:12:59 +0000218 btrfs_get_block_group(ret);
Liu Boa1897fd2012-12-27 09:01:23 +0000219 if (bytenr == 0 && info->first_logical_byte > ret->key.objectid)
220 info->first_logical_byte = ret->key.objectid;
221 }
Josef Bacik0f9dd462008-09-23 13:14:11 -0400222 spin_unlock(&info->block_group_cache_lock);
223
224 return ret;
225}
226
Jeff Mahoney2ff7e612016-06-22 18:54:24 -0400227static int add_excluded_extent(struct btrfs_fs_info *fs_info,
Yan Zheng11833d62009-09-11 16:11:19 -0400228 u64 start, u64 num_bytes)
Josef Bacik817d52f2009-07-13 21:29:25 -0400229{
Yan Zheng11833d62009-09-11 16:11:19 -0400230 u64 end = start + num_bytes - 1;
Jeff Mahoney0b246af2016-06-22 18:54:23 -0400231 set_extent_bits(&fs_info->freed_extents[0],
David Sterbaceeb0ae2016-04-26 23:54:39 +0200232 start, end, EXTENT_UPTODATE);
Jeff Mahoney0b246af2016-06-22 18:54:23 -0400233 set_extent_bits(&fs_info->freed_extents[1],
David Sterbaceeb0ae2016-04-26 23:54:39 +0200234 start, end, EXTENT_UPTODATE);
Yan Zheng11833d62009-09-11 16:11:19 -0400235 return 0;
Josef Bacik817d52f2009-07-13 21:29:25 -0400236}
237
Nikolay Borisov9e715da2018-06-20 15:49:08 +0300238static void free_excluded_extents(struct btrfs_block_group_cache *cache)
Josef Bacik817d52f2009-07-13 21:29:25 -0400239{
Nikolay Borisov9e715da2018-06-20 15:49:08 +0300240 struct btrfs_fs_info *fs_info = cache->fs_info;
Yan Zheng11833d62009-09-11 16:11:19 -0400241 u64 start, end;
242
243 start = cache->key.objectid;
244 end = start + cache->key.offset - 1;
245
Jeff Mahoney0b246af2016-06-22 18:54:23 -0400246 clear_extent_bits(&fs_info->freed_extents[0],
David Sterba91166212016-04-26 23:54:39 +0200247 start, end, EXTENT_UPTODATE);
Jeff Mahoney0b246af2016-06-22 18:54:23 -0400248 clear_extent_bits(&fs_info->freed_extents[1],
David Sterba91166212016-04-26 23:54:39 +0200249 start, end, EXTENT_UPTODATE);
Yan Zheng11833d62009-09-11 16:11:19 -0400250}
251
Nikolay Borisov3c4da652018-06-20 15:49:09 +0300252static int exclude_super_stripes(struct btrfs_block_group_cache *cache)
Yan Zheng11833d62009-09-11 16:11:19 -0400253{
Nikolay Borisov3c4da652018-06-20 15:49:09 +0300254 struct btrfs_fs_info *fs_info = cache->fs_info;
Josef Bacik817d52f2009-07-13 21:29:25 -0400255 u64 bytenr;
256 u64 *logical;
257 int stripe_len;
258 int i, nr, ret;
259
Yan, Zheng06b23312009-11-26 09:31:11 +0000260 if (cache->key.objectid < BTRFS_SUPER_INFO_OFFSET) {
261 stripe_len = BTRFS_SUPER_INFO_OFFSET - cache->key.objectid;
262 cache->bytes_super += stripe_len;
Jeff Mahoney2ff7e612016-06-22 18:54:24 -0400263 ret = add_excluded_extent(fs_info, cache->key.objectid,
Yan, Zheng06b23312009-11-26 09:31:11 +0000264 stripe_len);
Josef Bacik835d9742013-03-19 12:13:25 -0400265 if (ret)
266 return ret;
Yan, Zheng06b23312009-11-26 09:31:11 +0000267 }
268
Josef Bacik817d52f2009-07-13 21:29:25 -0400269 for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
270 bytenr = btrfs_sb_offset(i);
Jeff Mahoney0b246af2016-06-22 18:54:23 -0400271 ret = btrfs_rmap_block(fs_info, cache->key.objectid,
Nikolay Borisov63a9c7b2018-05-04 10:53:05 +0300272 bytenr, &logical, &nr, &stripe_len);
Josef Bacik835d9742013-03-19 12:13:25 -0400273 if (ret)
274 return ret;
Yan Zheng11833d62009-09-11 16:11:19 -0400275
Josef Bacik817d52f2009-07-13 21:29:25 -0400276 while (nr--) {
Josef Bacik51bf5f02013-04-23 12:55:21 -0400277 u64 start, len;
278
279 if (logical[nr] > cache->key.objectid +
280 cache->key.offset)
281 continue;
282
283 if (logical[nr] + stripe_len <= cache->key.objectid)
284 continue;
285
286 start = logical[nr];
287 if (start < cache->key.objectid) {
288 start = cache->key.objectid;
289 len = (logical[nr] + stripe_len) - start;
290 } else {
291 len = min_t(u64, stripe_len,
292 cache->key.objectid +
293 cache->key.offset - start);
294 }
295
296 cache->bytes_super += len;
Jeff Mahoney2ff7e612016-06-22 18:54:24 -0400297 ret = add_excluded_extent(fs_info, start, len);
Josef Bacik835d9742013-03-19 12:13:25 -0400298 if (ret) {
299 kfree(logical);
300 return ret;
301 }
Josef Bacik817d52f2009-07-13 21:29:25 -0400302 }
Yan Zheng11833d62009-09-11 16:11:19 -0400303
Josef Bacik817d52f2009-07-13 21:29:25 -0400304 kfree(logical);
305 }
Josef Bacik817d52f2009-07-13 21:29:25 -0400306 return 0;
307}
308
Yan Zheng11833d62009-09-11 16:11:19 -0400309static struct btrfs_caching_control *
310get_caching_control(struct btrfs_block_group_cache *cache)
311{
312 struct btrfs_caching_control *ctl;
313
314 spin_lock(&cache->lock);
Josef Bacikdde5abe2010-09-16 16:17:03 -0400315 if (!cache->caching_ctl) {
316 spin_unlock(&cache->lock);
317 return NULL;
318 }
319
Yan Zheng11833d62009-09-11 16:11:19 -0400320 ctl = cache->caching_ctl;
Elena Reshetova1e4f4712017-03-03 10:55:14 +0200321 refcount_inc(&ctl->count);
Yan Zheng11833d62009-09-11 16:11:19 -0400322 spin_unlock(&cache->lock);
323 return ctl;
324}
325
326static void put_caching_control(struct btrfs_caching_control *ctl)
327{
Elena Reshetova1e4f4712017-03-03 10:55:14 +0200328 if (refcount_dec_and_test(&ctl->count))
Yan Zheng11833d62009-09-11 16:11:19 -0400329 kfree(ctl);
330}
331
Josef Bacikd0bd4562015-09-23 14:54:14 -0400332#ifdef CONFIG_BTRFS_DEBUG
Jeff Mahoney2ff7e612016-06-22 18:54:24 -0400333static void fragment_free_space(struct btrfs_block_group_cache *block_group)
Josef Bacikd0bd4562015-09-23 14:54:14 -0400334{
Jeff Mahoney2ff7e612016-06-22 18:54:24 -0400335 struct btrfs_fs_info *fs_info = block_group->fs_info;
Josef Bacikd0bd4562015-09-23 14:54:14 -0400336 u64 start = block_group->key.objectid;
337 u64 len = block_group->key.offset;
338 u64 chunk = block_group->flags & BTRFS_BLOCK_GROUP_METADATA ?
Jeff Mahoney0b246af2016-06-22 18:54:23 -0400339 fs_info->nodesize : fs_info->sectorsize;
Josef Bacikd0bd4562015-09-23 14:54:14 -0400340 u64 step = chunk << 1;
341
342 while (len > chunk) {
343 btrfs_remove_free_space(block_group, start, chunk);
344 start += step;
345 if (len < step)
346 len = 0;
347 else
348 len -= step;
349 }
350}
351#endif
352
Josef Bacik0f9dd462008-09-23 13:14:11 -0400353/*
354 * this is only called by cache_block_group, since we could have freed extents
355 * we need to check the pinned_extents for any extents that can't be used yet
356 * since their free space will be released as soon as the transaction commits.
357 */
Omar Sandovala5ed9182015-09-29 20:50:35 -0700358u64 add_new_free_space(struct btrfs_block_group_cache *block_group,
Nikolay Borisov4457c1c2018-05-10 15:44:45 +0300359 u64 start, u64 end)
Josef Bacik0f9dd462008-09-23 13:14:11 -0400360{
Nikolay Borisov4457c1c2018-05-10 15:44:45 +0300361 struct btrfs_fs_info *info = block_group->fs_info;
Josef Bacik817d52f2009-07-13 21:29:25 -0400362 u64 extent_start, extent_end, size, total_added = 0;
Josef Bacik0f9dd462008-09-23 13:14:11 -0400363 int ret;
364
365 while (start < end) {
Yan Zheng11833d62009-09-11 16:11:19 -0400366 ret = find_first_extent_bit(info->pinned_extents, start,
Josef Bacik0f9dd462008-09-23 13:14:11 -0400367 &extent_start, &extent_end,
Josef Bacike6138872012-09-27 17:07:30 -0400368 EXTENT_DIRTY | EXTENT_UPTODATE,
369 NULL);
Josef Bacik0f9dd462008-09-23 13:14:11 -0400370 if (ret)
371 break;
372
Yan, Zheng06b23312009-11-26 09:31:11 +0000373 if (extent_start <= start) {
Josef Bacik0f9dd462008-09-23 13:14:11 -0400374 start = extent_end + 1;
375 } else if (extent_start > start && extent_start < end) {
376 size = extent_start - start;
Josef Bacik817d52f2009-07-13 21:29:25 -0400377 total_added += size;
Josef Bacikea6a4782008-11-20 12:16:16 -0500378 ret = btrfs_add_free_space(block_group, start,
379 size);
Jeff Mahoney79787ea2012-03-12 16:03:00 +0100380 BUG_ON(ret); /* -ENOMEM or logic error */
Josef Bacik0f9dd462008-09-23 13:14:11 -0400381 start = extent_end + 1;
382 } else {
383 break;
384 }
385 }
386
387 if (start < end) {
388 size = end - start;
Josef Bacik817d52f2009-07-13 21:29:25 -0400389 total_added += size;
Josef Bacikea6a4782008-11-20 12:16:16 -0500390 ret = btrfs_add_free_space(block_group, start, size);
Jeff Mahoney79787ea2012-03-12 16:03:00 +0100391 BUG_ON(ret); /* -ENOMEM or logic error */
Josef Bacik0f9dd462008-09-23 13:14:11 -0400392 }
393
Josef Bacik817d52f2009-07-13 21:29:25 -0400394 return total_added;
Josef Bacik0f9dd462008-09-23 13:14:11 -0400395}
396
Omar Sandoval73fa48b2015-09-29 20:50:33 -0700397static int load_extent_tree_free(struct btrfs_caching_control *caching_ctl)
Chris Masone37c9e62007-05-09 20:13:14 -0400398{
Jeff Mahoney0b246af2016-06-22 18:54:23 -0400399 struct btrfs_block_group_cache *block_group = caching_ctl->block_group;
400 struct btrfs_fs_info *fs_info = block_group->fs_info;
401 struct btrfs_root *extent_root = fs_info->extent_root;
Chris Masone37c9e62007-05-09 20:13:14 -0400402 struct btrfs_path *path;
Chris Mason5f39d392007-10-15 16:14:19 -0400403 struct extent_buffer *leaf;
Yan Zheng11833d62009-09-11 16:11:19 -0400404 struct btrfs_key key;
Josef Bacik817d52f2009-07-13 21:29:25 -0400405 u64 total_found = 0;
Yan Zheng11833d62009-09-11 16:11:19 -0400406 u64 last = 0;
407 u32 nritems;
Omar Sandoval73fa48b2015-09-29 20:50:33 -0700408 int ret;
Josef Bacikd0bd4562015-09-23 14:54:14 -0400409 bool wakeup = true;
Chris Masonf510cfe2007-10-15 16:14:48 -0400410
Chris Masone37c9e62007-05-09 20:13:14 -0400411 path = btrfs_alloc_path();
412 if (!path)
Omar Sandoval73fa48b2015-09-29 20:50:33 -0700413 return -ENOMEM;
Yan7d7d6062007-09-14 16:15:28 -0400414
Josef Bacik817d52f2009-07-13 21:29:25 -0400415 last = max_t(u64, block_group->key.objectid, BTRFS_SUPER_INFO_OFFSET);
Yan Zheng11833d62009-09-11 16:11:19 -0400416
Josef Bacikd0bd4562015-09-23 14:54:14 -0400417#ifdef CONFIG_BTRFS_DEBUG
418 /*
419 * If we're fragmenting we don't want to make anybody think we can
420 * allocate from this block group until we've had a chance to fragment
421 * the free space.
422 */
Jeff Mahoney2ff7e612016-06-22 18:54:24 -0400423 if (btrfs_should_fragment_free_space(block_group))
Josef Bacikd0bd4562015-09-23 14:54:14 -0400424 wakeup = false;
425#endif
Chris Mason5cd57b22008-06-25 16:01:30 -0400426 /*
Josef Bacik817d52f2009-07-13 21:29:25 -0400427 * We don't want to deadlock with somebody trying to allocate a new
428 * extent for the extent root while also trying to search the extent
429 * root to add free space. So we skip locking and search the commit
430 * root, since its read-only
Chris Mason5cd57b22008-06-25 16:01:30 -0400431 */
432 path->skip_locking = 1;
Josef Bacik817d52f2009-07-13 21:29:25 -0400433 path->search_commit_root = 1;
David Sterbae4058b52015-11-27 16:31:35 +0100434 path->reada = READA_FORWARD;
Josef Bacik817d52f2009-07-13 21:29:25 -0400435
Yan Zhenge4404d62008-12-12 10:03:26 -0500436 key.objectid = last;
Chris Masone37c9e62007-05-09 20:13:14 -0400437 key.offset = 0;
Yan Zheng11833d62009-09-11 16:11:19 -0400438 key.type = BTRFS_EXTENT_ITEM_KEY;
Chris Mason013f1b12009-07-31 14:57:55 -0400439
Liu Bo52ee28d2013-07-11 17:51:15 +0800440next:
Yan Zheng11833d62009-09-11 16:11:19 -0400441 ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
Chris Masone37c9e62007-05-09 20:13:14 -0400442 if (ret < 0)
Omar Sandoval73fa48b2015-09-29 20:50:33 -0700443 goto out;
Yan Zhenga512bbf2008-12-08 16:46:26 -0500444
Yan Zheng11833d62009-09-11 16:11:19 -0400445 leaf = path->nodes[0];
446 nritems = btrfs_header_nritems(leaf);
447
Chris Masond3977122009-01-05 21:25:51 -0500448 while (1) {
David Sterba7841cb22011-05-31 18:07:27 +0200449 if (btrfs_fs_closing(fs_info) > 1) {
Yan Zhengf25784b2009-07-28 08:41:57 -0400450 last = (u64)-1;
Josef Bacik817d52f2009-07-13 21:29:25 -0400451 break;
Yan Zhengf25784b2009-07-28 08:41:57 -0400452 }
Josef Bacik817d52f2009-07-13 21:29:25 -0400453
Yan Zheng11833d62009-09-11 16:11:19 -0400454 if (path->slots[0] < nritems) {
455 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
456 } else {
457 ret = find_next_key(path, 0, &key);
458 if (ret)
Chris Masone37c9e62007-05-09 20:13:14 -0400459 break;
Josef Bacik817d52f2009-07-13 21:29:25 -0400460
Josef Bacikc9ea7b22013-09-19 10:02:11 -0400461 if (need_resched() ||
Josef Bacik9e351cc2014-03-13 15:42:13 -0400462 rwsem_is_contended(&fs_info->commit_root_sem)) {
Josef Bacikd0bd4562015-09-23 14:54:14 -0400463 if (wakeup)
464 caching_ctl->progress = last;
Chris Masonff5714c2011-05-28 07:00:39 -0400465 btrfs_release_path(path);
Josef Bacik9e351cc2014-03-13 15:42:13 -0400466 up_read(&fs_info->commit_root_sem);
Josef Bacik589d8ad2011-05-11 17:30:53 -0400467 mutex_unlock(&caching_ctl->mutex);
Yan Zheng11833d62009-09-11 16:11:19 -0400468 cond_resched();
Omar Sandoval73fa48b2015-09-29 20:50:33 -0700469 mutex_lock(&caching_ctl->mutex);
470 down_read(&fs_info->commit_root_sem);
471 goto next;
Josef Bacik589d8ad2011-05-11 17:30:53 -0400472 }
Josef Bacik0a3896d2013-04-19 14:37:26 -0400473
474 ret = btrfs_next_leaf(extent_root, path);
475 if (ret < 0)
Omar Sandoval73fa48b2015-09-29 20:50:33 -0700476 goto out;
Josef Bacik0a3896d2013-04-19 14:37:26 -0400477 if (ret)
478 break;
Josef Bacik589d8ad2011-05-11 17:30:53 -0400479 leaf = path->nodes[0];
480 nritems = btrfs_header_nritems(leaf);
481 continue;
Yan Zheng11833d62009-09-11 16:11:19 -0400482 }
Josef Bacik817d52f2009-07-13 21:29:25 -0400483
Liu Bo52ee28d2013-07-11 17:51:15 +0800484 if (key.objectid < last) {
485 key.objectid = last;
486 key.offset = 0;
487 key.type = BTRFS_EXTENT_ITEM_KEY;
488
Josef Bacikd0bd4562015-09-23 14:54:14 -0400489 if (wakeup)
490 caching_ctl->progress = last;
Liu Bo52ee28d2013-07-11 17:51:15 +0800491 btrfs_release_path(path);
492 goto next;
493 }
494
Yan Zheng11833d62009-09-11 16:11:19 -0400495 if (key.objectid < block_group->key.objectid) {
496 path->slots[0]++;
Josef Bacik817d52f2009-07-13 21:29:25 -0400497 continue;
Chris Masone37c9e62007-05-09 20:13:14 -0400498 }
Josef Bacik0f9dd462008-09-23 13:14:11 -0400499
Chris Masone37c9e62007-05-09 20:13:14 -0400500 if (key.objectid >= block_group->key.objectid +
Josef Bacik0f9dd462008-09-23 13:14:11 -0400501 block_group->key.offset)
Yan7d7d6062007-09-14 16:15:28 -0400502 break;
Yan7d7d6062007-09-14 16:15:28 -0400503
Josef Bacik3173a182013-03-07 14:22:04 -0500504 if (key.type == BTRFS_EXTENT_ITEM_KEY ||
505 key.type == BTRFS_METADATA_ITEM_KEY) {
Nikolay Borisov4457c1c2018-05-10 15:44:45 +0300506 total_found += add_new_free_space(block_group, last,
Josef Bacik817d52f2009-07-13 21:29:25 -0400507 key.objectid);
Josef Bacik3173a182013-03-07 14:22:04 -0500508 if (key.type == BTRFS_METADATA_ITEM_KEY)
509 last = key.objectid +
Jeff Mahoneyda170662016-06-15 09:22:56 -0400510 fs_info->nodesize;
Josef Bacik3173a182013-03-07 14:22:04 -0500511 else
512 last = key.objectid + key.offset;
Josef Bacik817d52f2009-07-13 21:29:25 -0400513
Omar Sandoval73fa48b2015-09-29 20:50:33 -0700514 if (total_found > CACHING_CTL_WAKE_UP) {
Yan Zheng11833d62009-09-11 16:11:19 -0400515 total_found = 0;
Josef Bacikd0bd4562015-09-23 14:54:14 -0400516 if (wakeup)
517 wake_up(&caching_ctl->wait);
Yan Zheng11833d62009-09-11 16:11:19 -0400518 }
Josef Bacik817d52f2009-07-13 21:29:25 -0400519 }
Chris Masone37c9e62007-05-09 20:13:14 -0400520 path->slots[0]++;
521 }
Josef Bacikef8bbdf2008-09-23 13:14:11 -0400522 ret = 0;
Josef Bacik817d52f2009-07-13 21:29:25 -0400523
Nikolay Borisov4457c1c2018-05-10 15:44:45 +0300524 total_found += add_new_free_space(block_group, last,
Josef Bacik817d52f2009-07-13 21:29:25 -0400525 block_group->key.objectid +
526 block_group->key.offset);
Yan Zheng11833d62009-09-11 16:11:19 -0400527 caching_ctl->progress = (u64)-1;
Josef Bacik817d52f2009-07-13 21:29:25 -0400528
Omar Sandoval73fa48b2015-09-29 20:50:33 -0700529out:
530 btrfs_free_path(path);
531 return ret;
532}
533
534static noinline void caching_thread(struct btrfs_work *work)
535{
536 struct btrfs_block_group_cache *block_group;
537 struct btrfs_fs_info *fs_info;
538 struct btrfs_caching_control *caching_ctl;
539 int ret;
540
541 caching_ctl = container_of(work, struct btrfs_caching_control, work);
542 block_group = caching_ctl->block_group;
543 fs_info = block_group->fs_info;
544
545 mutex_lock(&caching_ctl->mutex);
546 down_read(&fs_info->commit_root_sem);
547
Omar Sandoval1e144fb2015-09-29 20:50:37 -0700548 if (btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE))
549 ret = load_free_space_tree(caching_ctl);
550 else
551 ret = load_extent_tree_free(caching_ctl);
Omar Sandoval73fa48b2015-09-29 20:50:33 -0700552
Josef Bacik817d52f2009-07-13 21:29:25 -0400553 spin_lock(&block_group->lock);
Yan Zheng11833d62009-09-11 16:11:19 -0400554 block_group->caching_ctl = NULL;
Omar Sandoval73fa48b2015-09-29 20:50:33 -0700555 block_group->cached = ret ? BTRFS_CACHE_ERROR : BTRFS_CACHE_FINISHED;
Josef Bacik817d52f2009-07-13 21:29:25 -0400556 spin_unlock(&block_group->lock);
557
Josef Bacikd0bd4562015-09-23 14:54:14 -0400558#ifdef CONFIG_BTRFS_DEBUG
Jeff Mahoney2ff7e612016-06-22 18:54:24 -0400559 if (btrfs_should_fragment_free_space(block_group)) {
Josef Bacikd0bd4562015-09-23 14:54:14 -0400560 u64 bytes_used;
561
562 spin_lock(&block_group->space_info->lock);
563 spin_lock(&block_group->lock);
564 bytes_used = block_group->key.offset -
565 btrfs_block_group_used(&block_group->item);
566 block_group->space_info->bytes_used += bytes_used >> 1;
567 spin_unlock(&block_group->lock);
568 spin_unlock(&block_group->space_info->lock);
Jeff Mahoney2ff7e612016-06-22 18:54:24 -0400569 fragment_free_space(block_group);
Josef Bacikd0bd4562015-09-23 14:54:14 -0400570 }
571#endif
572
573 caching_ctl->progress = (u64)-1;
Chris Masonf7d3d2f2015-12-18 11:11:10 -0800574
Josef Bacik9e351cc2014-03-13 15:42:13 -0400575 up_read(&fs_info->commit_root_sem);
Nikolay Borisov9e715da2018-06-20 15:49:08 +0300576 free_excluded_extents(block_group);
Yan Zheng11833d62009-09-11 16:11:19 -0400577 mutex_unlock(&caching_ctl->mutex);
Omar Sandoval73fa48b2015-09-29 20:50:33 -0700578
Yan Zheng11833d62009-09-11 16:11:19 -0400579 wake_up(&caching_ctl->wait);
580
581 put_caching_control(caching_ctl);
Josef Bacik11dfe352009-11-13 20:12:59 +0000582 btrfs_put_block_group(block_group);
Josef Bacik817d52f2009-07-13 21:29:25 -0400583}
584
Josef Bacik9d66e232010-08-25 16:54:15 -0400585static int cache_block_group(struct btrfs_block_group_cache *cache,
Josef Bacik9d66e232010-08-25 16:54:15 -0400586 int load_cache_only)
Josef Bacik817d52f2009-07-13 21:29:25 -0400587{
Josef Bacik291c7d22011-11-14 13:52:14 -0500588 DEFINE_WAIT(wait);
Yan Zheng11833d62009-09-11 16:11:19 -0400589 struct btrfs_fs_info *fs_info = cache->fs_info;
590 struct btrfs_caching_control *caching_ctl;
Josef Bacik817d52f2009-07-13 21:29:25 -0400591 int ret = 0;
592
Josef Bacik291c7d22011-11-14 13:52:14 -0500593 caching_ctl = kzalloc(sizeof(*caching_ctl), GFP_NOFS);
Jeff Mahoney79787ea2012-03-12 16:03:00 +0100594 if (!caching_ctl)
595 return -ENOMEM;
Josef Bacik291c7d22011-11-14 13:52:14 -0500596
597 INIT_LIST_HEAD(&caching_ctl->list);
598 mutex_init(&caching_ctl->mutex);
599 init_waitqueue_head(&caching_ctl->wait);
600 caching_ctl->block_group = cache;
601 caching_ctl->progress = cache->key.objectid;
Elena Reshetova1e4f4712017-03-03 10:55:14 +0200602 refcount_set(&caching_ctl->count, 1);
Liu Bo9e0af232014-08-15 23:36:53 +0800603 btrfs_init_work(&caching_ctl->work, btrfs_cache_helper,
604 caching_thread, NULL, NULL);
Josef Bacik291c7d22011-11-14 13:52:14 -0500605
606 spin_lock(&cache->lock);
607 /*
608 * This should be a rare occasion, but this could happen I think in the
609 * case where one thread starts to load the space cache info, and then
610 * some other thread starts a transaction commit which tries to do an
611 * allocation while the other thread is still loading the space cache
612 * info. The previous loop should have kept us from choosing this block
613 * group, but if we've moved to the state where we will wait on caching
614 * block groups we need to first check if we're doing a fast load here,
615 * so we can wait for it to finish, otherwise we could end up allocating
616 * from a block group who's cache gets evicted for one reason or
617 * another.
618 */
619 while (cache->cached == BTRFS_CACHE_FAST) {
620 struct btrfs_caching_control *ctl;
621
622 ctl = cache->caching_ctl;
Elena Reshetova1e4f4712017-03-03 10:55:14 +0200623 refcount_inc(&ctl->count);
Josef Bacik291c7d22011-11-14 13:52:14 -0500624 prepare_to_wait(&ctl->wait, &wait, TASK_UNINTERRUPTIBLE);
625 spin_unlock(&cache->lock);
626
627 schedule();
628
629 finish_wait(&ctl->wait, &wait);
630 put_caching_control(ctl);
631 spin_lock(&cache->lock);
632 }
633
634 if (cache->cached != BTRFS_CACHE_NO) {
635 spin_unlock(&cache->lock);
636 kfree(caching_ctl);
Yan Zheng11833d62009-09-11 16:11:19 -0400637 return 0;
Josef Bacik291c7d22011-11-14 13:52:14 -0500638 }
639 WARN_ON(cache->caching_ctl);
640 cache->caching_ctl = caching_ctl;
641 cache->cached = BTRFS_CACHE_FAST;
642 spin_unlock(&cache->lock);
Yan Zheng11833d62009-09-11 16:11:19 -0400643
Satoru Takeuchid8953d62017-09-12 20:08:08 +0900644 if (btrfs_test_opt(fs_info, SPACE_CACHE)) {
Josef Bacikcb83b7b2014-11-26 11:52:54 -0500645 mutex_lock(&caching_ctl->mutex);
Josef Bacik9d66e232010-08-25 16:54:15 -0400646 ret = load_free_space_cache(fs_info, cache);
647
648 spin_lock(&cache->lock);
649 if (ret == 1) {
Josef Bacik291c7d22011-11-14 13:52:14 -0500650 cache->caching_ctl = NULL;
Josef Bacik9d66e232010-08-25 16:54:15 -0400651 cache->cached = BTRFS_CACHE_FINISHED;
652 cache->last_byte_to_unpin = (u64)-1;
Josef Bacikcb83b7b2014-11-26 11:52:54 -0500653 caching_ctl->progress = (u64)-1;
Josef Bacik9d66e232010-08-25 16:54:15 -0400654 } else {
Josef Bacik291c7d22011-11-14 13:52:14 -0500655 if (load_cache_only) {
656 cache->caching_ctl = NULL;
657 cache->cached = BTRFS_CACHE_NO;
658 } else {
659 cache->cached = BTRFS_CACHE_STARTED;
Filipe Manana4f69cb92014-11-26 15:28:51 +0000660 cache->has_caching_ctl = 1;
Josef Bacik291c7d22011-11-14 13:52:14 -0500661 }
Josef Bacik9d66e232010-08-25 16:54:15 -0400662 }
663 spin_unlock(&cache->lock);
Josef Bacikd0bd4562015-09-23 14:54:14 -0400664#ifdef CONFIG_BTRFS_DEBUG
665 if (ret == 1 &&
Jeff Mahoney2ff7e612016-06-22 18:54:24 -0400666 btrfs_should_fragment_free_space(cache)) {
Josef Bacikd0bd4562015-09-23 14:54:14 -0400667 u64 bytes_used;
668
669 spin_lock(&cache->space_info->lock);
670 spin_lock(&cache->lock);
671 bytes_used = cache->key.offset -
672 btrfs_block_group_used(&cache->item);
673 cache->space_info->bytes_used += bytes_used >> 1;
674 spin_unlock(&cache->lock);
675 spin_unlock(&cache->space_info->lock);
Jeff Mahoney2ff7e612016-06-22 18:54:24 -0400676 fragment_free_space(cache);
Josef Bacikd0bd4562015-09-23 14:54:14 -0400677 }
678#endif
Josef Bacikcb83b7b2014-11-26 11:52:54 -0500679 mutex_unlock(&caching_ctl->mutex);
680
Josef Bacik291c7d22011-11-14 13:52:14 -0500681 wake_up(&caching_ctl->wait);
Josef Bacik3c148742011-02-02 15:53:47 +0000682 if (ret == 1) {
Josef Bacik291c7d22011-11-14 13:52:14 -0500683 put_caching_control(caching_ctl);
Nikolay Borisov9e715da2018-06-20 15:49:08 +0300684 free_excluded_extents(cache);
Josef Bacik9d66e232010-08-25 16:54:15 -0400685 return 0;
Josef Bacik3c148742011-02-02 15:53:47 +0000686 }
Josef Bacik291c7d22011-11-14 13:52:14 -0500687 } else {
688 /*
Omar Sandoval1e144fb2015-09-29 20:50:37 -0700689 * We're either using the free space tree or no caching at all.
690 * Set cached to the appropriate value and wakeup any waiters.
Josef Bacik291c7d22011-11-14 13:52:14 -0500691 */
692 spin_lock(&cache->lock);
693 if (load_cache_only) {
694 cache->caching_ctl = NULL;
695 cache->cached = BTRFS_CACHE_NO;
696 } else {
697 cache->cached = BTRFS_CACHE_STARTED;
Filipe Manana4f69cb92014-11-26 15:28:51 +0000698 cache->has_caching_ctl = 1;
Josef Bacik291c7d22011-11-14 13:52:14 -0500699 }
Josef Bacik817d52f2009-07-13 21:29:25 -0400700 spin_unlock(&cache->lock);
Josef Bacik291c7d22011-11-14 13:52:14 -0500701 wake_up(&caching_ctl->wait);
702 }
703
704 if (load_cache_only) {
705 put_caching_control(caching_ctl);
Yan Zheng11833d62009-09-11 16:11:19 -0400706 return 0;
Josef Bacik817d52f2009-07-13 21:29:25 -0400707 }
Josef Bacik817d52f2009-07-13 21:29:25 -0400708
Josef Bacik9e351cc2014-03-13 15:42:13 -0400709 down_write(&fs_info->commit_root_sem);
Elena Reshetova1e4f4712017-03-03 10:55:14 +0200710 refcount_inc(&caching_ctl->count);
Yan Zheng11833d62009-09-11 16:11:19 -0400711 list_add_tail(&caching_ctl->list, &fs_info->caching_block_groups);
Josef Bacik9e351cc2014-03-13 15:42:13 -0400712 up_write(&fs_info->commit_root_sem);
Yan Zheng11833d62009-09-11 16:11:19 -0400713
Josef Bacik11dfe352009-11-13 20:12:59 +0000714 btrfs_get_block_group(cache);
Yan Zheng11833d62009-09-11 16:11:19 -0400715
Qu Wenruoe66f0bb2014-02-28 10:46:12 +0800716 btrfs_queue_work(fs_info->caching_workers, &caching_ctl->work);
Josef Bacik817d52f2009-07-13 21:29:25 -0400717
Josef Bacikef8bbdf2008-09-23 13:14:11 -0400718 return ret;
Chris Masone37c9e62007-05-09 20:13:14 -0400719}
720
Josef Bacik0f9dd462008-09-23 13:14:11 -0400721/*
722 * return the block group that starts at or after bytenr
723 */
Chris Masond3977122009-01-05 21:25:51 -0500724static struct btrfs_block_group_cache *
725btrfs_lookup_first_block_group(struct btrfs_fs_info *info, u64 bytenr)
Chris Mason0ef3e662008-05-24 14:04:53 -0400726{
Masahiro Yamadae2c89902016-09-13 04:35:52 +0900727 return block_group_cache_tree_search(info, bytenr, 0);
Chris Mason0ef3e662008-05-24 14:04:53 -0400728}
729
Josef Bacik0f9dd462008-09-23 13:14:11 -0400730/*
Sankar P9f556842009-05-14 13:52:22 -0400731 * return the block group that contains the given bytenr
Josef Bacik0f9dd462008-09-23 13:14:11 -0400732 */
Chris Masond3977122009-01-05 21:25:51 -0500733struct btrfs_block_group_cache *btrfs_lookup_block_group(
734 struct btrfs_fs_info *info,
735 u64 bytenr)
Chris Masonbe744172007-05-06 10:15:01 -0400736{
Masahiro Yamadae2c89902016-09-13 04:35:52 +0900737 return block_group_cache_tree_search(info, bytenr, 1);
Chris Masonbe744172007-05-06 10:15:01 -0400738}
Chris Mason0b86a832008-03-24 15:01:56 -0400739
Josef Bacik0f9dd462008-09-23 13:14:11 -0400740static struct btrfs_space_info *__find_space_info(struct btrfs_fs_info *info,
741 u64 flags)
Chris Mason6324fbf2008-03-24 15:01:59 -0400742{
Josef Bacik0f9dd462008-09-23 13:14:11 -0400743 struct list_head *head = &info->space_info;
Josef Bacik0f9dd462008-09-23 13:14:11 -0400744 struct btrfs_space_info *found;
Chris Mason4184ea72009-03-10 12:39:20 -0400745
Ilya Dryomov52ba6922012-01-16 22:04:47 +0200746 flags &= BTRFS_BLOCK_GROUP_TYPE_MASK;
Yan, Zhengb742bb822010-05-16 10:46:24 -0400747
Chris Mason4184ea72009-03-10 12:39:20 -0400748 rcu_read_lock();
749 list_for_each_entry_rcu(found, head, list) {
Josef Bacik67377732010-09-16 16:19:09 -0400750 if (found->flags & flags) {
Chris Mason4184ea72009-03-10 12:39:20 -0400751 rcu_read_unlock();
Josef Bacik0f9dd462008-09-23 13:14:11 -0400752 return found;
Chris Mason4184ea72009-03-10 12:39:20 -0400753 }
Josef Bacik0f9dd462008-09-23 13:14:11 -0400754 }
Chris Mason4184ea72009-03-10 12:39:20 -0400755 rcu_read_unlock();
Josef Bacik0f9dd462008-09-23 13:14:11 -0400756 return NULL;
Chris Mason6324fbf2008-03-24 15:01:59 -0400757}
758
Omar Sandoval0d9f8242017-06-06 16:45:26 -0700759static void add_pinned_bytes(struct btrfs_fs_info *fs_info, s64 num_bytes,
Nikolay Borisov29d2b842018-03-30 12:58:47 +0300760 bool metadata, u64 root_objectid)
Omar Sandoval0d9f8242017-06-06 16:45:26 -0700761{
762 struct btrfs_space_info *space_info;
763 u64 flags;
764
Nikolay Borisov29d2b842018-03-30 12:58:47 +0300765 if (metadata) {
Omar Sandoval0d9f8242017-06-06 16:45:26 -0700766 if (root_objectid == BTRFS_CHUNK_TREE_OBJECTID)
767 flags = BTRFS_BLOCK_GROUP_SYSTEM;
768 else
769 flags = BTRFS_BLOCK_GROUP_METADATA;
770 } else {
771 flags = BTRFS_BLOCK_GROUP_DATA;
772 }
773
774 space_info = __find_space_info(fs_info, flags);
Omar Sandoval55e81962017-06-06 16:45:27 -0700775 ASSERT(space_info);
Ethan Liendec59fa2018-07-13 16:50:42 +0800776 percpu_counter_add_batch(&space_info->total_bytes_pinned, num_bytes,
777 BTRFS_TOTAL_BYTES_PINNED_BATCH);
Omar Sandoval0d9f8242017-06-06 16:45:26 -0700778}
779
Chris Mason4184ea72009-03-10 12:39:20 -0400780/*
781 * after adding space to the filesystem, we need to clear the full flags
782 * on all the space infos.
783 */
784void btrfs_clear_space_info_full(struct btrfs_fs_info *info)
785{
786 struct list_head *head = &info->space_info;
787 struct btrfs_space_info *found;
788
789 rcu_read_lock();
790 list_for_each_entry_rcu(found, head, list)
791 found->full = 0;
792 rcu_read_unlock();
793}
794
Filipe Manana1a4ed8f2014-10-27 10:44:24 +0000795/* simple helper to search for an existing data extent at a given offset */
Jeff Mahoney2ff7e612016-06-22 18:54:24 -0400796int btrfs_lookup_data_extent(struct btrfs_fs_info *fs_info, u64 start, u64 len)
Chris Masone02119d2008-09-05 16:13:11 -0400797{
798 int ret;
799 struct btrfs_key key;
Zheng Yan31840ae2008-09-23 13:14:14 -0400800 struct btrfs_path *path;
Chris Masone02119d2008-09-05 16:13:11 -0400801
Zheng Yan31840ae2008-09-23 13:14:14 -0400802 path = btrfs_alloc_path();
Mark Fashehd8926bb2011-07-13 10:38:47 -0700803 if (!path)
804 return -ENOMEM;
805
Chris Masone02119d2008-09-05 16:13:11 -0400806 key.objectid = start;
807 key.offset = len;
Josef Bacik3173a182013-03-07 14:22:04 -0500808 key.type = BTRFS_EXTENT_ITEM_KEY;
Jeff Mahoney0b246af2016-06-22 18:54:23 -0400809 ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, path, 0, 0);
Zheng Yan31840ae2008-09-23 13:14:14 -0400810 btrfs_free_path(path);
Chris Mason7bb86312007-12-11 09:25:06 -0500811 return ret;
812}
813
Chris Masond8d5f3e2007-12-11 12:42:00 -0500814/*
Josef Bacik3173a182013-03-07 14:22:04 -0500815 * helper function to lookup reference count and flags of a tree block.
Yan, Zhenga22285a2010-05-16 10:48:46 -0400816 *
817 * the head node for delayed ref is used to store the sum of all the
818 * reference count modifications queued up in the rbtree. the head
819 * node may also store the extent flags to set. This way you can check
820 * to see what the reference count and extent flags would be if all of
821 * the delayed refs are not processed.
822 */
823int btrfs_lookup_extent_info(struct btrfs_trans_handle *trans,
Jeff Mahoney2ff7e612016-06-22 18:54:24 -0400824 struct btrfs_fs_info *fs_info, u64 bytenr,
Josef Bacik3173a182013-03-07 14:22:04 -0500825 u64 offset, int metadata, u64 *refs, u64 *flags)
Yan, Zhenga22285a2010-05-16 10:48:46 -0400826{
827 struct btrfs_delayed_ref_head *head;
828 struct btrfs_delayed_ref_root *delayed_refs;
829 struct btrfs_path *path;
830 struct btrfs_extent_item *ei;
831 struct extent_buffer *leaf;
832 struct btrfs_key key;
833 u32 item_size;
834 u64 num_refs;
835 u64 extent_flags;
836 int ret;
837
Josef Bacik3173a182013-03-07 14:22:04 -0500838 /*
839 * If we don't have skinny metadata, don't bother doing anything
840 * different
841 */
Jeff Mahoney0b246af2016-06-22 18:54:23 -0400842 if (metadata && !btrfs_fs_incompat(fs_info, SKINNY_METADATA)) {
843 offset = fs_info->nodesize;
Josef Bacik3173a182013-03-07 14:22:04 -0500844 metadata = 0;
845 }
846
Yan, Zhenga22285a2010-05-16 10:48:46 -0400847 path = btrfs_alloc_path();
848 if (!path)
849 return -ENOMEM;
850
Yan, Zhenga22285a2010-05-16 10:48:46 -0400851 if (!trans) {
852 path->skip_locking = 1;
853 path->search_commit_root = 1;
854 }
Filipe David Borba Manana639eefc2013-12-08 00:26:29 +0000855
856search_again:
857 key.objectid = bytenr;
858 key.offset = offset;
859 if (metadata)
860 key.type = BTRFS_METADATA_ITEM_KEY;
861 else
862 key.type = BTRFS_EXTENT_ITEM_KEY;
863
Jeff Mahoney0b246af2016-06-22 18:54:23 -0400864 ret = btrfs_search_slot(trans, fs_info->extent_root, &key, path, 0, 0);
Yan, Zhenga22285a2010-05-16 10:48:46 -0400865 if (ret < 0)
866 goto out_free;
867
Josef Bacik3173a182013-03-07 14:22:04 -0500868 if (ret > 0 && metadata && key.type == BTRFS_METADATA_ITEM_KEY) {
Filipe David Borba Manana74be9512013-07-05 23:12:06 +0100869 if (path->slots[0]) {
870 path->slots[0]--;
871 btrfs_item_key_to_cpu(path->nodes[0], &key,
872 path->slots[0]);
873 if (key.objectid == bytenr &&
874 key.type == BTRFS_EXTENT_ITEM_KEY &&
Jeff Mahoney0b246af2016-06-22 18:54:23 -0400875 key.offset == fs_info->nodesize)
Filipe David Borba Manana74be9512013-07-05 23:12:06 +0100876 ret = 0;
877 }
Josef Bacik3173a182013-03-07 14:22:04 -0500878 }
879
Yan, Zhenga22285a2010-05-16 10:48:46 -0400880 if (ret == 0) {
881 leaf = path->nodes[0];
882 item_size = btrfs_item_size_nr(leaf, path->slots[0]);
883 if (item_size >= sizeof(*ei)) {
884 ei = btrfs_item_ptr(leaf, path->slots[0],
885 struct btrfs_extent_item);
886 num_refs = btrfs_extent_refs(leaf, ei);
887 extent_flags = btrfs_extent_flags(leaf, ei);
888 } else {
Nikolay Borisovba3c2b12018-06-26 16:57:36 +0300889 ret = -EINVAL;
890 btrfs_print_v0_err(fs_info);
891 if (trans)
892 btrfs_abort_transaction(trans, ret);
893 else
894 btrfs_handle_fs_error(fs_info, ret, NULL);
895
896 goto out_free;
Yan, Zhenga22285a2010-05-16 10:48:46 -0400897 }
Nikolay Borisovba3c2b12018-06-26 16:57:36 +0300898
Yan, Zhenga22285a2010-05-16 10:48:46 -0400899 BUG_ON(num_refs == 0);
900 } else {
901 num_refs = 0;
902 extent_flags = 0;
903 ret = 0;
904 }
905
906 if (!trans)
907 goto out;
908
909 delayed_refs = &trans->transaction->delayed_refs;
910 spin_lock(&delayed_refs->lock);
Liu Bof72ad18e2017-01-30 12:24:37 -0800911 head = btrfs_find_delayed_ref_head(delayed_refs, bytenr);
Yan, Zhenga22285a2010-05-16 10:48:46 -0400912 if (head) {
913 if (!mutex_trylock(&head->mutex)) {
Josef Bacikd2788502017-09-29 15:43:57 -0400914 refcount_inc(&head->refs);
Yan, Zhenga22285a2010-05-16 10:48:46 -0400915 spin_unlock(&delayed_refs->lock);
916
David Sterbab3b4aa72011-04-21 01:20:15 +0200917 btrfs_release_path(path);
Yan, Zhenga22285a2010-05-16 10:48:46 -0400918
David Sterba8cc33e52011-05-02 15:29:25 +0200919 /*
920 * Mutex was contended, block until it's released and try
921 * again
922 */
Yan, Zhenga22285a2010-05-16 10:48:46 -0400923 mutex_lock(&head->mutex);
924 mutex_unlock(&head->mutex);
Josef Bacikd2788502017-09-29 15:43:57 -0400925 btrfs_put_delayed_ref_head(head);
Filipe David Borba Manana639eefc2013-12-08 00:26:29 +0000926 goto search_again;
Yan, Zhenga22285a2010-05-16 10:48:46 -0400927 }
Josef Bacikd7df2c72014-01-23 09:21:38 -0500928 spin_lock(&head->lock);
Yan, Zhenga22285a2010-05-16 10:48:46 -0400929 if (head->extent_op && head->extent_op->update_flags)
930 extent_flags |= head->extent_op->flags_to_set;
931 else
932 BUG_ON(num_refs == 0);
933
Josef Bacikd2788502017-09-29 15:43:57 -0400934 num_refs += head->ref_mod;
Josef Bacikd7df2c72014-01-23 09:21:38 -0500935 spin_unlock(&head->lock);
Yan, Zhenga22285a2010-05-16 10:48:46 -0400936 mutex_unlock(&head->mutex);
937 }
938 spin_unlock(&delayed_refs->lock);
939out:
940 WARN_ON(num_refs == 0);
941 if (refs)
942 *refs = num_refs;
943 if (flags)
944 *flags = extent_flags;
945out_free:
946 btrfs_free_path(path);
947 return ret;
948}
949
950/*
Chris Masond8d5f3e2007-12-11 12:42:00 -0500951 * Back reference rules. Back refs have three main goals:
952 *
953 * 1) differentiate between all holders of references to an extent so that
954 * when a reference is dropped we can make sure it was a valid reference
955 * before freeing the extent.
956 *
957 * 2) Provide enough information to quickly find the holders of an extent
958 * if we notice a given block is corrupted or bad.
959 *
960 * 3) Make it easy to migrate blocks for FS shrinking or storage pool
961 * maintenance. This is actually the same as #2, but with a slightly
962 * different use case.
963 *
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400964 * There are two kinds of back refs. The implicit back refs is optimized
965 * for pointers in non-shared tree blocks. For a given pointer in a block,
966 * back refs of this kind provide information about the block's owner tree
967 * and the pointer's key. These information allow us to find the block by
968 * b-tree searching. The full back refs is for pointers in tree blocks not
969 * referenced by their owner trees. The location of tree block is recorded
970 * in the back refs. Actually the full back refs is generic, and can be
971 * used in all cases the implicit back refs is used. The major shortcoming
972 * of the full back refs is its overhead. Every time a tree block gets
973 * COWed, we have to update back refs entry for all pointers in it.
974 *
975 * For a newly allocated tree block, we use implicit back refs for
976 * pointers in it. This means most tree related operations only involve
977 * implicit back refs. For a tree block created in old transaction, the
978 * only way to drop a reference to it is COW it. So we can detect the
979 * event that tree block loses its owner tree's reference and do the
980 * back refs conversion.
981 *
Nicholas D Steeves01327612016-05-19 21:18:45 -0400982 * When a tree block is COWed through a tree, there are four cases:
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400983 *
984 * The reference count of the block is one and the tree is the block's
985 * owner tree. Nothing to do in this case.
986 *
987 * The reference count of the block is one and the tree is not the
988 * block's owner tree. In this case, full back refs is used for pointers
989 * in the block. Remove these full back refs, add implicit back refs for
990 * every pointers in the new block.
991 *
992 * The reference count of the block is greater than one and the tree is
993 * the block's owner tree. In this case, implicit back refs is used for
994 * pointers in the block. Add full back refs for every pointers in the
995 * block, increase lower level extents' reference counts. The original
996 * implicit back refs are entailed to the new block.
997 *
998 * The reference count of the block is greater than one and the tree is
999 * not the block's owner tree. Add implicit back refs for every pointer in
1000 * the new block, increase lower level extents' reference count.
1001 *
1002 * Back Reference Key composing:
1003 *
1004 * The key objectid corresponds to the first byte in the extent,
1005 * The key type is used to differentiate between types of back refs.
1006 * There are different meanings of the key offset for different types
1007 * of back refs.
1008 *
Chris Masond8d5f3e2007-12-11 12:42:00 -05001009 * File extents can be referenced by:
1010 *
1011 * - multiple snapshots, subvolumes, or different generations in one subvol
Zheng Yan31840ae2008-09-23 13:14:14 -04001012 * - different files inside a single subvolume
Chris Masond8d5f3e2007-12-11 12:42:00 -05001013 * - different offsets inside a file (bookend extents in file.c)
1014 *
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001015 * The extent ref structure for the implicit back refs has fields for:
Chris Masond8d5f3e2007-12-11 12:42:00 -05001016 *
1017 * - Objectid of the subvolume root
Chris Masond8d5f3e2007-12-11 12:42:00 -05001018 * - objectid of the file holding the reference
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001019 * - original offset in the file
1020 * - how many bookend extents
Zheng Yan31840ae2008-09-23 13:14:14 -04001021 *
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001022 * The key offset for the implicit back refs is hash of the first
1023 * three fields.
Chris Masond8d5f3e2007-12-11 12:42:00 -05001024 *
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001025 * The extent ref structure for the full back refs has field for:
Chris Masond8d5f3e2007-12-11 12:42:00 -05001026 *
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001027 * - number of pointers in the tree leaf
Chris Masond8d5f3e2007-12-11 12:42:00 -05001028 *
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001029 * The key offset for the implicit back refs is the first byte of
1030 * the tree leaf
Chris Masond8d5f3e2007-12-11 12:42:00 -05001031 *
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001032 * When a file extent is allocated, The implicit back refs is used.
1033 * the fields are filled in:
Chris Masond8d5f3e2007-12-11 12:42:00 -05001034 *
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001035 * (root_key.objectid, inode objectid, offset in file, 1)
1036 *
1037 * When a file extent is removed file truncation, we find the
1038 * corresponding implicit back refs and check the following fields:
1039 *
1040 * (btrfs_header_owner(leaf), inode objectid, offset in file)
Chris Masond8d5f3e2007-12-11 12:42:00 -05001041 *
1042 * Btree extents can be referenced by:
1043 *
1044 * - Different subvolumes
Chris Masond8d5f3e2007-12-11 12:42:00 -05001045 *
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001046 * Both the implicit back refs and the full back refs for tree blocks
1047 * only consist of key. The key offset for the implicit back refs is
1048 * objectid of block's owner tree. The key offset for the full back refs
1049 * is the first byte of parent block.
Chris Masond8d5f3e2007-12-11 12:42:00 -05001050 *
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001051 * When implicit back refs is used, information about the lowest key and
1052 * level of the tree block are required. These information are stored in
1053 * tree block info structure.
Chris Masond8d5f3e2007-12-11 12:42:00 -05001054 */
Zheng Yan31840ae2008-09-23 13:14:14 -04001055
Liu Bo167ce952017-08-18 15:15:18 -06001056/*
1057 * is_data == BTRFS_REF_TYPE_BLOCK, tree block type is required,
Andrea Gelmini52042d82018-11-28 12:05:13 +01001058 * is_data == BTRFS_REF_TYPE_DATA, data type is requiried,
Liu Bo167ce952017-08-18 15:15:18 -06001059 * is_data == BTRFS_REF_TYPE_ANY, either type is OK.
1060 */
1061int btrfs_get_extent_inline_ref_type(const struct extent_buffer *eb,
1062 struct btrfs_extent_inline_ref *iref,
1063 enum btrfs_inline_ref_type is_data)
1064{
1065 int type = btrfs_extent_inline_ref_type(eb, iref);
Liu Bo64ecdb62017-08-18 15:15:24 -06001066 u64 offset = btrfs_extent_inline_ref_offset(eb, iref);
Liu Bo167ce952017-08-18 15:15:18 -06001067
1068 if (type == BTRFS_TREE_BLOCK_REF_KEY ||
1069 type == BTRFS_SHARED_BLOCK_REF_KEY ||
1070 type == BTRFS_SHARED_DATA_REF_KEY ||
1071 type == BTRFS_EXTENT_DATA_REF_KEY) {
1072 if (is_data == BTRFS_REF_TYPE_BLOCK) {
Liu Bo64ecdb62017-08-18 15:15:24 -06001073 if (type == BTRFS_TREE_BLOCK_REF_KEY)
Liu Bo167ce952017-08-18 15:15:18 -06001074 return type;
Liu Bo64ecdb62017-08-18 15:15:24 -06001075 if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
1076 ASSERT(eb->fs_info);
1077 /*
1078 * Every shared one has parent tree
1079 * block, which must be aligned to
1080 * nodesize.
1081 */
1082 if (offset &&
1083 IS_ALIGNED(offset, eb->fs_info->nodesize))
1084 return type;
1085 }
Liu Bo167ce952017-08-18 15:15:18 -06001086 } else if (is_data == BTRFS_REF_TYPE_DATA) {
Liu Bo64ecdb62017-08-18 15:15:24 -06001087 if (type == BTRFS_EXTENT_DATA_REF_KEY)
Liu Bo167ce952017-08-18 15:15:18 -06001088 return type;
Liu Bo64ecdb62017-08-18 15:15:24 -06001089 if (type == BTRFS_SHARED_DATA_REF_KEY) {
1090 ASSERT(eb->fs_info);
1091 /*
1092 * Every shared one has parent tree
1093 * block, which must be aligned to
1094 * nodesize.
1095 */
1096 if (offset &&
1097 IS_ALIGNED(offset, eb->fs_info->nodesize))
1098 return type;
1099 }
Liu Bo167ce952017-08-18 15:15:18 -06001100 } else {
1101 ASSERT(is_data == BTRFS_REF_TYPE_ANY);
1102 return type;
1103 }
1104 }
1105
1106 btrfs_print_leaf((struct extent_buffer *)eb);
1107 btrfs_err(eb->fs_info, "eb %llu invalid extent inline ref type %d",
1108 eb->start, type);
1109 WARN_ON(1);
1110
1111 return BTRFS_REF_TYPE_INVALID;
1112}
1113
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001114static u64 hash_extent_data_ref(u64 root_objectid, u64 owner, u64 offset)
1115{
1116 u32 high_crc = ~(u32)0;
1117 u32 low_crc = ~(u32)0;
1118 __le64 lenum;
1119
1120 lenum = cpu_to_le64(root_objectid);
Nikolay Borisov9678c542018-01-08 11:45:05 +02001121 high_crc = crc32c(high_crc, &lenum, sizeof(lenum));
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001122 lenum = cpu_to_le64(owner);
Nikolay Borisov9678c542018-01-08 11:45:05 +02001123 low_crc = crc32c(low_crc, &lenum, sizeof(lenum));
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001124 lenum = cpu_to_le64(offset);
Nikolay Borisov9678c542018-01-08 11:45:05 +02001125 low_crc = crc32c(low_crc, &lenum, sizeof(lenum));
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001126
1127 return ((u64)high_crc << 31) ^ (u64)low_crc;
1128}
1129
1130static u64 hash_extent_data_ref_item(struct extent_buffer *leaf,
1131 struct btrfs_extent_data_ref *ref)
1132{
1133 return hash_extent_data_ref(btrfs_extent_data_ref_root(leaf, ref),
1134 btrfs_extent_data_ref_objectid(leaf, ref),
1135 btrfs_extent_data_ref_offset(leaf, ref));
1136}
1137
1138static int match_extent_data_ref(struct extent_buffer *leaf,
1139 struct btrfs_extent_data_ref *ref,
1140 u64 root_objectid, u64 owner, u64 offset)
1141{
1142 if (btrfs_extent_data_ref_root(leaf, ref) != root_objectid ||
1143 btrfs_extent_data_ref_objectid(leaf, ref) != owner ||
1144 btrfs_extent_data_ref_offset(leaf, ref) != offset)
1145 return 0;
1146 return 1;
1147}
1148
1149static noinline int lookup_extent_data_ref(struct btrfs_trans_handle *trans,
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001150 struct btrfs_path *path,
1151 u64 bytenr, u64 parent,
1152 u64 root_objectid,
1153 u64 owner, u64 offset)
1154{
Nikolay Borisovbd1d53e2018-06-20 15:48:51 +03001155 struct btrfs_root *root = trans->fs_info->extent_root;
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001156 struct btrfs_key key;
1157 struct btrfs_extent_data_ref *ref;
1158 struct extent_buffer *leaf;
1159 u32 nritems;
1160 int ret;
1161 int recow;
1162 int err = -ENOENT;
1163
1164 key.objectid = bytenr;
1165 if (parent) {
1166 key.type = BTRFS_SHARED_DATA_REF_KEY;
1167 key.offset = parent;
1168 } else {
1169 key.type = BTRFS_EXTENT_DATA_REF_KEY;
1170 key.offset = hash_extent_data_ref(root_objectid,
1171 owner, offset);
1172 }
1173again:
1174 recow = 0;
1175 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
1176 if (ret < 0) {
1177 err = ret;
1178 goto fail;
1179 }
1180
1181 if (parent) {
1182 if (!ret)
1183 return 0;
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001184 goto fail;
Zheng Yan31840ae2008-09-23 13:14:14 -04001185 }
1186
1187 leaf = path->nodes[0];
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001188 nritems = btrfs_header_nritems(leaf);
1189 while (1) {
1190 if (path->slots[0] >= nritems) {
1191 ret = btrfs_next_leaf(root, path);
1192 if (ret < 0)
1193 err = ret;
1194 if (ret)
1195 goto fail;
1196
1197 leaf = path->nodes[0];
1198 nritems = btrfs_header_nritems(leaf);
1199 recow = 1;
1200 }
1201
1202 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
1203 if (key.objectid != bytenr ||
1204 key.type != BTRFS_EXTENT_DATA_REF_KEY)
1205 goto fail;
1206
1207 ref = btrfs_item_ptr(leaf, path->slots[0],
1208 struct btrfs_extent_data_ref);
1209
1210 if (match_extent_data_ref(leaf, ref, root_objectid,
1211 owner, offset)) {
1212 if (recow) {
David Sterbab3b4aa72011-04-21 01:20:15 +02001213 btrfs_release_path(path);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001214 goto again;
1215 }
1216 err = 0;
1217 break;
1218 }
1219 path->slots[0]++;
Zheng Yan31840ae2008-09-23 13:14:14 -04001220 }
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001221fail:
1222 return err;
Zheng Yan31840ae2008-09-23 13:14:14 -04001223}
1224
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001225static noinline int insert_extent_data_ref(struct btrfs_trans_handle *trans,
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001226 struct btrfs_path *path,
1227 u64 bytenr, u64 parent,
1228 u64 root_objectid, u64 owner,
1229 u64 offset, int refs_to_add)
Zheng Yan31840ae2008-09-23 13:14:14 -04001230{
Nikolay Borisov62b895a2018-06-20 15:48:44 +03001231 struct btrfs_root *root = trans->fs_info->extent_root;
Zheng Yan31840ae2008-09-23 13:14:14 -04001232 struct btrfs_key key;
1233 struct extent_buffer *leaf;
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001234 u32 size;
Zheng Yan31840ae2008-09-23 13:14:14 -04001235 u32 num_refs;
1236 int ret;
1237
1238 key.objectid = bytenr;
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001239 if (parent) {
1240 key.type = BTRFS_SHARED_DATA_REF_KEY;
1241 key.offset = parent;
1242 size = sizeof(struct btrfs_shared_data_ref);
Zheng Yan31840ae2008-09-23 13:14:14 -04001243 } else {
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001244 key.type = BTRFS_EXTENT_DATA_REF_KEY;
1245 key.offset = hash_extent_data_ref(root_objectid,
1246 owner, offset);
1247 size = sizeof(struct btrfs_extent_data_ref);
Zheng Yan31840ae2008-09-23 13:14:14 -04001248 }
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001249
1250 ret = btrfs_insert_empty_item(trans, root, path, &key, size);
1251 if (ret && ret != -EEXIST)
1252 goto fail;
1253
1254 leaf = path->nodes[0];
1255 if (parent) {
1256 struct btrfs_shared_data_ref *ref;
1257 ref = btrfs_item_ptr(leaf, path->slots[0],
1258 struct btrfs_shared_data_ref);
1259 if (ret == 0) {
1260 btrfs_set_shared_data_ref_count(leaf, ref, refs_to_add);
1261 } else {
1262 num_refs = btrfs_shared_data_ref_count(leaf, ref);
1263 num_refs += refs_to_add;
1264 btrfs_set_shared_data_ref_count(leaf, ref, num_refs);
1265 }
1266 } else {
1267 struct btrfs_extent_data_ref *ref;
1268 while (ret == -EEXIST) {
1269 ref = btrfs_item_ptr(leaf, path->slots[0],
1270 struct btrfs_extent_data_ref);
1271 if (match_extent_data_ref(leaf, ref, root_objectid,
1272 owner, offset))
1273 break;
David Sterbab3b4aa72011-04-21 01:20:15 +02001274 btrfs_release_path(path);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001275 key.offset++;
1276 ret = btrfs_insert_empty_item(trans, root, path, &key,
1277 size);
1278 if (ret && ret != -EEXIST)
1279 goto fail;
1280
1281 leaf = path->nodes[0];
1282 }
1283 ref = btrfs_item_ptr(leaf, path->slots[0],
1284 struct btrfs_extent_data_ref);
1285 if (ret == 0) {
1286 btrfs_set_extent_data_ref_root(leaf, ref,
1287 root_objectid);
1288 btrfs_set_extent_data_ref_objectid(leaf, ref, owner);
1289 btrfs_set_extent_data_ref_offset(leaf, ref, offset);
1290 btrfs_set_extent_data_ref_count(leaf, ref, refs_to_add);
1291 } else {
1292 num_refs = btrfs_extent_data_ref_count(leaf, ref);
1293 num_refs += refs_to_add;
1294 btrfs_set_extent_data_ref_count(leaf, ref, num_refs);
1295 }
1296 }
1297 btrfs_mark_buffer_dirty(leaf);
1298 ret = 0;
1299fail:
David Sterbab3b4aa72011-04-21 01:20:15 +02001300 btrfs_release_path(path);
Chris Mason7bb86312007-12-11 09:25:06 -05001301 return ret;
Chris Mason74493f72007-12-11 09:25:06 -05001302}
1303
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001304static noinline int remove_extent_data_ref(struct btrfs_trans_handle *trans,
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001305 struct btrfs_path *path,
Josef Bacikfcebe452014-05-13 17:30:47 -07001306 int refs_to_drop, int *last_ref)
Zheng Yan31840ae2008-09-23 13:14:14 -04001307{
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001308 struct btrfs_key key;
1309 struct btrfs_extent_data_ref *ref1 = NULL;
1310 struct btrfs_shared_data_ref *ref2 = NULL;
Zheng Yan31840ae2008-09-23 13:14:14 -04001311 struct extent_buffer *leaf;
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001312 u32 num_refs = 0;
Zheng Yan31840ae2008-09-23 13:14:14 -04001313 int ret = 0;
1314
1315 leaf = path->nodes[0];
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001316 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
1317
1318 if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
1319 ref1 = btrfs_item_ptr(leaf, path->slots[0],
1320 struct btrfs_extent_data_ref);
1321 num_refs = btrfs_extent_data_ref_count(leaf, ref1);
1322 } else if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
1323 ref2 = btrfs_item_ptr(leaf, path->slots[0],
1324 struct btrfs_shared_data_ref);
1325 num_refs = btrfs_shared_data_ref_count(leaf, ref2);
David Sterba6d8ff4e2018-06-26 16:20:59 +02001326 } else if (unlikely(key.type == BTRFS_EXTENT_REF_V0_KEY)) {
Nikolay Borisovba3c2b12018-06-26 16:57:36 +03001327 btrfs_print_v0_err(trans->fs_info);
1328 btrfs_abort_transaction(trans, -EINVAL);
1329 return -EINVAL;
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001330 } else {
1331 BUG();
1332 }
1333
Chris Mason56bec292009-03-13 10:10:06 -04001334 BUG_ON(num_refs < refs_to_drop);
1335 num_refs -= refs_to_drop;
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001336
Zheng Yan31840ae2008-09-23 13:14:14 -04001337 if (num_refs == 0) {
Nikolay Borisove9f62902018-06-20 15:48:46 +03001338 ret = btrfs_del_item(trans, trans->fs_info->extent_root, path);
Josef Bacikfcebe452014-05-13 17:30:47 -07001339 *last_ref = 1;
Zheng Yan31840ae2008-09-23 13:14:14 -04001340 } else {
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001341 if (key.type == BTRFS_EXTENT_DATA_REF_KEY)
1342 btrfs_set_extent_data_ref_count(leaf, ref1, num_refs);
1343 else if (key.type == BTRFS_SHARED_DATA_REF_KEY)
1344 btrfs_set_shared_data_ref_count(leaf, ref2, num_refs);
Zheng Yan31840ae2008-09-23 13:14:14 -04001345 btrfs_mark_buffer_dirty(leaf);
1346 }
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001347 return ret;
1348}
1349
Zhaolei9ed0dea2015-08-06 22:16:24 +08001350static noinline u32 extent_data_ref_count(struct btrfs_path *path,
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001351 struct btrfs_extent_inline_ref *iref)
1352{
1353 struct btrfs_key key;
1354 struct extent_buffer *leaf;
1355 struct btrfs_extent_data_ref *ref1;
1356 struct btrfs_shared_data_ref *ref2;
1357 u32 num_refs = 0;
Liu Bo3de28d52017-08-18 15:15:19 -06001358 int type;
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001359
1360 leaf = path->nodes[0];
1361 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
Nikolay Borisovba3c2b12018-06-26 16:57:36 +03001362
1363 BUG_ON(key.type == BTRFS_EXTENT_REF_V0_KEY);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001364 if (iref) {
Liu Bo3de28d52017-08-18 15:15:19 -06001365 /*
1366 * If type is invalid, we should have bailed out earlier than
1367 * this call.
1368 */
1369 type = btrfs_get_extent_inline_ref_type(leaf, iref, BTRFS_REF_TYPE_DATA);
1370 ASSERT(type != BTRFS_REF_TYPE_INVALID);
1371 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001372 ref1 = (struct btrfs_extent_data_ref *)(&iref->offset);
1373 num_refs = btrfs_extent_data_ref_count(leaf, ref1);
1374 } else {
1375 ref2 = (struct btrfs_shared_data_ref *)(iref + 1);
1376 num_refs = btrfs_shared_data_ref_count(leaf, ref2);
1377 }
1378 } else if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
1379 ref1 = btrfs_item_ptr(leaf, path->slots[0],
1380 struct btrfs_extent_data_ref);
1381 num_refs = btrfs_extent_data_ref_count(leaf, ref1);
1382 } else if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
1383 ref2 = btrfs_item_ptr(leaf, path->slots[0],
1384 struct btrfs_shared_data_ref);
1385 num_refs = btrfs_shared_data_ref_count(leaf, ref2);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001386 } else {
1387 WARN_ON(1);
1388 }
1389 return num_refs;
1390}
1391
1392static noinline int lookup_tree_block_ref(struct btrfs_trans_handle *trans,
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001393 struct btrfs_path *path,
1394 u64 bytenr, u64 parent,
1395 u64 root_objectid)
1396{
Nikolay Borisovb8582ee2018-06-20 15:48:50 +03001397 struct btrfs_root *root = trans->fs_info->extent_root;
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001398 struct btrfs_key key;
1399 int ret;
1400
1401 key.objectid = bytenr;
1402 if (parent) {
1403 key.type = BTRFS_SHARED_BLOCK_REF_KEY;
1404 key.offset = parent;
1405 } else {
1406 key.type = BTRFS_TREE_BLOCK_REF_KEY;
1407 key.offset = root_objectid;
1408 }
1409
1410 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
1411 if (ret > 0)
1412 ret = -ENOENT;
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001413 return ret;
1414}
1415
1416static noinline int insert_tree_block_ref(struct btrfs_trans_handle *trans,
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001417 struct btrfs_path *path,
1418 u64 bytenr, u64 parent,
1419 u64 root_objectid)
1420{
1421 struct btrfs_key key;
1422 int ret;
1423
1424 key.objectid = bytenr;
1425 if (parent) {
1426 key.type = BTRFS_SHARED_BLOCK_REF_KEY;
1427 key.offset = parent;
1428 } else {
1429 key.type = BTRFS_TREE_BLOCK_REF_KEY;
1430 key.offset = root_objectid;
1431 }
1432
Nikolay Borisov10728402018-06-20 15:48:43 +03001433 ret = btrfs_insert_empty_item(trans, trans->fs_info->extent_root,
Jeff Mahoney87bde3c2017-02-15 16:28:27 -05001434 path, &key, 0);
David Sterbab3b4aa72011-04-21 01:20:15 +02001435 btrfs_release_path(path);
Zheng Yan31840ae2008-09-23 13:14:14 -04001436 return ret;
1437}
1438
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001439static inline int extent_ref_type(u64 parent, u64 owner)
1440{
1441 int type;
1442 if (owner < BTRFS_FIRST_FREE_OBJECTID) {
1443 if (parent > 0)
1444 type = BTRFS_SHARED_BLOCK_REF_KEY;
1445 else
1446 type = BTRFS_TREE_BLOCK_REF_KEY;
1447 } else {
1448 if (parent > 0)
1449 type = BTRFS_SHARED_DATA_REF_KEY;
1450 else
1451 type = BTRFS_EXTENT_DATA_REF_KEY;
1452 }
1453 return type;
1454}
1455
Yan Zheng2c47e6052009-06-27 21:07:35 -04001456static int find_next_key(struct btrfs_path *path, int level,
1457 struct btrfs_key *key)
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001458
1459{
Yan Zheng2c47e6052009-06-27 21:07:35 -04001460 for (; level < BTRFS_MAX_LEVEL; level++) {
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001461 if (!path->nodes[level])
1462 break;
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001463 if (path->slots[level] + 1 >=
1464 btrfs_header_nritems(path->nodes[level]))
1465 continue;
1466 if (level == 0)
1467 btrfs_item_key_to_cpu(path->nodes[level], key,
1468 path->slots[level] + 1);
1469 else
1470 btrfs_node_key_to_cpu(path->nodes[level], key,
1471 path->slots[level] + 1);
1472 return 0;
1473 }
1474 return 1;
1475}
1476
1477/*
1478 * look for inline back ref. if back ref is found, *ref_ret is set
1479 * to the address of inline back ref, and 0 is returned.
1480 *
1481 * if back ref isn't found, *ref_ret is set to the address where it
1482 * should be inserted, and -ENOENT is returned.
1483 *
1484 * if insert is true and there are too many inline back refs, the path
1485 * points to the extent item, and -EAGAIN is returned.
1486 *
1487 * NOTE: inline back refs are ordered in the same way that back ref
1488 * items in the tree are ordered.
1489 */
1490static noinline_for_stack
1491int lookup_inline_extent_backref(struct btrfs_trans_handle *trans,
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001492 struct btrfs_path *path,
1493 struct btrfs_extent_inline_ref **ref_ret,
1494 u64 bytenr, u64 num_bytes,
1495 u64 parent, u64 root_objectid,
1496 u64 owner, u64 offset, int insert)
1497{
Nikolay Borisov867cc1f2018-06-20 15:48:48 +03001498 struct btrfs_fs_info *fs_info = trans->fs_info;
Jeff Mahoney87bde3c2017-02-15 16:28:27 -05001499 struct btrfs_root *root = fs_info->extent_root;
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001500 struct btrfs_key key;
1501 struct extent_buffer *leaf;
1502 struct btrfs_extent_item *ei;
1503 struct btrfs_extent_inline_ref *iref;
1504 u64 flags;
1505 u64 item_size;
1506 unsigned long ptr;
1507 unsigned long end;
1508 int extra_size;
1509 int type;
1510 int want;
1511 int ret;
1512 int err = 0;
Jeff Mahoney0b246af2016-06-22 18:54:23 -04001513 bool skinny_metadata = btrfs_fs_incompat(fs_info, SKINNY_METADATA);
Liu Bo3de28d52017-08-18 15:15:19 -06001514 int needed;
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001515
1516 key.objectid = bytenr;
1517 key.type = BTRFS_EXTENT_ITEM_KEY;
1518 key.offset = num_bytes;
1519
1520 want = extent_ref_type(parent, owner);
1521 if (insert) {
1522 extra_size = btrfs_extent_inline_ref_size(want);
Yan Zheng85d41982009-06-11 08:51:10 -04001523 path->keep_locks = 1;
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001524 } else
1525 extra_size = -1;
Josef Bacik3173a182013-03-07 14:22:04 -05001526
1527 /*
Nikolay Borisov16d1c062018-06-18 14:59:26 +03001528 * Owner is our level, so we can just add one to get the level for the
1529 * block we are interested in.
Josef Bacik3173a182013-03-07 14:22:04 -05001530 */
1531 if (skinny_metadata && owner < BTRFS_FIRST_FREE_OBJECTID) {
1532 key.type = BTRFS_METADATA_ITEM_KEY;
1533 key.offset = owner;
1534 }
1535
1536again:
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001537 ret = btrfs_search_slot(trans, root, &key, path, extra_size, 1);
1538 if (ret < 0) {
1539 err = ret;
1540 goto out;
1541 }
Josef Bacik3173a182013-03-07 14:22:04 -05001542
1543 /*
1544 * We may be a newly converted file system which still has the old fat
1545 * extent entries for metadata, so try and see if we have one of those.
1546 */
1547 if (ret > 0 && skinny_metadata) {
1548 skinny_metadata = false;
1549 if (path->slots[0]) {
1550 path->slots[0]--;
1551 btrfs_item_key_to_cpu(path->nodes[0], &key,
1552 path->slots[0]);
1553 if (key.objectid == bytenr &&
1554 key.type == BTRFS_EXTENT_ITEM_KEY &&
1555 key.offset == num_bytes)
1556 ret = 0;
1557 }
1558 if (ret) {
Filipe Manana9ce49a02014-04-24 15:15:28 +01001559 key.objectid = bytenr;
Josef Bacik3173a182013-03-07 14:22:04 -05001560 key.type = BTRFS_EXTENT_ITEM_KEY;
1561 key.offset = num_bytes;
1562 btrfs_release_path(path);
1563 goto again;
1564 }
1565 }
1566
Jeff Mahoney79787ea2012-03-12 16:03:00 +01001567 if (ret && !insert) {
1568 err = -ENOENT;
1569 goto out;
Dulshani Gunawardhanafae7f212013-10-31 10:30:08 +05301570 } else if (WARN_ON(ret)) {
Josef Bacik492104c2013-03-08 15:41:02 -05001571 err = -EIO;
Josef Bacik492104c2013-03-08 15:41:02 -05001572 goto out;
Jeff Mahoney79787ea2012-03-12 16:03:00 +01001573 }
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001574
1575 leaf = path->nodes[0];
1576 item_size = btrfs_item_size_nr(leaf, path->slots[0]);
David Sterba6d8ff4e2018-06-26 16:20:59 +02001577 if (unlikely(item_size < sizeof(*ei))) {
Nikolay Borisovba3c2b12018-06-26 16:57:36 +03001578 err = -EINVAL;
1579 btrfs_print_v0_err(fs_info);
1580 btrfs_abort_transaction(trans, err);
1581 goto out;
1582 }
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001583
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001584 ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
1585 flags = btrfs_extent_flags(leaf, ei);
1586
1587 ptr = (unsigned long)(ei + 1);
1588 end = (unsigned long)ei + item_size;
1589
Josef Bacik3173a182013-03-07 14:22:04 -05001590 if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK && !skinny_metadata) {
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001591 ptr += sizeof(struct btrfs_tree_block_info);
1592 BUG_ON(ptr > end);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001593 }
1594
Liu Bo3de28d52017-08-18 15:15:19 -06001595 if (owner >= BTRFS_FIRST_FREE_OBJECTID)
1596 needed = BTRFS_REF_TYPE_DATA;
1597 else
1598 needed = BTRFS_REF_TYPE_BLOCK;
1599
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001600 err = -ENOENT;
1601 while (1) {
1602 if (ptr >= end) {
1603 WARN_ON(ptr > end);
1604 break;
1605 }
1606 iref = (struct btrfs_extent_inline_ref *)ptr;
Liu Bo3de28d52017-08-18 15:15:19 -06001607 type = btrfs_get_extent_inline_ref_type(leaf, iref, needed);
1608 if (type == BTRFS_REF_TYPE_INVALID) {
Su Yueaf431dc2018-06-22 16:18:01 +08001609 err = -EUCLEAN;
Liu Bo3de28d52017-08-18 15:15:19 -06001610 goto out;
1611 }
1612
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001613 if (want < type)
1614 break;
1615 if (want > type) {
1616 ptr += btrfs_extent_inline_ref_size(type);
1617 continue;
1618 }
1619
1620 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
1621 struct btrfs_extent_data_ref *dref;
1622 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
1623 if (match_extent_data_ref(leaf, dref, root_objectid,
1624 owner, offset)) {
1625 err = 0;
1626 break;
1627 }
1628 if (hash_extent_data_ref_item(leaf, dref) <
1629 hash_extent_data_ref(root_objectid, owner, offset))
1630 break;
1631 } else {
1632 u64 ref_offset;
1633 ref_offset = btrfs_extent_inline_ref_offset(leaf, iref);
1634 if (parent > 0) {
1635 if (parent == ref_offset) {
1636 err = 0;
1637 break;
1638 }
1639 if (ref_offset < parent)
1640 break;
1641 } else {
1642 if (root_objectid == ref_offset) {
1643 err = 0;
1644 break;
1645 }
1646 if (ref_offset < root_objectid)
1647 break;
1648 }
1649 }
1650 ptr += btrfs_extent_inline_ref_size(type);
1651 }
1652 if (err == -ENOENT && insert) {
1653 if (item_size + extra_size >=
1654 BTRFS_MAX_EXTENT_ITEM_SIZE(root)) {
1655 err = -EAGAIN;
1656 goto out;
1657 }
1658 /*
1659 * To add new inline back ref, we have to make sure
1660 * there is no corresponding back ref item.
1661 * For simplicity, we just do not add new inline back
1662 * ref if there is any kind of item for this block
1663 */
Yan Zheng2c47e6052009-06-27 21:07:35 -04001664 if (find_next_key(path, 0, &key) == 0 &&
1665 key.objectid == bytenr &&
Yan Zheng85d41982009-06-11 08:51:10 -04001666 key.type < BTRFS_BLOCK_GROUP_ITEM_KEY) {
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001667 err = -EAGAIN;
1668 goto out;
1669 }
1670 }
1671 *ref_ret = (struct btrfs_extent_inline_ref *)ptr;
1672out:
Yan Zheng85d41982009-06-11 08:51:10 -04001673 if (insert) {
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001674 path->keep_locks = 0;
1675 btrfs_unlock_up_safe(path, 1);
1676 }
1677 return err;
1678}
1679
1680/*
1681 * helper to add new inline back ref
1682 */
1683static noinline_for_stack
Jeff Mahoney87bde3c2017-02-15 16:28:27 -05001684void setup_inline_extent_backref(struct btrfs_fs_info *fs_info,
Jeff Mahoney143bede2012-03-01 14:56:26 +01001685 struct btrfs_path *path,
1686 struct btrfs_extent_inline_ref *iref,
1687 u64 parent, u64 root_objectid,
1688 u64 owner, u64 offset, int refs_to_add,
1689 struct btrfs_delayed_extent_op *extent_op)
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001690{
1691 struct extent_buffer *leaf;
1692 struct btrfs_extent_item *ei;
1693 unsigned long ptr;
1694 unsigned long end;
1695 unsigned long item_offset;
1696 u64 refs;
1697 int size;
1698 int type;
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001699
1700 leaf = path->nodes[0];
1701 ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
1702 item_offset = (unsigned long)iref - (unsigned long)ei;
1703
1704 type = extent_ref_type(parent, owner);
1705 size = btrfs_extent_inline_ref_size(type);
1706
Jeff Mahoney87bde3c2017-02-15 16:28:27 -05001707 btrfs_extend_item(fs_info, path, size);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001708
1709 ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
1710 refs = btrfs_extent_refs(leaf, ei);
1711 refs += refs_to_add;
1712 btrfs_set_extent_refs(leaf, ei, refs);
1713 if (extent_op)
1714 __run_delayed_extent_op(extent_op, leaf, ei);
1715
1716 ptr = (unsigned long)ei + item_offset;
1717 end = (unsigned long)ei + btrfs_item_size_nr(leaf, path->slots[0]);
1718 if (ptr < end - size)
1719 memmove_extent_buffer(leaf, ptr + size, ptr,
1720 end - size - ptr);
1721
1722 iref = (struct btrfs_extent_inline_ref *)ptr;
1723 btrfs_set_extent_inline_ref_type(leaf, iref, type);
1724 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
1725 struct btrfs_extent_data_ref *dref;
1726 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
1727 btrfs_set_extent_data_ref_root(leaf, dref, root_objectid);
1728 btrfs_set_extent_data_ref_objectid(leaf, dref, owner);
1729 btrfs_set_extent_data_ref_offset(leaf, dref, offset);
1730 btrfs_set_extent_data_ref_count(leaf, dref, refs_to_add);
1731 } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
1732 struct btrfs_shared_data_ref *sref;
1733 sref = (struct btrfs_shared_data_ref *)(iref + 1);
1734 btrfs_set_shared_data_ref_count(leaf, sref, refs_to_add);
1735 btrfs_set_extent_inline_ref_offset(leaf, iref, parent);
1736 } else if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
1737 btrfs_set_extent_inline_ref_offset(leaf, iref, parent);
1738 } else {
1739 btrfs_set_extent_inline_ref_offset(leaf, iref, root_objectid);
1740 }
1741 btrfs_mark_buffer_dirty(leaf);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001742}
1743
1744static int lookup_extent_backref(struct btrfs_trans_handle *trans,
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001745 struct btrfs_path *path,
1746 struct btrfs_extent_inline_ref **ref_ret,
1747 u64 bytenr, u64 num_bytes, u64 parent,
1748 u64 root_objectid, u64 owner, u64 offset)
1749{
1750 int ret;
1751
Nikolay Borisov867cc1f2018-06-20 15:48:48 +03001752 ret = lookup_inline_extent_backref(trans, path, ref_ret, bytenr,
1753 num_bytes, parent, root_objectid,
1754 owner, offset, 0);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001755 if (ret != -ENOENT)
1756 return ret;
1757
David Sterbab3b4aa72011-04-21 01:20:15 +02001758 btrfs_release_path(path);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001759 *ref_ret = NULL;
1760
1761 if (owner < BTRFS_FIRST_FREE_OBJECTID) {
Nikolay Borisovb8582ee2018-06-20 15:48:50 +03001762 ret = lookup_tree_block_ref(trans, path, bytenr, parent,
1763 root_objectid);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001764 } else {
Nikolay Borisovbd1d53e2018-06-20 15:48:51 +03001765 ret = lookup_extent_data_ref(trans, path, bytenr, parent,
1766 root_objectid, owner, offset);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001767 }
1768 return ret;
1769}
1770
1771/*
1772 * helper to update/remove inline back ref
1773 */
1774static noinline_for_stack
Nikolay Borisov61a18f12018-06-20 15:48:49 +03001775void update_inline_extent_backref(struct btrfs_path *path,
Jeff Mahoney143bede2012-03-01 14:56:26 +01001776 struct btrfs_extent_inline_ref *iref,
1777 int refs_to_mod,
Josef Bacikfcebe452014-05-13 17:30:47 -07001778 struct btrfs_delayed_extent_op *extent_op,
1779 int *last_ref)
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001780{
Nikolay Borisov61a18f12018-06-20 15:48:49 +03001781 struct extent_buffer *leaf = path->nodes[0];
1782 struct btrfs_fs_info *fs_info = leaf->fs_info;
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001783 struct btrfs_extent_item *ei;
1784 struct btrfs_extent_data_ref *dref = NULL;
1785 struct btrfs_shared_data_ref *sref = NULL;
1786 unsigned long ptr;
1787 unsigned long end;
1788 u32 item_size;
1789 int size;
1790 int type;
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001791 u64 refs;
1792
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001793 ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
1794 refs = btrfs_extent_refs(leaf, ei);
1795 WARN_ON(refs_to_mod < 0 && refs + refs_to_mod <= 0);
1796 refs += refs_to_mod;
1797 btrfs_set_extent_refs(leaf, ei, refs);
1798 if (extent_op)
1799 __run_delayed_extent_op(extent_op, leaf, ei);
1800
Liu Bo3de28d52017-08-18 15:15:19 -06001801 /*
1802 * If type is invalid, we should have bailed out after
1803 * lookup_inline_extent_backref().
1804 */
1805 type = btrfs_get_extent_inline_ref_type(leaf, iref, BTRFS_REF_TYPE_ANY);
1806 ASSERT(type != BTRFS_REF_TYPE_INVALID);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001807
1808 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
1809 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
1810 refs = btrfs_extent_data_ref_count(leaf, dref);
1811 } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
1812 sref = (struct btrfs_shared_data_ref *)(iref + 1);
1813 refs = btrfs_shared_data_ref_count(leaf, sref);
1814 } else {
1815 refs = 1;
1816 BUG_ON(refs_to_mod != -1);
1817 }
1818
1819 BUG_ON(refs_to_mod < 0 && refs < -refs_to_mod);
1820 refs += refs_to_mod;
1821
1822 if (refs > 0) {
1823 if (type == BTRFS_EXTENT_DATA_REF_KEY)
1824 btrfs_set_extent_data_ref_count(leaf, dref, refs);
1825 else
1826 btrfs_set_shared_data_ref_count(leaf, sref, refs);
1827 } else {
Josef Bacikfcebe452014-05-13 17:30:47 -07001828 *last_ref = 1;
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001829 size = btrfs_extent_inline_ref_size(type);
1830 item_size = btrfs_item_size_nr(leaf, path->slots[0]);
1831 ptr = (unsigned long)iref;
1832 end = (unsigned long)ei + item_size;
1833 if (ptr + size < end)
1834 memmove_extent_buffer(leaf, ptr, ptr + size,
1835 end - ptr - size);
1836 item_size -= size;
Jeff Mahoney87bde3c2017-02-15 16:28:27 -05001837 btrfs_truncate_item(fs_info, path, item_size, 1);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001838 }
1839 btrfs_mark_buffer_dirty(leaf);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001840}
1841
1842static noinline_for_stack
1843int insert_inline_extent_backref(struct btrfs_trans_handle *trans,
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001844 struct btrfs_path *path,
1845 u64 bytenr, u64 num_bytes, u64 parent,
1846 u64 root_objectid, u64 owner,
1847 u64 offset, int refs_to_add,
1848 struct btrfs_delayed_extent_op *extent_op)
1849{
1850 struct btrfs_extent_inline_ref *iref;
1851 int ret;
1852
Nikolay Borisov867cc1f2018-06-20 15:48:48 +03001853 ret = lookup_inline_extent_backref(trans, path, &iref, bytenr,
1854 num_bytes, parent, root_objectid,
1855 owner, offset, 1);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001856 if (ret == 0) {
1857 BUG_ON(owner < BTRFS_FIRST_FREE_OBJECTID);
Nikolay Borisov61a18f12018-06-20 15:48:49 +03001858 update_inline_extent_backref(path, iref, refs_to_add,
1859 extent_op, NULL);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001860 } else if (ret == -ENOENT) {
Nikolay Borisova639cde2018-06-20 15:49:10 +03001861 setup_inline_extent_backref(trans->fs_info, path, iref, parent,
Jeff Mahoney143bede2012-03-01 14:56:26 +01001862 root_objectid, owner, offset,
1863 refs_to_add, extent_op);
1864 ret = 0;
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001865 }
1866 return ret;
1867}
1868
1869static int insert_extent_backref(struct btrfs_trans_handle *trans,
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001870 struct btrfs_path *path,
1871 u64 bytenr, u64 parent, u64 root_objectid,
1872 u64 owner, u64 offset, int refs_to_add)
1873{
1874 int ret;
1875 if (owner < BTRFS_FIRST_FREE_OBJECTID) {
1876 BUG_ON(refs_to_add != 1);
Nikolay Borisov10728402018-06-20 15:48:43 +03001877 ret = insert_tree_block_ref(trans, path, bytenr, parent,
1878 root_objectid);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001879 } else {
Nikolay Borisov62b895a2018-06-20 15:48:44 +03001880 ret = insert_extent_data_ref(trans, path, bytenr, parent,
1881 root_objectid, owner, offset,
1882 refs_to_add);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001883 }
1884 return ret;
1885}
1886
1887static int remove_extent_backref(struct btrfs_trans_handle *trans,
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001888 struct btrfs_path *path,
1889 struct btrfs_extent_inline_ref *iref,
Josef Bacikfcebe452014-05-13 17:30:47 -07001890 int refs_to_drop, int is_data, int *last_ref)
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001891{
Jeff Mahoney143bede2012-03-01 14:56:26 +01001892 int ret = 0;
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001893
1894 BUG_ON(!is_data && refs_to_drop != 1);
1895 if (iref) {
Nikolay Borisov61a18f12018-06-20 15:48:49 +03001896 update_inline_extent_backref(path, iref, -refs_to_drop, NULL,
1897 last_ref);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001898 } else if (is_data) {
Nikolay Borisove9f62902018-06-20 15:48:46 +03001899 ret = remove_extent_data_ref(trans, path, refs_to_drop,
Josef Bacikfcebe452014-05-13 17:30:47 -07001900 last_ref);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001901 } else {
Josef Bacikfcebe452014-05-13 17:30:47 -07001902 *last_ref = 1;
Nikolay Borisov87cc7a82018-06-20 15:49:12 +03001903 ret = btrfs_del_item(trans, trans->fs_info->extent_root, path);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001904 }
1905 return ret;
1906}
1907
Jeff Mahoneyd04c6b82015-06-15 09:41:14 -04001908static int btrfs_issue_discard(struct block_device *bdev, u64 start, u64 len,
1909 u64 *discarded_bytes)
Chris Mason15916de2008-11-19 21:17:22 -05001910{
Jeff Mahoney86557862015-06-15 09:41:16 -04001911 int j, ret = 0;
1912 u64 bytes_left, end;
Jeff Mahoney4d89d372015-06-15 09:41:15 -04001913 u64 aligned_start = ALIGN(start, 1 << 9);
1914
1915 if (WARN_ON(start != aligned_start)) {
1916 len -= aligned_start - start;
1917 len = round_down(len, 1 << 9);
1918 start = aligned_start;
1919 }
Jeff Mahoneyd04c6b82015-06-15 09:41:14 -04001920
1921 *discarded_bytes = 0;
Jeff Mahoney86557862015-06-15 09:41:16 -04001922
1923 if (!len)
1924 return 0;
1925
1926 end = start + len;
1927 bytes_left = len;
1928
1929 /* Skip any superblocks on this device. */
1930 for (j = 0; j < BTRFS_SUPER_MIRROR_MAX; j++) {
1931 u64 sb_start = btrfs_sb_offset(j);
1932 u64 sb_end = sb_start + BTRFS_SUPER_INFO_SIZE;
1933 u64 size = sb_start - start;
1934
1935 if (!in_range(sb_start, start, bytes_left) &&
1936 !in_range(sb_end, start, bytes_left) &&
1937 !in_range(start, sb_start, BTRFS_SUPER_INFO_SIZE))
1938 continue;
1939
1940 /*
1941 * Superblock spans beginning of range. Adjust start and
1942 * try again.
1943 */
1944 if (sb_start <= start) {
1945 start += sb_end - start;
1946 if (start > end) {
1947 bytes_left = 0;
1948 break;
1949 }
1950 bytes_left = end - start;
1951 continue;
1952 }
1953
1954 if (size) {
1955 ret = blkdev_issue_discard(bdev, start >> 9, size >> 9,
1956 GFP_NOFS, 0);
1957 if (!ret)
1958 *discarded_bytes += size;
1959 else if (ret != -EOPNOTSUPP)
1960 return ret;
1961 }
1962
1963 start = sb_end;
1964 if (start > end) {
1965 bytes_left = 0;
1966 break;
1967 }
1968 bytes_left = end - start;
1969 }
1970
1971 if (bytes_left) {
1972 ret = blkdev_issue_discard(bdev, start >> 9, bytes_left >> 9,
Jeff Mahoney4d89d372015-06-15 09:41:15 -04001973 GFP_NOFS, 0);
1974 if (!ret)
Jeff Mahoney86557862015-06-15 09:41:16 -04001975 *discarded_bytes += bytes_left;
Jeff Mahoney4d89d372015-06-15 09:41:15 -04001976 }
Jeff Mahoneyd04c6b82015-06-15 09:41:14 -04001977 return ret;
Chris Mason15916de2008-11-19 21:17:22 -05001978}
Chris Mason15916de2008-11-19 21:17:22 -05001979
Jeff Mahoney2ff7e612016-06-22 18:54:24 -04001980int btrfs_discard_extent(struct btrfs_fs_info *fs_info, u64 bytenr,
Filipe Manana1edb647b2014-12-08 14:01:12 +00001981 u64 num_bytes, u64 *actual_bytes)
Liu Hui1f3c79a2009-01-05 15:57:51 -05001982{
Liu Hui1f3c79a2009-01-05 15:57:51 -05001983 int ret;
Li Dongyang5378e602011-03-24 10:24:27 +00001984 u64 discarded_bytes = 0;
Jan Schmidta1d3c472011-08-04 17:15:33 +02001985 struct btrfs_bio *bbio = NULL;
Liu Hui1f3c79a2009-01-05 15:57:51 -05001986
Christoph Hellwige244a0a2009-10-14 09:24:59 -04001987
Filipe Manana29992412016-05-27 17:42:05 +01001988 /*
1989 * Avoid races with device replace and make sure our bbio has devices
1990 * associated to its stripes that don't go away while we are discarding.
1991 */
Jeff Mahoney0b246af2016-06-22 18:54:23 -04001992 btrfs_bio_counter_inc_blocked(fs_info);
Liu Hui1f3c79a2009-01-05 15:57:51 -05001993 /* Tell the block device(s) that the sectors can be discarded */
Jeff Mahoney0b246af2016-06-22 18:54:23 -04001994 ret = btrfs_map_block(fs_info, BTRFS_MAP_DISCARD, bytenr, &num_bytes,
1995 &bbio, 0);
Jeff Mahoney79787ea2012-03-12 16:03:00 +01001996 /* Error condition is -ENOMEM */
Liu Hui1f3c79a2009-01-05 15:57:51 -05001997 if (!ret) {
Jan Schmidta1d3c472011-08-04 17:15:33 +02001998 struct btrfs_bio_stripe *stripe = bbio->stripes;
Liu Hui1f3c79a2009-01-05 15:57:51 -05001999 int i;
2000
Liu Hui1f3c79a2009-01-05 15:57:51 -05002001
Jan Schmidta1d3c472011-08-04 17:15:33 +02002002 for (i = 0; i < bbio->num_stripes; i++, stripe++) {
Jeff Mahoneyd04c6b82015-06-15 09:41:14 -04002003 u64 bytes;
Anand Jain38b5f682017-11-29 18:53:43 +08002004 struct request_queue *req_q;
2005
Filipe Manana627e0872018-01-30 18:40:22 +00002006 if (!stripe->dev->bdev) {
2007 ASSERT(btrfs_test_opt(fs_info, DEGRADED));
2008 continue;
2009 }
Anand Jain38b5f682017-11-29 18:53:43 +08002010 req_q = bdev_get_queue(stripe->dev->bdev);
2011 if (!blk_queue_discard(req_q))
Josef Bacikd5e20032011-08-04 14:52:27 +00002012 continue;
2013
Li Dongyang5378e602011-03-24 10:24:27 +00002014 ret = btrfs_issue_discard(stripe->dev->bdev,
2015 stripe->physical,
Jeff Mahoneyd04c6b82015-06-15 09:41:14 -04002016 stripe->length,
2017 &bytes);
Li Dongyang5378e602011-03-24 10:24:27 +00002018 if (!ret)
Jeff Mahoneyd04c6b82015-06-15 09:41:14 -04002019 discarded_bytes += bytes;
Li Dongyang5378e602011-03-24 10:24:27 +00002020 else if (ret != -EOPNOTSUPP)
Jeff Mahoney79787ea2012-03-12 16:03:00 +01002021 break; /* Logic errors or -ENOMEM, or -EIO but I don't know how that could happen JDM */
Josef Bacikd5e20032011-08-04 14:52:27 +00002022
2023 /*
2024 * Just in case we get back EOPNOTSUPP for some reason,
2025 * just ignore the return value so we don't screw up
2026 * people calling discard_extent.
2027 */
2028 ret = 0;
Liu Hui1f3c79a2009-01-05 15:57:51 -05002029 }
Zhao Lei6e9606d2015-01-20 15:11:34 +08002030 btrfs_put_bbio(bbio);
Liu Hui1f3c79a2009-01-05 15:57:51 -05002031 }
Jeff Mahoney0b246af2016-06-22 18:54:23 -04002032 btrfs_bio_counter_dec(fs_info);
Li Dongyang5378e602011-03-24 10:24:27 +00002033
2034 if (actual_bytes)
2035 *actual_bytes = discarded_bytes;
2036
Liu Hui1f3c79a2009-01-05 15:57:51 -05002037
David Woodhouse53b381b2013-01-29 18:40:14 -05002038 if (ret == -EOPNOTSUPP)
2039 ret = 0;
Liu Hui1f3c79a2009-01-05 15:57:51 -05002040 return ret;
Liu Hui1f3c79a2009-01-05 15:57:51 -05002041}
2042
Jeff Mahoney79787ea2012-03-12 16:03:00 +01002043/* Can return -ENOMEM */
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002044int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
Josef Bacik84f7d8e2017-09-29 15:43:49 -04002045 struct btrfs_root *root,
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002046 u64 bytenr, u64 num_bytes, u64 parent,
Filipe Mananab06c4bf2015-10-23 07:52:54 +01002047 u64 root_objectid, u64 owner, u64 offset)
Zheng Yan31840ae2008-09-23 13:14:14 -04002048{
Josef Bacik84f7d8e2017-09-29 15:43:49 -04002049 struct btrfs_fs_info *fs_info = root->fs_info;
Omar Sandovald7eae342017-06-06 16:45:31 -07002050 int old_ref_mod, new_ref_mod;
Zheng Yan31840ae2008-09-23 13:14:14 -04002051 int ret;
Arne Jansen66d7e7f2011-09-12 15:26:38 +02002052
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002053 BUG_ON(owner < BTRFS_FIRST_FREE_OBJECTID &&
2054 root_objectid == BTRFS_TREE_LOG_OBJECTID);
Zheng Yan31840ae2008-09-23 13:14:14 -04002055
Josef Bacikfd708b82017-09-29 15:43:50 -04002056 btrfs_ref_tree_mod(root, bytenr, num_bytes, parent, root_objectid,
2057 owner, offset, BTRFS_ADD_DELAYED_REF);
2058
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002059 if (owner < BTRFS_FIRST_FREE_OBJECTID) {
Nikolay Borisov44e1c472018-06-20 15:48:53 +03002060 ret = btrfs_add_delayed_tree_ref(trans, bytenr,
Omar Sandoval7be07912017-06-06 16:45:30 -07002061 num_bytes, parent,
2062 root_objectid, (int)owner,
2063 BTRFS_ADD_DELAYED_REF, NULL,
Omar Sandovald7eae342017-06-06 16:45:31 -07002064 &old_ref_mod, &new_ref_mod);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002065 } else {
Nikolay Borisov88a979c2018-06-20 15:48:54 +03002066 ret = btrfs_add_delayed_data_ref(trans, bytenr,
Omar Sandoval7be07912017-06-06 16:45:30 -07002067 num_bytes, parent,
2068 root_objectid, owner, offset,
Omar Sandovald7eae342017-06-06 16:45:31 -07002069 0, BTRFS_ADD_DELAYED_REF,
2070 &old_ref_mod, &new_ref_mod);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002071 }
Omar Sandovald7eae342017-06-06 16:45:31 -07002072
Nikolay Borisov29d2b842018-03-30 12:58:47 +03002073 if (ret == 0 && old_ref_mod < 0 && new_ref_mod >= 0) {
2074 bool metadata = owner < BTRFS_FIRST_FREE_OBJECTID;
2075
2076 add_pinned_bytes(fs_info, -num_bytes, metadata, root_objectid);
2077 }
Omar Sandovald7eae342017-06-06 16:45:31 -07002078
Zheng Yan31840ae2008-09-23 13:14:14 -04002079 return ret;
2080}
2081
Nikolay Borisovbd3c6852018-06-18 14:59:25 +03002082/*
2083 * __btrfs_inc_extent_ref - insert backreference for a given extent
2084 *
2085 * @trans: Handle of transaction
2086 *
2087 * @node: The delayed ref node used to get the bytenr/length for
2088 * extent whose references are incremented.
2089 *
2090 * @parent: If this is a shared extent (BTRFS_SHARED_DATA_REF_KEY/
2091 * BTRFS_SHARED_BLOCK_REF_KEY) then it holds the logical
2092 * bytenr of the parent block. Since new extents are always
2093 * created with indirect references, this will only be the case
2094 * when relocating a shared extent. In that case, root_objectid
2095 * will be BTRFS_TREE_RELOC_OBJECTID. Otheriwse, parent must
2096 * be 0
2097 *
2098 * @root_objectid: The id of the root where this modification has originated,
2099 * this can be either one of the well-known metadata trees or
2100 * the subvolume id which references this extent.
2101 *
2102 * @owner: For data extents it is the inode number of the owning file.
2103 * For metadata extents this parameter holds the level in the
2104 * tree of the extent.
2105 *
2106 * @offset: For metadata extents the offset is ignored and is currently
2107 * always passed as 0. For data extents it is the fileoffset
2108 * this extent belongs to.
2109 *
2110 * @refs_to_add Number of references to add
2111 *
2112 * @extent_op Pointer to a structure, holding information necessary when
2113 * updating a tree block's flags
2114 *
2115 */
Chris Mason925baed2008-06-25 16:01:30 -04002116static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
Qu Wenruoc682f9b2015-03-17 16:59:47 +08002117 struct btrfs_delayed_ref_node *node,
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002118 u64 parent, u64 root_objectid,
2119 u64 owner, u64 offset, int refs_to_add,
2120 struct btrfs_delayed_extent_op *extent_op)
Chris Mason56bec292009-03-13 10:10:06 -04002121{
Chris Mason5caf2a02007-04-02 11:20:42 -04002122 struct btrfs_path *path;
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002123 struct extent_buffer *leaf;
Chris Mason234b63a2007-03-13 10:46:10 -04002124 struct btrfs_extent_item *item;
Josef Bacikfcebe452014-05-13 17:30:47 -07002125 struct btrfs_key key;
Qu Wenruoc682f9b2015-03-17 16:59:47 +08002126 u64 bytenr = node->bytenr;
2127 u64 num_bytes = node->num_bytes;
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002128 u64 refs;
2129 int ret;
Chris Mason037e6392007-03-07 11:50:24 -05002130
Chris Mason5caf2a02007-04-02 11:20:42 -04002131 path = btrfs_alloc_path();
Chris Mason54aa1f42007-06-22 14:16:25 -04002132 if (!path)
2133 return -ENOMEM;
Chris Mason26b80032007-08-08 20:17:12 -04002134
David Sterbae4058b52015-11-27 16:31:35 +01002135 path->reada = READA_FORWARD;
Chris Masonb9473432009-03-13 11:00:37 -04002136 path->leave_spinning = 1;
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002137 /* this will setup the path even if it fails to insert the back ref */
Nikolay Borisova639cde2018-06-20 15:49:10 +03002138 ret = insert_inline_extent_backref(trans, path, bytenr, num_bytes,
2139 parent, root_objectid, owner,
2140 offset, refs_to_add, extent_op);
Qu Wenruo0ed47922015-04-16 16:55:08 +08002141 if ((ret < 0 && ret != -EAGAIN) || !ret)
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002142 goto out;
Josef Bacikfcebe452014-05-13 17:30:47 -07002143
2144 /*
2145 * Ok we had -EAGAIN which means we didn't have space to insert and
2146 * inline extent ref, so just update the reference count and add a
2147 * normal backref.
2148 */
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002149 leaf = path->nodes[0];
Josef Bacikfcebe452014-05-13 17:30:47 -07002150 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002151 item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
2152 refs = btrfs_extent_refs(leaf, item);
2153 btrfs_set_extent_refs(leaf, item, refs + refs_to_add);
2154 if (extent_op)
2155 __run_delayed_extent_op(extent_op, leaf, item);
Zheng Yan31840ae2008-09-23 13:14:14 -04002156
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002157 btrfs_mark_buffer_dirty(leaf);
David Sterbab3b4aa72011-04-21 01:20:15 +02002158 btrfs_release_path(path);
Chris Mason7bb86312007-12-11 09:25:06 -05002159
David Sterbae4058b52015-11-27 16:31:35 +01002160 path->reada = READA_FORWARD;
Chris Masonb9473432009-03-13 11:00:37 -04002161 path->leave_spinning = 1;
Chris Mason56bec292009-03-13 10:10:06 -04002162 /* now insert the actual backref */
Nikolay Borisov37593412018-06-20 15:48:45 +03002163 ret = insert_extent_backref(trans, path, bytenr, parent, root_objectid,
2164 owner, offset, refs_to_add);
Jeff Mahoney79787ea2012-03-12 16:03:00 +01002165 if (ret)
Jeff Mahoney66642832016-06-10 18:19:25 -04002166 btrfs_abort_transaction(trans, ret);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002167out:
Chris Mason74493f72007-12-11 09:25:06 -05002168 btrfs_free_path(path);
Liu Bo30d133f2013-10-11 16:30:23 +08002169 return ret;
Chris Mason02217ed2007-03-02 16:08:05 -05002170}
2171
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002172static int run_delayed_data_ref(struct btrfs_trans_handle *trans,
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002173 struct btrfs_delayed_ref_node *node,
2174 struct btrfs_delayed_extent_op *extent_op,
2175 int insert_reserved)
Chris Masone9d0b132007-08-10 14:06:19 -04002176{
Chris Mason56bec292009-03-13 10:10:06 -04002177 int ret = 0;
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002178 struct btrfs_delayed_data_ref *ref;
2179 struct btrfs_key ins;
2180 u64 parent = 0;
2181 u64 ref_root = 0;
2182 u64 flags = 0;
Chris Mason56bec292009-03-13 10:10:06 -04002183
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002184 ins.objectid = node->bytenr;
2185 ins.offset = node->num_bytes;
2186 ins.type = BTRFS_EXTENT_ITEM_KEY;
Chris Mason56bec292009-03-13 10:10:06 -04002187
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002188 ref = btrfs_delayed_node_to_data_ref(node);
Nikolay Borisov2bf98ef2018-06-20 15:49:00 +03002189 trace_run_delayed_data_ref(trans->fs_info, node, ref, node->action);
Liu Bo599c75e2013-07-16 19:03:36 +08002190
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002191 if (node->type == BTRFS_SHARED_DATA_REF_KEY)
2192 parent = ref->parent;
Josef Bacikfcebe452014-05-13 17:30:47 -07002193 ref_root = ref->root;
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002194
2195 if (node->action == BTRFS_ADD_DELAYED_REF && insert_reserved) {
Josef Bacik3173a182013-03-07 14:22:04 -05002196 if (extent_op)
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002197 flags |= extent_op->flags_to_set;
Nikolay Borisovef89b822018-06-20 15:48:58 +03002198 ret = alloc_reserved_file_extent(trans, parent, ref_root,
2199 flags, ref->objectid,
2200 ref->offset, &ins,
2201 node->ref_mod);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002202 } else if (node->action == BTRFS_ADD_DELAYED_REF) {
Nikolay Borisov2590d0f2018-06-20 15:48:59 +03002203 ret = __btrfs_inc_extent_ref(trans, node, parent, ref_root,
2204 ref->objectid, ref->offset,
2205 node->ref_mod, extent_op);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002206 } else if (node->action == BTRFS_DROP_DELAYED_REF) {
Nikolay Borisove72cb922018-06-20 15:48:57 +03002207 ret = __btrfs_free_extent(trans, node, parent,
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002208 ref_root, ref->objectid,
2209 ref->offset, node->ref_mod,
Qu Wenruoc682f9b2015-03-17 16:59:47 +08002210 extent_op);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002211 } else {
2212 BUG();
2213 }
Chris Mason56bec292009-03-13 10:10:06 -04002214 return ret;
2215}
2216
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002217static void __run_delayed_extent_op(struct btrfs_delayed_extent_op *extent_op,
2218 struct extent_buffer *leaf,
2219 struct btrfs_extent_item *ei)
2220{
2221 u64 flags = btrfs_extent_flags(leaf, ei);
2222 if (extent_op->update_flags) {
2223 flags |= extent_op->flags_to_set;
2224 btrfs_set_extent_flags(leaf, ei, flags);
2225 }
2226
2227 if (extent_op->update_key) {
2228 struct btrfs_tree_block_info *bi;
2229 BUG_ON(!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK));
2230 bi = (struct btrfs_tree_block_info *)(ei + 1);
2231 btrfs_set_tree_block_key(leaf, bi, &extent_op->key);
2232 }
2233}
2234
2235static int run_delayed_extent_op(struct btrfs_trans_handle *trans,
Josef Bacikd2788502017-09-29 15:43:57 -04002236 struct btrfs_delayed_ref_head *head,
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002237 struct btrfs_delayed_extent_op *extent_op)
2238{
Nikolay Borisov20b9a2d2018-06-20 15:49:01 +03002239 struct btrfs_fs_info *fs_info = trans->fs_info;
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002240 struct btrfs_key key;
2241 struct btrfs_path *path;
2242 struct btrfs_extent_item *ei;
2243 struct extent_buffer *leaf;
2244 u32 item_size;
2245 int ret;
2246 int err = 0;
Josef Bacikb1c79e02013-05-09 13:49:30 -04002247 int metadata = !extent_op->is_data;
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002248
Jeff Mahoney79787ea2012-03-12 16:03:00 +01002249 if (trans->aborted)
2250 return 0;
2251
Jeff Mahoney0b246af2016-06-22 18:54:23 -04002252 if (metadata && !btrfs_fs_incompat(fs_info, SKINNY_METADATA))
Josef Bacik3173a182013-03-07 14:22:04 -05002253 metadata = 0;
2254
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002255 path = btrfs_alloc_path();
2256 if (!path)
2257 return -ENOMEM;
2258
Josef Bacikd2788502017-09-29 15:43:57 -04002259 key.objectid = head->bytenr;
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002260
Josef Bacik3173a182013-03-07 14:22:04 -05002261 if (metadata) {
Josef Bacik3173a182013-03-07 14:22:04 -05002262 key.type = BTRFS_METADATA_ITEM_KEY;
Josef Bacikb1c79e02013-05-09 13:49:30 -04002263 key.offset = extent_op->level;
Josef Bacik3173a182013-03-07 14:22:04 -05002264 } else {
2265 key.type = BTRFS_EXTENT_ITEM_KEY;
Josef Bacikd2788502017-09-29 15:43:57 -04002266 key.offset = head->num_bytes;
Josef Bacik3173a182013-03-07 14:22:04 -05002267 }
2268
2269again:
David Sterbae4058b52015-11-27 16:31:35 +01002270 path->reada = READA_FORWARD;
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002271 path->leave_spinning = 1;
Jeff Mahoney0b246af2016-06-22 18:54:23 -04002272 ret = btrfs_search_slot(trans, fs_info->extent_root, &key, path, 0, 1);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002273 if (ret < 0) {
2274 err = ret;
2275 goto out;
2276 }
2277 if (ret > 0) {
Josef Bacik3173a182013-03-07 14:22:04 -05002278 if (metadata) {
Filipe David Borba Manana55994882013-10-18 15:42:56 +01002279 if (path->slots[0] > 0) {
2280 path->slots[0]--;
2281 btrfs_item_key_to_cpu(path->nodes[0], &key,
2282 path->slots[0]);
Josef Bacikd2788502017-09-29 15:43:57 -04002283 if (key.objectid == head->bytenr &&
Filipe David Borba Manana55994882013-10-18 15:42:56 +01002284 key.type == BTRFS_EXTENT_ITEM_KEY &&
Josef Bacikd2788502017-09-29 15:43:57 -04002285 key.offset == head->num_bytes)
Filipe David Borba Manana55994882013-10-18 15:42:56 +01002286 ret = 0;
2287 }
2288 if (ret > 0) {
2289 btrfs_release_path(path);
2290 metadata = 0;
Josef Bacik3173a182013-03-07 14:22:04 -05002291
Josef Bacikd2788502017-09-29 15:43:57 -04002292 key.objectid = head->bytenr;
2293 key.offset = head->num_bytes;
Filipe David Borba Manana55994882013-10-18 15:42:56 +01002294 key.type = BTRFS_EXTENT_ITEM_KEY;
2295 goto again;
2296 }
2297 } else {
2298 err = -EIO;
2299 goto out;
Josef Bacik3173a182013-03-07 14:22:04 -05002300 }
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002301 }
2302
2303 leaf = path->nodes[0];
2304 item_size = btrfs_item_size_nr(leaf, path->slots[0]);
Nikolay Borisovba3c2b12018-06-26 16:57:36 +03002305
David Sterba6d8ff4e2018-06-26 16:20:59 +02002306 if (unlikely(item_size < sizeof(*ei))) {
Nikolay Borisovba3c2b12018-06-26 16:57:36 +03002307 err = -EINVAL;
2308 btrfs_print_v0_err(fs_info);
2309 btrfs_abort_transaction(trans, err);
2310 goto out;
2311 }
2312
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002313 ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
2314 __run_delayed_extent_op(extent_op, leaf, ei);
2315
2316 btrfs_mark_buffer_dirty(leaf);
2317out:
2318 btrfs_free_path(path);
2319 return err;
2320}
2321
2322static int run_delayed_tree_ref(struct btrfs_trans_handle *trans,
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002323 struct btrfs_delayed_ref_node *node,
2324 struct btrfs_delayed_extent_op *extent_op,
2325 int insert_reserved)
2326{
2327 int ret = 0;
2328 struct btrfs_delayed_tree_ref *ref;
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002329 u64 parent = 0;
2330 u64 ref_root = 0;
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002331
2332 ref = btrfs_delayed_node_to_tree_ref(node);
Nikolay Borisovf97806f2018-06-20 15:49:04 +03002333 trace_run_delayed_tree_ref(trans->fs_info, node, ref, node->action);
Liu Bo599c75e2013-07-16 19:03:36 +08002334
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002335 if (node->type == BTRFS_SHARED_BLOCK_REF_KEY)
2336 parent = ref->parent;
Josef Bacikfcebe452014-05-13 17:30:47 -07002337 ref_root = ref->root;
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002338
Liu Bo02794222016-09-14 19:19:05 -07002339 if (node->ref_mod != 1) {
Nikolay Borisovf97806f2018-06-20 15:49:04 +03002340 btrfs_err(trans->fs_info,
Liu Bo02794222016-09-14 19:19:05 -07002341 "btree block(%llu) has %d references rather than 1: action %d ref_root %llu parent %llu",
2342 node->bytenr, node->ref_mod, node->action, ref_root,
2343 parent);
2344 return -EIO;
2345 }
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002346 if (node->action == BTRFS_ADD_DELAYED_REF && insert_reserved) {
Josef Bacik3173a182013-03-07 14:22:04 -05002347 BUG_ON(!extent_op || !extent_op->update_flags);
Nikolay Borisov21ebfbe2018-05-21 12:27:22 +03002348 ret = alloc_reserved_tree_block(trans, node, extent_op);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002349 } else if (node->action == BTRFS_ADD_DELAYED_REF) {
Nikolay Borisov2590d0f2018-06-20 15:48:59 +03002350 ret = __btrfs_inc_extent_ref(trans, node, parent, ref_root,
2351 ref->level, 0, 1, extent_op);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002352 } else if (node->action == BTRFS_DROP_DELAYED_REF) {
Nikolay Borisove72cb922018-06-20 15:48:57 +03002353 ret = __btrfs_free_extent(trans, node, parent, ref_root,
Qu Wenruoc682f9b2015-03-17 16:59:47 +08002354 ref->level, 0, 1, extent_op);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002355 } else {
2356 BUG();
2357 }
2358 return ret;
2359}
2360
Chris Mason56bec292009-03-13 10:10:06 -04002361/* helper function to actually process a single delayed ref entry */
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002362static int run_one_delayed_ref(struct btrfs_trans_handle *trans,
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002363 struct btrfs_delayed_ref_node *node,
2364 struct btrfs_delayed_extent_op *extent_op,
2365 int insert_reserved)
Chris Mason56bec292009-03-13 10:10:06 -04002366{
Jeff Mahoney79787ea2012-03-12 16:03:00 +01002367 int ret = 0;
2368
Josef Bacik857cc2f2013-10-07 15:21:08 -04002369 if (trans->aborted) {
2370 if (insert_reserved)
Nikolay Borisov5fac7f92018-06-20 15:49:11 +03002371 btrfs_pin_extent(trans->fs_info, node->bytenr,
Josef Bacik857cc2f2013-10-07 15:21:08 -04002372 node->num_bytes, 1);
Jeff Mahoney79787ea2012-03-12 16:03:00 +01002373 return 0;
Josef Bacik857cc2f2013-10-07 15:21:08 -04002374 }
Jeff Mahoney79787ea2012-03-12 16:03:00 +01002375
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002376 if (node->type == BTRFS_TREE_BLOCK_REF_KEY ||
2377 node->type == BTRFS_SHARED_BLOCK_REF_KEY)
Nikolay Borisovf97806f2018-06-20 15:49:04 +03002378 ret = run_delayed_tree_ref(trans, node, extent_op,
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002379 insert_reserved);
2380 else if (node->type == BTRFS_EXTENT_DATA_REF_KEY ||
2381 node->type == BTRFS_SHARED_DATA_REF_KEY)
Nikolay Borisov2bf98ef2018-06-20 15:49:00 +03002382 ret = run_delayed_data_ref(trans, node, extent_op,
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002383 insert_reserved);
2384 else
2385 BUG();
Josef Bacik80ee54b2018-10-11 15:54:22 -04002386 if (ret && insert_reserved)
2387 btrfs_pin_extent(trans->fs_info, node->bytenr,
2388 node->num_bytes, 1);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002389 return ret;
Chris Masone9d0b132007-08-10 14:06:19 -04002390}
2391
Qu Wenruoc6fc2452015-03-30 17:03:00 +08002392static inline struct btrfs_delayed_ref_node *
Chris Mason56bec292009-03-13 10:10:06 -04002393select_delayed_ref(struct btrfs_delayed_ref_head *head)
Chris Masona28ec192007-03-06 20:08:01 -05002394{
Filipe Mananacffc3372015-07-09 13:13:44 +01002395 struct btrfs_delayed_ref_node *ref;
2396
Liu Boe3d03962018-08-23 03:51:50 +08002397 if (RB_EMPTY_ROOT(&head->ref_tree.rb_root))
Qu Wenruoc6fc2452015-03-30 17:03:00 +08002398 return NULL;
Josef Bacikd7df2c72014-01-23 09:21:38 -05002399
Filipe Mananacffc3372015-07-09 13:13:44 +01002400 /*
2401 * Select a delayed ref of type BTRFS_ADD_DELAYED_REF first.
2402 * This is to prevent a ref count from going down to zero, which deletes
2403 * the extent item from the extent tree, when there still are references
2404 * to add, which would fail because they would not find the extent item.
2405 */
Wang Xiaoguang1d57ee92016-10-26 18:07:33 +08002406 if (!list_empty(&head->ref_add_list))
2407 return list_first_entry(&head->ref_add_list,
2408 struct btrfs_delayed_ref_node, add_list);
Filipe Mananacffc3372015-07-09 13:13:44 +01002409
Liu Boe3d03962018-08-23 03:51:50 +08002410 ref = rb_entry(rb_first_cached(&head->ref_tree),
Josef Bacik0e0adbc2017-10-19 14:16:00 -04002411 struct btrfs_delayed_ref_node, ref_node);
Wang Xiaoguang1d57ee92016-10-26 18:07:33 +08002412 ASSERT(list_empty(&ref->add_list));
2413 return ref;
Chris Mason56bec292009-03-13 10:10:06 -04002414}
2415
Josef Bacik2eadaa22017-09-29 15:43:52 -04002416static void unselect_delayed_ref_head(struct btrfs_delayed_ref_root *delayed_refs,
2417 struct btrfs_delayed_ref_head *head)
2418{
2419 spin_lock(&delayed_refs->lock);
2420 head->processing = 0;
2421 delayed_refs->num_heads_ready++;
2422 spin_unlock(&delayed_refs->lock);
2423 btrfs_delayed_ref_unlock(head);
2424}
2425
Josef Bacikbedc66172018-12-03 10:20:31 -05002426static struct btrfs_delayed_extent_op *cleanup_extent_op(
2427 struct btrfs_delayed_ref_head *head)
Josef Bacikb00e6252017-09-29 15:43:53 -04002428{
2429 struct btrfs_delayed_extent_op *extent_op = head->extent_op;
Josef Bacikbedc66172018-12-03 10:20:31 -05002430
2431 if (!extent_op)
2432 return NULL;
2433
2434 if (head->must_insert_reserved) {
2435 head->extent_op = NULL;
2436 btrfs_free_delayed_extent_op(extent_op);
2437 return NULL;
2438 }
2439 return extent_op;
2440}
2441
2442static int run_and_cleanup_extent_op(struct btrfs_trans_handle *trans,
2443 struct btrfs_delayed_ref_head *head)
2444{
2445 struct btrfs_delayed_extent_op *extent_op;
Josef Bacikb00e6252017-09-29 15:43:53 -04002446 int ret;
2447
Josef Bacikbedc66172018-12-03 10:20:31 -05002448 extent_op = cleanup_extent_op(head);
Josef Bacikb00e6252017-09-29 15:43:53 -04002449 if (!extent_op)
2450 return 0;
2451 head->extent_op = NULL;
Josef Bacikb00e6252017-09-29 15:43:53 -04002452 spin_unlock(&head->lock);
Nikolay Borisov20b9a2d2018-06-20 15:49:01 +03002453 ret = run_delayed_extent_op(trans, head, extent_op);
Josef Bacikb00e6252017-09-29 15:43:53 -04002454 btrfs_free_delayed_extent_op(extent_op);
2455 return ret ? ret : 1;
2456}
2457
Josef Bacik31890da2018-11-21 14:05:41 -05002458void btrfs_cleanup_ref_head_accounting(struct btrfs_fs_info *fs_info,
2459 struct btrfs_delayed_ref_root *delayed_refs,
2460 struct btrfs_delayed_ref_head *head)
Josef Bacik07c47772018-12-03 10:20:30 -05002461{
Josef Bacikba2c4d42018-12-03 10:20:33 -05002462 int nr_items = 1; /* Dropping this ref head update. */
Josef Bacik07c47772018-12-03 10:20:30 -05002463
2464 if (head->total_ref_mod < 0) {
2465 struct btrfs_space_info *space_info;
2466 u64 flags;
2467
2468 if (head->is_data)
2469 flags = BTRFS_BLOCK_GROUP_DATA;
2470 else if (head->is_system)
2471 flags = BTRFS_BLOCK_GROUP_SYSTEM;
2472 else
2473 flags = BTRFS_BLOCK_GROUP_METADATA;
2474 space_info = __find_space_info(fs_info, flags);
2475 ASSERT(space_info);
2476 percpu_counter_add_batch(&space_info->total_bytes_pinned,
2477 -head->num_bytes,
2478 BTRFS_TOTAL_BYTES_PINNED_BATCH);
2479
Josef Bacikba2c4d42018-12-03 10:20:33 -05002480 /*
2481 * We had csum deletions accounted for in our delayed refs rsv,
2482 * we need to drop the csum leaves for this update from our
2483 * delayed_refs_rsv.
2484 */
Josef Bacik07c47772018-12-03 10:20:30 -05002485 if (head->is_data) {
2486 spin_lock(&delayed_refs->lock);
2487 delayed_refs->pending_csums -= head->num_bytes;
2488 spin_unlock(&delayed_refs->lock);
Josef Bacikba2c4d42018-12-03 10:20:33 -05002489 nr_items += btrfs_csum_bytes_to_leaves(fs_info,
2490 head->num_bytes);
Josef Bacik07c47772018-12-03 10:20:30 -05002491 }
2492 }
2493
Josef Bacikba2c4d42018-12-03 10:20:33 -05002494 btrfs_delayed_refs_rsv_release(fs_info, nr_items);
Josef Bacik07c47772018-12-03 10:20:30 -05002495}
2496
Josef Bacik194ab0b2017-09-29 15:43:54 -04002497static int cleanup_ref_head(struct btrfs_trans_handle *trans,
Josef Bacik194ab0b2017-09-29 15:43:54 -04002498 struct btrfs_delayed_ref_head *head)
2499{
Nikolay Borisovf9871ed2018-06-20 15:49:03 +03002500
2501 struct btrfs_fs_info *fs_info = trans->fs_info;
Josef Bacik194ab0b2017-09-29 15:43:54 -04002502 struct btrfs_delayed_ref_root *delayed_refs;
2503 int ret;
2504
2505 delayed_refs = &trans->transaction->delayed_refs;
2506
Josef Bacikbedc66172018-12-03 10:20:31 -05002507 ret = run_and_cleanup_extent_op(trans, head);
Josef Bacik194ab0b2017-09-29 15:43:54 -04002508 if (ret < 0) {
2509 unselect_delayed_ref_head(delayed_refs, head);
2510 btrfs_debug(fs_info, "run_delayed_extent_op returned %d", ret);
2511 return ret;
2512 } else if (ret) {
2513 return ret;
2514 }
2515
2516 /*
2517 * Need to drop our head ref lock and re-acquire the delayed ref lock
2518 * and then re-check to make sure nobody got added.
2519 */
2520 spin_unlock(&head->lock);
2521 spin_lock(&delayed_refs->lock);
2522 spin_lock(&head->lock);
Liu Boe3d03962018-08-23 03:51:50 +08002523 if (!RB_EMPTY_ROOT(&head->ref_tree.rb_root) || head->extent_op) {
Josef Bacik194ab0b2017-09-29 15:43:54 -04002524 spin_unlock(&head->lock);
2525 spin_unlock(&delayed_refs->lock);
2526 return 1;
2527 }
Josef Bacikd7baffd2018-12-03 10:20:29 -05002528 btrfs_delete_ref_head(delayed_refs, head);
Josef Bacikc1103f72017-09-29 15:43:56 -04002529 spin_unlock(&head->lock);
Nikolay Borisov1e7a1422018-04-11 11:21:18 +03002530 spin_unlock(&delayed_refs->lock);
Josef Bacikc1103f72017-09-29 15:43:56 -04002531
Josef Bacikc1103f72017-09-29 15:43:56 -04002532 if (head->must_insert_reserved) {
Josef Bacikd2788502017-09-29 15:43:57 -04002533 btrfs_pin_extent(fs_info, head->bytenr,
2534 head->num_bytes, 1);
Josef Bacikc1103f72017-09-29 15:43:56 -04002535 if (head->is_data) {
Josef Bacikd2788502017-09-29 15:43:57 -04002536 ret = btrfs_del_csums(trans, fs_info, head->bytenr,
2537 head->num_bytes);
Josef Bacikc1103f72017-09-29 15:43:56 -04002538 }
2539 }
2540
Josef Bacik31890da2018-11-21 14:05:41 -05002541 btrfs_cleanup_ref_head_accounting(fs_info, delayed_refs, head);
Josef Bacik07c47772018-12-03 10:20:30 -05002542
2543 trace_run_delayed_ref_head(fs_info, head, 0);
Josef Bacikc1103f72017-09-29 15:43:56 -04002544 btrfs_delayed_ref_unlock(head);
Josef Bacikd2788502017-09-29 15:43:57 -04002545 btrfs_put_delayed_ref_head(head);
Josef Bacik194ab0b2017-09-29 15:43:54 -04002546 return 0;
2547}
2548
Nikolay Borisovb1cdbcb2018-08-15 10:39:54 +03002549static struct btrfs_delayed_ref_head *btrfs_obtain_ref_head(
2550 struct btrfs_trans_handle *trans)
2551{
2552 struct btrfs_delayed_ref_root *delayed_refs =
2553 &trans->transaction->delayed_refs;
2554 struct btrfs_delayed_ref_head *head = NULL;
2555 int ret;
2556
2557 spin_lock(&delayed_refs->lock);
Lu Fengqi5637c742018-10-11 13:40:33 +08002558 head = btrfs_select_ref_head(delayed_refs);
Nikolay Borisovb1cdbcb2018-08-15 10:39:54 +03002559 if (!head) {
2560 spin_unlock(&delayed_refs->lock);
2561 return head;
2562 }
2563
2564 /*
2565 * Grab the lock that says we are going to process all the refs for
2566 * this head
2567 */
Lu Fengqi9e920a62018-10-11 13:40:34 +08002568 ret = btrfs_delayed_ref_lock(delayed_refs, head);
Nikolay Borisovb1cdbcb2018-08-15 10:39:54 +03002569 spin_unlock(&delayed_refs->lock);
2570
2571 /*
2572 * We may have dropped the spin lock to get the head mutex lock, and
2573 * that might have given someone else time to free the head. If that's
2574 * true, it has been removed from our list and we can move on.
2575 */
2576 if (ret == -EAGAIN)
2577 head = ERR_PTR(-EAGAIN);
2578
2579 return head;
2580}
2581
Nikolay Borisove7261382018-08-15 10:39:55 +03002582static int btrfs_run_delayed_refs_for_head(struct btrfs_trans_handle *trans,
2583 struct btrfs_delayed_ref_head *locked_ref,
2584 unsigned long *run_refs)
2585{
2586 struct btrfs_fs_info *fs_info = trans->fs_info;
2587 struct btrfs_delayed_ref_root *delayed_refs;
2588 struct btrfs_delayed_extent_op *extent_op;
2589 struct btrfs_delayed_ref_node *ref;
2590 int must_insert_reserved = 0;
2591 int ret;
2592
2593 delayed_refs = &trans->transaction->delayed_refs;
2594
Nikolay Borisov0110a4c2018-08-15 10:39:56 +03002595 lockdep_assert_held(&locked_ref->mutex);
2596 lockdep_assert_held(&locked_ref->lock);
2597
Nikolay Borisove7261382018-08-15 10:39:55 +03002598 while ((ref = select_delayed_ref(locked_ref))) {
2599 if (ref->seq &&
2600 btrfs_check_delayed_seq(fs_info, ref->seq)) {
2601 spin_unlock(&locked_ref->lock);
2602 unselect_delayed_ref_head(delayed_refs, locked_ref);
2603 return -EAGAIN;
2604 }
2605
2606 (*run_refs)++;
2607 ref->in_tree = 0;
2608 rb_erase_cached(&ref->ref_node, &locked_ref->ref_tree);
2609 RB_CLEAR_NODE(&ref->ref_node);
2610 if (!list_empty(&ref->add_list))
2611 list_del(&ref->add_list);
2612 /*
2613 * When we play the delayed ref, also correct the ref_mod on
2614 * head
2615 */
2616 switch (ref->action) {
2617 case BTRFS_ADD_DELAYED_REF:
2618 case BTRFS_ADD_DELAYED_EXTENT:
2619 locked_ref->ref_mod -= ref->ref_mod;
2620 break;
2621 case BTRFS_DROP_DELAYED_REF:
2622 locked_ref->ref_mod += ref->ref_mod;
2623 break;
2624 default:
2625 WARN_ON(1);
2626 }
2627 atomic_dec(&delayed_refs->num_entries);
2628
2629 /*
2630 * Record the must_insert_reserved flag before we drop the
2631 * spin lock.
2632 */
2633 must_insert_reserved = locked_ref->must_insert_reserved;
2634 locked_ref->must_insert_reserved = 0;
2635
2636 extent_op = locked_ref->extent_op;
2637 locked_ref->extent_op = NULL;
2638 spin_unlock(&locked_ref->lock);
2639
2640 ret = run_one_delayed_ref(trans, ref, extent_op,
2641 must_insert_reserved);
2642
2643 btrfs_free_delayed_extent_op(extent_op);
2644 if (ret) {
2645 unselect_delayed_ref_head(delayed_refs, locked_ref);
2646 btrfs_put_delayed_ref(ref);
2647 btrfs_debug(fs_info, "run_one_delayed_ref returned %d",
2648 ret);
2649 return ret;
2650 }
2651
2652 btrfs_put_delayed_ref(ref);
2653 cond_resched();
2654
2655 spin_lock(&locked_ref->lock);
2656 btrfs_merge_delayed_refs(trans, delayed_refs, locked_ref);
2657 }
2658
2659 return 0;
2660}
2661
Jeff Mahoney79787ea2012-03-12 16:03:00 +01002662/*
2663 * Returns 0 on success or if called with an already aborted transaction.
2664 * Returns -ENOMEM or -EIO on failure and will abort the transaction.
2665 */
Josef Bacikd7df2c72014-01-23 09:21:38 -05002666static noinline int __btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
Josef Bacikd7df2c72014-01-23 09:21:38 -05002667 unsigned long nr)
Chris Mason56bec292009-03-13 10:10:06 -04002668{
Nikolay Borisov0a1e4582018-03-15 16:00:27 +02002669 struct btrfs_fs_info *fs_info = trans->fs_info;
Chris Mason56bec292009-03-13 10:10:06 -04002670 struct btrfs_delayed_ref_root *delayed_refs;
Chris Mason56bec292009-03-13 10:10:06 -04002671 struct btrfs_delayed_ref_head *locked_ref = NULL;
Josef Bacik0a2b2a82014-01-23 10:54:11 -05002672 ktime_t start = ktime_get();
Chris Mason56bec292009-03-13 10:10:06 -04002673 int ret;
Josef Bacikd7df2c72014-01-23 09:21:38 -05002674 unsigned long count = 0;
Josef Bacik0a2b2a82014-01-23 10:54:11 -05002675 unsigned long actual_count = 0;
Chris Mason56bec292009-03-13 10:10:06 -04002676
2677 delayed_refs = &trans->transaction->delayed_refs;
Nikolay Borisov0110a4c2018-08-15 10:39:56 +03002678 do {
Chris Mason56bec292009-03-13 10:10:06 -04002679 if (!locked_ref) {
Nikolay Borisovb1cdbcb2018-08-15 10:39:54 +03002680 locked_ref = btrfs_obtain_ref_head(trans);
Nikolay Borisov0110a4c2018-08-15 10:39:56 +03002681 if (IS_ERR_OR_NULL(locked_ref)) {
2682 if (PTR_ERR(locked_ref) == -EAGAIN) {
2683 continue;
2684 } else {
2685 break;
2686 }
Chris Mason56bec292009-03-13 10:10:06 -04002687 }
Nikolay Borisov0110a4c2018-08-15 10:39:56 +03002688 count++;
Chris Mason56bec292009-03-13 10:10:06 -04002689 }
Filipe Manana2c3cf7d2015-10-22 09:47:34 +01002690 /*
2691 * We need to try and merge add/drops of the same ref since we
2692 * can run into issues with relocate dropping the implicit ref
2693 * and then it being added back again before the drop can
2694 * finish. If we merged anything we need to re-loop so we can
2695 * get a good ref.
2696 * Or we can get node references of the same type that weren't
2697 * merged when created due to bumps in the tree mod seq, and
2698 * we need to merge them to prevent adding an inline extent
2699 * backref before dropping it (triggering a BUG_ON at
2700 * insert_inline_extent_backref()).
2701 */
Josef Bacikd7df2c72014-01-23 09:21:38 -05002702 spin_lock(&locked_ref->lock);
Nikolay Borisovbe97f132018-04-19 11:06:39 +03002703 btrfs_merge_delayed_refs(trans, delayed_refs, locked_ref);
Josef Bacikae1e2062012-08-07 16:00:32 -04002704
Nikolay Borisov0110a4c2018-08-15 10:39:56 +03002705 ret = btrfs_run_delayed_refs_for_head(trans, locked_ref,
2706 &actual_count);
2707 if (ret < 0 && ret != -EAGAIN) {
2708 /*
2709 * Error, btrfs_run_delayed_refs_for_head already
2710 * unlocked everything so just bail out
2711 */
2712 return ret;
2713 } else if (!ret) {
2714 /*
2715 * Success, perform the usual cleanup of a processed
2716 * head
2717 */
Nikolay Borisovf9871ed2018-06-20 15:49:03 +03002718 ret = cleanup_ref_head(trans, locked_ref);
Josef Bacik194ab0b2017-09-29 15:43:54 -04002719 if (ret > 0 ) {
Josef Bacikb00e6252017-09-29 15:43:53 -04002720 /* We dropped our lock, we need to loop. */
2721 ret = 0;
Josef Bacikd7df2c72014-01-23 09:21:38 -05002722 continue;
Josef Bacik194ab0b2017-09-29 15:43:54 -04002723 } else if (ret) {
2724 return ret;
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002725 }
Josef Bacikc1103f72017-09-29 15:43:56 -04002726 }
Chris Mason56bec292009-03-13 10:10:06 -04002727
Josef Bacikb00e6252017-09-29 15:43:53 -04002728 /*
Nikolay Borisov0110a4c2018-08-15 10:39:56 +03002729 * Either success case or btrfs_run_delayed_refs_for_head
2730 * returned -EAGAIN, meaning we need to select another head
Josef Bacikb00e6252017-09-29 15:43:53 -04002731 */
Josef Bacikb00e6252017-09-29 15:43:53 -04002732
Nikolay Borisov0110a4c2018-08-15 10:39:56 +03002733 locked_ref = NULL;
Chris Mason1887be62009-03-13 10:11:24 -04002734 cond_resched();
Nikolay Borisov0110a4c2018-08-15 10:39:56 +03002735 } while ((nr != -1 && count < nr) || locked_ref);
Josef Bacik0a2b2a82014-01-23 10:54:11 -05002736
2737 /*
2738 * We don't want to include ref heads since we can have empty ref heads
2739 * and those will drastically skew our runtime down since we just do
2740 * accounting, no actual extent tree updates.
2741 */
2742 if (actual_count > 0) {
2743 u64 runtime = ktime_to_ns(ktime_sub(ktime_get(), start));
2744 u64 avg;
2745
2746 /*
2747 * We weigh the current average higher than our current runtime
2748 * to avoid large swings in the average.
2749 */
2750 spin_lock(&delayed_refs->lock);
2751 avg = fs_info->avg_delayed_ref_runtime * 3 + runtime;
David Sterbaf8c269d2015-01-16 17:21:12 +01002752 fs_info->avg_delayed_ref_runtime = avg >> 2; /* div by 4 */
Josef Bacik0a2b2a82014-01-23 10:54:11 -05002753 spin_unlock(&delayed_refs->lock);
2754 }
Josef Bacikd7df2c72014-01-23 09:21:38 -05002755 return 0;
Chris Masonc3e69d52009-03-13 10:17:05 -04002756}
2757
Arne Jansen709c0482011-09-12 12:22:57 +02002758#ifdef SCRAMBLE_DELAYED_REFS
2759/*
2760 * Normally delayed refs get processed in ascending bytenr order. This
2761 * correlates in most cases to the order added. To expose dependencies on this
2762 * order, we start to process the tree in the middle instead of the beginning
2763 */
2764static u64 find_middle(struct rb_root *root)
2765{
2766 struct rb_node *n = root->rb_node;
2767 struct btrfs_delayed_ref_node *entry;
2768 int alt = 1;
2769 u64 middle;
2770 u64 first = 0, last = 0;
2771
2772 n = rb_first(root);
2773 if (n) {
2774 entry = rb_entry(n, struct btrfs_delayed_ref_node, rb_node);
2775 first = entry->bytenr;
2776 }
2777 n = rb_last(root);
2778 if (n) {
2779 entry = rb_entry(n, struct btrfs_delayed_ref_node, rb_node);
2780 last = entry->bytenr;
2781 }
2782 n = root->rb_node;
2783
2784 while (n) {
2785 entry = rb_entry(n, struct btrfs_delayed_ref_node, rb_node);
2786 WARN_ON(!entry->in_tree);
2787
2788 middle = entry->bytenr;
2789
2790 if (alt)
2791 n = n->rb_left;
2792 else
2793 n = n->rb_right;
2794
2795 alt = 1 - alt;
2796 }
2797 return middle;
2798}
2799#endif
2800
Jeff Mahoney2ff7e612016-06-22 18:54:24 -04002801static inline u64 heads_to_leaves(struct btrfs_fs_info *fs_info, u64 heads)
Josef Bacik1be41b72013-06-12 13:56:06 -04002802{
2803 u64 num_bytes;
2804
2805 num_bytes = heads * (sizeof(struct btrfs_extent_item) +
2806 sizeof(struct btrfs_extent_inline_ref));
Jeff Mahoney0b246af2016-06-22 18:54:23 -04002807 if (!btrfs_fs_incompat(fs_info, SKINNY_METADATA))
Josef Bacik1be41b72013-06-12 13:56:06 -04002808 num_bytes += heads * sizeof(struct btrfs_tree_block_info);
2809
2810 /*
2811 * We don't ever fill up leaves all the way so multiply by 2 just to be
Nicholas D Steeves01327612016-05-19 21:18:45 -04002812 * closer to what we're really going to want to use.
Josef Bacik1be41b72013-06-12 13:56:06 -04002813 */
Jeff Mahoney0b246af2016-06-22 18:54:23 -04002814 return div_u64(num_bytes, BTRFS_LEAF_DATA_SIZE(fs_info));
Josef Bacik1be41b72013-06-12 13:56:06 -04002815}
2816
Josef Bacik12621332015-02-03 07:50:16 -08002817/*
2818 * Takes the number of bytes to be csumm'ed and figures out how many leaves it
2819 * would require to store the csums for that many bytes.
2820 */
Jeff Mahoney2ff7e612016-06-22 18:54:24 -04002821u64 btrfs_csum_bytes_to_leaves(struct btrfs_fs_info *fs_info, u64 csum_bytes)
Josef Bacik12621332015-02-03 07:50:16 -08002822{
2823 u64 csum_size;
2824 u64 num_csums_per_leaf;
2825 u64 num_csums;
2826
Jeff Mahoney0b246af2016-06-22 18:54:23 -04002827 csum_size = BTRFS_MAX_ITEM_SIZE(fs_info);
Josef Bacik12621332015-02-03 07:50:16 -08002828 num_csums_per_leaf = div64_u64(csum_size,
Jeff Mahoney0b246af2016-06-22 18:54:23 -04002829 (u64)btrfs_super_csum_size(fs_info->super_copy));
2830 num_csums = div64_u64(csum_bytes, fs_info->sectorsize);
Josef Bacik12621332015-02-03 07:50:16 -08002831 num_csums += num_csums_per_leaf - 1;
2832 num_csums = div64_u64(num_csums, num_csums_per_leaf);
2833 return num_csums;
2834}
2835
Josef Bacik644036122018-12-03 10:20:36 -05002836bool btrfs_check_space_for_delayed_refs(struct btrfs_fs_info *fs_info)
Josef Bacik1be41b72013-06-12 13:56:06 -04002837{
Josef Bacik644036122018-12-03 10:20:36 -05002838 struct btrfs_block_rsv *delayed_refs_rsv = &fs_info->delayed_refs_rsv;
2839 struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv;
2840 bool ret = false;
2841 u64 reserved;
Josef Bacik1be41b72013-06-12 13:56:06 -04002842
2843 spin_lock(&global_rsv->lock);
Josef Bacik644036122018-12-03 10:20:36 -05002844 reserved = global_rsv->reserved;
Josef Bacik1be41b72013-06-12 13:56:06 -04002845 spin_unlock(&global_rsv->lock);
Josef Bacik644036122018-12-03 10:20:36 -05002846
2847 /*
2848 * Since the global reserve is just kind of magic we don't really want
2849 * to rely on it to save our bacon, so if our size is more than the
2850 * delayed_refs_rsv and the global rsv then it's time to think about
2851 * bailing.
2852 */
2853 spin_lock(&delayed_refs_rsv->lock);
2854 reserved += delayed_refs_rsv->reserved;
2855 if (delayed_refs_rsv->size >= reserved)
2856 ret = true;
2857 spin_unlock(&delayed_refs_rsv->lock);
Josef Bacik1be41b72013-06-12 13:56:06 -04002858 return ret;
2859}
2860
Lu Fengqi7c861622018-10-11 13:40:36 +08002861int btrfs_should_throttle_delayed_refs(struct btrfs_trans_handle *trans)
Josef Bacik0a2b2a82014-01-23 10:54:11 -05002862{
Josef Bacik0a2b2a82014-01-23 10:54:11 -05002863 u64 num_entries =
2864 atomic_read(&trans->transaction->delayed_refs.num_entries);
2865 u64 avg_runtime;
Chris Masona79b7d42014-05-22 16:18:52 -07002866 u64 val;
Josef Bacik0a2b2a82014-01-23 10:54:11 -05002867
2868 smp_mb();
Lu Fengqi7c861622018-10-11 13:40:36 +08002869 avg_runtime = trans->fs_info->avg_delayed_ref_runtime;
Chris Masona79b7d42014-05-22 16:18:52 -07002870 val = num_entries * avg_runtime;
Wang Xiaoguangdc1a90c2016-10-26 15:23:01 +08002871 if (val >= NSEC_PER_SEC)
Josef Bacik0a2b2a82014-01-23 10:54:11 -05002872 return 1;
Chris Masona79b7d42014-05-22 16:18:52 -07002873 if (val >= NSEC_PER_SEC / 2)
2874 return 2;
Josef Bacik0a2b2a82014-01-23 10:54:11 -05002875
Josef Bacik644036122018-12-03 10:20:36 -05002876 return btrfs_check_space_for_delayed_refs(trans->fs_info);
Josef Bacik0a2b2a82014-01-23 10:54:11 -05002877}
2878
Chris Masona79b7d42014-05-22 16:18:52 -07002879struct async_delayed_refs {
2880 struct btrfs_root *root;
Josef Bacik31b96552016-04-11 17:37:40 -04002881 u64 transid;
Chris Masona79b7d42014-05-22 16:18:52 -07002882 int count;
2883 int error;
2884 int sync;
2885 struct completion wait;
2886 struct btrfs_work work;
2887};
2888
Jeff Mahoney2ff7e612016-06-22 18:54:24 -04002889static inline struct async_delayed_refs *
2890to_async_delayed_refs(struct btrfs_work *work)
2891{
2892 return container_of(work, struct async_delayed_refs, work);
2893}
2894
Chris Masona79b7d42014-05-22 16:18:52 -07002895static void delayed_ref_async_start(struct btrfs_work *work)
2896{
Jeff Mahoney2ff7e612016-06-22 18:54:24 -04002897 struct async_delayed_refs *async = to_async_delayed_refs(work);
Chris Masona79b7d42014-05-22 16:18:52 -07002898 struct btrfs_trans_handle *trans;
Jeff Mahoney2ff7e612016-06-22 18:54:24 -04002899 struct btrfs_fs_info *fs_info = async->root->fs_info;
Chris Masona79b7d42014-05-22 16:18:52 -07002900 int ret;
2901
Chris Mason0f873ec2016-04-27 09:59:38 -04002902 /* if the commit is already started, we don't need to wait here */
Jeff Mahoney2ff7e612016-06-22 18:54:24 -04002903 if (btrfs_transaction_blocked(fs_info))
Josef Bacik31b96552016-04-11 17:37:40 -04002904 goto done;
Josef Bacik31b96552016-04-11 17:37:40 -04002905
Chris Mason0f873ec2016-04-27 09:59:38 -04002906 trans = btrfs_join_transaction(async->root);
2907 if (IS_ERR(trans)) {
2908 async->error = PTR_ERR(trans);
Chris Masona79b7d42014-05-22 16:18:52 -07002909 goto done;
2910 }
2911
Chris Mason0f873ec2016-04-27 09:59:38 -04002912 /* Don't bother flushing if we got into a different transaction */
2913 if (trans->transid > async->transid)
2914 goto end;
2915
Nikolay Borisovc79a70b2018-03-15 17:27:37 +02002916 ret = btrfs_run_delayed_refs(trans, async->count);
Chris Masona79b7d42014-05-22 16:18:52 -07002917 if (ret)
2918 async->error = ret;
Chris Mason0f873ec2016-04-27 09:59:38 -04002919end:
Jeff Mahoney3a45bb22016-09-09 21:39:03 -04002920 ret = btrfs_end_transaction(trans);
Chris Masona79b7d42014-05-22 16:18:52 -07002921 if (ret && !async->error)
2922 async->error = ret;
2923done:
2924 if (async->sync)
2925 complete(&async->wait);
2926 else
2927 kfree(async);
2928}
2929
Jeff Mahoney2ff7e612016-06-22 18:54:24 -04002930int btrfs_async_run_delayed_refs(struct btrfs_fs_info *fs_info,
Josef Bacik31b96552016-04-11 17:37:40 -04002931 unsigned long count, u64 transid, int wait)
Chris Masona79b7d42014-05-22 16:18:52 -07002932{
2933 struct async_delayed_refs *async;
2934 int ret;
2935
2936 async = kmalloc(sizeof(*async), GFP_NOFS);
2937 if (!async)
2938 return -ENOMEM;
2939
Jeff Mahoney0b246af2016-06-22 18:54:23 -04002940 async->root = fs_info->tree_root;
Chris Masona79b7d42014-05-22 16:18:52 -07002941 async->count = count;
2942 async->error = 0;
Josef Bacik31b96552016-04-11 17:37:40 -04002943 async->transid = transid;
Chris Masona79b7d42014-05-22 16:18:52 -07002944 if (wait)
2945 async->sync = 1;
2946 else
2947 async->sync = 0;
2948 init_completion(&async->wait);
2949
Liu Bo9e0af232014-08-15 23:36:53 +08002950 btrfs_init_work(&async->work, btrfs_extent_refs_helper,
2951 delayed_ref_async_start, NULL, NULL);
Chris Masona79b7d42014-05-22 16:18:52 -07002952
Jeff Mahoney0b246af2016-06-22 18:54:23 -04002953 btrfs_queue_work(fs_info->extent_workers, &async->work);
Chris Masona79b7d42014-05-22 16:18:52 -07002954
2955 if (wait) {
2956 wait_for_completion(&async->wait);
2957 ret = async->error;
2958 kfree(async);
2959 return ret;
2960 }
2961 return 0;
2962}
2963
Chris Masonc3e69d52009-03-13 10:17:05 -04002964/*
2965 * this starts processing the delayed reference count updates and
2966 * extent insertions we have queued up so far. count can be
2967 * 0, which means to process everything in the tree at the start
2968 * of the run (but not newly added entries), or it can be some target
2969 * number you'd like to process.
Jeff Mahoney79787ea2012-03-12 16:03:00 +01002970 *
2971 * Returns 0 on success or if called with an aborted transaction
2972 * Returns <0 on error and aborts the transaction
Chris Masonc3e69d52009-03-13 10:17:05 -04002973 */
2974int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
Nikolay Borisovc79a70b2018-03-15 17:27:37 +02002975 unsigned long count)
Chris Masonc3e69d52009-03-13 10:17:05 -04002976{
Nikolay Borisovc79a70b2018-03-15 17:27:37 +02002977 struct btrfs_fs_info *fs_info = trans->fs_info;
Chris Masonc3e69d52009-03-13 10:17:05 -04002978 struct rb_node *node;
2979 struct btrfs_delayed_ref_root *delayed_refs;
Liu Boc46effa2013-10-14 12:59:45 +08002980 struct btrfs_delayed_ref_head *head;
Chris Masonc3e69d52009-03-13 10:17:05 -04002981 int ret;
2982 int run_all = count == (unsigned long)-1;
Chris Masonc3e69d52009-03-13 10:17:05 -04002983
Jeff Mahoney79787ea2012-03-12 16:03:00 +01002984 /* We'll clean this up in btrfs_cleanup_transaction */
2985 if (trans->aborted)
2986 return 0;
2987
Jeff Mahoney0b246af2016-06-22 18:54:23 -04002988 if (test_bit(BTRFS_FS_CREATING_FREE_SPACE_TREE, &fs_info->flags))
Chris Mason511711a2015-12-30 07:52:35 -08002989 return 0;
2990
Chris Masonc3e69d52009-03-13 10:17:05 -04002991 delayed_refs = &trans->transaction->delayed_refs;
Liu Bo26455d32014-12-17 16:14:09 +08002992 if (count == 0)
Josef Bacikd7df2c72014-01-23 09:21:38 -05002993 count = atomic_read(&delayed_refs->num_entries) * 2;
Chris Masonbb721702013-01-29 18:44:12 -05002994
Chris Masonc3e69d52009-03-13 10:17:05 -04002995again:
Arne Jansen709c0482011-09-12 12:22:57 +02002996#ifdef SCRAMBLE_DELAYED_REFS
2997 delayed_refs->run_delayed_start = find_middle(&delayed_refs->root);
2998#endif
Nikolay Borisov0a1e4582018-03-15 16:00:27 +02002999 ret = __btrfs_run_delayed_refs(trans, count);
Josef Bacikd7df2c72014-01-23 09:21:38 -05003000 if (ret < 0) {
Jeff Mahoney66642832016-06-10 18:19:25 -04003001 btrfs_abort_transaction(trans, ret);
Josef Bacikd7df2c72014-01-23 09:21:38 -05003002 return ret;
Chris Masonc3e69d52009-03-13 10:17:05 -04003003 }
3004
Chris Mason56bec292009-03-13 10:10:06 -04003005 if (run_all) {
Josef Bacik119e80d2018-11-21 14:05:42 -05003006 btrfs_create_pending_block_groups(trans);
Josef Bacikea658ba2012-09-11 16:57:25 -04003007
Josef Bacikd7df2c72014-01-23 09:21:38 -05003008 spin_lock(&delayed_refs->lock);
Liu Bo5c9d0282018-08-23 03:51:49 +08003009 node = rb_first_cached(&delayed_refs->href_root);
Josef Bacikd7df2c72014-01-23 09:21:38 -05003010 if (!node) {
3011 spin_unlock(&delayed_refs->lock);
Chris Mason56bec292009-03-13 10:10:06 -04003012 goto out;
Josef Bacikd7df2c72014-01-23 09:21:38 -05003013 }
Josef Bacikd2788502017-09-29 15:43:57 -04003014 head = rb_entry(node, struct btrfs_delayed_ref_head,
3015 href_node);
3016 refcount_inc(&head->refs);
Chris Mason56bec292009-03-13 10:10:06 -04003017 spin_unlock(&delayed_refs->lock);
Josef Bacikd2788502017-09-29 15:43:57 -04003018
3019 /* Mutex was contended, block until it's released and retry. */
3020 mutex_lock(&head->mutex);
3021 mutex_unlock(&head->mutex);
3022
3023 btrfs_put_delayed_ref_head(head);
Josef Bacikd7df2c72014-01-23 09:21:38 -05003024 cond_resched();
Chris Mason56bec292009-03-13 10:10:06 -04003025 goto again;
3026 }
Chris Mason54aa1f42007-06-22 14:16:25 -04003027out:
Chris Masona28ec192007-03-06 20:08:01 -05003028 return 0;
3029}
3030
Yan Zheng5d4f98a2009-06-10 10:45:14 -04003031int btrfs_set_disk_extent_flags(struct btrfs_trans_handle *trans,
Jeff Mahoney2ff7e612016-06-22 18:54:24 -04003032 struct btrfs_fs_info *fs_info,
Yan Zheng5d4f98a2009-06-10 10:45:14 -04003033 u64 bytenr, u64 num_bytes, u64 flags,
Josef Bacikb1c79e02013-05-09 13:49:30 -04003034 int level, int is_data)
Yan Zheng5d4f98a2009-06-10 10:45:14 -04003035{
3036 struct btrfs_delayed_extent_op *extent_op;
3037 int ret;
3038
Miao Xie78a61842012-11-21 02:21:28 +00003039 extent_op = btrfs_alloc_delayed_extent_op();
Yan Zheng5d4f98a2009-06-10 10:45:14 -04003040 if (!extent_op)
3041 return -ENOMEM;
3042
3043 extent_op->flags_to_set = flags;
David Sterba35b3ad52015-11-30 16:51:29 +01003044 extent_op->update_flags = true;
3045 extent_op->update_key = false;
3046 extent_op->is_data = is_data ? true : false;
Josef Bacikb1c79e02013-05-09 13:49:30 -04003047 extent_op->level = level;
Yan Zheng5d4f98a2009-06-10 10:45:14 -04003048
Jeff Mahoney0b246af2016-06-22 18:54:23 -04003049 ret = btrfs_add_delayed_extent_op(fs_info, trans, bytenr,
Arne Jansen66d7e7f2011-09-12 15:26:38 +02003050 num_bytes, extent_op);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04003051 if (ret)
Miao Xie78a61842012-11-21 02:21:28 +00003052 btrfs_free_delayed_extent_op(extent_op);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04003053 return ret;
3054}
3055
Liu Boe4c3b2d2017-01-30 12:25:28 -08003056static noinline int check_delayed_ref(struct btrfs_root *root,
Yan Zheng5d4f98a2009-06-10 10:45:14 -04003057 struct btrfs_path *path,
3058 u64 objectid, u64 offset, u64 bytenr)
3059{
3060 struct btrfs_delayed_ref_head *head;
3061 struct btrfs_delayed_ref_node *ref;
3062 struct btrfs_delayed_data_ref *data_ref;
3063 struct btrfs_delayed_ref_root *delayed_refs;
Liu Boe4c3b2d2017-01-30 12:25:28 -08003064 struct btrfs_transaction *cur_trans;
Josef Bacik0e0adbc2017-10-19 14:16:00 -04003065 struct rb_node *node;
Yan Zheng5d4f98a2009-06-10 10:45:14 -04003066 int ret = 0;
3067
ethanwu998ac6d2018-04-29 15:59:42 +08003068 spin_lock(&root->fs_info->trans_lock);
Liu Boe4c3b2d2017-01-30 12:25:28 -08003069 cur_trans = root->fs_info->running_transaction;
ethanwu998ac6d2018-04-29 15:59:42 +08003070 if (cur_trans)
3071 refcount_inc(&cur_trans->use_count);
3072 spin_unlock(&root->fs_info->trans_lock);
Liu Boe4c3b2d2017-01-30 12:25:28 -08003073 if (!cur_trans)
3074 return 0;
3075
3076 delayed_refs = &cur_trans->delayed_refs;
Yan Zheng5d4f98a2009-06-10 10:45:14 -04003077 spin_lock(&delayed_refs->lock);
Liu Bof72ad18e2017-01-30 12:24:37 -08003078 head = btrfs_find_delayed_ref_head(delayed_refs, bytenr);
Josef Bacikd7df2c72014-01-23 09:21:38 -05003079 if (!head) {
3080 spin_unlock(&delayed_refs->lock);
ethanwu998ac6d2018-04-29 15:59:42 +08003081 btrfs_put_transaction(cur_trans);
Josef Bacikd7df2c72014-01-23 09:21:38 -05003082 return 0;
3083 }
Yan Zheng5d4f98a2009-06-10 10:45:14 -04003084
3085 if (!mutex_trylock(&head->mutex)) {
Josef Bacikd2788502017-09-29 15:43:57 -04003086 refcount_inc(&head->refs);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04003087 spin_unlock(&delayed_refs->lock);
3088
David Sterbab3b4aa72011-04-21 01:20:15 +02003089 btrfs_release_path(path);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04003090
David Sterba8cc33e52011-05-02 15:29:25 +02003091 /*
3092 * Mutex was contended, block until it's released and let
3093 * caller try again
3094 */
Yan Zheng5d4f98a2009-06-10 10:45:14 -04003095 mutex_lock(&head->mutex);
3096 mutex_unlock(&head->mutex);
Josef Bacikd2788502017-09-29 15:43:57 -04003097 btrfs_put_delayed_ref_head(head);
ethanwu998ac6d2018-04-29 15:59:42 +08003098 btrfs_put_transaction(cur_trans);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04003099 return -EAGAIN;
3100 }
Yan Zheng5d4f98a2009-06-10 10:45:14 -04003101 spin_unlock(&delayed_refs->lock);
Josef Bacikd7df2c72014-01-23 09:21:38 -05003102
3103 spin_lock(&head->lock);
Josef Bacik0e0adbc2017-10-19 14:16:00 -04003104 /*
3105 * XXX: We should replace this with a proper search function in the
3106 * future.
3107 */
Liu Boe3d03962018-08-23 03:51:50 +08003108 for (node = rb_first_cached(&head->ref_tree); node;
3109 node = rb_next(node)) {
Josef Bacik0e0adbc2017-10-19 14:16:00 -04003110 ref = rb_entry(node, struct btrfs_delayed_ref_node, ref_node);
Josef Bacikd7df2c72014-01-23 09:21:38 -05003111 /* If it's a shared ref we know a cross reference exists */
3112 if (ref->type != BTRFS_EXTENT_DATA_REF_KEY) {
3113 ret = 1;
3114 break;
3115 }
3116
3117 data_ref = btrfs_delayed_node_to_data_ref(ref);
3118
3119 /*
3120 * If our ref doesn't match the one we're currently looking at
3121 * then we have a cross reference.
3122 */
3123 if (data_ref->root != root->root_key.objectid ||
3124 data_ref->objectid != objectid ||
3125 data_ref->offset != offset) {
3126 ret = 1;
3127 break;
3128 }
3129 }
3130 spin_unlock(&head->lock);
3131 mutex_unlock(&head->mutex);
ethanwu998ac6d2018-04-29 15:59:42 +08003132 btrfs_put_transaction(cur_trans);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04003133 return ret;
3134}
3135
Liu Boe4c3b2d2017-01-30 12:25:28 -08003136static noinline int check_committed_ref(struct btrfs_root *root,
Yan Zheng5d4f98a2009-06-10 10:45:14 -04003137 struct btrfs_path *path,
3138 u64 objectid, u64 offset, u64 bytenr)
Chris Masonbe20aa92007-12-17 20:14:01 -05003139{
Jeff Mahoney0b246af2016-06-22 18:54:23 -04003140 struct btrfs_fs_info *fs_info = root->fs_info;
3141 struct btrfs_root *extent_root = fs_info->extent_root;
Yan Zhengf321e492008-07-30 09:26:11 -04003142 struct extent_buffer *leaf;
Yan Zheng5d4f98a2009-06-10 10:45:14 -04003143 struct btrfs_extent_data_ref *ref;
3144 struct btrfs_extent_inline_ref *iref;
3145 struct btrfs_extent_item *ei;
Chris Masonbe20aa92007-12-17 20:14:01 -05003146 struct btrfs_key key;
Yan Zheng5d4f98a2009-06-10 10:45:14 -04003147 u32 item_size;
Liu Bo3de28d52017-08-18 15:15:19 -06003148 int type;
Yan Zhengf321e492008-07-30 09:26:11 -04003149 int ret;
Chris Masonbe20aa92007-12-17 20:14:01 -05003150
Chris Masonbe20aa92007-12-17 20:14:01 -05003151 key.objectid = bytenr;
Zheng Yan31840ae2008-09-23 13:14:14 -04003152 key.offset = (u64)-1;
Yan Zhengf321e492008-07-30 09:26:11 -04003153 key.type = BTRFS_EXTENT_ITEM_KEY;
Chris Masonbe20aa92007-12-17 20:14:01 -05003154
Chris Masonbe20aa92007-12-17 20:14:01 -05003155 ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
3156 if (ret < 0)
3157 goto out;
Jeff Mahoney79787ea2012-03-12 16:03:00 +01003158 BUG_ON(ret == 0); /* Corruption */
Yan Zheng80ff3852008-10-30 14:20:02 -04003159
3160 ret = -ENOENT;
3161 if (path->slots[0] == 0)
Zheng Yan31840ae2008-09-23 13:14:14 -04003162 goto out;
Chris Masonbe20aa92007-12-17 20:14:01 -05003163
Zheng Yan31840ae2008-09-23 13:14:14 -04003164 path->slots[0]--;
Yan Zhengf321e492008-07-30 09:26:11 -04003165 leaf = path->nodes[0];
Yan Zheng5d4f98a2009-06-10 10:45:14 -04003166 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
Chris Masonbe20aa92007-12-17 20:14:01 -05003167
Yan Zheng5d4f98a2009-06-10 10:45:14 -04003168 if (key.objectid != bytenr || key.type != BTRFS_EXTENT_ITEM_KEY)
Chris Masonbe20aa92007-12-17 20:14:01 -05003169 goto out;
Chris Masonbe20aa92007-12-17 20:14:01 -05003170
Yan Zheng5d4f98a2009-06-10 10:45:14 -04003171 ret = 1;
3172 item_size = btrfs_item_size_nr(leaf, path->slots[0]);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04003173 ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
3174
3175 if (item_size != sizeof(*ei) +
3176 btrfs_extent_inline_ref_size(BTRFS_EXTENT_DATA_REF_KEY))
3177 goto out;
3178
3179 if (btrfs_extent_generation(leaf, ei) <=
3180 btrfs_root_last_snapshot(&root->root_item))
3181 goto out;
3182
3183 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
Liu Bo3de28d52017-08-18 15:15:19 -06003184
3185 type = btrfs_get_extent_inline_ref_type(leaf, iref, BTRFS_REF_TYPE_DATA);
3186 if (type != BTRFS_EXTENT_DATA_REF_KEY)
Yan Zheng5d4f98a2009-06-10 10:45:14 -04003187 goto out;
3188
3189 ref = (struct btrfs_extent_data_ref *)(&iref->offset);
3190 if (btrfs_extent_refs(leaf, ei) !=
3191 btrfs_extent_data_ref_count(leaf, ref) ||
3192 btrfs_extent_data_ref_root(leaf, ref) !=
3193 root->root_key.objectid ||
3194 btrfs_extent_data_ref_objectid(leaf, ref) != objectid ||
3195 btrfs_extent_data_ref_offset(leaf, ref) != offset)
3196 goto out;
3197
Yan Zhengf321e492008-07-30 09:26:11 -04003198 ret = 0;
Chris Masonbe20aa92007-12-17 20:14:01 -05003199out:
Yan Zheng5d4f98a2009-06-10 10:45:14 -04003200 return ret;
3201}
3202
Liu Boe4c3b2d2017-01-30 12:25:28 -08003203int btrfs_cross_ref_exist(struct btrfs_root *root, u64 objectid, u64 offset,
3204 u64 bytenr)
Yan Zheng5d4f98a2009-06-10 10:45:14 -04003205{
3206 struct btrfs_path *path;
3207 int ret;
Yan Zheng5d4f98a2009-06-10 10:45:14 -04003208
3209 path = btrfs_alloc_path();
3210 if (!path)
Su Yue9132c4f2018-05-30 14:49:10 +08003211 return -ENOMEM;
Yan Zheng5d4f98a2009-06-10 10:45:14 -04003212
3213 do {
Liu Boe4c3b2d2017-01-30 12:25:28 -08003214 ret = check_committed_ref(root, path, objectid,
Yan Zheng5d4f98a2009-06-10 10:45:14 -04003215 offset, bytenr);
3216 if (ret && ret != -ENOENT)
3217 goto out;
3218
Misono Tomohiro380fd062018-08-30 10:59:16 +09003219 ret = check_delayed_ref(root, path, objectid, offset, bytenr);
3220 } while (ret == -EAGAIN);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04003221
Yan Zheng5d4f98a2009-06-10 10:45:14 -04003222out:
Yan Zhengf321e492008-07-30 09:26:11 -04003223 btrfs_free_path(path);
Yan, Zhengf0486c62010-05-16 10:46:25 -04003224 if (root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
3225 WARN_ON(ret > 0);
Yan Zhengf321e492008-07-30 09:26:11 -04003226 return ret;
3227}
3228
Yan Zheng5d4f98a2009-06-10 10:45:14 -04003229static int __btrfs_mod_ref(struct btrfs_trans_handle *trans,
Chris Masonb7a9f292009-02-04 09:23:45 -05003230 struct btrfs_root *root,
Yan Zheng5d4f98a2009-06-10 10:45:14 -04003231 struct extent_buffer *buf,
Josef Bacike339a6b2014-07-02 10:54:25 -07003232 int full_backref, int inc)
Zheng Yan31840ae2008-09-23 13:14:14 -04003233{
Jeff Mahoney0b246af2016-06-22 18:54:23 -04003234 struct btrfs_fs_info *fs_info = root->fs_info;
Zheng Yan31840ae2008-09-23 13:14:14 -04003235 u64 bytenr;
Yan Zheng5d4f98a2009-06-10 10:45:14 -04003236 u64 num_bytes;
3237 u64 parent;
Zheng Yan31840ae2008-09-23 13:14:14 -04003238 u64 ref_root;
Zheng Yan31840ae2008-09-23 13:14:14 -04003239 u32 nritems;
Zheng Yan31840ae2008-09-23 13:14:14 -04003240 struct btrfs_key key;
3241 struct btrfs_file_extent_item *fi;
3242 int i;
3243 int level;
3244 int ret = 0;
Jeff Mahoney2ff7e612016-06-22 18:54:24 -04003245 int (*process_func)(struct btrfs_trans_handle *,
Josef Bacik84f7d8e2017-09-29 15:43:49 -04003246 struct btrfs_root *,
Filipe Mananab06c4bf2015-10-23 07:52:54 +01003247 u64, u64, u64, u64, u64, u64);
Zheng Yan31840ae2008-09-23 13:14:14 -04003248
David Sterbafccb84c2014-09-29 23:53:21 +02003249
Jeff Mahoney0b246af2016-06-22 18:54:23 -04003250 if (btrfs_is_testing(fs_info))
Josef Bacikfaa2dbf2014-05-07 17:06:09 -04003251 return 0;
David Sterbafccb84c2014-09-29 23:53:21 +02003252
Zheng Yan31840ae2008-09-23 13:14:14 -04003253 ref_root = btrfs_header_owner(buf);
Zheng Yan31840ae2008-09-23 13:14:14 -04003254 nritems = btrfs_header_nritems(buf);
3255 level = btrfs_header_level(buf);
3256
Miao Xie27cdeb72014-04-02 19:51:05 +08003257 if (!test_bit(BTRFS_ROOT_REF_COWS, &root->state) && level == 0)
Yan Zheng5d4f98a2009-06-10 10:45:14 -04003258 return 0;
Chris Masonb7a9f292009-02-04 09:23:45 -05003259
Yan Zheng5d4f98a2009-06-10 10:45:14 -04003260 if (inc)
3261 process_func = btrfs_inc_extent_ref;
3262 else
3263 process_func = btrfs_free_extent;
Zheng Yan31840ae2008-09-23 13:14:14 -04003264
Yan Zheng5d4f98a2009-06-10 10:45:14 -04003265 if (full_backref)
3266 parent = buf->start;
3267 else
3268 parent = 0;
3269
Zheng Yan31840ae2008-09-23 13:14:14 -04003270 for (i = 0; i < nritems; i++) {
Chris Masondb945352007-10-15 16:15:53 -04003271 if (level == 0) {
Chris Mason5f39d392007-10-15 16:14:19 -04003272 btrfs_item_key_to_cpu(buf, &key, i);
David Sterba962a2982014-06-04 18:41:45 +02003273 if (key.type != BTRFS_EXTENT_DATA_KEY)
Chris Mason54aa1f42007-06-22 14:16:25 -04003274 continue;
Chris Mason5f39d392007-10-15 16:14:19 -04003275 fi = btrfs_item_ptr(buf, i,
Chris Mason54aa1f42007-06-22 14:16:25 -04003276 struct btrfs_file_extent_item);
Chris Mason5f39d392007-10-15 16:14:19 -04003277 if (btrfs_file_extent_type(buf, fi) ==
Chris Mason54aa1f42007-06-22 14:16:25 -04003278 BTRFS_FILE_EXTENT_INLINE)
3279 continue;
Zheng Yan31840ae2008-09-23 13:14:14 -04003280 bytenr = btrfs_file_extent_disk_bytenr(buf, fi);
3281 if (bytenr == 0)
Chris Mason54aa1f42007-06-22 14:16:25 -04003282 continue;
Zheng Yan31840ae2008-09-23 13:14:14 -04003283
Yan Zheng5d4f98a2009-06-10 10:45:14 -04003284 num_bytes = btrfs_file_extent_disk_num_bytes(buf, fi);
3285 key.offset -= btrfs_file_extent_offset(buf, fi);
Josef Bacik84f7d8e2017-09-29 15:43:49 -04003286 ret = process_func(trans, root, bytenr, num_bytes,
Yan Zheng5d4f98a2009-06-10 10:45:14 -04003287 parent, ref_root, key.objectid,
Filipe Mananab06c4bf2015-10-23 07:52:54 +01003288 key.offset);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04003289 if (ret)
3290 goto fail;
Chris Masonb7a9f292009-02-04 09:23:45 -05003291 } else {
3292 bytenr = btrfs_node_blockptr(buf, i);
Jeff Mahoney0b246af2016-06-22 18:54:23 -04003293 num_bytes = fs_info->nodesize;
Josef Bacik84f7d8e2017-09-29 15:43:49 -04003294 ret = process_func(trans, root, bytenr, num_bytes,
Filipe Mananab06c4bf2015-10-23 07:52:54 +01003295 parent, ref_root, level - 1, 0);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04003296 if (ret)
Zheng Yan31840ae2008-09-23 13:14:14 -04003297 goto fail;
Chris Mason54aa1f42007-06-22 14:16:25 -04003298 }
3299 }
Zheng Yan31840ae2008-09-23 13:14:14 -04003300 return 0;
3301fail:
Chris Mason54aa1f42007-06-22 14:16:25 -04003302 return ret;
Chris Mason02217ed2007-03-02 16:08:05 -05003303}
3304
Yan Zheng5d4f98a2009-06-10 10:45:14 -04003305int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
Josef Bacike339a6b2014-07-02 10:54:25 -07003306 struct extent_buffer *buf, int full_backref)
Zheng Yan31840ae2008-09-23 13:14:14 -04003307{
Josef Bacike339a6b2014-07-02 10:54:25 -07003308 return __btrfs_mod_ref(trans, root, buf, full_backref, 1);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04003309}
Zheng Yan31840ae2008-09-23 13:14:14 -04003310
Yan Zheng5d4f98a2009-06-10 10:45:14 -04003311int btrfs_dec_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
Josef Bacike339a6b2014-07-02 10:54:25 -07003312 struct extent_buffer *buf, int full_backref)
Yan Zheng5d4f98a2009-06-10 10:45:14 -04003313{
Josef Bacike339a6b2014-07-02 10:54:25 -07003314 return __btrfs_mod_ref(trans, root, buf, full_backref, 0);
Zheng Yan31840ae2008-09-23 13:14:14 -04003315}
3316
Chris Mason9078a3e2007-04-26 16:46:15 -04003317static int write_one_cache_group(struct btrfs_trans_handle *trans,
Chris Mason9078a3e2007-04-26 16:46:15 -04003318 struct btrfs_path *path,
3319 struct btrfs_block_group_cache *cache)
3320{
David Sterba39db2322019-03-20 11:57:46 +01003321 struct btrfs_fs_info *fs_info = trans->fs_info;
Chris Mason9078a3e2007-04-26 16:46:15 -04003322 int ret;
Jeff Mahoney0b246af2016-06-22 18:54:23 -04003323 struct btrfs_root *extent_root = fs_info->extent_root;
Chris Mason5f39d392007-10-15 16:14:19 -04003324 unsigned long bi;
3325 struct extent_buffer *leaf;
Chris Mason9078a3e2007-04-26 16:46:15 -04003326
Chris Mason9078a3e2007-04-26 16:46:15 -04003327 ret = btrfs_search_slot(trans, extent_root, &cache->key, path, 0, 1);
Josef Bacikdf95e7f2014-12-12 16:02:20 -05003328 if (ret) {
3329 if (ret > 0)
3330 ret = -ENOENT;
Chris Mason54aa1f42007-06-22 14:16:25 -04003331 goto fail;
Josef Bacikdf95e7f2014-12-12 16:02:20 -05003332 }
Chris Mason5f39d392007-10-15 16:14:19 -04003333
3334 leaf = path->nodes[0];
3335 bi = btrfs_item_ptr_offset(leaf, path->slots[0]);
3336 write_extent_buffer(leaf, &cache->item, bi, sizeof(cache->item));
3337 btrfs_mark_buffer_dirty(leaf);
Chris Mason54aa1f42007-06-22 14:16:25 -04003338fail:
Filipe Manana24b89d02015-04-25 18:31:05 +01003339 btrfs_release_path(path);
Josef Bacikdf95e7f2014-12-12 16:02:20 -05003340 return ret;
Chris Mason9078a3e2007-04-26 16:46:15 -04003341
3342}
3343
Yan Zheng4a8c9a62009-07-22 10:07:05 -04003344static struct btrfs_block_group_cache *
Jeff Mahoney2ff7e612016-06-22 18:54:24 -04003345next_block_group(struct btrfs_fs_info *fs_info,
Yan Zheng4a8c9a62009-07-22 10:07:05 -04003346 struct btrfs_block_group_cache *cache)
3347{
3348 struct rb_node *node;
Filipe Manana292cbd52014-11-26 15:28:50 +00003349
Jeff Mahoney0b246af2016-06-22 18:54:23 -04003350 spin_lock(&fs_info->block_group_cache_lock);
Filipe Manana292cbd52014-11-26 15:28:50 +00003351
3352 /* If our block group was removed, we need a full search. */
3353 if (RB_EMPTY_NODE(&cache->cache_node)) {
3354 const u64 next_bytenr = cache->key.objectid + cache->key.offset;
3355
Jeff Mahoney0b246af2016-06-22 18:54:23 -04003356 spin_unlock(&fs_info->block_group_cache_lock);
Filipe Manana292cbd52014-11-26 15:28:50 +00003357 btrfs_put_block_group(cache);
Jeff Mahoney0b246af2016-06-22 18:54:23 -04003358 cache = btrfs_lookup_first_block_group(fs_info, next_bytenr); return cache;
Filipe Manana292cbd52014-11-26 15:28:50 +00003359 }
Yan Zheng4a8c9a62009-07-22 10:07:05 -04003360 node = rb_next(&cache->cache_node);
3361 btrfs_put_block_group(cache);
3362 if (node) {
3363 cache = rb_entry(node, struct btrfs_block_group_cache,
3364 cache_node);
Josef Bacik11dfe352009-11-13 20:12:59 +00003365 btrfs_get_block_group(cache);
Yan Zheng4a8c9a62009-07-22 10:07:05 -04003366 } else
3367 cache = NULL;
Jeff Mahoney0b246af2016-06-22 18:54:23 -04003368 spin_unlock(&fs_info->block_group_cache_lock);
Yan Zheng4a8c9a62009-07-22 10:07:05 -04003369 return cache;
3370}
3371
Josef Bacik0af3d002010-06-21 14:48:16 -04003372static int cache_save_setup(struct btrfs_block_group_cache *block_group,
3373 struct btrfs_trans_handle *trans,
3374 struct btrfs_path *path)
3375{
Jeff Mahoney0b246af2016-06-22 18:54:23 -04003376 struct btrfs_fs_info *fs_info = block_group->fs_info;
3377 struct btrfs_root *root = fs_info->tree_root;
Josef Bacik0af3d002010-06-21 14:48:16 -04003378 struct inode *inode = NULL;
Qu Wenruo364ecf32017-02-27 15:10:38 +08003379 struct extent_changeset *data_reserved = NULL;
Josef Bacik0af3d002010-06-21 14:48:16 -04003380 u64 alloc_hint = 0;
Josef Bacik2b209822010-12-03 13:17:53 -05003381 int dcs = BTRFS_DC_ERROR;
David Sterbaf8c269d2015-01-16 17:21:12 +01003382 u64 num_pages = 0;
Josef Bacik0af3d002010-06-21 14:48:16 -04003383 int retries = 0;
3384 int ret = 0;
3385
3386 /*
3387 * If this block group is smaller than 100 megs don't bother caching the
3388 * block group.
3389 */
Byongho Leeee221842015-12-15 01:42:10 +09003390 if (block_group->key.offset < (100 * SZ_1M)) {
Josef Bacik0af3d002010-06-21 14:48:16 -04003391 spin_lock(&block_group->lock);
3392 block_group->disk_cache_state = BTRFS_DC_WRITTEN;
3393 spin_unlock(&block_group->lock);
3394 return 0;
3395 }
3396
Josef Bacik0c0ef4b2015-02-12 09:43:51 -05003397 if (trans->aborted)
3398 return 0;
Josef Bacik0af3d002010-06-21 14:48:16 -04003399again:
Jeff Mahoney77ab86b2017-02-15 16:28:30 -05003400 inode = lookup_free_space_inode(fs_info, block_group, path);
Josef Bacik0af3d002010-06-21 14:48:16 -04003401 if (IS_ERR(inode) && PTR_ERR(inode) != -ENOENT) {
3402 ret = PTR_ERR(inode);
David Sterbab3b4aa72011-04-21 01:20:15 +02003403 btrfs_release_path(path);
Josef Bacik0af3d002010-06-21 14:48:16 -04003404 goto out;
3405 }
3406
3407 if (IS_ERR(inode)) {
3408 BUG_ON(retries);
3409 retries++;
3410
3411 if (block_group->ro)
3412 goto out_free;
3413
Jeff Mahoney77ab86b2017-02-15 16:28:30 -05003414 ret = create_free_space_inode(fs_info, trans, block_group,
3415 path);
Josef Bacik0af3d002010-06-21 14:48:16 -04003416 if (ret)
3417 goto out_free;
3418 goto again;
3419 }
3420
3421 /*
3422 * We want to set the generation to 0, that way if anything goes wrong
3423 * from here on out we know not to trust this cache when we load up next
3424 * time.
3425 */
3426 BTRFS_I(inode)->generation = 0;
3427 ret = btrfs_update_inode(trans, root, inode);
Josef Bacik0c0ef4b2015-02-12 09:43:51 -05003428 if (ret) {
3429 /*
3430 * So theoretically we could recover from this, simply set the
3431 * super cache generation to 0 so we know to invalidate the
3432 * cache, but then we'd have to keep track of the block groups
3433 * that fail this way so we know we _have_ to reset this cache
3434 * before the next commit or risk reading stale cache. So to
3435 * limit our exposure to horrible edge cases lets just abort the
3436 * transaction, this only happens in really bad situations
3437 * anyway.
3438 */
Jeff Mahoney66642832016-06-10 18:19:25 -04003439 btrfs_abort_transaction(trans, ret);
Josef Bacik0c0ef4b2015-02-12 09:43:51 -05003440 goto out_put;
3441 }
Josef Bacik0af3d002010-06-21 14:48:16 -04003442 WARN_ON(ret);
3443
Josef Bacik8e138e02017-11-17 14:50:46 -05003444 /* We've already setup this transaction, go ahead and exit */
3445 if (block_group->cache_generation == trans->transid &&
3446 i_size_read(inode)) {
3447 dcs = BTRFS_DC_SETUP;
3448 goto out_put;
3449 }
3450
Josef Bacik0af3d002010-06-21 14:48:16 -04003451 if (i_size_read(inode) > 0) {
Jeff Mahoney2ff7e612016-06-22 18:54:24 -04003452 ret = btrfs_check_trunc_cache_free_space(fs_info,
Jeff Mahoney0b246af2016-06-22 18:54:23 -04003453 &fs_info->global_block_rsv);
Miao Xie7b61cd92013-05-13 13:55:09 +00003454 if (ret)
3455 goto out_put;
3456
Jeff Mahoney77ab86b2017-02-15 16:28:30 -05003457 ret = btrfs_truncate_free_space_cache(trans, NULL, inode);
Josef Bacik0af3d002010-06-21 14:48:16 -04003458 if (ret)
3459 goto out_put;
3460 }
3461
3462 spin_lock(&block_group->lock);
Liu Bocf7c1ef2012-07-06 03:31:34 -06003463 if (block_group->cached != BTRFS_CACHE_FINISHED ||
Jeff Mahoney0b246af2016-06-22 18:54:23 -04003464 !btrfs_test_opt(fs_info, SPACE_CACHE)) {
Liu Bocf7c1ef2012-07-06 03:31:34 -06003465 /*
3466 * don't bother trying to write stuff out _if_
3467 * a) we're not cached,
Liu Bo1a79c1f2017-03-06 13:49:02 -08003468 * b) we're with nospace_cache mount option,
3469 * c) we're with v2 space_cache (FREE_SPACE_TREE).
Liu Bocf7c1ef2012-07-06 03:31:34 -06003470 */
Josef Bacik2b209822010-12-03 13:17:53 -05003471 dcs = BTRFS_DC_WRITTEN;
Josef Bacik0af3d002010-06-21 14:48:16 -04003472 spin_unlock(&block_group->lock);
3473 goto out_put;
3474 }
3475 spin_unlock(&block_group->lock);
3476
Josef Bacik6fc823b2012-08-06 13:46:38 -06003477 /*
Josef Bacik2968b1f2015-10-01 12:55:18 -04003478 * We hit an ENOSPC when setting up the cache in this transaction, just
3479 * skip doing the setup, we've already cleared the cache so we're safe.
3480 */
3481 if (test_bit(BTRFS_TRANS_CACHE_ENOSPC, &trans->transaction->flags)) {
3482 ret = -ENOSPC;
3483 goto out_put;
3484 }
3485
3486 /*
Josef Bacik6fc823b2012-08-06 13:46:38 -06003487 * Try to preallocate enough space based on how big the block group is.
3488 * Keep in mind this has to include any pinned space which could end up
3489 * taking up quite a bit since it's not folded into the other space
3490 * cache.
3491 */
Byongho Leeee221842015-12-15 01:42:10 +09003492 num_pages = div_u64(block_group->key.offset, SZ_256M);
Josef Bacik0af3d002010-06-21 14:48:16 -04003493 if (!num_pages)
3494 num_pages = 1;
3495
Josef Bacik0af3d002010-06-21 14:48:16 -04003496 num_pages *= 16;
Kirill A. Shutemov09cbfea2016-04-01 15:29:47 +03003497 num_pages *= PAGE_SIZE;
Josef Bacik0af3d002010-06-21 14:48:16 -04003498
Qu Wenruo364ecf32017-02-27 15:10:38 +08003499 ret = btrfs_check_data_free_space(inode, &data_reserved, 0, num_pages);
Josef Bacik0af3d002010-06-21 14:48:16 -04003500 if (ret)
3501 goto out_put;
3502
3503 ret = btrfs_prealloc_file_range_trans(inode, trans, 0, 0, num_pages,
3504 num_pages, num_pages,
3505 &alloc_hint);
Josef Bacik2968b1f2015-10-01 12:55:18 -04003506 /*
3507 * Our cache requires contiguous chunks so that we don't modify a bunch
3508 * of metadata or split extents when writing the cache out, which means
3509 * we can enospc if we are heavily fragmented in addition to just normal
3510 * out of space conditions. So if we hit this just skip setting up any
3511 * other block groups for this transaction, maybe we'll unpin enough
3512 * space the next time around.
3513 */
Josef Bacik2b209822010-12-03 13:17:53 -05003514 if (!ret)
3515 dcs = BTRFS_DC_SETUP;
Josef Bacik2968b1f2015-10-01 12:55:18 -04003516 else if (ret == -ENOSPC)
3517 set_bit(BTRFS_TRANS_CACHE_ENOSPC, &trans->transaction->flags);
Josef Bacikc09544e2011-08-30 10:19:10 -04003518
Josef Bacik0af3d002010-06-21 14:48:16 -04003519out_put:
3520 iput(inode);
3521out_free:
David Sterbab3b4aa72011-04-21 01:20:15 +02003522 btrfs_release_path(path);
Josef Bacik0af3d002010-06-21 14:48:16 -04003523out:
3524 spin_lock(&block_group->lock);
Josef Bacike65cbb92011-12-13 16:04:54 -05003525 if (!ret && dcs == BTRFS_DC_SETUP)
Josef Bacik5b0e95b2011-10-06 08:58:24 -04003526 block_group->cache_generation = trans->transid;
Josef Bacik2b209822010-12-03 13:17:53 -05003527 block_group->disk_cache_state = dcs;
Josef Bacik0af3d002010-06-21 14:48:16 -04003528 spin_unlock(&block_group->lock);
3529
Qu Wenruo364ecf32017-02-27 15:10:38 +08003530 extent_changeset_free(data_reserved);
Josef Bacik0af3d002010-06-21 14:48:16 -04003531 return ret;
3532}
3533
David Sterbabbebb3e2019-03-20 12:02:55 +01003534int btrfs_setup_space_cache(struct btrfs_trans_handle *trans)
Josef Bacikdcdf7f62015-03-02 16:37:31 -05003535{
David Sterbabbebb3e2019-03-20 12:02:55 +01003536 struct btrfs_fs_info *fs_info = trans->fs_info;
Josef Bacikdcdf7f62015-03-02 16:37:31 -05003537 struct btrfs_block_group_cache *cache, *tmp;
3538 struct btrfs_transaction *cur_trans = trans->transaction;
3539 struct btrfs_path *path;
3540
3541 if (list_empty(&cur_trans->dirty_bgs) ||
Jeff Mahoney0b246af2016-06-22 18:54:23 -04003542 !btrfs_test_opt(fs_info, SPACE_CACHE))
Josef Bacikdcdf7f62015-03-02 16:37:31 -05003543 return 0;
3544
3545 path = btrfs_alloc_path();
3546 if (!path)
3547 return -ENOMEM;
3548
3549 /* Could add new block groups, use _safe just in case */
3550 list_for_each_entry_safe(cache, tmp, &cur_trans->dirty_bgs,
3551 dirty_list) {
3552 if (cache->disk_cache_state == BTRFS_DC_CLEAR)
3553 cache_save_setup(cache, trans, path);
3554 }
3555
3556 btrfs_free_path(path);
3557 return 0;
3558}
3559
Chris Mason1bbc6212015-04-06 12:46:08 -07003560/*
3561 * transaction commit does final block group cache writeback during a
3562 * critical section where nothing is allowed to change the FS. This is
3563 * required in order for the cache to actually match the block group,
3564 * but can introduce a lot of latency into the commit.
3565 *
3566 * So, btrfs_start_dirty_block_groups is here to kick off block group
3567 * cache IO. There's a chance we'll have to redo some of it if the
3568 * block group changes again during the commit, but it greatly reduces
3569 * the commit latency by getting rid of the easy block groups while
3570 * we're still allowing others to join the commit.
3571 */
Nikolay Borisov21217052018-02-07 17:55:41 +02003572int btrfs_start_dirty_block_groups(struct btrfs_trans_handle *trans)
Chris Mason1bbc6212015-04-06 12:46:08 -07003573{
Nikolay Borisov21217052018-02-07 17:55:41 +02003574 struct btrfs_fs_info *fs_info = trans->fs_info;
Chris Mason1bbc6212015-04-06 12:46:08 -07003575 struct btrfs_block_group_cache *cache;
3576 struct btrfs_transaction *cur_trans = trans->transaction;
3577 int ret = 0;
3578 int should_put;
3579 struct btrfs_path *path = NULL;
3580 LIST_HEAD(dirty);
3581 struct list_head *io = &cur_trans->io_bgs;
3582 int num_started = 0;
3583 int loops = 0;
3584
3585 spin_lock(&cur_trans->dirty_bgs_lock);
Filipe Mananab58d1a92015-04-25 18:29:16 +01003586 if (list_empty(&cur_trans->dirty_bgs)) {
3587 spin_unlock(&cur_trans->dirty_bgs_lock);
3588 return 0;
Chris Mason1bbc6212015-04-06 12:46:08 -07003589 }
Filipe Mananab58d1a92015-04-25 18:29:16 +01003590 list_splice_init(&cur_trans->dirty_bgs, &dirty);
Chris Mason1bbc6212015-04-06 12:46:08 -07003591 spin_unlock(&cur_trans->dirty_bgs_lock);
3592
3593again:
Chris Mason1bbc6212015-04-06 12:46:08 -07003594 /*
3595 * make sure all the block groups on our dirty list actually
3596 * exist
3597 */
Nikolay Borisov6c686b32018-02-07 17:55:40 +02003598 btrfs_create_pending_block_groups(trans);
Chris Mason1bbc6212015-04-06 12:46:08 -07003599
3600 if (!path) {
3601 path = btrfs_alloc_path();
3602 if (!path)
3603 return -ENOMEM;
3604 }
3605
Filipe Mananab58d1a92015-04-25 18:29:16 +01003606 /*
3607 * cache_write_mutex is here only to save us from balance or automatic
3608 * removal of empty block groups deleting this block group while we are
3609 * writing out the cache
3610 */
3611 mutex_lock(&trans->transaction->cache_write_mutex);
Chris Mason1bbc6212015-04-06 12:46:08 -07003612 while (!list_empty(&dirty)) {
Josef Bacikba2c4d42018-12-03 10:20:33 -05003613 bool drop_reserve = true;
3614
Chris Mason1bbc6212015-04-06 12:46:08 -07003615 cache = list_first_entry(&dirty,
3616 struct btrfs_block_group_cache,
3617 dirty_list);
Chris Mason1bbc6212015-04-06 12:46:08 -07003618 /*
3619 * this can happen if something re-dirties a block
3620 * group that is already under IO. Just wait for it to
3621 * finish and then do it all again
3622 */
3623 if (!list_empty(&cache->io_list)) {
3624 list_del_init(&cache->io_list);
Jeff Mahoneyafdb5712016-09-09 12:09:35 -04003625 btrfs_wait_cache_io(trans, cache, path);
Chris Mason1bbc6212015-04-06 12:46:08 -07003626 btrfs_put_block_group(cache);
3627 }
3628
3629
3630 /*
3631 * btrfs_wait_cache_io uses the cache->dirty_list to decide
3632 * if it should update the cache_state. Don't delete
3633 * until after we wait.
3634 *
3635 * Since we're not running in the commit critical section
3636 * we need the dirty_bgs_lock to protect from update_block_group
3637 */
3638 spin_lock(&cur_trans->dirty_bgs_lock);
3639 list_del_init(&cache->dirty_list);
3640 spin_unlock(&cur_trans->dirty_bgs_lock);
3641
3642 should_put = 1;
3643
3644 cache_save_setup(cache, trans, path);
3645
3646 if (cache->disk_cache_state == BTRFS_DC_SETUP) {
3647 cache->io_ctl.inode = NULL;
Jeff Mahoney0b246af2016-06-22 18:54:23 -04003648 ret = btrfs_write_out_cache(fs_info, trans,
Jeff Mahoney5b4aace2016-06-21 10:40:19 -04003649 cache, path);
Chris Mason1bbc6212015-04-06 12:46:08 -07003650 if (ret == 0 && cache->io_ctl.inode) {
3651 num_started++;
3652 should_put = 0;
3653
3654 /*
Nikolay Borisov45ae2c12018-02-08 18:25:18 +02003655 * The cache_write_mutex is protecting the
3656 * io_list, also refer to the definition of
3657 * btrfs_transaction::io_bgs for more details
Chris Mason1bbc6212015-04-06 12:46:08 -07003658 */
3659 list_add_tail(&cache->io_list, io);
3660 } else {
3661 /*
3662 * if we failed to write the cache, the
3663 * generation will be bad and life goes on
3664 */
3665 ret = 0;
3666 }
3667 }
Filipe Mananaff1f8252015-05-06 16:15:09 +01003668 if (!ret) {
David Sterba39db2322019-03-20 11:57:46 +01003669 ret = write_one_cache_group(trans, path, cache);
Filipe Mananaff1f8252015-05-06 16:15:09 +01003670 /*
3671 * Our block group might still be attached to the list
3672 * of new block groups in the transaction handle of some
3673 * other task (struct btrfs_trans_handle->new_bgs). This
3674 * means its block group item isn't yet in the extent
3675 * tree. If this happens ignore the error, as we will
3676 * try again later in the critical section of the
3677 * transaction commit.
3678 */
3679 if (ret == -ENOENT) {
3680 ret = 0;
3681 spin_lock(&cur_trans->dirty_bgs_lock);
3682 if (list_empty(&cache->dirty_list)) {
3683 list_add_tail(&cache->dirty_list,
3684 &cur_trans->dirty_bgs);
3685 btrfs_get_block_group(cache);
Josef Bacikba2c4d42018-12-03 10:20:33 -05003686 drop_reserve = false;
Filipe Mananaff1f8252015-05-06 16:15:09 +01003687 }
3688 spin_unlock(&cur_trans->dirty_bgs_lock);
3689 } else if (ret) {
Jeff Mahoney66642832016-06-10 18:19:25 -04003690 btrfs_abort_transaction(trans, ret);
Filipe Mananaff1f8252015-05-06 16:15:09 +01003691 }
3692 }
Chris Mason1bbc6212015-04-06 12:46:08 -07003693
Andrea Gelmini52042d82018-11-28 12:05:13 +01003694 /* if it's not on the io list, we need to put the block group */
Chris Mason1bbc6212015-04-06 12:46:08 -07003695 if (should_put)
3696 btrfs_put_block_group(cache);
Josef Bacikba2c4d42018-12-03 10:20:33 -05003697 if (drop_reserve)
3698 btrfs_delayed_refs_rsv_release(fs_info, 1);
Chris Mason1bbc6212015-04-06 12:46:08 -07003699
3700 if (ret)
3701 break;
Filipe Mananab58d1a92015-04-25 18:29:16 +01003702
3703 /*
3704 * Avoid blocking other tasks for too long. It might even save
3705 * us from writing caches for block groups that are going to be
3706 * removed.
3707 */
3708 mutex_unlock(&trans->transaction->cache_write_mutex);
3709 mutex_lock(&trans->transaction->cache_write_mutex);
Chris Mason1bbc6212015-04-06 12:46:08 -07003710 }
Filipe Mananab58d1a92015-04-25 18:29:16 +01003711 mutex_unlock(&trans->transaction->cache_write_mutex);
Chris Mason1bbc6212015-04-06 12:46:08 -07003712
3713 /*
3714 * go through delayed refs for all the stuff we've just kicked off
3715 * and then loop back (just once)
3716 */
Nikolay Borisovc79a70b2018-03-15 17:27:37 +02003717 ret = btrfs_run_delayed_refs(trans, 0);
Chris Mason1bbc6212015-04-06 12:46:08 -07003718 if (!ret && loops == 0) {
3719 loops++;
3720 spin_lock(&cur_trans->dirty_bgs_lock);
3721 list_splice_init(&cur_trans->dirty_bgs, &dirty);
Filipe Mananab58d1a92015-04-25 18:29:16 +01003722 /*
3723 * dirty_bgs_lock protects us from concurrent block group
3724 * deletes too (not just cache_write_mutex).
3725 */
3726 if (!list_empty(&dirty)) {
3727 spin_unlock(&cur_trans->dirty_bgs_lock);
3728 goto again;
3729 }
Chris Mason1bbc6212015-04-06 12:46:08 -07003730 spin_unlock(&cur_trans->dirty_bgs_lock);
Liu Boc79a1752016-07-20 17:44:12 -07003731 } else if (ret < 0) {
Jeff Mahoney2ff7e612016-06-22 18:54:24 -04003732 btrfs_cleanup_dirty_bgs(cur_trans, fs_info);
Chris Mason1bbc6212015-04-06 12:46:08 -07003733 }
3734
3735 btrfs_free_path(path);
3736 return ret;
3737}
3738
David Sterba5742d152019-03-20 12:04:08 +01003739int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans)
Chris Mason9078a3e2007-04-26 16:46:15 -04003740{
David Sterba5742d152019-03-20 12:04:08 +01003741 struct btrfs_fs_info *fs_info = trans->fs_info;
Yan Zheng4a8c9a62009-07-22 10:07:05 -04003742 struct btrfs_block_group_cache *cache;
Josef Bacikce93ec52014-11-17 15:45:48 -05003743 struct btrfs_transaction *cur_trans = trans->transaction;
3744 int ret = 0;
Chris Masonc9dc4c62015-04-04 17:14:42 -07003745 int should_put;
Chris Mason9078a3e2007-04-26 16:46:15 -04003746 struct btrfs_path *path;
Chris Mason1bbc6212015-04-06 12:46:08 -07003747 struct list_head *io = &cur_trans->io_bgs;
Chris Masonc9dc4c62015-04-04 17:14:42 -07003748 int num_started = 0;
Chris Mason9078a3e2007-04-26 16:46:15 -04003749
3750 path = btrfs_alloc_path();
3751 if (!path)
3752 return -ENOMEM;
3753
Josef Bacikce93ec52014-11-17 15:45:48 -05003754 /*
Filipe Mananae44081e2015-12-18 03:02:48 +00003755 * Even though we are in the critical section of the transaction commit,
3756 * we can still have concurrent tasks adding elements to this
3757 * transaction's list of dirty block groups. These tasks correspond to
3758 * endio free space workers started when writeback finishes for a
3759 * space cache, which run inode.c:btrfs_finish_ordered_io(), and can
3760 * allocate new block groups as a result of COWing nodes of the root
3761 * tree when updating the free space inode. The writeback for the space
3762 * caches is triggered by an earlier call to
3763 * btrfs_start_dirty_block_groups() and iterations of the following
3764 * loop.
3765 * Also we want to do the cache_save_setup first and then run the
Josef Bacikce93ec52014-11-17 15:45:48 -05003766 * delayed refs to make sure we have the best chance at doing this all
3767 * in one shot.
3768 */
Filipe Mananae44081e2015-12-18 03:02:48 +00003769 spin_lock(&cur_trans->dirty_bgs_lock);
Josef Bacikce93ec52014-11-17 15:45:48 -05003770 while (!list_empty(&cur_trans->dirty_bgs)) {
3771 cache = list_first_entry(&cur_trans->dirty_bgs,
3772 struct btrfs_block_group_cache,
3773 dirty_list);
Chris Masonc9dc4c62015-04-04 17:14:42 -07003774
3775 /*
3776 * this can happen if cache_save_setup re-dirties a block
3777 * group that is already under IO. Just wait for it to
3778 * finish and then do it all again
3779 */
3780 if (!list_empty(&cache->io_list)) {
Filipe Mananae44081e2015-12-18 03:02:48 +00003781 spin_unlock(&cur_trans->dirty_bgs_lock);
Chris Masonc9dc4c62015-04-04 17:14:42 -07003782 list_del_init(&cache->io_list);
Jeff Mahoneyafdb5712016-09-09 12:09:35 -04003783 btrfs_wait_cache_io(trans, cache, path);
Chris Masonc9dc4c62015-04-04 17:14:42 -07003784 btrfs_put_block_group(cache);
Filipe Mananae44081e2015-12-18 03:02:48 +00003785 spin_lock(&cur_trans->dirty_bgs_lock);
Chris Masonc9dc4c62015-04-04 17:14:42 -07003786 }
3787
Chris Mason1bbc6212015-04-06 12:46:08 -07003788 /*
3789 * don't remove from the dirty list until after we've waited
3790 * on any pending IO
3791 */
Josef Bacikce93ec52014-11-17 15:45:48 -05003792 list_del_init(&cache->dirty_list);
Filipe Mananae44081e2015-12-18 03:02:48 +00003793 spin_unlock(&cur_trans->dirty_bgs_lock);
Chris Masonc9dc4c62015-04-04 17:14:42 -07003794 should_put = 1;
3795
Chris Mason1bbc6212015-04-06 12:46:08 -07003796 cache_save_setup(cache, trans, path);
Chris Masonc9dc4c62015-04-04 17:14:42 -07003797
Josef Bacikce93ec52014-11-17 15:45:48 -05003798 if (!ret)
Nikolay Borisovc79a70b2018-03-15 17:27:37 +02003799 ret = btrfs_run_delayed_refs(trans,
Jeff Mahoney2ff7e612016-06-22 18:54:24 -04003800 (unsigned long) -1);
Chris Masonc9dc4c62015-04-04 17:14:42 -07003801
3802 if (!ret && cache->disk_cache_state == BTRFS_DC_SETUP) {
3803 cache->io_ctl.inode = NULL;
Jeff Mahoney0b246af2016-06-22 18:54:23 -04003804 ret = btrfs_write_out_cache(fs_info, trans,
Jeff Mahoney5b4aace2016-06-21 10:40:19 -04003805 cache, path);
Chris Masonc9dc4c62015-04-04 17:14:42 -07003806 if (ret == 0 && cache->io_ctl.inode) {
3807 num_started++;
3808 should_put = 0;
Chris Mason1bbc6212015-04-06 12:46:08 -07003809 list_add_tail(&cache->io_list, io);
Chris Masonc9dc4c62015-04-04 17:14:42 -07003810 } else {
3811 /*
3812 * if we failed to write the cache, the
3813 * generation will be bad and life goes on
3814 */
3815 ret = 0;
3816 }
3817 }
Filipe Mananaff1f8252015-05-06 16:15:09 +01003818 if (!ret) {
David Sterba39db2322019-03-20 11:57:46 +01003819 ret = write_one_cache_group(trans, path, cache);
Filipe Manana2bc0bb52015-12-30 02:42:30 +00003820 /*
3821 * One of the free space endio workers might have
3822 * created a new block group while updating a free space
3823 * cache's inode (at inode.c:btrfs_finish_ordered_io())
3824 * and hasn't released its transaction handle yet, in
3825 * which case the new block group is still attached to
3826 * its transaction handle and its creation has not
3827 * finished yet (no block group item in the extent tree
3828 * yet, etc). If this is the case, wait for all free
3829 * space endio workers to finish and retry. This is a
3830 * a very rare case so no need for a more efficient and
3831 * complex approach.
3832 */
3833 if (ret == -ENOENT) {
3834 wait_event(cur_trans->writer_wait,
3835 atomic_read(&cur_trans->num_writers) == 1);
David Sterba39db2322019-03-20 11:57:46 +01003836 ret = write_one_cache_group(trans, path, cache);
Filipe Manana2bc0bb52015-12-30 02:42:30 +00003837 }
Filipe Mananaff1f8252015-05-06 16:15:09 +01003838 if (ret)
Jeff Mahoney66642832016-06-10 18:19:25 -04003839 btrfs_abort_transaction(trans, ret);
Filipe Mananaff1f8252015-05-06 16:15:09 +01003840 }
Chris Masonc9dc4c62015-04-04 17:14:42 -07003841
3842 /* if its not on the io list, we need to put the block group */
3843 if (should_put)
3844 btrfs_put_block_group(cache);
Josef Bacikba2c4d42018-12-03 10:20:33 -05003845 btrfs_delayed_refs_rsv_release(fs_info, 1);
Filipe Mananae44081e2015-12-18 03:02:48 +00003846 spin_lock(&cur_trans->dirty_bgs_lock);
Chris Masonc9dc4c62015-04-04 17:14:42 -07003847 }
Filipe Mananae44081e2015-12-18 03:02:48 +00003848 spin_unlock(&cur_trans->dirty_bgs_lock);
Chris Masonc9dc4c62015-04-04 17:14:42 -07003849
Nikolay Borisov45ae2c12018-02-08 18:25:18 +02003850 /*
3851 * Refer to the definition of io_bgs member for details why it's safe
3852 * to use it without any locking
3853 */
Chris Mason1bbc6212015-04-06 12:46:08 -07003854 while (!list_empty(io)) {
3855 cache = list_first_entry(io, struct btrfs_block_group_cache,
Chris Masonc9dc4c62015-04-04 17:14:42 -07003856 io_list);
3857 list_del_init(&cache->io_list);
Jeff Mahoneyafdb5712016-09-09 12:09:35 -04003858 btrfs_wait_cache_io(trans, cache, path);
Josef Bacik0af3d002010-06-21 14:48:16 -04003859 btrfs_put_block_group(cache);
3860 }
3861
Chris Mason9078a3e2007-04-26 16:46:15 -04003862 btrfs_free_path(path);
Josef Bacikce93ec52014-11-17 15:45:48 -05003863 return ret;
Chris Mason9078a3e2007-04-26 16:46:15 -04003864}
3865
Jeff Mahoney2ff7e612016-06-22 18:54:24 -04003866int btrfs_extent_readonly(struct btrfs_fs_info *fs_info, u64 bytenr)
Yan Zhengd2fb3432008-12-11 16:30:39 -05003867{
3868 struct btrfs_block_group_cache *block_group;
3869 int readonly = 0;
3870
Jeff Mahoney0b246af2016-06-22 18:54:23 -04003871 block_group = btrfs_lookup_block_group(fs_info, bytenr);
Yan Zhengd2fb3432008-12-11 16:30:39 -05003872 if (!block_group || block_group->ro)
3873 readonly = 1;
3874 if (block_group)
Chris Masonfa9c0d792009-04-03 09:47:43 -04003875 btrfs_put_block_group(block_group);
Yan Zhengd2fb3432008-12-11 16:30:39 -05003876 return readonly;
3877}
3878
Filipe Mananaf78c4362016-05-09 13:15:41 +01003879bool btrfs_inc_nocow_writers(struct btrfs_fs_info *fs_info, u64 bytenr)
3880{
3881 struct btrfs_block_group_cache *bg;
3882 bool ret = true;
3883
3884 bg = btrfs_lookup_block_group(fs_info, bytenr);
3885 if (!bg)
3886 return false;
3887
3888 spin_lock(&bg->lock);
3889 if (bg->ro)
3890 ret = false;
3891 else
3892 atomic_inc(&bg->nocow_writers);
3893 spin_unlock(&bg->lock);
3894
3895 /* no put on block group, done by btrfs_dec_nocow_writers */
3896 if (!ret)
3897 btrfs_put_block_group(bg);
3898
3899 return ret;
3900
3901}
3902
3903void btrfs_dec_nocow_writers(struct btrfs_fs_info *fs_info, u64 bytenr)
3904{
3905 struct btrfs_block_group_cache *bg;
3906
3907 bg = btrfs_lookup_block_group(fs_info, bytenr);
3908 ASSERT(bg);
3909 if (atomic_dec_and_test(&bg->nocow_writers))
Peter Zijlstra46259562018-03-15 11:43:08 +01003910 wake_up_var(&bg->nocow_writers);
Filipe Mananaf78c4362016-05-09 13:15:41 +01003911 /*
3912 * Once for our lookup and once for the lookup done by a previous call
3913 * to btrfs_inc_nocow_writers()
3914 */
3915 btrfs_put_block_group(bg);
3916 btrfs_put_block_group(bg);
3917}
3918
Filipe Mananaf78c4362016-05-09 13:15:41 +01003919void btrfs_wait_nocow_writers(struct btrfs_block_group_cache *bg)
3920{
Peter Zijlstra46259562018-03-15 11:43:08 +01003921 wait_var_event(&bg->nocow_writers, !atomic_read(&bg->nocow_writers));
Filipe Mananaf78c4362016-05-09 13:15:41 +01003922}
3923
Jeff Mahoney6ab0a202013-11-01 13:07:04 -04003924static const char *alloc_name(u64 flags)
3925{
3926 switch (flags) {
3927 case BTRFS_BLOCK_GROUP_METADATA|BTRFS_BLOCK_GROUP_DATA:
3928 return "mixed";
3929 case BTRFS_BLOCK_GROUP_METADATA:
3930 return "metadata";
3931 case BTRFS_BLOCK_GROUP_DATA:
3932 return "data";
3933 case BTRFS_BLOCK_GROUP_SYSTEM:
3934 return "system";
3935 default:
3936 WARN_ON(1);
3937 return "invalid-combination";
3938 };
3939}
3940
Lu Fengqi4ca61682018-05-28 14:30:27 +08003941static int create_space_info(struct btrfs_fs_info *info, u64 flags)
Nikolay Borisov2be12ef2017-05-22 09:35:49 +03003942{
3943
3944 struct btrfs_space_info *space_info;
3945 int i;
3946 int ret;
3947
3948 space_info = kzalloc(sizeof(*space_info), GFP_NOFS);
3949 if (!space_info)
3950 return -ENOMEM;
3951
3952 ret = percpu_counter_init(&space_info->total_bytes_pinned, 0,
3953 GFP_KERNEL);
3954 if (ret) {
3955 kfree(space_info);
3956 return ret;
3957 }
3958
3959 for (i = 0; i < BTRFS_NR_RAID_TYPES; i++)
3960 INIT_LIST_HEAD(&space_info->block_groups[i]);
3961 init_rwsem(&space_info->groups_sem);
3962 spin_lock_init(&space_info->lock);
3963 space_info->flags = flags & BTRFS_BLOCK_GROUP_TYPE_MASK;
3964 space_info->force_alloc = CHUNK_ALLOC_NO_FORCE;
3965 init_waitqueue_head(&space_info->wait);
3966 INIT_LIST_HEAD(&space_info->ro_bgs);
3967 INIT_LIST_HEAD(&space_info->tickets);
3968 INIT_LIST_HEAD(&space_info->priority_tickets);
3969
3970 ret = kobject_init_and_add(&space_info->kobj, &space_info_ktype,
3971 info->space_info_kobj, "%s",
3972 alloc_name(space_info->flags));
3973 if (ret) {
3974 percpu_counter_destroy(&space_info->total_bytes_pinned);
3975 kfree(space_info);
3976 return ret;
3977 }
3978
Nikolay Borisov2be12ef2017-05-22 09:35:49 +03003979 list_add_rcu(&space_info->list, &info->space_info);
3980 if (flags & BTRFS_BLOCK_GROUP_DATA)
3981 info->data_sinfo = space_info;
3982
3983 return ret;
3984}
3985
Nikolay Borisovd2006e62017-05-22 09:35:50 +03003986static void update_space_info(struct btrfs_fs_info *info, u64 flags,
Chris Mason593060d2008-03-25 16:50:33 -04003987 u64 total_bytes, u64 bytes_used,
Josef Bacike40edf22016-03-25 13:25:47 -04003988 u64 bytes_readonly,
Chris Mason593060d2008-03-25 16:50:33 -04003989 struct btrfs_space_info **space_info)
3990{
3991 struct btrfs_space_info *found;
Yan, Zhengb742bb822010-05-16 10:46:24 -04003992 int factor;
3993
David Sterba46df06b2018-07-13 20:46:30 +02003994 factor = btrfs_bg_type_to_factor(flags);
Chris Mason593060d2008-03-25 16:50:33 -04003995
3996 found = __find_space_info(info, flags);
Nikolay Borisovd2006e62017-05-22 09:35:50 +03003997 ASSERT(found);
3998 spin_lock(&found->lock);
3999 found->total_bytes += total_bytes;
4000 found->disk_total += total_bytes * factor;
4001 found->bytes_used += bytes_used;
4002 found->disk_used += bytes_used * factor;
4003 found->bytes_readonly += bytes_readonly;
4004 if (total_bytes > 0)
4005 found->full = 0;
4006 space_info_add_new_bytes(info, found, total_bytes -
4007 bytes_used - bytes_readonly);
4008 spin_unlock(&found->lock);
4009 *space_info = found;
Chris Mason593060d2008-03-25 16:50:33 -04004010}
4011
Chris Mason8790d502008-04-03 16:29:03 -04004012static void set_avail_alloc_bits(struct btrfs_fs_info *fs_info, u64 flags)
4013{
Ilya Dryomov899c81e2012-03-27 17:09:16 +03004014 u64 extra_flags = chunk_to_extended(flags) &
4015 BTRFS_EXTENDED_PROFILE_MASK;
Ilya Dryomova46d11a2012-01-16 22:04:47 +02004016
Miao Xiede98ced2013-01-29 10:13:12 +00004017 write_seqlock(&fs_info->profiles_lock);
Ilya Dryomova46d11a2012-01-16 22:04:47 +02004018 if (flags & BTRFS_BLOCK_GROUP_DATA)
4019 fs_info->avail_data_alloc_bits |= extra_flags;
4020 if (flags & BTRFS_BLOCK_GROUP_METADATA)
4021 fs_info->avail_metadata_alloc_bits |= extra_flags;
4022 if (flags & BTRFS_BLOCK_GROUP_SYSTEM)
4023 fs_info->avail_system_alloc_bits |= extra_flags;
Miao Xiede98ced2013-01-29 10:13:12 +00004024 write_sequnlock(&fs_info->profiles_lock);
Chris Mason8790d502008-04-03 16:29:03 -04004025}
Chris Mason593060d2008-03-25 16:50:33 -04004026
Ilya Dryomova46d11a2012-01-16 22:04:47 +02004027/*
Ilya Dryomovfc67c452012-03-27 17:09:17 +03004028 * returns target flags in extended format or 0 if restripe for this
4029 * chunk_type is not in progress
Ilya Dryomovc6664b42012-04-12 16:03:56 -04004030 *
David Sterbadccdb072018-03-21 00:20:05 +01004031 * should be called with balance_lock held
Ilya Dryomovfc67c452012-03-27 17:09:17 +03004032 */
4033static u64 get_restripe_target(struct btrfs_fs_info *fs_info, u64 flags)
4034{
4035 struct btrfs_balance_control *bctl = fs_info->balance_ctl;
4036 u64 target = 0;
4037
Ilya Dryomovfc67c452012-03-27 17:09:17 +03004038 if (!bctl)
4039 return 0;
4040
4041 if (flags & BTRFS_BLOCK_GROUP_DATA &&
4042 bctl->data.flags & BTRFS_BALANCE_ARGS_CONVERT) {
4043 target = BTRFS_BLOCK_GROUP_DATA | bctl->data.target;
4044 } else if (flags & BTRFS_BLOCK_GROUP_SYSTEM &&
4045 bctl->sys.flags & BTRFS_BALANCE_ARGS_CONVERT) {
4046 target = BTRFS_BLOCK_GROUP_SYSTEM | bctl->sys.target;
4047 } else if (flags & BTRFS_BLOCK_GROUP_METADATA &&
4048 bctl->meta.flags & BTRFS_BALANCE_ARGS_CONVERT) {
4049 target = BTRFS_BLOCK_GROUP_METADATA | bctl->meta.target;
4050 }
4051
4052 return target;
4053}
4054
4055/*
Ilya Dryomova46d11a2012-01-16 22:04:47 +02004056 * @flags: available profiles in extended format (see ctree.h)
4057 *
Ilya Dryomove4d8ec02012-01-16 22:04:48 +02004058 * Returns reduced profile in chunk format. If profile changing is in
4059 * progress (either running or paused) picks the target profile (if it's
4060 * already available), otherwise falls back to plain reducing.
Ilya Dryomova46d11a2012-01-16 22:04:47 +02004061 */
Jeff Mahoney2ff7e612016-06-22 18:54:24 -04004062static u64 btrfs_reduce_alloc_profile(struct btrfs_fs_info *fs_info, u64 flags)
Chris Masonec44a352008-04-28 15:29:52 -04004063{
Jeff Mahoney0b246af2016-06-22 18:54:23 -04004064 u64 num_devices = fs_info->fs_devices->rw_devices;
Ilya Dryomovfc67c452012-03-27 17:09:17 +03004065 u64 target;
Zhao Lei9c170b22015-09-15 21:08:08 +08004066 u64 raid_type;
4067 u64 allowed = 0;
Chris Masona061fc82008-05-07 11:43:44 -04004068
Ilya Dryomovfc67c452012-03-27 17:09:17 +03004069 /*
4070 * see if restripe for this chunk_type is in progress, if so
4071 * try to reduce to the target profile
4072 */
Jeff Mahoney0b246af2016-06-22 18:54:23 -04004073 spin_lock(&fs_info->balance_lock);
4074 target = get_restripe_target(fs_info, flags);
Ilya Dryomovfc67c452012-03-27 17:09:17 +03004075 if (target) {
4076 /* pick target profile only if it's already available */
4077 if ((flags & target) & BTRFS_EXTENDED_PROFILE_MASK) {
Jeff Mahoney0b246af2016-06-22 18:54:23 -04004078 spin_unlock(&fs_info->balance_lock);
Ilya Dryomovfc67c452012-03-27 17:09:17 +03004079 return extended_to_chunk(target);
Ilya Dryomove4d8ec02012-01-16 22:04:48 +02004080 }
4081 }
Jeff Mahoney0b246af2016-06-22 18:54:23 -04004082 spin_unlock(&fs_info->balance_lock);
Ilya Dryomove4d8ec02012-01-16 22:04:48 +02004083
David Woodhouse53b381b2013-01-29 18:40:14 -05004084 /* First, mask out the RAID levels which aren't possible */
Zhao Lei9c170b22015-09-15 21:08:08 +08004085 for (raid_type = 0; raid_type < BTRFS_NR_RAID_TYPES; raid_type++) {
4086 if (num_devices >= btrfs_raid_array[raid_type].devs_min)
Anand Jain41a6e892018-04-25 19:01:43 +08004087 allowed |= btrfs_raid_array[raid_type].bg_flag;
Zhao Lei9c170b22015-09-15 21:08:08 +08004088 }
4089 allowed &= flags;
Chris Masona061fc82008-05-07 11:43:44 -04004090
Zhao Lei9c170b22015-09-15 21:08:08 +08004091 if (allowed & BTRFS_BLOCK_GROUP_RAID6)
4092 allowed = BTRFS_BLOCK_GROUP_RAID6;
4093 else if (allowed & BTRFS_BLOCK_GROUP_RAID5)
4094 allowed = BTRFS_BLOCK_GROUP_RAID5;
4095 else if (allowed & BTRFS_BLOCK_GROUP_RAID10)
4096 allowed = BTRFS_BLOCK_GROUP_RAID10;
4097 else if (allowed & BTRFS_BLOCK_GROUP_RAID1)
4098 allowed = BTRFS_BLOCK_GROUP_RAID1;
4099 else if (allowed & BTRFS_BLOCK_GROUP_RAID0)
4100 allowed = BTRFS_BLOCK_GROUP_RAID0;
Chris Masonec44a352008-04-28 15:29:52 -04004101
Zhao Lei9c170b22015-09-15 21:08:08 +08004102 flags &= ~BTRFS_BLOCK_GROUP_PROFILE_MASK;
Chris Masonec44a352008-04-28 15:29:52 -04004103
Zhao Lei9c170b22015-09-15 21:08:08 +08004104 return extended_to_chunk(flags | allowed);
Chris Masonec44a352008-04-28 15:29:52 -04004105}
4106
Jeff Mahoney2ff7e612016-06-22 18:54:24 -04004107static u64 get_alloc_profile(struct btrfs_fs_info *fs_info, u64 orig_flags)
Josef Bacik6a632092009-02-20 11:00:09 -05004108{
Miao Xiede98ced2013-01-29 10:13:12 +00004109 unsigned seq;
Filipe Mananaf8213bd2014-04-24 15:15:29 +01004110 u64 flags;
Miao Xiede98ced2013-01-29 10:13:12 +00004111
4112 do {
Filipe Mananaf8213bd2014-04-24 15:15:29 +01004113 flags = orig_flags;
Jeff Mahoney0b246af2016-06-22 18:54:23 -04004114 seq = read_seqbegin(&fs_info->profiles_lock);
Miao Xiede98ced2013-01-29 10:13:12 +00004115
4116 if (flags & BTRFS_BLOCK_GROUP_DATA)
Jeff Mahoney0b246af2016-06-22 18:54:23 -04004117 flags |= fs_info->avail_data_alloc_bits;
Miao Xiede98ced2013-01-29 10:13:12 +00004118 else if (flags & BTRFS_BLOCK_GROUP_SYSTEM)
Jeff Mahoney0b246af2016-06-22 18:54:23 -04004119 flags |= fs_info->avail_system_alloc_bits;
Miao Xiede98ced2013-01-29 10:13:12 +00004120 else if (flags & BTRFS_BLOCK_GROUP_METADATA)
Jeff Mahoney0b246af2016-06-22 18:54:23 -04004121 flags |= fs_info->avail_metadata_alloc_bits;
4122 } while (read_seqretry(&fs_info->profiles_lock, seq));
Ilya Dryomov6fef8df2012-01-16 22:04:47 +02004123
Jeff Mahoney2ff7e612016-06-22 18:54:24 -04004124 return btrfs_reduce_alloc_profile(fs_info, flags);
Yan, Zhengb742bb822010-05-16 10:46:24 -04004125}
Josef Bacik6a632092009-02-20 11:00:09 -05004126
Jeff Mahoney1b868262017-05-17 11:38:35 -04004127static u64 get_alloc_profile_by_root(struct btrfs_root *root, int data)
Yan, Zhengb742bb822010-05-16 10:46:24 -04004128{
Jeff Mahoney0b246af2016-06-22 18:54:23 -04004129 struct btrfs_fs_info *fs_info = root->fs_info;
Yan, Zhengb742bb822010-05-16 10:46:24 -04004130 u64 flags;
David Woodhouse53b381b2013-01-29 18:40:14 -05004131 u64 ret;
Josef Bacik6a632092009-02-20 11:00:09 -05004132
Yan, Zhengb742bb822010-05-16 10:46:24 -04004133 if (data)
4134 flags = BTRFS_BLOCK_GROUP_DATA;
Jeff Mahoney0b246af2016-06-22 18:54:23 -04004135 else if (root == fs_info->chunk_root)
Yan, Zhengb742bb822010-05-16 10:46:24 -04004136 flags = BTRFS_BLOCK_GROUP_SYSTEM;
4137 else
4138 flags = BTRFS_BLOCK_GROUP_METADATA;
4139
Jeff Mahoney2ff7e612016-06-22 18:54:24 -04004140 ret = get_alloc_profile(fs_info, flags);
David Woodhouse53b381b2013-01-29 18:40:14 -05004141 return ret;
Josef Bacik6a632092009-02-20 11:00:09 -05004142}
4143
Jeff Mahoney1b868262017-05-17 11:38:35 -04004144u64 btrfs_data_alloc_profile(struct btrfs_fs_info *fs_info)
4145{
4146 return get_alloc_profile(fs_info, BTRFS_BLOCK_GROUP_DATA);
4147}
4148
4149u64 btrfs_metadata_alloc_profile(struct btrfs_fs_info *fs_info)
4150{
4151 return get_alloc_profile(fs_info, BTRFS_BLOCK_GROUP_METADATA);
4152}
4153
4154u64 btrfs_system_alloc_profile(struct btrfs_fs_info *fs_info)
4155{
4156 return get_alloc_profile(fs_info, BTRFS_BLOCK_GROUP_SYSTEM);
4157}
4158
Liu Bo41361352017-02-13 15:42:21 -08004159static u64 btrfs_space_info_used(struct btrfs_space_info *s_info,
4160 bool may_use_included)
4161{
4162 ASSERT(s_info);
4163 return s_info->bytes_used + s_info->bytes_reserved +
4164 s_info->bytes_pinned + s_info->bytes_readonly +
4165 (may_use_included ? s_info->bytes_may_use : 0);
4166}
4167
Nikolay Borisov04f4f912017-02-20 13:50:36 +02004168int btrfs_alloc_data_chunk_ondemand(struct btrfs_inode *inode, u64 bytes)
Josef Bacik6a632092009-02-20 11:00:09 -05004169{
Nikolay Borisov04f4f912017-02-20 13:50:36 +02004170 struct btrfs_root *root = inode->root;
Li Zefanb4d7c3c2012-07-09 20:21:07 -06004171 struct btrfs_fs_info *fs_info = root->fs_info;
Nikolay Borisov1174cad2017-07-11 13:47:50 +03004172 struct btrfs_space_info *data_sinfo = fs_info->data_sinfo;
Josef Bacikab6e24102010-03-19 14:38:13 +00004173 u64 used;
Zhao Lei94b947b2015-02-14 13:23:45 +08004174 int ret = 0;
Zhao Leic99f1b02015-03-02 19:32:20 +08004175 int need_commit = 2;
4176 int have_pinned_space;
Josef Bacik6a632092009-02-20 11:00:09 -05004177
4178 /* make sure bytes are sectorsize aligned */
Jeff Mahoney0b246af2016-06-22 18:54:23 -04004179 bytes = ALIGN(bytes, fs_info->sectorsize);
Josef Bacik6a632092009-02-20 11:00:09 -05004180
Miao Xie9dced182013-10-25 17:33:36 +08004181 if (btrfs_is_free_space_inode(inode)) {
Zhao Leic99f1b02015-03-02 19:32:20 +08004182 need_commit = 0;
Miao Xie9dced182013-10-25 17:33:36 +08004183 ASSERT(current->journal_info);
Josef Bacik0af3d002010-06-21 14:48:16 -04004184 }
4185
Josef Bacik6a632092009-02-20 11:00:09 -05004186again:
4187 /* make sure we have enough space to handle the data first */
4188 spin_lock(&data_sinfo->lock);
Liu Bo41361352017-02-13 15:42:21 -08004189 used = btrfs_space_info_used(data_sinfo, true);
Josef Bacikab6e24102010-03-19 14:38:13 +00004190
4191 if (used + bytes > data_sinfo->total_bytes) {
Josef Bacik4e06bdd2009-02-20 10:59:53 -05004192 struct btrfs_trans_handle *trans;
4193
Josef Bacik6a632092009-02-20 11:00:09 -05004194 /*
4195 * if we don't have enough free bytes in this space then we need
4196 * to alloc a new chunk.
4197 */
Zhao Leib9fd47c2015-02-09 14:40:20 +08004198 if (!data_sinfo->full) {
Josef Bacik6a632092009-02-20 11:00:09 -05004199 u64 alloc_target;
Josef Bacik6a632092009-02-20 11:00:09 -05004200
Chris Mason0e4f8f82011-04-15 16:05:44 -04004201 data_sinfo->force_alloc = CHUNK_ALLOC_FORCE;
Josef Bacik6a632092009-02-20 11:00:09 -05004202 spin_unlock(&data_sinfo->lock);
Nikolay Borisov1174cad2017-07-11 13:47:50 +03004203
Jeff Mahoney1b868262017-05-17 11:38:35 -04004204 alloc_target = btrfs_data_alloc_profile(fs_info);
Miao Xie9dced182013-10-25 17:33:36 +08004205 /*
4206 * It is ugly that we don't call nolock join
4207 * transaction for the free space inode case here.
4208 * But it is safe because we only do the data space
4209 * reservation for the free space cache in the
4210 * transaction context, the common join transaction
4211 * just increase the counter of the current transaction
4212 * handler, doesn't try to acquire the trans_lock of
4213 * the fs.
4214 */
Josef Bacik7a7eaa42011-04-13 12:54:33 -04004215 trans = btrfs_join_transaction(root);
Yan, Zhenga22285a2010-05-16 10:48:46 -04004216 if (IS_ERR(trans))
4217 return PTR_ERR(trans);
Josef Bacik6a632092009-02-20 11:00:09 -05004218
Nikolay Borisov01458822018-06-20 15:49:05 +03004219 ret = do_chunk_alloc(trans, alloc_target,
Chris Mason0e4f8f82011-04-15 16:05:44 -04004220 CHUNK_ALLOC_NO_FORCE);
Jeff Mahoney3a45bb22016-09-09 21:39:03 -04004221 btrfs_end_transaction(trans);
Miao Xied52a5b52011-01-05 10:07:18 +00004222 if (ret < 0) {
4223 if (ret != -ENOSPC)
4224 return ret;
Zhao Leic99f1b02015-03-02 19:32:20 +08004225 else {
4226 have_pinned_space = 1;
Miao Xied52a5b52011-01-05 10:07:18 +00004227 goto commit_trans;
Zhao Leic99f1b02015-03-02 19:32:20 +08004228 }
Miao Xied52a5b52011-01-05 10:07:18 +00004229 }
Chris Mason33b4d472009-09-22 14:45:50 -04004230
Josef Bacik6a632092009-02-20 11:00:09 -05004231 goto again;
4232 }
Josef Bacikf2bb8f52011-05-25 13:10:16 -04004233
4234 /*
Josef Bacikb150a4f2013-06-19 15:00:04 -04004235 * If we don't have enough pinned space to deal with this
Zhao Lei94b947b2015-02-14 13:23:45 +08004236 * allocation, and no removed chunk in current transaction,
4237 * don't bother committing the transaction.
Josef Bacikf2bb8f52011-05-25 13:10:16 -04004238 */
Ethan Liendec59fa2018-07-13 16:50:42 +08004239 have_pinned_space = __percpu_counter_compare(
Zhao Leic99f1b02015-03-02 19:32:20 +08004240 &data_sinfo->total_bytes_pinned,
Ethan Liendec59fa2018-07-13 16:50:42 +08004241 used + bytes - data_sinfo->total_bytes,
4242 BTRFS_TOTAL_BYTES_PINNED_BATCH);
Josef Bacik6a632092009-02-20 11:00:09 -05004243 spin_unlock(&data_sinfo->lock);
Josef Bacik4e06bdd2009-02-20 10:59:53 -05004244
4245 /* commit the current transaction and try again */
Miao Xied52a5b52011-01-05 10:07:18 +00004246commit_trans:
Nikolay Borisov92e2f7e2018-02-05 10:41:16 +02004247 if (need_commit) {
Zhao Leic99f1b02015-03-02 19:32:20 +08004248 need_commit--;
Josef Bacikb150a4f2013-06-19 15:00:04 -04004249
Zhao Leie1746e82015-12-01 18:39:40 +08004250 if (need_commit > 0) {
Nikolay Borisov82b3e532018-04-23 10:54:13 +03004251 btrfs_start_delalloc_roots(fs_info, -1);
Chris Mason6374e57a2017-06-23 09:48:21 -07004252 btrfs_wait_ordered_roots(fs_info, U64_MAX, 0,
Jeff Mahoney0b246af2016-06-22 18:54:23 -04004253 (u64)-1);
Zhao Leie1746e82015-12-01 18:39:40 +08004254 }
Zhao Lei9a4e7272015-04-09 12:34:43 +08004255
Josef Bacik7a7eaa42011-04-13 12:54:33 -04004256 trans = btrfs_join_transaction(root);
Yan, Zhenga22285a2010-05-16 10:48:46 -04004257 if (IS_ERR(trans))
4258 return PTR_ERR(trans);
Zhao Leic99f1b02015-03-02 19:32:20 +08004259 if (have_pinned_space >= 0 ||
Josef Bacik3204d332015-09-24 10:46:10 -04004260 test_bit(BTRFS_TRANS_HAVE_FREE_BGS,
4261 &trans->transaction->flags) ||
Zhao Leic99f1b02015-03-02 19:32:20 +08004262 need_commit > 0) {
Jeff Mahoney3a45bb22016-09-09 21:39:03 -04004263 ret = btrfs_commit_transaction(trans);
Zhao Lei94b947b2015-02-14 13:23:45 +08004264 if (ret)
4265 return ret;
Zhao Leid7c15172015-02-26 10:49:20 +08004266 /*
Filipe Mananac2d6cb12016-01-15 11:05:12 +00004267 * The cleaner kthread might still be doing iput
4268 * operations. Wait for it to finish so that
Josef Bacik034f7842018-12-03 11:06:52 -05004269 * more space is released. We don't need to
4270 * explicitly run the delayed iputs here because
4271 * the commit_transaction would have woken up
4272 * the cleaner.
Zhao Leid7c15172015-02-26 10:49:20 +08004273 */
Josef Bacik034f7842018-12-03 11:06:52 -05004274 ret = btrfs_wait_on_delayed_iputs(fs_info);
4275 if (ret)
4276 return ret;
Zhao Lei94b947b2015-02-14 13:23:45 +08004277 goto again;
4278 } else {
Jeff Mahoney3a45bb22016-09-09 21:39:03 -04004279 btrfs_end_transaction(trans);
Zhao Lei94b947b2015-02-14 13:23:45 +08004280 }
Josef Bacik4e06bdd2009-02-20 10:59:53 -05004281 }
4282
Jeff Mahoney0b246af2016-06-22 18:54:23 -04004283 trace_btrfs_space_reservation(fs_info,
Jeff Mahoneycab45e22013-10-16 16:27:01 -04004284 "space_info:enospc",
4285 data_sinfo->flags, bytes, 1);
Josef Bacik6a632092009-02-20 11:00:09 -05004286 return -ENOSPC;
4287 }
Qu Wenruo9f9b8e82018-10-24 20:24:01 +08004288 update_bytes_may_use(data_sinfo, bytes);
Jeff Mahoney0b246af2016-06-22 18:54:23 -04004289 trace_btrfs_space_reservation(fs_info, "space_info",
Liu Bo2bcc0322012-03-29 09:57:44 -04004290 data_sinfo->flags, bytes, 1);
Josef Bacik6a632092009-02-20 11:00:09 -05004291 spin_unlock(&data_sinfo->lock);
4292
Josef Bacik4559b0a72018-07-19 10:49:51 -04004293 return 0;
Josef Bacik6a632092009-02-20 11:00:09 -05004294}
4295
Qu Wenruo364ecf32017-02-27 15:10:38 +08004296int btrfs_check_data_free_space(struct inode *inode,
4297 struct extent_changeset **reserved, u64 start, u64 len)
Qu Wenruo4ceff072015-09-08 17:22:42 +08004298{
Jeff Mahoney0b246af2016-06-22 18:54:23 -04004299 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
Qu Wenruo4ceff072015-09-08 17:22:42 +08004300 int ret;
4301
4302 /* align the range */
Jeff Mahoney0b246af2016-06-22 18:54:23 -04004303 len = round_up(start + len, fs_info->sectorsize) -
4304 round_down(start, fs_info->sectorsize);
4305 start = round_down(start, fs_info->sectorsize);
Qu Wenruo4ceff072015-09-08 17:22:42 +08004306
Nikolay Borisov04f4f912017-02-20 13:50:36 +02004307 ret = btrfs_alloc_data_chunk_ondemand(BTRFS_I(inode), len);
Qu Wenruo4ceff072015-09-08 17:22:42 +08004308 if (ret < 0)
4309 return ret;
4310
Josef Bacik1e5ec2e2016-09-15 14:57:48 -04004311 /* Use new btrfs_qgroup_reserve_data to reserve precious data space. */
Qu Wenruo364ecf32017-02-27 15:10:38 +08004312 ret = btrfs_qgroup_reserve_data(inode, reserved, start, len);
Qu Wenruo7bc329c2017-02-27 15:10:36 +08004313 if (ret < 0)
Josef Bacik1e5ec2e2016-09-15 14:57:48 -04004314 btrfs_free_reserved_data_space_noquota(inode, start, len);
Qu Wenruo364ecf32017-02-27 15:10:38 +08004315 else
4316 ret = 0;
Qu Wenruo4ceff072015-09-08 17:22:42 +08004317 return ret;
4318}
4319
4320/*
Qu Wenruo4ceff072015-09-08 17:22:42 +08004321 * Called if we need to clear a data reservation for this inode
4322 * Normally in a error case.
4323 *
Qu Wenruo51773be2015-10-08 18:19:37 +08004324 * This one will *NOT* use accurate qgroup reserved space API, just for case
4325 * which we can't sleep and is sure it won't affect qgroup reserved space.
4326 * Like clear_bit_hook().
Qu Wenruo4ceff072015-09-08 17:22:42 +08004327 */
Qu Wenruo51773be2015-10-08 18:19:37 +08004328void btrfs_free_reserved_data_space_noquota(struct inode *inode, u64 start,
4329 u64 len)
Qu Wenruo4ceff072015-09-08 17:22:42 +08004330{
Jeff Mahoney0b246af2016-06-22 18:54:23 -04004331 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
Qu Wenruo4ceff072015-09-08 17:22:42 +08004332 struct btrfs_space_info *data_sinfo;
4333
4334 /* Make sure the range is aligned to sectorsize */
Jeff Mahoney0b246af2016-06-22 18:54:23 -04004335 len = round_up(start + len, fs_info->sectorsize) -
4336 round_down(start, fs_info->sectorsize);
4337 start = round_down(start, fs_info->sectorsize);
Qu Wenruo4ceff072015-09-08 17:22:42 +08004338
Jeff Mahoney0b246af2016-06-22 18:54:23 -04004339 data_sinfo = fs_info->data_sinfo;
Qu Wenruo4ceff072015-09-08 17:22:42 +08004340 spin_lock(&data_sinfo->lock);
Qu Wenruo9f9b8e82018-10-24 20:24:01 +08004341 update_bytes_may_use(data_sinfo, -len);
Jeff Mahoney0b246af2016-06-22 18:54:23 -04004342 trace_btrfs_space_reservation(fs_info, "space_info",
Qu Wenruo4ceff072015-09-08 17:22:42 +08004343 data_sinfo->flags, len, 0);
4344 spin_unlock(&data_sinfo->lock);
4345}
4346
Qu Wenruo51773be2015-10-08 18:19:37 +08004347/*
4348 * Called if we need to clear a data reservation for this inode
4349 * Normally in a error case.
4350 *
Nicholas D Steeves01327612016-05-19 21:18:45 -04004351 * This one will handle the per-inode data rsv map for accurate reserved
Qu Wenruo51773be2015-10-08 18:19:37 +08004352 * space framework.
4353 */
Qu Wenruobc42bda2017-02-27 15:10:39 +08004354void btrfs_free_reserved_data_space(struct inode *inode,
4355 struct extent_changeset *reserved, u64 start, u64 len)
Qu Wenruo51773be2015-10-08 18:19:37 +08004356{
Jeff Mahoney0c476a52016-11-18 21:52:40 -05004357 struct btrfs_root *root = BTRFS_I(inode)->root;
4358
4359 /* Make sure the range is aligned to sectorsize */
Jeff Mahoneyda170662016-06-15 09:22:56 -04004360 len = round_up(start + len, root->fs_info->sectorsize) -
4361 round_down(start, root->fs_info->sectorsize);
4362 start = round_down(start, root->fs_info->sectorsize);
Jeff Mahoney0c476a52016-11-18 21:52:40 -05004363
Qu Wenruo51773be2015-10-08 18:19:37 +08004364 btrfs_free_reserved_data_space_noquota(inode, start, len);
Qu Wenruobc42bda2017-02-27 15:10:39 +08004365 btrfs_qgroup_free_data(inode, reserved, start, len);
Qu Wenruo51773be2015-10-08 18:19:37 +08004366}
4367
Josef Bacik97e728d2009-04-21 17:40:57 -04004368static void force_metadata_allocation(struct btrfs_fs_info *info)
4369{
4370 struct list_head *head = &info->space_info;
4371 struct btrfs_space_info *found;
4372
4373 rcu_read_lock();
4374 list_for_each_entry_rcu(found, head, list) {
4375 if (found->flags & BTRFS_BLOCK_GROUP_METADATA)
Chris Mason0e4f8f82011-04-15 16:05:44 -04004376 found->force_alloc = CHUNK_ALLOC_FORCE;
Josef Bacik97e728d2009-04-21 17:40:57 -04004377 }
4378 rcu_read_unlock();
4379}
4380
Miao Xie3c76cd82013-04-25 10:12:38 +00004381static inline u64 calc_global_rsv_need_space(struct btrfs_block_rsv *global)
4382{
4383 return (global->size << 1);
4384}
4385
Jeff Mahoney2ff7e612016-06-22 18:54:24 -04004386static int should_alloc_chunk(struct btrfs_fs_info *fs_info,
Josef Bacik698d0082012-09-12 14:08:47 -04004387 struct btrfs_space_info *sinfo, int force)
Yan, Zheng424499d2010-05-16 10:46:25 -04004388{
Nikolay Borisov8d8aafe2017-06-22 09:51:48 -04004389 u64 bytes_used = btrfs_space_info_used(sinfo, false);
Chris Masone5bc2452010-10-26 13:37:56 -04004390 u64 thresh;
Yan, Zheng424499d2010-05-16 10:46:25 -04004391
Chris Mason0e4f8f82011-04-15 16:05:44 -04004392 if (force == CHUNK_ALLOC_FORCE)
4393 return 1;
4394
4395 /*
4396 * in limited mode, we want to have some free space up to
4397 * about 1% of the FS size.
4398 */
4399 if (force == CHUNK_ALLOC_LIMITED) {
Jeff Mahoney0b246af2016-06-22 18:54:23 -04004400 thresh = btrfs_super_total_bytes(fs_info->super_copy);
Byongho Leeee221842015-12-15 01:42:10 +09004401 thresh = max_t(u64, SZ_64M, div_factor_fine(thresh, 1));
Chris Mason0e4f8f82011-04-15 16:05:44 -04004402
Nikolay Borisov8d8aafe2017-06-22 09:51:48 -04004403 if (sinfo->total_bytes - bytes_used < thresh)
Chris Mason0e4f8f82011-04-15 16:05:44 -04004404 return 1;
4405 }
Chris Mason0e4f8f82011-04-15 16:05:44 -04004406
Nikolay Borisov8d8aafe2017-06-22 09:51:48 -04004407 if (bytes_used + SZ_2M < div_factor(sinfo->total_bytes, 8))
Josef Bacik14ed0ca2010-10-15 15:23:48 -04004408 return 0;
Yan, Zheng424499d2010-05-16 10:46:25 -04004409 return 1;
4410}
4411
Jeff Mahoney2ff7e612016-06-22 18:54:24 -04004412static u64 get_profile_num_devs(struct btrfs_fs_info *fs_info, u64 type)
Liu Bo15d1ff82012-03-29 09:57:44 -04004413{
4414 u64 num_dev;
4415
David Woodhouse53b381b2013-01-29 18:40:14 -05004416 if (type & (BTRFS_BLOCK_GROUP_RAID10 |
4417 BTRFS_BLOCK_GROUP_RAID0 |
4418 BTRFS_BLOCK_GROUP_RAID5 |
4419 BTRFS_BLOCK_GROUP_RAID6))
Jeff Mahoney0b246af2016-06-22 18:54:23 -04004420 num_dev = fs_info->fs_devices->rw_devices;
Liu Bo15d1ff82012-03-29 09:57:44 -04004421 else if (type & BTRFS_BLOCK_GROUP_RAID1)
4422 num_dev = 2;
4423 else
4424 num_dev = 1; /* DUP or single */
4425
Filipe Manana39c2d7f2015-05-20 14:01:55 +01004426 return num_dev;
Liu Bo15d1ff82012-03-29 09:57:44 -04004427}
4428
Filipe Manana39c2d7f2015-05-20 14:01:55 +01004429/*
4430 * If @is_allocation is true, reserve space in the system space info necessary
4431 * for allocating a chunk, otherwise if it's false, reserve space necessary for
4432 * removing a chunk.
4433 */
Nikolay Borisov451a2c12018-06-20 15:49:07 +03004434void check_system_chunk(struct btrfs_trans_handle *trans, u64 type)
Liu Bo15d1ff82012-03-29 09:57:44 -04004435{
Nikolay Borisov451a2c12018-06-20 15:49:07 +03004436 struct btrfs_fs_info *fs_info = trans->fs_info;
Liu Bo15d1ff82012-03-29 09:57:44 -04004437 struct btrfs_space_info *info;
4438 u64 left;
4439 u64 thresh;
Filipe Manana4fbcdf62015-05-20 14:01:54 +01004440 int ret = 0;
Filipe Manana39c2d7f2015-05-20 14:01:55 +01004441 u64 num_devs;
Filipe Manana4fbcdf62015-05-20 14:01:54 +01004442
4443 /*
4444 * Needed because we can end up allocating a system chunk and for an
4445 * atomic and race free space reservation in the chunk block reserve.
4446 */
David Sterbaa32bf9a2018-03-16 02:21:22 +01004447 lockdep_assert_held(&fs_info->chunk_mutex);
Liu Bo15d1ff82012-03-29 09:57:44 -04004448
Jeff Mahoney0b246af2016-06-22 18:54:23 -04004449 info = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_SYSTEM);
Liu Bo15d1ff82012-03-29 09:57:44 -04004450 spin_lock(&info->lock);
Liu Bo41361352017-02-13 15:42:21 -08004451 left = info->total_bytes - btrfs_space_info_used(info, true);
Liu Bo15d1ff82012-03-29 09:57:44 -04004452 spin_unlock(&info->lock);
4453
Jeff Mahoney2ff7e612016-06-22 18:54:24 -04004454 num_devs = get_profile_num_devs(fs_info, type);
Filipe Manana39c2d7f2015-05-20 14:01:55 +01004455
4456 /* num_devs device items to update and 1 chunk item to add or remove */
Jeff Mahoney0b246af2016-06-22 18:54:23 -04004457 thresh = btrfs_calc_trunc_metadata_size(fs_info, num_devs) +
4458 btrfs_calc_trans_metadata_size(fs_info, 1);
Filipe Manana39c2d7f2015-05-20 14:01:55 +01004459
Jeff Mahoney0b246af2016-06-22 18:54:23 -04004460 if (left < thresh && btrfs_test_opt(fs_info, ENOSPC_DEBUG)) {
4461 btrfs_info(fs_info, "left=%llu, need=%llu, flags=%llu",
4462 left, thresh, type);
4463 dump_space_info(fs_info, info, 0, 0);
Liu Bo15d1ff82012-03-29 09:57:44 -04004464 }
4465
4466 if (left < thresh) {
Jeff Mahoney1b868262017-05-17 11:38:35 -04004467 u64 flags = btrfs_system_alloc_profile(fs_info);
Liu Bo15d1ff82012-03-29 09:57:44 -04004468
Filipe Manana4fbcdf62015-05-20 14:01:54 +01004469 /*
4470 * Ignore failure to create system chunk. We might end up not
4471 * needing it, as we might not need to COW all nodes/leafs from
4472 * the paths we visit in the chunk tree (they were already COWed
4473 * or created in the current transaction for example).
4474 */
Nikolay Borisovc216b202018-06-20 15:49:06 +03004475 ret = btrfs_alloc_chunk(trans, flags);
Filipe Manana4fbcdf62015-05-20 14:01:54 +01004476 }
4477
4478 if (!ret) {
Jeff Mahoney0b246af2016-06-22 18:54:23 -04004479 ret = btrfs_block_rsv_add(fs_info->chunk_root,
4480 &fs_info->chunk_block_rsv,
Filipe Manana4fbcdf62015-05-20 14:01:54 +01004481 thresh, BTRFS_RESERVE_NO_FLUSH);
4482 if (!ret)
4483 trans->chunk_bytes_reserved += thresh;
Liu Bo15d1ff82012-03-29 09:57:44 -04004484 }
4485}
4486
Liu Bo28b737f2016-07-29 11:09:50 -07004487/*
4488 * If force is CHUNK_ALLOC_FORCE:
4489 * - return 1 if it successfully allocates a chunk,
4490 * - return errors including -ENOSPC otherwise.
4491 * If force is NOT CHUNK_ALLOC_FORCE:
4492 * - return 0 if it doesn't need to allocate a new chunk,
4493 * - return 1 if it successfully allocates a chunk,
4494 * - return errors including -ENOSPC otherwise.
4495 */
Nikolay Borisov01458822018-06-20 15:49:05 +03004496static int do_chunk_alloc(struct btrfs_trans_handle *trans, u64 flags,
4497 int force)
Chris Mason6324fbf2008-03-24 15:01:59 -04004498{
Nikolay Borisov01458822018-06-20 15:49:05 +03004499 struct btrfs_fs_info *fs_info = trans->fs_info;
Chris Mason6324fbf2008-03-24 15:01:59 -04004500 struct btrfs_space_info *space_info;
Nikolay Borisov2556fbb2018-04-18 10:27:57 +03004501 bool wait_for_alloc = false;
4502 bool should_alloc = false;
Yan Zhengc146afa2008-11-12 14:34:12 -05004503 int ret = 0;
4504
Josef Bacikc6b305a2012-12-18 09:16:16 -05004505 /* Don't re-enter if we're already allocating a chunk */
4506 if (trans->allocating_chunk)
4507 return -ENOSPC;
4508
Jeff Mahoney0b246af2016-06-22 18:54:23 -04004509 space_info = __find_space_info(fs_info, flags);
Jeff Mahoneydc2d3002018-03-20 15:25:25 -04004510 ASSERT(space_info);
Chris Mason6324fbf2008-03-24 15:01:59 -04004511
Nikolay Borisov2556fbb2018-04-18 10:27:57 +03004512 do {
4513 spin_lock(&space_info->lock);
4514 if (force < space_info->force_alloc)
4515 force = space_info->force_alloc;
4516 should_alloc = should_alloc_chunk(fs_info, space_info, force);
4517 if (space_info->full) {
4518 /* No more free physical space */
4519 if (should_alloc)
4520 ret = -ENOSPC;
4521 else
4522 ret = 0;
4523 spin_unlock(&space_info->lock);
4524 return ret;
4525 } else if (!should_alloc) {
4526 spin_unlock(&space_info->lock);
4527 return 0;
4528 } else if (space_info->chunk_alloc) {
4529 /*
4530 * Someone is already allocating, so we need to block
4531 * until this someone is finished and then loop to
4532 * recheck if we should continue with our allocation
4533 * attempt.
4534 */
4535 wait_for_alloc = true;
4536 spin_unlock(&space_info->lock);
4537 mutex_lock(&fs_info->chunk_mutex);
4538 mutex_unlock(&fs_info->chunk_mutex);
4539 } else {
4540 /* Proceed with allocation */
4541 space_info->chunk_alloc = 1;
4542 wait_for_alloc = false;
4543 spin_unlock(&space_info->lock);
4544 }
Chris Mason6324fbf2008-03-24 15:01:59 -04004545
Nikolay Borisov2556fbb2018-04-18 10:27:57 +03004546 cond_resched();
4547 } while (wait_for_alloc);
Josef Bacik25179202008-10-29 14:49:05 -04004548
Josef Bacik6d741192011-04-11 20:20:11 -04004549 mutex_lock(&fs_info->chunk_mutex);
Josef Bacikc6b305a2012-12-18 09:16:16 -05004550 trans->allocating_chunk = true;
4551
Josef Bacik97e728d2009-04-21 17:40:57 -04004552 /*
Josef Bacik67377732010-09-16 16:19:09 -04004553 * If we have mixed data/metadata chunks we want to make sure we keep
4554 * allocating mixed chunks instead of individual chunks.
4555 */
4556 if (btrfs_mixed_space_info(space_info))
4557 flags |= (BTRFS_BLOCK_GROUP_DATA | BTRFS_BLOCK_GROUP_METADATA);
4558
4559 /*
Josef Bacik97e728d2009-04-21 17:40:57 -04004560 * if we're doing a data chunk, go ahead and make sure that
4561 * we keep a reasonable number of metadata chunks allocated in the
4562 * FS as well.
4563 */
Josef Bacik9ed74f22009-09-11 16:12:44 -04004564 if (flags & BTRFS_BLOCK_GROUP_DATA && fs_info->metadata_ratio) {
Josef Bacik97e728d2009-04-21 17:40:57 -04004565 fs_info->data_chunk_allocations++;
4566 if (!(fs_info->data_chunk_allocations %
4567 fs_info->metadata_ratio))
4568 force_metadata_allocation(fs_info);
4569 }
4570
Liu Bo15d1ff82012-03-29 09:57:44 -04004571 /*
4572 * Check if we have enough space in SYSTEM chunk because we may need
4573 * to update devices.
4574 */
Nikolay Borisov451a2c12018-06-20 15:49:07 +03004575 check_system_chunk(trans, flags);
Liu Bo15d1ff82012-03-29 09:57:44 -04004576
Nikolay Borisovc216b202018-06-20 15:49:06 +03004577 ret = btrfs_alloc_chunk(trans, flags);
Josef Bacikc6b305a2012-12-18 09:16:16 -05004578 trans->allocating_chunk = false;
Mark Fasheh92b8e8972011-07-12 10:57:59 -07004579
Josef Bacik9ed74f22009-09-11 16:12:44 -04004580 spin_lock(&space_info->lock);
Nikolay Borisov57f16422018-04-11 11:21:19 +03004581 if (ret < 0) {
4582 if (ret == -ENOSPC)
4583 space_info->full = 1;
4584 else
4585 goto out;
4586 } else {
Yan, Zheng424499d2010-05-16 10:46:25 -04004587 ret = 1;
Josef Bacik21a94f72018-10-11 15:54:03 -04004588 space_info->max_extent_size = 0;
Nikolay Borisov57f16422018-04-11 11:21:19 +03004589 }
Josef Bacik6d741192011-04-11 20:20:11 -04004590
Chris Mason0e4f8f82011-04-15 16:05:44 -04004591 space_info->force_alloc = CHUNK_ALLOC_NO_FORCE;
Alexandre Olivaa81cb9a2013-02-21 21:15:14 +00004592out:
Josef Bacik6d741192011-04-11 20:20:11 -04004593 space_info->chunk_alloc = 0;
Josef Bacik9ed74f22009-09-11 16:12:44 -04004594 spin_unlock(&space_info->lock);
Dan Carpentera25c75d2012-04-18 09:59:29 +03004595 mutex_unlock(&fs_info->chunk_mutex);
Filipe Manana00d80e32015-07-20 14:56:20 +01004596 /*
4597 * When we allocate a new chunk we reserve space in the chunk block
4598 * reserve to make sure we can COW nodes/leafs in the chunk tree or
4599 * add new nodes/leafs to it if we end up needing to do it when
4600 * inserting the chunk item and updating device items as part of the
4601 * second phase of chunk allocation, performed by
4602 * btrfs_finish_chunk_alloc(). So make sure we don't accumulate a
4603 * large number of new block groups to create in our transaction
4604 * handle's new_bgs list to avoid exhausting the chunk block reserve
4605 * in extreme cases - like having a single transaction create many new
4606 * block groups when starting to write out the free space caches of all
4607 * the block groups that were made dirty during the lifetime of the
4608 * transaction.
4609 */
Filipe Manana5ce55552018-10-12 10:03:55 +01004610 if (trans->chunk_bytes_reserved >= (u64)SZ_2M)
Nikolay Borisov6c686b32018-02-07 17:55:40 +02004611 btrfs_create_pending_block_groups(trans);
Filipe Manana5ce55552018-10-12 10:03:55 +01004612
Josef Bacik0f9dd462008-09-23 13:14:11 -04004613 return ret;
Chris Mason6324fbf2008-03-24 15:01:59 -04004614}
4615
Jeff Mahoneyc1c49192017-05-17 11:38:36 -04004616static int can_overcommit(struct btrfs_fs_info *fs_info,
Josef Bacika80c8dcf2012-09-06 16:59:33 -04004617 struct btrfs_space_info *space_info, u64 bytes,
Jeff Mahoneyc1c49192017-05-17 11:38:36 -04004618 enum btrfs_reserve_flush_enum flush,
4619 bool system_chunk)
Josef Bacika80c8dcf2012-09-06 16:59:33 -04004620{
Jeff Mahoney0b246af2016-06-22 18:54:23 -04004621 struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv;
Josef Bacik957780e2016-05-17 13:30:55 -04004622 u64 profile;
Miao Xie3c76cd82013-04-25 10:12:38 +00004623 u64 space_size;
Josef Bacika80c8dcf2012-09-06 16:59:33 -04004624 u64 avail;
4625 u64 used;
David Sterba46df06b2018-07-13 20:46:30 +02004626 int factor;
Josef Bacika80c8dcf2012-09-06 16:59:33 -04004627
Josef Bacik957780e2016-05-17 13:30:55 -04004628 /* Don't overcommit when in mixed mode. */
4629 if (space_info->flags & BTRFS_BLOCK_GROUP_DATA)
4630 return 0;
4631
Jeff Mahoneyc1c49192017-05-17 11:38:36 -04004632 if (system_chunk)
4633 profile = btrfs_system_alloc_profile(fs_info);
4634 else
4635 profile = btrfs_metadata_alloc_profile(fs_info);
4636
Liu Bo41361352017-02-13 15:42:21 -08004637 used = btrfs_space_info_used(space_info, false);
Josef Bacik96f1bb52013-01-30 17:02:51 -05004638
Josef Bacik96f1bb52013-01-30 17:02:51 -05004639 /*
4640 * We only want to allow over committing if we have lots of actual space
4641 * free, but if we don't have enough space to handle the global reserve
4642 * space then we could end up having a real enospc problem when trying
4643 * to allocate a chunk or some other such important allocation.
4644 */
Miao Xie3c76cd82013-04-25 10:12:38 +00004645 spin_lock(&global_rsv->lock);
4646 space_size = calc_global_rsv_need_space(global_rsv);
4647 spin_unlock(&global_rsv->lock);
4648 if (used + space_size >= space_info->total_bytes)
Josef Bacik96f1bb52013-01-30 17:02:51 -05004649 return 0;
4650
4651 used += space_info->bytes_may_use;
Josef Bacika80c8dcf2012-09-06 16:59:33 -04004652
Nikolay Borisova5ed45f2017-05-11 09:17:46 +03004653 avail = atomic64_read(&fs_info->free_chunk_space);
Josef Bacika80c8dcf2012-09-06 16:59:33 -04004654
4655 /*
4656 * If we have dup, raid1 or raid10 then only half of the free
Andrea Gelmini52042d82018-11-28 12:05:13 +01004657 * space is actually usable. For raid56, the space info used
David Woodhouse53b381b2013-01-29 18:40:14 -05004658 * doesn't include the parity drive, so we don't have to
4659 * change the math
Josef Bacika80c8dcf2012-09-06 16:59:33 -04004660 */
David Sterba46df06b2018-07-13 20:46:30 +02004661 factor = btrfs_bg_type_to_factor(profile);
4662 avail = div_u64(avail, factor);
Josef Bacika80c8dcf2012-09-06 16:59:33 -04004663
4664 /*
Miao Xie561c2942012-10-16 11:32:18 +00004665 * If we aren't flushing all things, let us overcommit up to
4666 * 1/2th of the space. If we can flush, don't let us overcommit
4667 * too much, let it overcommit up to 1/8 of the space.
Josef Bacika80c8dcf2012-09-06 16:59:33 -04004668 */
Miao Xie08e007d2012-10-16 11:33:38 +00004669 if (flush == BTRFS_RESERVE_FLUSH_ALL)
Josef Bacik14575ae2013-09-17 10:48:00 -04004670 avail >>= 3;
Josef Bacika80c8dcf2012-09-06 16:59:33 -04004671 else
Josef Bacik14575ae2013-09-17 10:48:00 -04004672 avail >>= 1;
Josef Bacika80c8dcf2012-09-06 16:59:33 -04004673
Josef Bacik14575ae2013-09-17 10:48:00 -04004674 if (used + bytes < space_info->total_bytes + avail)
Josef Bacika80c8dcf2012-09-06 16:59:33 -04004675 return 1;
4676 return 0;
4677}
4678
Jeff Mahoney2ff7e612016-06-22 18:54:24 -04004679static void btrfs_writeback_inodes_sb_nr(struct btrfs_fs_info *fs_info,
Miao Xie6c255e62014-03-06 13:55:01 +08004680 unsigned long nr_pages, int nr_items)
Miao Xieda633a42012-12-20 11:19:09 +00004681{
Jeff Mahoney0b246af2016-06-22 18:54:23 -04004682 struct super_block *sb = fs_info->sb;
Miao Xieda633a42012-12-20 11:19:09 +00004683
Josef Bacik925a6ef2013-06-20 12:31:27 -04004684 if (down_read_trylock(&sb->s_umount)) {
4685 writeback_inodes_sb_nr(sb, nr_pages, WB_REASON_FS_FREE_SPACE);
4686 up_read(&sb->s_umount);
4687 } else {
Miao Xieda633a42012-12-20 11:19:09 +00004688 /*
4689 * We needn't worry the filesystem going from r/w to r/o though
4690 * we don't acquire ->s_umount mutex, because the filesystem
4691 * should guarantee the delalloc inodes list be empty after
4692 * the filesystem is readonly(all dirty pages are written to
4693 * the disk).
4694 */
Nikolay Borisov82b3e532018-04-23 10:54:13 +03004695 btrfs_start_delalloc_roots(fs_info, nr_items);
Josef Bacik98ad69c2013-04-04 11:55:49 -04004696 if (!current->journal_info)
Jeff Mahoney0b246af2016-06-22 18:54:23 -04004697 btrfs_wait_ordered_roots(fs_info, nr_items, 0, (u64)-1);
Miao Xieda633a42012-12-20 11:19:09 +00004698 }
4699}
4700
Chris Mason6374e57a2017-06-23 09:48:21 -07004701static inline u64 calc_reclaim_items_nr(struct btrfs_fs_info *fs_info,
Jeff Mahoney2ff7e612016-06-22 18:54:24 -04004702 u64 to_reclaim)
Miao Xie18cd8ea2013-11-04 23:13:22 +08004703{
4704 u64 bytes;
Chris Mason6374e57a2017-06-23 09:48:21 -07004705 u64 nr;
Miao Xie18cd8ea2013-11-04 23:13:22 +08004706
Jeff Mahoney2ff7e612016-06-22 18:54:24 -04004707 bytes = btrfs_calc_trans_metadata_size(fs_info, 1);
Chris Mason6374e57a2017-06-23 09:48:21 -07004708 nr = div64_u64(to_reclaim, bytes);
Miao Xie18cd8ea2013-11-04 23:13:22 +08004709 if (!nr)
4710 nr = 1;
4711 return nr;
4712}
4713
Byongho Leeee221842015-12-15 01:42:10 +09004714#define EXTENT_SIZE_PER_ITEM SZ_256K
Miao Xiec61a16a2013-11-04 23:13:23 +08004715
Yan, Zheng5da9d012010-05-16 10:46:25 -04004716/*
4717 * shrink metadata reservation for delalloc
4718 */
Jeff Mahoneyc1c49192017-05-17 11:38:36 -04004719static void shrink_delalloc(struct btrfs_fs_info *fs_info, u64 to_reclaim,
4720 u64 orig, bool wait_ordered)
Yan, Zheng5da9d012010-05-16 10:46:25 -04004721{
Josef Bacik0019f102010-10-15 15:18:40 -04004722 struct btrfs_space_info *space_info;
Josef Bacik663350a2011-11-03 22:54:25 -04004723 struct btrfs_trans_handle *trans;
Josef Bacikf4c738c2012-07-02 17:10:51 -04004724 u64 delalloc_bytes;
Nikolay Borisov420829d2019-01-03 10:50:05 +02004725 u64 async_pages;
Chris Mason6374e57a2017-06-23 09:48:21 -07004726 u64 items;
Josef Bacikb1953bc2011-01-21 21:10:01 +00004727 long time_left;
Miao Xied3ee29e32013-11-04 23:13:20 +08004728 unsigned long nr_pages;
4729 int loops;
Yan, Zheng5da9d012010-05-16 10:46:25 -04004730
Miao Xiec61a16a2013-11-04 23:13:23 +08004731 /* Calc the number of the pages we need flush for space reservation */
Jeff Mahoney2ff7e612016-06-22 18:54:24 -04004732 items = calc_reclaim_items_nr(fs_info, to_reclaim);
Chris Mason6374e57a2017-06-23 09:48:21 -07004733 to_reclaim = items * EXTENT_SIZE_PER_ITEM;
Miao Xiec61a16a2013-11-04 23:13:23 +08004734
Josef Bacik663350a2011-11-03 22:54:25 -04004735 trans = (struct btrfs_trans_handle *)current->journal_info;
Josef Bacik69fe2d72017-10-19 14:15:57 -04004736 space_info = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_METADATA);
Chris Masonbf9022e2010-10-26 13:40:45 -04004737
Miao Xie963d6782013-01-29 10:10:51 +00004738 delalloc_bytes = percpu_counter_sum_positive(
Jeff Mahoney0b246af2016-06-22 18:54:23 -04004739 &fs_info->delalloc_bytes);
Josef Bacikf4c738c2012-07-02 17:10:51 -04004740 if (delalloc_bytes == 0) {
Josef Bacikfdb5eff2011-06-07 16:07:44 -04004741 if (trans)
Josef Bacikf4c738c2012-07-02 17:10:51 -04004742 return;
Miao Xie38c135a2013-11-04 23:13:21 +08004743 if (wait_ordered)
Jeff Mahoney0b246af2016-06-22 18:54:23 -04004744 btrfs_wait_ordered_roots(fs_info, items, 0, (u64)-1);
Josef Bacikf4c738c2012-07-02 17:10:51 -04004745 return;
Josef Bacikfdb5eff2011-06-07 16:07:44 -04004746 }
4747
Miao Xied3ee29e32013-11-04 23:13:20 +08004748 loops = 0;
Josef Bacikf4c738c2012-07-02 17:10:51 -04004749 while (delalloc_bytes && loops < 3) {
Nikolay Borisov420829d2019-01-03 10:50:05 +02004750 nr_pages = min(delalloc_bytes, to_reclaim) >> PAGE_SHIFT;
4751
Josef Bacikdea31f52012-09-06 16:47:00 -04004752 /*
Nikolay Borisov420829d2019-01-03 10:50:05 +02004753 * Triggers inode writeback for up to nr_pages. This will invoke
4754 * ->writepages callback and trigger delalloc filling
4755 * (btrfs_run_delalloc_range()).
Josef Bacikdea31f52012-09-06 16:47:00 -04004756 */
Nikolay Borisov420829d2019-01-03 10:50:05 +02004757 btrfs_writeback_inodes_sb_nr(fs_info, nr_pages, items);
4758
4759 /*
4760 * We need to wait for the compressed pages to start before
4761 * we continue.
4762 */
4763 async_pages = atomic_read(&fs_info->async_delalloc_pages);
4764 if (!async_pages)
Miao Xie9f3a0742013-11-04 23:13:24 +08004765 goto skip_async;
Josef Bacikdea31f52012-09-06 16:47:00 -04004766
Nikolay Borisov420829d2019-01-03 10:50:05 +02004767 /*
4768 * Calculate how many compressed pages we want to be written
4769 * before we continue. I.e if there are more async pages than we
4770 * require wait_event will wait until nr_pages are written.
4771 */
4772 if (async_pages <= nr_pages)
4773 async_pages = 0;
Miao Xie9f3a0742013-11-04 23:13:24 +08004774 else
Nikolay Borisov420829d2019-01-03 10:50:05 +02004775 async_pages -= nr_pages;
Miao Xie9f3a0742013-11-04 23:13:24 +08004776
Jeff Mahoney0b246af2016-06-22 18:54:23 -04004777 wait_event(fs_info->async_submit_wait,
4778 atomic_read(&fs_info->async_delalloc_pages) <=
Nikolay Borisov420829d2019-01-03 10:50:05 +02004779 (int)async_pages);
Miao Xie9f3a0742013-11-04 23:13:24 +08004780skip_async:
Josef Bacik0019f102010-10-15 15:18:40 -04004781 spin_lock(&space_info->lock);
Josef Bacik957780e2016-05-17 13:30:55 -04004782 if (list_empty(&space_info->tickets) &&
4783 list_empty(&space_info->priority_tickets)) {
4784 spin_unlock(&space_info->lock);
4785 break;
4786 }
Josef Bacik0019f102010-10-15 15:18:40 -04004787 spin_unlock(&space_info->lock);
Yan, Zheng5da9d012010-05-16 10:46:25 -04004788
Chris Mason36e39c42011-03-12 07:08:42 -05004789 loops++;
Josef Bacikf104d042011-10-14 13:56:58 -04004790 if (wait_ordered && !trans) {
Jeff Mahoney0b246af2016-06-22 18:54:23 -04004791 btrfs_wait_ordered_roots(fs_info, items, 0, (u64)-1);
Josef Bacikf104d042011-10-14 13:56:58 -04004792 } else {
Josef Bacikf4c738c2012-07-02 17:10:51 -04004793 time_left = schedule_timeout_killable(1);
Josef Bacikf104d042011-10-14 13:56:58 -04004794 if (time_left)
4795 break;
4796 }
Miao Xie963d6782013-01-29 10:10:51 +00004797 delalloc_bytes = percpu_counter_sum_positive(
Jeff Mahoney0b246af2016-06-22 18:54:23 -04004798 &fs_info->delalloc_bytes);
Yan, Zheng5da9d012010-05-16 10:46:25 -04004799 }
Yan, Zheng5da9d012010-05-16 10:46:25 -04004800}
4801
Josef Bacik996478c2017-08-22 16:00:39 -04004802struct reserve_ticket {
Josef Bacikf91587e2018-11-21 14:03:10 -05004803 u64 orig_bytes;
Josef Bacik996478c2017-08-22 16:00:39 -04004804 u64 bytes;
4805 int error;
4806 struct list_head list;
4807 wait_queue_head_t wait;
4808};
4809
Josef Bacik4a92b1b2011-08-30 12:34:28 -04004810/**
Josef Bacik663350a2011-11-03 22:54:25 -04004811 * maybe_commit_transaction - possibly commit the transaction if its ok to
4812 * @root - the root we're allocating for
4813 * @bytes - the number of bytes we want to reserve
4814 * @force - force the commit
Josef Bacik8bb8ab22010-10-15 16:52:49 -04004815 *
Josef Bacik663350a2011-11-03 22:54:25 -04004816 * This will check to make sure that committing the transaction will actually
4817 * get us somewhere and then commit the transaction if it does. Otherwise it
4818 * will return -ENOSPC.
Josef Bacik8bb8ab22010-10-15 16:52:49 -04004819 */
Jeff Mahoney0c9ab342017-02-15 16:28:28 -05004820static int may_commit_transaction(struct btrfs_fs_info *fs_info,
Josef Bacik996478c2017-08-22 16:00:39 -04004821 struct btrfs_space_info *space_info)
Josef Bacik663350a2011-11-03 22:54:25 -04004822{
Josef Bacik996478c2017-08-22 16:00:39 -04004823 struct reserve_ticket *ticket = NULL;
Jeff Mahoney0b246af2016-06-22 18:54:23 -04004824 struct btrfs_block_rsv *delayed_rsv = &fs_info->delayed_block_rsv;
Josef Bacik4c8edbc2018-12-03 10:20:34 -05004825 struct btrfs_block_rsv *delayed_refs_rsv = &fs_info->delayed_refs_rsv;
Josef Bacik663350a2011-11-03 22:54:25 -04004826 struct btrfs_trans_handle *trans;
Josef Bacik4c8edbc2018-12-03 10:20:34 -05004827 u64 bytes_needed;
4828 u64 reclaim_bytes = 0;
Josef Bacik663350a2011-11-03 22:54:25 -04004829
4830 trans = (struct btrfs_trans_handle *)current->journal_info;
4831 if (trans)
4832 return -EAGAIN;
4833
Josef Bacik996478c2017-08-22 16:00:39 -04004834 spin_lock(&space_info->lock);
4835 if (!list_empty(&space_info->priority_tickets))
4836 ticket = list_first_entry(&space_info->priority_tickets,
4837 struct reserve_ticket, list);
4838 else if (!list_empty(&space_info->tickets))
4839 ticket = list_first_entry(&space_info->tickets,
4840 struct reserve_ticket, list);
Josef Bacik4c8edbc2018-12-03 10:20:34 -05004841 bytes_needed = (ticket) ? ticket->bytes : 0;
Josef Bacik996478c2017-08-22 16:00:39 -04004842 spin_unlock(&space_info->lock);
4843
Josef Bacik4c8edbc2018-12-03 10:20:34 -05004844 if (!bytes_needed)
Josef Bacik996478c2017-08-22 16:00:39 -04004845 return 0;
Josef Bacik663350a2011-11-03 22:54:25 -04004846
Josef Bacikd89dbef2018-11-21 14:03:06 -05004847 trans = btrfs_join_transaction(fs_info->extent_root);
4848 if (IS_ERR(trans))
4849 return PTR_ERR(trans);
4850
4851 /*
4852 * See if there is enough pinned space to make this reservation, or if
4853 * we have block groups that are going to be freed, allowing us to
4854 * possibly do a chunk allocation the next loop through.
4855 */
4856 if (test_bit(BTRFS_TRANS_HAVE_FREE_BGS, &trans->transaction->flags) ||
4857 __percpu_counter_compare(&space_info->total_bytes_pinned,
4858 bytes_needed,
4859 BTRFS_TOTAL_BYTES_PINNED_BATCH) >= 0)
Josef Bacik663350a2011-11-03 22:54:25 -04004860 goto commit;
Josef Bacik663350a2011-11-03 22:54:25 -04004861
4862 /*
4863 * See if there is some space in the delayed insertion reservation for
4864 * this reservation.
4865 */
4866 if (space_info != delayed_rsv->space_info)
Josef Bacikd89dbef2018-11-21 14:03:06 -05004867 goto enospc;
Josef Bacik663350a2011-11-03 22:54:25 -04004868
4869 spin_lock(&delayed_rsv->lock);
Josef Bacik4c8edbc2018-12-03 10:20:34 -05004870 reclaim_bytes += delayed_rsv->reserved;
Nikolay Borisov057aac32017-11-07 11:22:54 +02004871 spin_unlock(&delayed_rsv->lock);
4872
Josef Bacik4c8edbc2018-12-03 10:20:34 -05004873 spin_lock(&delayed_refs_rsv->lock);
4874 reclaim_bytes += delayed_refs_rsv->reserved;
4875 spin_unlock(&delayed_refs_rsv->lock);
4876 if (reclaim_bytes >= bytes_needed)
4877 goto commit;
4878 bytes_needed -= reclaim_bytes;
4879
Ethan Liendec59fa2018-07-13 16:50:42 +08004880 if (__percpu_counter_compare(&space_info->total_bytes_pinned,
Josef Bacik4c8edbc2018-12-03 10:20:34 -05004881 bytes_needed,
Josef Bacikd89dbef2018-11-21 14:03:06 -05004882 BTRFS_TOTAL_BYTES_PINNED_BATCH) < 0)
4883 goto enospc;
Josef Bacik663350a2011-11-03 22:54:25 -04004884
4885commit:
Jeff Mahoney3a45bb22016-09-09 21:39:03 -04004886 return btrfs_commit_transaction(trans);
Josef Bacikd89dbef2018-11-21 14:03:06 -05004887enospc:
4888 btrfs_end_transaction(trans);
4889 return -ENOSPC;
Josef Bacik663350a2011-11-03 22:54:25 -04004890}
4891
Nikolay Borisove38ae7a2017-07-25 17:48:28 +03004892/*
4893 * Try to flush some data based on policy set by @state. This is only advisory
4894 * and may fail for various reasons. The caller is supposed to examine the
4895 * state of @space_info to detect the outcome.
4896 */
4897static void flush_space(struct btrfs_fs_info *fs_info,
Josef Bacik96c3f432012-06-21 14:05:49 -04004898 struct btrfs_space_info *space_info, u64 num_bytes,
Nikolay Borisov7bdd6272017-07-11 13:25:13 +03004899 int state)
Josef Bacik96c3f432012-06-21 14:05:49 -04004900{
Jeff Mahoneya9b33112017-05-17 11:38:34 -04004901 struct btrfs_root *root = fs_info->extent_root;
Josef Bacik96c3f432012-06-21 14:05:49 -04004902 struct btrfs_trans_handle *trans;
4903 int nr;
Josef Bacikf4c738c2012-07-02 17:10:51 -04004904 int ret = 0;
Josef Bacik96c3f432012-06-21 14:05:49 -04004905
4906 switch (state) {
Josef Bacik96c3f432012-06-21 14:05:49 -04004907 case FLUSH_DELAYED_ITEMS_NR:
4908 case FLUSH_DELAYED_ITEMS:
Miao Xie18cd8ea2013-11-04 23:13:22 +08004909 if (state == FLUSH_DELAYED_ITEMS_NR)
Jeff Mahoney2ff7e612016-06-22 18:54:24 -04004910 nr = calc_reclaim_items_nr(fs_info, num_bytes) * 2;
Miao Xie18cd8ea2013-11-04 23:13:22 +08004911 else
Josef Bacik96c3f432012-06-21 14:05:49 -04004912 nr = -1;
Miao Xie18cd8ea2013-11-04 23:13:22 +08004913
Josef Bacik96c3f432012-06-21 14:05:49 -04004914 trans = btrfs_join_transaction(root);
4915 if (IS_ERR(trans)) {
4916 ret = PTR_ERR(trans);
4917 break;
4918 }
Nikolay Borisove5c304e62018-02-07 17:55:43 +02004919 ret = btrfs_run_delayed_items_nr(trans, nr);
Jeff Mahoney3a45bb22016-09-09 21:39:03 -04004920 btrfs_end_transaction(trans);
Josef Bacik96c3f432012-06-21 14:05:49 -04004921 break;
Josef Bacik67b0fd62012-09-24 13:42:00 -04004922 case FLUSH_DELALLOC:
4923 case FLUSH_DELALLOC_WAIT:
Nikolay Borisov7bdd6272017-07-11 13:25:13 +03004924 shrink_delalloc(fs_info, num_bytes * 2, num_bytes,
Josef Bacik67b0fd62012-09-24 13:42:00 -04004925 state == FLUSH_DELALLOC_WAIT);
4926 break;
Josef Bacik413df722018-12-03 10:20:35 -05004927 case FLUSH_DELAYED_REFS_NR:
4928 case FLUSH_DELAYED_REFS:
4929 trans = btrfs_join_transaction(root);
4930 if (IS_ERR(trans)) {
4931 ret = PTR_ERR(trans);
4932 break;
4933 }
4934 if (state == FLUSH_DELAYED_REFS_NR)
4935 nr = calc_reclaim_items_nr(fs_info, num_bytes);
4936 else
4937 nr = 0;
4938 btrfs_run_delayed_refs(trans, nr);
4939 btrfs_end_transaction(trans);
4940 break;
Josef Bacikea658ba2012-09-11 16:57:25 -04004941 case ALLOC_CHUNK:
Josef Bacik450114f2018-11-21 14:03:08 -05004942 case ALLOC_CHUNK_FORCE:
Josef Bacikea658ba2012-09-11 16:57:25 -04004943 trans = btrfs_join_transaction(root);
4944 if (IS_ERR(trans)) {
4945 ret = PTR_ERR(trans);
4946 break;
4947 }
Nikolay Borisov01458822018-06-20 15:49:05 +03004948 ret = do_chunk_alloc(trans,
Jeff Mahoney1b868262017-05-17 11:38:35 -04004949 btrfs_metadata_alloc_profile(fs_info),
Josef Bacik450114f2018-11-21 14:03:08 -05004950 (state == ALLOC_CHUNK) ?
4951 CHUNK_ALLOC_NO_FORCE : CHUNK_ALLOC_FORCE);
Jeff Mahoney3a45bb22016-09-09 21:39:03 -04004952 btrfs_end_transaction(trans);
Alex Lyakaseecba892015-12-06 12:32:31 +02004953 if (ret > 0 || ret == -ENOSPC)
Josef Bacikea658ba2012-09-11 16:57:25 -04004954 ret = 0;
4955 break;
Josef Bacik96c3f432012-06-21 14:05:49 -04004956 case COMMIT_TRANS:
Josef Bacik3ec9a4c2019-01-11 10:21:01 -05004957 /*
4958 * If we have pending delayed iputs then we could free up a
4959 * bunch of pinned space, so make sure we run the iputs before
4960 * we do our pinned bytes check below.
4961 */
Josef Bacik3ec9a4c2019-01-11 10:21:01 -05004962 btrfs_run_delayed_iputs(fs_info);
Josef Bacik034f7842018-12-03 11:06:52 -05004963 btrfs_wait_on_delayed_iputs(fs_info);
Josef Bacik3ec9a4c2019-01-11 10:21:01 -05004964
Josef Bacik996478c2017-08-22 16:00:39 -04004965 ret = may_commit_transaction(fs_info, space_info);
Josef Bacik96c3f432012-06-21 14:05:49 -04004966 break;
4967 default:
4968 ret = -ENOSPC;
4969 break;
4970 }
4971
Nikolay Borisov7bdd6272017-07-11 13:25:13 +03004972 trace_btrfs_flush_space(fs_info, space_info->flags, num_bytes, state,
4973 ret);
Nikolay Borisove38ae7a2017-07-25 17:48:28 +03004974 return;
Josef Bacik96c3f432012-06-21 14:05:49 -04004975}
Miao Xie21c7e752014-05-13 17:29:04 -07004976
4977static inline u64
Jeff Mahoneyc1c49192017-05-17 11:38:36 -04004978btrfs_calc_reclaim_metadata_size(struct btrfs_fs_info *fs_info,
4979 struct btrfs_space_info *space_info,
4980 bool system_chunk)
Miao Xie21c7e752014-05-13 17:29:04 -07004981{
Josef Bacik957780e2016-05-17 13:30:55 -04004982 struct reserve_ticket *ticket;
Miao Xie21c7e752014-05-13 17:29:04 -07004983 u64 used;
4984 u64 expected;
Josef Bacik957780e2016-05-17 13:30:55 -04004985 u64 to_reclaim = 0;
Miao Xie21c7e752014-05-13 17:29:04 -07004986
Josef Bacik957780e2016-05-17 13:30:55 -04004987 list_for_each_entry(ticket, &space_info->tickets, list)
4988 to_reclaim += ticket->bytes;
4989 list_for_each_entry(ticket, &space_info->priority_tickets, list)
4990 to_reclaim += ticket->bytes;
4991 if (to_reclaim)
4992 return to_reclaim;
Miao Xie21c7e752014-05-13 17:29:04 -07004993
Wang Xiaoguange0af2482016-08-31 19:46:16 +08004994 to_reclaim = min_t(u64, num_online_cpus() * SZ_1M, SZ_16M);
Jeff Mahoneyc1c49192017-05-17 11:38:36 -04004995 if (can_overcommit(fs_info, space_info, to_reclaim,
4996 BTRFS_RESERVE_FLUSH_ALL, system_chunk))
Wang Xiaoguange0af2482016-08-31 19:46:16 +08004997 return 0;
4998
Nikolay Borisov0eee8a42017-06-14 11:35:34 +03004999 used = btrfs_space_info_used(space_info, true);
5000
Jeff Mahoneyc1c49192017-05-17 11:38:36 -04005001 if (can_overcommit(fs_info, space_info, SZ_1M,
5002 BTRFS_RESERVE_FLUSH_ALL, system_chunk))
Miao Xie21c7e752014-05-13 17:29:04 -07005003 expected = div_factor_fine(space_info->total_bytes, 95);
5004 else
5005 expected = div_factor_fine(space_info->total_bytes, 90);
5006
5007 if (used > expected)
5008 to_reclaim = used - expected;
5009 else
5010 to_reclaim = 0;
5011 to_reclaim = min(to_reclaim, space_info->bytes_may_use +
5012 space_info->bytes_reserved);
Miao Xie21c7e752014-05-13 17:29:04 -07005013 return to_reclaim;
5014}
5015
Jeff Mahoneyc1c49192017-05-17 11:38:36 -04005016static inline int need_do_async_reclaim(struct btrfs_fs_info *fs_info,
5017 struct btrfs_space_info *space_info,
5018 u64 used, bool system_chunk)
Miao Xie21c7e752014-05-13 17:29:04 -07005019{
Josef Bacik365c5312015-02-18 13:58:15 -08005020 u64 thresh = div_factor_fine(space_info->total_bytes, 98);
5021
5022 /* If we're just plain full then async reclaim just slows us down. */
Josef Bacikbaee8792016-01-26 09:35:38 -05005023 if ((space_info->bytes_used + space_info->bytes_reserved) >= thresh)
Josef Bacik365c5312015-02-18 13:58:15 -08005024 return 0;
5025
Jeff Mahoneyc1c49192017-05-17 11:38:36 -04005026 if (!btrfs_calc_reclaim_metadata_size(fs_info, space_info,
5027 system_chunk))
Liu Bo25ce4592014-09-10 12:58:50 +08005028 return 0;
Liu Bo25ce4592014-09-10 12:58:50 +08005029
Jeff Mahoney0b246af2016-06-22 18:54:23 -04005030 return (used >= thresh && !btrfs_fs_closing(fs_info) &&
5031 !test_bit(BTRFS_FS_STATE_REMOUNTING, &fs_info->fs_state));
Miao Xie21c7e752014-05-13 17:29:04 -07005032}
5033
Josef Bacikf91587e2018-11-21 14:03:10 -05005034static bool wake_all_tickets(struct list_head *head)
Miao Xie21c7e752014-05-13 17:29:04 -07005035{
Josef Bacik957780e2016-05-17 13:30:55 -04005036 struct reserve_ticket *ticket;
Miao Xie21c7e752014-05-13 17:29:04 -07005037
Josef Bacik957780e2016-05-17 13:30:55 -04005038 while (!list_empty(head)) {
5039 ticket = list_first_entry(head, struct reserve_ticket, list);
5040 list_del_init(&ticket->list);
5041 ticket->error = -ENOSPC;
5042 wake_up(&ticket->wait);
Josef Bacikf91587e2018-11-21 14:03:10 -05005043 if (ticket->bytes != ticket->orig_bytes)
5044 return true;
Miao Xie21c7e752014-05-13 17:29:04 -07005045 }
Josef Bacikf91587e2018-11-21 14:03:10 -05005046 return false;
Miao Xie21c7e752014-05-13 17:29:04 -07005047}
5048
Josef Bacik957780e2016-05-17 13:30:55 -04005049/*
5050 * This is for normal flushers, we can wait all goddamned day if we want to. We
5051 * will loop and continuously try to flush as long as we are making progress.
5052 * We count progress as clearing off tickets each time we have to loop.
5053 */
Miao Xie21c7e752014-05-13 17:29:04 -07005054static void btrfs_async_reclaim_metadata_space(struct work_struct *work)
5055{
5056 struct btrfs_fs_info *fs_info;
5057 struct btrfs_space_info *space_info;
5058 u64 to_reclaim;
5059 int flush_state;
Josef Bacik957780e2016-05-17 13:30:55 -04005060 int commit_cycles = 0;
Wang Xiaoguangce129652016-09-02 10:58:46 +08005061 u64 last_tickets_id;
Miao Xie21c7e752014-05-13 17:29:04 -07005062
5063 fs_info = container_of(work, struct btrfs_fs_info, async_reclaim_work);
5064 space_info = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_METADATA);
5065
Josef Bacik957780e2016-05-17 13:30:55 -04005066 spin_lock(&space_info->lock);
Jeff Mahoneyc1c49192017-05-17 11:38:36 -04005067 to_reclaim = btrfs_calc_reclaim_metadata_size(fs_info, space_info,
5068 false);
Josef Bacik957780e2016-05-17 13:30:55 -04005069 if (!to_reclaim) {
5070 space_info->flush = 0;
5071 spin_unlock(&space_info->lock);
Miao Xie21c7e752014-05-13 17:29:04 -07005072 return;
Josef Bacik957780e2016-05-17 13:30:55 -04005073 }
Wang Xiaoguangce129652016-09-02 10:58:46 +08005074 last_tickets_id = space_info->tickets_id;
Josef Bacik957780e2016-05-17 13:30:55 -04005075 spin_unlock(&space_info->lock);
Miao Xie21c7e752014-05-13 17:29:04 -07005076
5077 flush_state = FLUSH_DELAYED_ITEMS_NR;
5078 do {
Nikolay Borisove38ae7a2017-07-25 17:48:28 +03005079 flush_space(fs_info, space_info, to_reclaim, flush_state);
Josef Bacik957780e2016-05-17 13:30:55 -04005080 spin_lock(&space_info->lock);
5081 if (list_empty(&space_info->tickets)) {
5082 space_info->flush = 0;
5083 spin_unlock(&space_info->lock);
Miao Xie21c7e752014-05-13 17:29:04 -07005084 return;
Josef Bacik957780e2016-05-17 13:30:55 -04005085 }
Jeff Mahoneyc1c49192017-05-17 11:38:36 -04005086 to_reclaim = btrfs_calc_reclaim_metadata_size(fs_info,
5087 space_info,
5088 false);
Wang Xiaoguangce129652016-09-02 10:58:46 +08005089 if (last_tickets_id == space_info->tickets_id) {
Josef Bacik957780e2016-05-17 13:30:55 -04005090 flush_state++;
5091 } else {
Wang Xiaoguangce129652016-09-02 10:58:46 +08005092 last_tickets_id = space_info->tickets_id;
Josef Bacik957780e2016-05-17 13:30:55 -04005093 flush_state = FLUSH_DELAYED_ITEMS_NR;
5094 if (commit_cycles)
5095 commit_cycles--;
5096 }
5097
Josef Bacik450114f2018-11-21 14:03:08 -05005098 /*
5099 * We don't want to force a chunk allocation until we've tried
5100 * pretty hard to reclaim space. Think of the case where we
5101 * freed up a bunch of space and so have a lot of pinned space
5102 * to reclaim. We would rather use that than possibly create a
5103 * underutilized metadata chunk. So if this is our first run
5104 * through the flushing state machine skip ALLOC_CHUNK_FORCE and
5105 * commit the transaction. If nothing has changed the next go
5106 * around then we can force a chunk allocation.
5107 */
5108 if (flush_state == ALLOC_CHUNK_FORCE && !commit_cycles)
5109 flush_state++;
5110
Josef Bacik957780e2016-05-17 13:30:55 -04005111 if (flush_state > COMMIT_TRANS) {
5112 commit_cycles++;
5113 if (commit_cycles > 2) {
Josef Bacikf91587e2018-11-21 14:03:10 -05005114 if (wake_all_tickets(&space_info->tickets)) {
5115 flush_state = FLUSH_DELAYED_ITEMS_NR;
5116 commit_cycles--;
5117 } else {
5118 space_info->flush = 0;
5119 }
Josef Bacik957780e2016-05-17 13:30:55 -04005120 } else {
5121 flush_state = FLUSH_DELAYED_ITEMS_NR;
5122 }
5123 }
5124 spin_unlock(&space_info->lock);
5125 } while (flush_state <= COMMIT_TRANS);
Miao Xie21c7e752014-05-13 17:29:04 -07005126}
5127
5128void btrfs_init_async_reclaim_work(struct work_struct *work)
5129{
5130 INIT_WORK(work, btrfs_async_reclaim_metadata_space);
5131}
5132
Josef Bacik8a1bbe12018-11-21 14:03:12 -05005133static const enum btrfs_flush_state priority_flush_states[] = {
5134 FLUSH_DELAYED_ITEMS_NR,
5135 FLUSH_DELAYED_ITEMS,
5136 ALLOC_CHUNK,
5137};
5138
Josef Bacik957780e2016-05-17 13:30:55 -04005139static void priority_reclaim_metadata_space(struct btrfs_fs_info *fs_info,
5140 struct btrfs_space_info *space_info,
5141 struct reserve_ticket *ticket)
5142{
5143 u64 to_reclaim;
Josef Bacik8a1bbe12018-11-21 14:03:12 -05005144 int flush_state;
Josef Bacik957780e2016-05-17 13:30:55 -04005145
5146 spin_lock(&space_info->lock);
Jeff Mahoneyc1c49192017-05-17 11:38:36 -04005147 to_reclaim = btrfs_calc_reclaim_metadata_size(fs_info, space_info,
5148 false);
Josef Bacik957780e2016-05-17 13:30:55 -04005149 if (!to_reclaim) {
5150 spin_unlock(&space_info->lock);
5151 return;
5152 }
5153 spin_unlock(&space_info->lock);
5154
Josef Bacik8a1bbe12018-11-21 14:03:12 -05005155 flush_state = 0;
Josef Bacik957780e2016-05-17 13:30:55 -04005156 do {
Josef Bacik8a1bbe12018-11-21 14:03:12 -05005157 flush_space(fs_info, space_info, to_reclaim,
5158 priority_flush_states[flush_state]);
Josef Bacik957780e2016-05-17 13:30:55 -04005159 flush_state++;
5160 spin_lock(&space_info->lock);
5161 if (ticket->bytes == 0) {
5162 spin_unlock(&space_info->lock);
5163 return;
5164 }
5165 spin_unlock(&space_info->lock);
Josef Bacik8a1bbe12018-11-21 14:03:12 -05005166 } while (flush_state < ARRAY_SIZE(priority_flush_states));
Josef Bacik957780e2016-05-17 13:30:55 -04005167}
5168
5169static int wait_reserve_ticket(struct btrfs_fs_info *fs_info,
5170 struct btrfs_space_info *space_info,
Josef Bacikf91587e2018-11-21 14:03:10 -05005171 struct reserve_ticket *ticket)
Josef Bacik957780e2016-05-17 13:30:55 -04005172
5173{
5174 DEFINE_WAIT(wait);
Josef Bacikf91587e2018-11-21 14:03:10 -05005175 u64 reclaim_bytes = 0;
Josef Bacik957780e2016-05-17 13:30:55 -04005176 int ret = 0;
5177
5178 spin_lock(&space_info->lock);
5179 while (ticket->bytes > 0 && ticket->error == 0) {
5180 ret = prepare_to_wait_event(&ticket->wait, &wait, TASK_KILLABLE);
5181 if (ret) {
5182 ret = -EINTR;
5183 break;
5184 }
5185 spin_unlock(&space_info->lock);
5186
5187 schedule();
5188
5189 finish_wait(&ticket->wait, &wait);
5190 spin_lock(&space_info->lock);
5191 }
5192 if (!ret)
5193 ret = ticket->error;
5194 if (!list_empty(&ticket->list))
5195 list_del_init(&ticket->list);
Josef Bacikf91587e2018-11-21 14:03:10 -05005196 if (ticket->bytes && ticket->bytes < ticket->orig_bytes)
5197 reclaim_bytes = ticket->orig_bytes - ticket->bytes;
Josef Bacik957780e2016-05-17 13:30:55 -04005198 spin_unlock(&space_info->lock);
5199
Josef Bacikf91587e2018-11-21 14:03:10 -05005200 if (reclaim_bytes)
5201 space_info_add_old_bytes(fs_info, space_info, reclaim_bytes);
Josef Bacik957780e2016-05-17 13:30:55 -04005202 return ret;
5203}
5204
Josef Bacik663350a2011-11-03 22:54:25 -04005205/**
Josef Bacik4a92b1b2011-08-30 12:34:28 -04005206 * reserve_metadata_bytes - try to reserve bytes from the block_rsv's space
5207 * @root - the root we're allocating for
Josef Bacik957780e2016-05-17 13:30:55 -04005208 * @space_info - the space info we want to allocate from
Josef Bacik4a92b1b2011-08-30 12:34:28 -04005209 * @orig_bytes - the number of bytes we want
Adam Buchbinder48fc7f72012-09-19 21:48:00 -04005210 * @flush - whether or not we can flush to make our reservation
Josef Bacik8bb8ab22010-10-15 16:52:49 -04005211 *
Nicholas D Steeves01327612016-05-19 21:18:45 -04005212 * This will reserve orig_bytes number of bytes from the space info associated
Josef Bacik4a92b1b2011-08-30 12:34:28 -04005213 * with the block_rsv. If there is not enough space it will make an attempt to
5214 * flush out space to make room. It will do this by flushing delalloc if
5215 * possible or committing the transaction. If flush is 0 then no attempts to
5216 * regain reservations will be made and this will fail if there is not enough
5217 * space already.
Josef Bacik8bb8ab22010-10-15 16:52:49 -04005218 */
Jeff Mahoneyc1c49192017-05-17 11:38:36 -04005219static int __reserve_metadata_bytes(struct btrfs_fs_info *fs_info,
Josef Bacik957780e2016-05-17 13:30:55 -04005220 struct btrfs_space_info *space_info,
5221 u64 orig_bytes,
Jeff Mahoneyc1c49192017-05-17 11:38:36 -04005222 enum btrfs_reserve_flush_enum flush,
5223 bool system_chunk)
Yan, Zhengf0486c62010-05-16 10:46:25 -04005224{
Josef Bacik957780e2016-05-17 13:30:55 -04005225 struct reserve_ticket ticket;
Josef Bacik2bf64752011-09-26 17:12:22 -04005226 u64 used;
Josef Bacikf91587e2018-11-21 14:03:10 -05005227 u64 reclaim_bytes = 0;
Josef Bacik8bb8ab22010-10-15 16:52:49 -04005228 int ret = 0;
Josef Bacik8bb8ab22010-10-15 16:52:49 -04005229
Josef Bacik957780e2016-05-17 13:30:55 -04005230 ASSERT(orig_bytes);
Josef Bacik8ca17f02016-05-27 13:24:13 -04005231 ASSERT(!current->journal_info || flush != BTRFS_RESERVE_FLUSH_ALL);
5232
Yan, Zhengf0486c62010-05-16 10:46:25 -04005233 spin_lock(&space_info->lock);
Josef Bacikfdb5eff2011-06-07 16:07:44 -04005234 ret = -ENOSPC;
Liu Bo41361352017-02-13 15:42:21 -08005235 used = btrfs_space_info_used(space_info, true);
Yan, Zhengf0486c62010-05-16 10:46:25 -04005236
Josef Bacik8bb8ab22010-10-15 16:52:49 -04005237 /*
Josef Bacik957780e2016-05-17 13:30:55 -04005238 * If we have enough space then hooray, make our reservation and carry
5239 * on. If not see if we can overcommit, and if we can, hooray carry on.
5240 * If not things get more complicated.
Josef Bacik8bb8ab22010-10-15 16:52:49 -04005241 */
Josef Bacik957780e2016-05-17 13:30:55 -04005242 if (used + orig_bytes <= space_info->total_bytes) {
Qu Wenruo9f9b8e82018-10-24 20:24:01 +08005243 update_bytes_may_use(space_info, orig_bytes);
Jeff Mahoney0b246af2016-06-22 18:54:23 -04005244 trace_btrfs_space_reservation(fs_info, "space_info",
5245 space_info->flags, orig_bytes, 1);
Josef Bacik957780e2016-05-17 13:30:55 -04005246 ret = 0;
Jeff Mahoneyc1c49192017-05-17 11:38:36 -04005247 } else if (can_overcommit(fs_info, space_info, orig_bytes, flush,
5248 system_chunk)) {
Qu Wenruo9f9b8e82018-10-24 20:24:01 +08005249 update_bytes_may_use(space_info, orig_bytes);
Jeff Mahoney0b246af2016-06-22 18:54:23 -04005250 trace_btrfs_space_reservation(fs_info, "space_info",
5251 space_info->flags, orig_bytes, 1);
Josef Bacik44734ed2012-09-28 16:04:19 -04005252 ret = 0;
Josef Bacik2bf64752011-09-26 17:12:22 -04005253 }
5254
Josef Bacik8bb8ab22010-10-15 16:52:49 -04005255 /*
Josef Bacik957780e2016-05-17 13:30:55 -04005256 * If we couldn't make a reservation then setup our reservation ticket
5257 * and kick the async worker if it's not already running.
Miao Xie08e007d2012-10-16 11:33:38 +00005258 *
Josef Bacik957780e2016-05-17 13:30:55 -04005259 * If we are a priority flusher then we just need to add our ticket to
5260 * the list and we will do our own flushing further down.
Josef Bacik8bb8ab22010-10-15 16:52:49 -04005261 */
Josef Bacik72bcd992012-12-18 15:16:34 -05005262 if (ret && flush != BTRFS_RESERVE_NO_FLUSH) {
Josef Bacikf91587e2018-11-21 14:03:10 -05005263 ticket.orig_bytes = orig_bytes;
Josef Bacik957780e2016-05-17 13:30:55 -04005264 ticket.bytes = orig_bytes;
5265 ticket.error = 0;
5266 init_waitqueue_head(&ticket.wait);
5267 if (flush == BTRFS_RESERVE_FLUSH_ALL) {
5268 list_add_tail(&ticket.list, &space_info->tickets);
5269 if (!space_info->flush) {
5270 space_info->flush = 1;
Jeff Mahoney0b246af2016-06-22 18:54:23 -04005271 trace_btrfs_trigger_flush(fs_info,
Josef Bacikf376df22016-03-25 13:25:56 -04005272 space_info->flags,
5273 orig_bytes, flush,
5274 "enospc");
Josef Bacik957780e2016-05-17 13:30:55 -04005275 queue_work(system_unbound_wq,
Jeff Mahoneyc1c49192017-05-17 11:38:36 -04005276 &fs_info->async_reclaim_work);
Josef Bacik957780e2016-05-17 13:30:55 -04005277 }
5278 } else {
5279 list_add_tail(&ticket.list,
5280 &space_info->priority_tickets);
5281 }
Miao Xie21c7e752014-05-13 17:29:04 -07005282 } else if (!ret && space_info->flags & BTRFS_BLOCK_GROUP_METADATA) {
5283 used += orig_bytes;
Josef Bacikf6acfd52014-09-18 11:27:17 -04005284 /*
5285 * We will do the space reservation dance during log replay,
5286 * which means we won't have fs_info->fs_root set, so don't do
5287 * the async reclaim as we will panic.
5288 */
Jeff Mahoney0b246af2016-06-22 18:54:23 -04005289 if (!test_bit(BTRFS_FS_LOG_RECOVERING, &fs_info->flags) &&
Jeff Mahoneyc1c49192017-05-17 11:38:36 -04005290 need_do_async_reclaim(fs_info, space_info,
5291 used, system_chunk) &&
Jeff Mahoney0b246af2016-06-22 18:54:23 -04005292 !work_busy(&fs_info->async_reclaim_work)) {
5293 trace_btrfs_trigger_flush(fs_info, space_info->flags,
5294 orig_bytes, flush, "preempt");
Miao Xie21c7e752014-05-13 17:29:04 -07005295 queue_work(system_unbound_wq,
Jeff Mahoney0b246af2016-06-22 18:54:23 -04005296 &fs_info->async_reclaim_work);
Josef Bacikf376df22016-03-25 13:25:56 -04005297 }
Josef Bacik8bb8ab22010-10-15 16:52:49 -04005298 }
Yan, Zhengf0486c62010-05-16 10:46:25 -04005299 spin_unlock(&space_info->lock);
Miao Xie08e007d2012-10-16 11:33:38 +00005300 if (!ret || flush == BTRFS_RESERVE_NO_FLUSH)
Josef Bacik957780e2016-05-17 13:30:55 -04005301 return ret;
Josef Bacik8bb8ab22010-10-15 16:52:49 -04005302
Josef Bacik957780e2016-05-17 13:30:55 -04005303 if (flush == BTRFS_RESERVE_FLUSH_ALL)
Josef Bacikf91587e2018-11-21 14:03:10 -05005304 return wait_reserve_ticket(fs_info, space_info, &ticket);
Miao Xie08e007d2012-10-16 11:33:38 +00005305
Josef Bacik957780e2016-05-17 13:30:55 -04005306 ret = 0;
Jeff Mahoney0b246af2016-06-22 18:54:23 -04005307 priority_reclaim_metadata_space(fs_info, space_info, &ticket);
Josef Bacik957780e2016-05-17 13:30:55 -04005308 spin_lock(&space_info->lock);
5309 if (ticket.bytes) {
Josef Bacikf91587e2018-11-21 14:03:10 -05005310 if (ticket.bytes < orig_bytes)
5311 reclaim_bytes = orig_bytes - ticket.bytes;
Josef Bacik957780e2016-05-17 13:30:55 -04005312 list_del_init(&ticket.list);
5313 ret = -ENOSPC;
5314 }
5315 spin_unlock(&space_info->lock);
Josef Bacikf91587e2018-11-21 14:03:10 -05005316
5317 if (reclaim_bytes)
5318 space_info_add_old_bytes(fs_info, space_info, reclaim_bytes);
Josef Bacik957780e2016-05-17 13:30:55 -04005319 ASSERT(list_empty(&ticket.list));
5320 return ret;
5321}
Josef Bacik8bb8ab22010-10-15 16:52:49 -04005322
Josef Bacik8bb8ab22010-10-15 16:52:49 -04005323/**
5324 * reserve_metadata_bytes - try to reserve bytes from the block_rsv's space
5325 * @root - the root we're allocating for
5326 * @block_rsv - the block_rsv we're allocating for
5327 * @orig_bytes - the number of bytes we want
5328 * @flush - whether or not we can flush to make our reservation
5329 *
Andrea Gelmini52042d82018-11-28 12:05:13 +01005330 * This will reserve orig_bytes number of bytes from the space info associated
Josef Bacik8bb8ab22010-10-15 16:52:49 -04005331 * with the block_rsv. If there is not enough space it will make an attempt to
5332 * flush out space to make room. It will do this by flushing delalloc if
5333 * possible or committing the transaction. If flush is 0 then no attempts to
5334 * regain reservations will be made and this will fail if there is not enough
5335 * space already.
5336 */
5337static int reserve_metadata_bytes(struct btrfs_root *root,
5338 struct btrfs_block_rsv *block_rsv,
5339 u64 orig_bytes,
5340 enum btrfs_reserve_flush_enum flush)
5341{
Jeff Mahoney0b246af2016-06-22 18:54:23 -04005342 struct btrfs_fs_info *fs_info = root->fs_info;
5343 struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv;
Josef Bacik957780e2016-05-17 13:30:55 -04005344 int ret;
Jeff Mahoneyc1c49192017-05-17 11:38:36 -04005345 bool system_chunk = (root == fs_info->chunk_root);
Yan, Zhengf0486c62010-05-16 10:46:25 -04005346
Jeff Mahoneyc1c49192017-05-17 11:38:36 -04005347 ret = __reserve_metadata_bytes(fs_info, block_rsv->space_info,
5348 orig_bytes, flush, system_chunk);
Josef Bacik5d803662013-02-07 16:06:02 -05005349 if (ret == -ENOSPC &&
5350 unlikely(root->orphan_cleanup_state == ORPHAN_CLEANUP_STARTED)) {
Josef Bacik5d803662013-02-07 16:06:02 -05005351 if (block_rsv != global_rsv &&
5352 !block_rsv_use_bytes(global_rsv, orig_bytes))
5353 ret = 0;
5354 }
Nikolay Borisov9a3daff2017-12-15 12:05:37 +02005355 if (ret == -ENOSPC) {
Jeff Mahoney0b246af2016-06-22 18:54:23 -04005356 trace_btrfs_space_reservation(fs_info, "space_info:enospc",
Josef Bacik957780e2016-05-17 13:30:55 -04005357 block_rsv->space_info->flags,
5358 orig_bytes, 1);
Nikolay Borisov9a3daff2017-12-15 12:05:37 +02005359
5360 if (btrfs_test_opt(fs_info, ENOSPC_DEBUG))
5361 dump_space_info(fs_info, block_rsv->space_info,
5362 orig_bytes, 0);
5363 }
Yan, Zhengf0486c62010-05-16 10:46:25 -04005364 return ret;
5365}
5366
Jeff Mahoney79787ea2012-03-12 16:03:00 +01005367static struct btrfs_block_rsv *get_block_rsv(
5368 const struct btrfs_trans_handle *trans,
5369 const struct btrfs_root *root)
Yan, Zhengf0486c62010-05-16 10:46:25 -04005370{
Jeff Mahoney0b246af2016-06-22 18:54:23 -04005371 struct btrfs_fs_info *fs_info = root->fs_info;
Josef Bacik4c13d752011-08-30 11:31:29 -04005372 struct btrfs_block_rsv *block_rsv = NULL;
5373
Alexandru Moisee9cf4392015-09-09 00:18:50 +00005374 if (test_bit(BTRFS_ROOT_REF_COWS, &root->state) ||
Jeff Mahoney0b246af2016-06-22 18:54:23 -04005375 (root == fs_info->csum_root && trans->adding_csums) ||
5376 (root == fs_info->uuid_root))
Stefan Behrensf7a81ea2013-08-15 17:11:19 +02005377 block_rsv = trans->block_rsv;
5378
Josef Bacik4c13d752011-08-30 11:31:29 -04005379 if (!block_rsv)
Yan, Zhengf0486c62010-05-16 10:46:25 -04005380 block_rsv = root->block_rsv;
5381
5382 if (!block_rsv)
Jeff Mahoney0b246af2016-06-22 18:54:23 -04005383 block_rsv = &fs_info->empty_block_rsv;
Yan, Zhengf0486c62010-05-16 10:46:25 -04005384
5385 return block_rsv;
5386}
5387
5388static int block_rsv_use_bytes(struct btrfs_block_rsv *block_rsv,
5389 u64 num_bytes)
5390{
5391 int ret = -ENOSPC;
5392 spin_lock(&block_rsv->lock);
5393 if (block_rsv->reserved >= num_bytes) {
5394 block_rsv->reserved -= num_bytes;
5395 if (block_rsv->reserved < block_rsv->size)
5396 block_rsv->full = 0;
5397 ret = 0;
5398 }
5399 spin_unlock(&block_rsv->lock);
5400 return ret;
5401}
5402
5403static void block_rsv_add_bytes(struct btrfs_block_rsv *block_rsv,
Lu Fengqi3a584172018-08-04 21:10:55 +08005404 u64 num_bytes, bool update_size)
Yan, Zhengf0486c62010-05-16 10:46:25 -04005405{
5406 spin_lock(&block_rsv->lock);
5407 block_rsv->reserved += num_bytes;
5408 if (update_size)
5409 block_rsv->size += num_bytes;
5410 else if (block_rsv->reserved >= block_rsv->size)
5411 block_rsv->full = 1;
5412 spin_unlock(&block_rsv->lock);
5413}
5414
Josef Bacikd52be812013-05-29 14:54:47 -04005415int btrfs_cond_migrate_bytes(struct btrfs_fs_info *fs_info,
5416 struct btrfs_block_rsv *dest, u64 num_bytes,
5417 int min_factor)
5418{
5419 struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv;
5420 u64 min_bytes;
5421
5422 if (global_rsv->space_info != dest->space_info)
5423 return -ENOSPC;
5424
5425 spin_lock(&global_rsv->lock);
5426 min_bytes = div_factor(global_rsv->size, min_factor);
5427 if (global_rsv->reserved < min_bytes + num_bytes) {
5428 spin_unlock(&global_rsv->lock);
5429 return -ENOSPC;
5430 }
5431 global_rsv->reserved -= num_bytes;
5432 if (global_rsv->reserved < global_rsv->size)
5433 global_rsv->full = 0;
5434 spin_unlock(&global_rsv->lock);
5435
Lu Fengqi3a584172018-08-04 21:10:55 +08005436 block_rsv_add_bytes(dest, num_bytes, true);
Josef Bacikd52be812013-05-29 14:54:47 -04005437 return 0;
5438}
5439
Josef Bacikba2c4d42018-12-03 10:20:33 -05005440/**
5441 * btrfs_migrate_to_delayed_refs_rsv - transfer bytes to our delayed refs rsv.
5442 * @fs_info - the fs info for our fs.
5443 * @src - the source block rsv to transfer from.
5444 * @num_bytes - the number of bytes to transfer.
5445 *
5446 * This transfers up to the num_bytes amount from the src rsv to the
5447 * delayed_refs_rsv. Any extra bytes are returned to the space info.
5448 */
5449void btrfs_migrate_to_delayed_refs_rsv(struct btrfs_fs_info *fs_info,
5450 struct btrfs_block_rsv *src,
5451 u64 num_bytes)
5452{
5453 struct btrfs_block_rsv *delayed_refs_rsv = &fs_info->delayed_refs_rsv;
5454 u64 to_free = 0;
5455
5456 spin_lock(&src->lock);
5457 src->reserved -= num_bytes;
5458 src->size -= num_bytes;
5459 spin_unlock(&src->lock);
5460
5461 spin_lock(&delayed_refs_rsv->lock);
5462 if (delayed_refs_rsv->size > delayed_refs_rsv->reserved) {
5463 u64 delta = delayed_refs_rsv->size -
5464 delayed_refs_rsv->reserved;
5465 if (num_bytes > delta) {
5466 to_free = num_bytes - delta;
5467 num_bytes = delta;
5468 }
5469 } else {
5470 to_free = num_bytes;
5471 num_bytes = 0;
5472 }
5473
5474 if (num_bytes)
5475 delayed_refs_rsv->reserved += num_bytes;
5476 if (delayed_refs_rsv->reserved >= delayed_refs_rsv->size)
5477 delayed_refs_rsv->full = 1;
5478 spin_unlock(&delayed_refs_rsv->lock);
5479
5480 if (num_bytes)
5481 trace_btrfs_space_reservation(fs_info, "delayed_refs_rsv",
5482 0, num_bytes, 1);
5483 if (to_free)
5484 space_info_add_old_bytes(fs_info, delayed_refs_rsv->space_info,
5485 to_free);
5486}
5487
5488/**
5489 * btrfs_delayed_refs_rsv_refill - refill based on our delayed refs usage.
5490 * @fs_info - the fs_info for our fs.
5491 * @flush - control how we can flush for this reservation.
5492 *
5493 * This will refill the delayed block_rsv up to 1 items size worth of space and
5494 * will return -ENOSPC if we can't make the reservation.
5495 */
5496int btrfs_delayed_refs_rsv_refill(struct btrfs_fs_info *fs_info,
5497 enum btrfs_reserve_flush_enum flush)
5498{
5499 struct btrfs_block_rsv *block_rsv = &fs_info->delayed_refs_rsv;
5500 u64 limit = btrfs_calc_trans_metadata_size(fs_info, 1);
5501 u64 num_bytes = 0;
5502 int ret = -ENOSPC;
5503
5504 spin_lock(&block_rsv->lock);
5505 if (block_rsv->reserved < block_rsv->size) {
5506 num_bytes = block_rsv->size - block_rsv->reserved;
5507 num_bytes = min(num_bytes, limit);
5508 }
5509 spin_unlock(&block_rsv->lock);
5510
5511 if (!num_bytes)
5512 return 0;
5513
5514 ret = reserve_metadata_bytes(fs_info->extent_root, block_rsv,
5515 num_bytes, flush);
5516 if (ret)
5517 return ret;
5518 block_rsv_add_bytes(block_rsv, num_bytes, 0);
5519 trace_btrfs_space_reservation(fs_info, "delayed_refs_rsv",
5520 0, num_bytes, 1);
5521 return 0;
5522}
5523
Josef Bacik957780e2016-05-17 13:30:55 -04005524/*
5525 * This is for space we already have accounted in space_info->bytes_may_use, so
5526 * basically when we're returning space from block_rsv's.
5527 */
5528static void space_info_add_old_bytes(struct btrfs_fs_info *fs_info,
5529 struct btrfs_space_info *space_info,
5530 u64 num_bytes)
5531{
5532 struct reserve_ticket *ticket;
5533 struct list_head *head;
5534 u64 used;
5535 enum btrfs_reserve_flush_enum flush = BTRFS_RESERVE_NO_FLUSH;
5536 bool check_overcommit = false;
5537
5538 spin_lock(&space_info->lock);
5539 head = &space_info->priority_tickets;
5540
5541 /*
5542 * If we are over our limit then we need to check and see if we can
5543 * overcommit, and if we can't then we just need to free up our space
5544 * and not satisfy any requests.
5545 */
Nikolay Borisov0eee8a42017-06-14 11:35:34 +03005546 used = btrfs_space_info_used(space_info, true);
Josef Bacik957780e2016-05-17 13:30:55 -04005547 if (used - num_bytes >= space_info->total_bytes)
5548 check_overcommit = true;
5549again:
5550 while (!list_empty(head) && num_bytes) {
5551 ticket = list_first_entry(head, struct reserve_ticket,
5552 list);
5553 /*
5554 * We use 0 bytes because this space is already reserved, so
5555 * adding the ticket space would be a double count.
5556 */
5557 if (check_overcommit &&
Jeff Mahoneyc1c49192017-05-17 11:38:36 -04005558 !can_overcommit(fs_info, space_info, 0, flush, false))
Josef Bacik957780e2016-05-17 13:30:55 -04005559 break;
5560 if (num_bytes >= ticket->bytes) {
5561 list_del_init(&ticket->list);
5562 num_bytes -= ticket->bytes;
5563 ticket->bytes = 0;
Wang Xiaoguangce129652016-09-02 10:58:46 +08005564 space_info->tickets_id++;
Josef Bacik957780e2016-05-17 13:30:55 -04005565 wake_up(&ticket->wait);
5566 } else {
5567 ticket->bytes -= num_bytes;
5568 num_bytes = 0;
5569 }
5570 }
5571
5572 if (num_bytes && head == &space_info->priority_tickets) {
5573 head = &space_info->tickets;
5574 flush = BTRFS_RESERVE_FLUSH_ALL;
5575 goto again;
5576 }
Qu Wenruo9f9b8e82018-10-24 20:24:01 +08005577 update_bytes_may_use(space_info, -num_bytes);
Josef Bacik957780e2016-05-17 13:30:55 -04005578 trace_btrfs_space_reservation(fs_info, "space_info",
5579 space_info->flags, num_bytes, 0);
5580 spin_unlock(&space_info->lock);
5581}
5582
5583/*
5584 * This is for newly allocated space that isn't accounted in
5585 * space_info->bytes_may_use yet. So if we allocate a chunk or unpin an extent
5586 * we use this helper.
5587 */
5588static void space_info_add_new_bytes(struct btrfs_fs_info *fs_info,
5589 struct btrfs_space_info *space_info,
5590 u64 num_bytes)
5591{
5592 struct reserve_ticket *ticket;
5593 struct list_head *head = &space_info->priority_tickets;
5594
5595again:
5596 while (!list_empty(head) && num_bytes) {
5597 ticket = list_first_entry(head, struct reserve_ticket,
5598 list);
5599 if (num_bytes >= ticket->bytes) {
5600 trace_btrfs_space_reservation(fs_info, "space_info",
5601 space_info->flags,
5602 ticket->bytes, 1);
5603 list_del_init(&ticket->list);
5604 num_bytes -= ticket->bytes;
Qu Wenruo9f9b8e82018-10-24 20:24:01 +08005605 update_bytes_may_use(space_info, ticket->bytes);
Josef Bacik957780e2016-05-17 13:30:55 -04005606 ticket->bytes = 0;
Wang Xiaoguangce129652016-09-02 10:58:46 +08005607 space_info->tickets_id++;
Josef Bacik957780e2016-05-17 13:30:55 -04005608 wake_up(&ticket->wait);
5609 } else {
5610 trace_btrfs_space_reservation(fs_info, "space_info",
5611 space_info->flags,
5612 num_bytes, 1);
Qu Wenruo9f9b8e82018-10-24 20:24:01 +08005613 update_bytes_may_use(space_info, num_bytes);
Josef Bacik957780e2016-05-17 13:30:55 -04005614 ticket->bytes -= num_bytes;
5615 num_bytes = 0;
5616 }
5617 }
5618
5619 if (num_bytes && head == &space_info->priority_tickets) {
5620 head = &space_info->tickets;
5621 goto again;
5622 }
5623}
5624
Josef Bacik69fe2d72017-10-19 14:15:57 -04005625static u64 block_rsv_release_bytes(struct btrfs_fs_info *fs_info,
Josef Bacik8c2a3ca2012-01-10 10:31:31 -05005626 struct btrfs_block_rsv *block_rsv,
Qu Wenruoff6bc372017-12-21 13:42:04 +08005627 struct btrfs_block_rsv *dest, u64 num_bytes,
5628 u64 *qgroup_to_release_ret)
Yan, Zhengf0486c62010-05-16 10:46:25 -04005629{
5630 struct btrfs_space_info *space_info = block_rsv->space_info;
Qu Wenruoff6bc372017-12-21 13:42:04 +08005631 u64 qgroup_to_release = 0;
Josef Bacik69fe2d72017-10-19 14:15:57 -04005632 u64 ret;
Yan, Zhengf0486c62010-05-16 10:46:25 -04005633
5634 spin_lock(&block_rsv->lock);
Qu Wenruoff6bc372017-12-21 13:42:04 +08005635 if (num_bytes == (u64)-1) {
Yan, Zhengf0486c62010-05-16 10:46:25 -04005636 num_bytes = block_rsv->size;
Qu Wenruoff6bc372017-12-21 13:42:04 +08005637 qgroup_to_release = block_rsv->qgroup_rsv_size;
5638 }
Yan, Zhengf0486c62010-05-16 10:46:25 -04005639 block_rsv->size -= num_bytes;
5640 if (block_rsv->reserved >= block_rsv->size) {
5641 num_bytes = block_rsv->reserved - block_rsv->size;
5642 block_rsv->reserved = block_rsv->size;
5643 block_rsv->full = 1;
5644 } else {
5645 num_bytes = 0;
5646 }
Qu Wenruoff6bc372017-12-21 13:42:04 +08005647 if (block_rsv->qgroup_rsv_reserved >= block_rsv->qgroup_rsv_size) {
5648 qgroup_to_release = block_rsv->qgroup_rsv_reserved -
5649 block_rsv->qgroup_rsv_size;
5650 block_rsv->qgroup_rsv_reserved = block_rsv->qgroup_rsv_size;
5651 } else {
5652 qgroup_to_release = 0;
5653 }
Yan, Zhengf0486c62010-05-16 10:46:25 -04005654 spin_unlock(&block_rsv->lock);
5655
Josef Bacik69fe2d72017-10-19 14:15:57 -04005656 ret = num_bytes;
Yan, Zhengf0486c62010-05-16 10:46:25 -04005657 if (num_bytes > 0) {
5658 if (dest) {
Josef Bacike9e22892011-01-24 21:43:19 +00005659 spin_lock(&dest->lock);
5660 if (!dest->full) {
5661 u64 bytes_to_add;
5662
5663 bytes_to_add = dest->size - dest->reserved;
5664 bytes_to_add = min(num_bytes, bytes_to_add);
5665 dest->reserved += bytes_to_add;
5666 if (dest->reserved >= dest->size)
5667 dest->full = 1;
5668 num_bytes -= bytes_to_add;
5669 }
5670 spin_unlock(&dest->lock);
5671 }
Josef Bacik957780e2016-05-17 13:30:55 -04005672 if (num_bytes)
5673 space_info_add_old_bytes(fs_info, space_info,
5674 num_bytes);
Yan, Zhengf0486c62010-05-16 10:46:25 -04005675 }
Qu Wenruoff6bc372017-12-21 13:42:04 +08005676 if (qgroup_to_release_ret)
5677 *qgroup_to_release_ret = qgroup_to_release;
Josef Bacik69fe2d72017-10-19 14:15:57 -04005678 return ret;
Yan, Zhengf0486c62010-05-16 10:46:25 -04005679}
5680
Josef Bacik25d609f2016-03-25 13:25:48 -04005681int btrfs_block_rsv_migrate(struct btrfs_block_rsv *src,
5682 struct btrfs_block_rsv *dst, u64 num_bytes,
Lu Fengqi3a584172018-08-04 21:10:55 +08005683 bool update_size)
Yan, Zhengf0486c62010-05-16 10:46:25 -04005684{
5685 int ret;
5686
5687 ret = block_rsv_use_bytes(src, num_bytes);
5688 if (ret)
5689 return ret;
5690
Josef Bacik25d609f2016-03-25 13:25:48 -04005691 block_rsv_add_bytes(dst, num_bytes, update_size);
Yan, Zhengf0486c62010-05-16 10:46:25 -04005692 return 0;
5693}
5694
Miao Xie66d8f3d2012-09-06 04:02:28 -06005695void btrfs_init_block_rsv(struct btrfs_block_rsv *rsv, unsigned short type)
Yan, Zhengf0486c62010-05-16 10:46:25 -04005696{
5697 memset(rsv, 0, sizeof(*rsv));
5698 spin_lock_init(&rsv->lock);
Miao Xie66d8f3d2012-09-06 04:02:28 -06005699 rsv->type = type;
Yan, Zhengf0486c62010-05-16 10:46:25 -04005700}
5701
Josef Bacik69fe2d72017-10-19 14:15:57 -04005702void btrfs_init_metadata_block_rsv(struct btrfs_fs_info *fs_info,
5703 struct btrfs_block_rsv *rsv,
5704 unsigned short type)
5705{
5706 btrfs_init_block_rsv(rsv, type);
5707 rsv->space_info = __find_space_info(fs_info,
5708 BTRFS_BLOCK_GROUP_METADATA);
5709}
5710
Jeff Mahoney2ff7e612016-06-22 18:54:24 -04005711struct btrfs_block_rsv *btrfs_alloc_block_rsv(struct btrfs_fs_info *fs_info,
Miao Xie66d8f3d2012-09-06 04:02:28 -06005712 unsigned short type)
Yan, Zhengf0486c62010-05-16 10:46:25 -04005713{
5714 struct btrfs_block_rsv *block_rsv;
Yan, Zhengf0486c62010-05-16 10:46:25 -04005715
5716 block_rsv = kmalloc(sizeof(*block_rsv), GFP_NOFS);
5717 if (!block_rsv)
5718 return NULL;
5719
Josef Bacik69fe2d72017-10-19 14:15:57 -04005720 btrfs_init_metadata_block_rsv(fs_info, block_rsv, type);
Yan, Zhengf0486c62010-05-16 10:46:25 -04005721 return block_rsv;
5722}
5723
Jeff Mahoney2ff7e612016-06-22 18:54:24 -04005724void btrfs_free_block_rsv(struct btrfs_fs_info *fs_info,
Yan, Zhengf0486c62010-05-16 10:46:25 -04005725 struct btrfs_block_rsv *rsv)
5726{
Josef Bacik2aaa6652012-08-29 14:27:18 -04005727 if (!rsv)
5728 return;
Jeff Mahoney2ff7e612016-06-22 18:54:24 -04005729 btrfs_block_rsv_release(fs_info, rsv, (u64)-1);
Josef Bacikdabdb642011-08-08 12:50:18 -04005730 kfree(rsv);
Yan, Zhengf0486c62010-05-16 10:46:25 -04005731}
5732
Miao Xie08e007d2012-10-16 11:33:38 +00005733int btrfs_block_rsv_add(struct btrfs_root *root,
5734 struct btrfs_block_rsv *block_rsv, u64 num_bytes,
5735 enum btrfs_reserve_flush_enum flush)
Yan, Zhengf0486c62010-05-16 10:46:25 -04005736{
5737 int ret;
5738
5739 if (num_bytes == 0)
5740 return 0;
Josef Bacik8bb8ab22010-10-15 16:52:49 -04005741
Miao Xie61b520a2011-11-10 20:45:05 -05005742 ret = reserve_metadata_bytes(root, block_rsv, num_bytes, flush);
Lu Fengqi5a2cb252018-08-04 21:10:56 +08005743 if (!ret)
Lu Fengqi3a584172018-08-04 21:10:55 +08005744 block_rsv_add_bytes(block_rsv, num_bytes, true);
Yan, Zhengf0486c62010-05-16 10:46:25 -04005745
Yan, Zhengf0486c62010-05-16 10:46:25 -04005746 return ret;
5747}
5748
Jeff Mahoney2ff7e612016-06-22 18:54:24 -04005749int btrfs_block_rsv_check(struct btrfs_block_rsv *block_rsv, int min_factor)
Yan, Zhengf0486c62010-05-16 10:46:25 -04005750{
5751 u64 num_bytes = 0;
Yan, Zhengf0486c62010-05-16 10:46:25 -04005752 int ret = -ENOSPC;
5753
5754 if (!block_rsv)
5755 return 0;
5756
5757 spin_lock(&block_rsv->lock);
Josef Bacik36ba0222011-10-18 12:15:48 -04005758 num_bytes = div_factor(block_rsv->size, min_factor);
5759 if (block_rsv->reserved >= num_bytes)
Yan, Zhengf0486c62010-05-16 10:46:25 -04005760 ret = 0;
Yan, Zhengf0486c62010-05-16 10:46:25 -04005761 spin_unlock(&block_rsv->lock);
Yan, Zhengf0486c62010-05-16 10:46:25 -04005762
Josef Bacik36ba0222011-10-18 12:15:48 -04005763 return ret;
5764}
5765
Miao Xie08e007d2012-10-16 11:33:38 +00005766int btrfs_block_rsv_refill(struct btrfs_root *root,
5767 struct btrfs_block_rsv *block_rsv, u64 min_reserved,
5768 enum btrfs_reserve_flush_enum flush)
Josef Bacik36ba0222011-10-18 12:15:48 -04005769{
5770 u64 num_bytes = 0;
5771 int ret = -ENOSPC;
5772
5773 if (!block_rsv)
5774 return 0;
5775
5776 spin_lock(&block_rsv->lock);
5777 num_bytes = min_reserved;
Josef Bacik13553e52011-08-08 13:33:21 -04005778 if (block_rsv->reserved >= num_bytes)
Yan, Zhengf0486c62010-05-16 10:46:25 -04005779 ret = 0;
Josef Bacik13553e52011-08-08 13:33:21 -04005780 else
Yan, Zhengf0486c62010-05-16 10:46:25 -04005781 num_bytes -= block_rsv->reserved;
Yan, Zhengf0486c62010-05-16 10:46:25 -04005782 spin_unlock(&block_rsv->lock);
Josef Bacik13553e52011-08-08 13:33:21 -04005783
Yan, Zhengf0486c62010-05-16 10:46:25 -04005784 if (!ret)
5785 return 0;
5786
Miao Xieaa38a712011-11-18 17:43:00 +08005787 ret = reserve_metadata_bytes(root, block_rsv, num_bytes, flush);
Josef Bacikdabdb642011-08-08 12:50:18 -04005788 if (!ret) {
Lu Fengqi3a584172018-08-04 21:10:55 +08005789 block_rsv_add_bytes(block_rsv, num_bytes, false);
Yan, Zhengf0486c62010-05-16 10:46:25 -04005790 return 0;
5791 }
5792
Josef Bacik13553e52011-08-08 13:33:21 -04005793 return ret;
Yan, Zhengf0486c62010-05-16 10:46:25 -04005794}
5795
Josef Bacik5df11362018-11-21 14:03:11 -05005796static void calc_refill_bytes(struct btrfs_block_rsv *block_rsv,
5797 u64 *metadata_bytes, u64 *qgroup_bytes)
5798{
5799 *metadata_bytes = 0;
5800 *qgroup_bytes = 0;
5801
5802 spin_lock(&block_rsv->lock);
5803 if (block_rsv->reserved < block_rsv->size)
5804 *metadata_bytes = block_rsv->size - block_rsv->reserved;
5805 if (block_rsv->qgroup_rsv_reserved < block_rsv->qgroup_rsv_size)
5806 *qgroup_bytes = block_rsv->qgroup_rsv_size -
5807 block_rsv->qgroup_rsv_reserved;
5808 spin_unlock(&block_rsv->lock);
5809}
5810
Josef Bacik69fe2d72017-10-19 14:15:57 -04005811/**
5812 * btrfs_inode_rsv_refill - refill the inode block rsv.
5813 * @inode - the inode we are refilling.
Andrea Gelmini52042d82018-11-28 12:05:13 +01005814 * @flush - the flushing restriction.
Josef Bacik69fe2d72017-10-19 14:15:57 -04005815 *
5816 * Essentially the same as btrfs_block_rsv_refill, except it uses the
5817 * block_rsv->size as the minimum size. We'll either refill the missing amount
Andrea Gelmini52042d82018-11-28 12:05:13 +01005818 * or return if we already have enough space. This will also handle the reserve
Josef Bacik69fe2d72017-10-19 14:15:57 -04005819 * tracepoint for the reserved amount.
5820 */
Qu Wenruo3f2dd7a2017-11-17 15:14:19 +08005821static int btrfs_inode_rsv_refill(struct btrfs_inode *inode,
5822 enum btrfs_reserve_flush_enum flush)
Josef Bacik69fe2d72017-10-19 14:15:57 -04005823{
5824 struct btrfs_root *root = inode->root;
5825 struct btrfs_block_rsv *block_rsv = &inode->block_rsv;
Josef Bacik5df11362018-11-21 14:03:11 -05005826 u64 num_bytes, last = 0;
5827 u64 qgroup_num_bytes;
Josef Bacik69fe2d72017-10-19 14:15:57 -04005828 int ret = -ENOSPC;
5829
Josef Bacik5df11362018-11-21 14:03:11 -05005830 calc_refill_bytes(block_rsv, &num_bytes, &qgroup_num_bytes);
Josef Bacik69fe2d72017-10-19 14:15:57 -04005831 if (num_bytes == 0)
5832 return 0;
5833
Josef Bacik5df11362018-11-21 14:03:11 -05005834 do {
5835 ret = btrfs_qgroup_reserve_meta_prealloc(root, qgroup_num_bytes,
5836 true);
5837 if (ret)
5838 return ret;
5839 ret = reserve_metadata_bytes(root, block_rsv, num_bytes, flush);
5840 if (ret) {
5841 btrfs_qgroup_free_meta_prealloc(root, qgroup_num_bytes);
5842 last = num_bytes;
5843 /*
5844 * If we are fragmented we can end up with a lot of
5845 * outstanding extents which will make our size be much
5846 * larger than our reserved amount.
5847 *
5848 * If the reservation happens here, it might be very
5849 * big though not needed in the end, if the delalloc
5850 * flushing happens.
5851 *
5852 * If this is the case try and do the reserve again.
5853 */
5854 if (flush == BTRFS_RESERVE_FLUSH_ALL)
5855 calc_refill_bytes(block_rsv, &num_bytes,
5856 &qgroup_num_bytes);
5857 if (num_bytes == 0)
5858 return 0;
5859 }
5860 } while (ret && last != num_bytes);
5861
Josef Bacik69fe2d72017-10-19 14:15:57 -04005862 if (!ret) {
Lu Fengqi3a584172018-08-04 21:10:55 +08005863 block_rsv_add_bytes(block_rsv, num_bytes, false);
Josef Bacik69fe2d72017-10-19 14:15:57 -04005864 trace_btrfs_space_reservation(root->fs_info, "delalloc",
5865 btrfs_ino(inode), num_bytes, 1);
Qu Wenruoff6bc372017-12-21 13:42:04 +08005866
5867 /* Don't forget to increase qgroup_rsv_reserved */
5868 spin_lock(&block_rsv->lock);
5869 block_rsv->qgroup_rsv_reserved += qgroup_num_bytes;
5870 spin_unlock(&block_rsv->lock);
Josef Bacik5df11362018-11-21 14:03:11 -05005871 }
Josef Bacik69fe2d72017-10-19 14:15:57 -04005872 return ret;
5873}
5874
Josef Bacikba2c4d42018-12-03 10:20:33 -05005875static u64 __btrfs_block_rsv_release(struct btrfs_fs_info *fs_info,
5876 struct btrfs_block_rsv *block_rsv,
5877 u64 num_bytes, u64 *qgroup_to_release)
5878{
5879 struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv;
5880 struct btrfs_block_rsv *delayed_rsv = &fs_info->delayed_refs_rsv;
5881 struct btrfs_block_rsv *target = delayed_rsv;
5882
5883 if (target->full || target == block_rsv)
5884 target = global_rsv;
5885
5886 if (block_rsv->space_info != target->space_info)
5887 target = NULL;
5888
5889 return block_rsv_release_bytes(fs_info, block_rsv, target, num_bytes,
5890 qgroup_to_release);
5891}
5892
5893void btrfs_block_rsv_release(struct btrfs_fs_info *fs_info,
5894 struct btrfs_block_rsv *block_rsv,
5895 u64 num_bytes)
5896{
5897 __btrfs_block_rsv_release(fs_info, block_rsv, num_bytes, NULL);
5898}
5899
Josef Bacik69fe2d72017-10-19 14:15:57 -04005900/**
5901 * btrfs_inode_rsv_release - release any excessive reservation.
5902 * @inode - the inode we need to release from.
Qu Wenruo43b18592017-12-12 15:34:32 +08005903 * @qgroup_free - free or convert qgroup meta.
5904 * Unlike normal operation, qgroup meta reservation needs to know if we are
5905 * freeing qgroup reservation or just converting it into per-trans. Normally
5906 * @qgroup_free is true for error handling, and false for normal release.
Josef Bacik69fe2d72017-10-19 14:15:57 -04005907 *
5908 * This is the same as btrfs_block_rsv_release, except that it handles the
5909 * tracepoint for the reservation.
5910 */
Qu Wenruo43b18592017-12-12 15:34:32 +08005911static void btrfs_inode_rsv_release(struct btrfs_inode *inode, bool qgroup_free)
Josef Bacik69fe2d72017-10-19 14:15:57 -04005912{
5913 struct btrfs_fs_info *fs_info = inode->root->fs_info;
Josef Bacik69fe2d72017-10-19 14:15:57 -04005914 struct btrfs_block_rsv *block_rsv = &inode->block_rsv;
5915 u64 released = 0;
Qu Wenruoff6bc372017-12-21 13:42:04 +08005916 u64 qgroup_to_release = 0;
Josef Bacik69fe2d72017-10-19 14:15:57 -04005917
5918 /*
5919 * Since we statically set the block_rsv->size we just want to say we
5920 * are releasing 0 bytes, and then we'll just get the reservation over
5921 * the size free'd.
5922 */
Josef Bacikba2c4d42018-12-03 10:20:33 -05005923 released = __btrfs_block_rsv_release(fs_info, block_rsv, 0,
5924 &qgroup_to_release);
Josef Bacik69fe2d72017-10-19 14:15:57 -04005925 if (released > 0)
5926 trace_btrfs_space_reservation(fs_info, "delalloc",
5927 btrfs_ino(inode), released, 0);
Qu Wenruo43b18592017-12-12 15:34:32 +08005928 if (qgroup_free)
Qu Wenruoff6bc372017-12-21 13:42:04 +08005929 btrfs_qgroup_free_meta_prealloc(inode->root, qgroup_to_release);
Qu Wenruo43b18592017-12-12 15:34:32 +08005930 else
Qu Wenruoff6bc372017-12-21 13:42:04 +08005931 btrfs_qgroup_convert_reserved_meta(inode->root,
5932 qgroup_to_release);
Josef Bacik69fe2d72017-10-19 14:15:57 -04005933}
5934
Josef Bacikba2c4d42018-12-03 10:20:33 -05005935/**
5936 * btrfs_delayed_refs_rsv_release - release a ref head's reservation.
5937 * @fs_info - the fs_info for our fs.
5938 * @nr - the number of items to drop.
5939 *
5940 * This drops the delayed ref head's count from the delayed refs rsv and frees
5941 * any excess reservation we had.
5942 */
5943void btrfs_delayed_refs_rsv_release(struct btrfs_fs_info *fs_info, int nr)
Yan, Zhengf0486c62010-05-16 10:46:25 -04005944{
Josef Bacikba2c4d42018-12-03 10:20:33 -05005945 struct btrfs_block_rsv *block_rsv = &fs_info->delayed_refs_rsv;
Jeff Mahoney0b246af2016-06-22 18:54:23 -04005946 struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv;
Josef Bacikba2c4d42018-12-03 10:20:33 -05005947 u64 num_bytes = btrfs_calc_trans_metadata_size(fs_info, nr);
5948 u64 released = 0;
Jeff Mahoney0b246af2016-06-22 18:54:23 -04005949
Josef Bacikba2c4d42018-12-03 10:20:33 -05005950 released = block_rsv_release_bytes(fs_info, block_rsv, global_rsv,
5951 num_bytes, NULL);
5952 if (released)
5953 trace_btrfs_space_reservation(fs_info, "delayed_refs_rsv",
5954 0, released, 0);
Yan, Zhengf0486c62010-05-16 10:46:25 -04005955}
5956
Yan, Zheng8929ecfa2010-05-16 10:49:58 -04005957static void update_global_block_rsv(struct btrfs_fs_info *fs_info)
5958{
5959 struct btrfs_block_rsv *block_rsv = &fs_info->global_block_rsv;
5960 struct btrfs_space_info *sinfo = block_rsv->space_info;
5961 u64 num_bytes;
5962
Josef Bacikae2e4722016-05-27 12:58:35 -04005963 /*
5964 * The global block rsv is based on the size of the extent tree, the
5965 * checksum tree and the root tree. If the fs is empty we want to set
5966 * it to a minimal amount for safety.
5967 */
5968 num_bytes = btrfs_root_used(&fs_info->extent_root->root_item) +
5969 btrfs_root_used(&fs_info->csum_root->root_item) +
5970 btrfs_root_used(&fs_info->tree_root->root_item);
5971 num_bytes = max_t(u64, num_bytes, SZ_16M);
Yan, Zheng8929ecfa2010-05-16 10:49:58 -04005972
Yan, Zheng8929ecfa2010-05-16 10:49:58 -04005973 spin_lock(&sinfo->lock);
Stefan Behrens1f699d32012-04-27 12:41:46 -04005974 spin_lock(&block_rsv->lock);
Yan, Zheng8929ecfa2010-05-16 10:49:58 -04005975
Byongho Leeee221842015-12-15 01:42:10 +09005976 block_rsv->size = min_t(u64, num_bytes, SZ_512M);
Yan, Zheng8929ecfa2010-05-16 10:49:58 -04005977
Josef Bacikfb4b10e2016-01-11 17:28:38 -05005978 if (block_rsv->reserved < block_rsv->size) {
Liu Bo41361352017-02-13 15:42:21 -08005979 num_bytes = btrfs_space_info_used(sinfo, true);
Josef Bacikfb4b10e2016-01-11 17:28:38 -05005980 if (sinfo->total_bytes > num_bytes) {
5981 num_bytes = sinfo->total_bytes - num_bytes;
5982 num_bytes = min(num_bytes,
5983 block_rsv->size - block_rsv->reserved);
5984 block_rsv->reserved += num_bytes;
Qu Wenruo9f9b8e82018-10-24 20:24:01 +08005985 update_bytes_may_use(sinfo, num_bytes);
Josef Bacikfb4b10e2016-01-11 17:28:38 -05005986 trace_btrfs_space_reservation(fs_info, "space_info",
5987 sinfo->flags, num_bytes,
5988 1);
5989 }
5990 } else if (block_rsv->reserved > block_rsv->size) {
Yan, Zheng8929ecfa2010-05-16 10:49:58 -04005991 num_bytes = block_rsv->reserved - block_rsv->size;
Qu Wenruo9f9b8e82018-10-24 20:24:01 +08005992 update_bytes_may_use(sinfo, -num_bytes);
Josef Bacik8c2a3ca2012-01-10 10:31:31 -05005993 trace_btrfs_space_reservation(fs_info, "space_info",
Liu Bo2bcc0322012-03-29 09:57:44 -04005994 sinfo->flags, num_bytes, 0);
Yan, Zheng8929ecfa2010-05-16 10:49:58 -04005995 block_rsv->reserved = block_rsv->size;
Yan, Zheng8929ecfa2010-05-16 10:49:58 -04005996 }
David Sterba182608c2011-05-05 13:13:16 +02005997
Josef Bacikfb4b10e2016-01-11 17:28:38 -05005998 if (block_rsv->reserved == block_rsv->size)
5999 block_rsv->full = 1;
6000 else
6001 block_rsv->full = 0;
6002
Yan, Zheng8929ecfa2010-05-16 10:49:58 -04006003 spin_unlock(&block_rsv->lock);
Stefan Behrens1f699d32012-04-27 12:41:46 -04006004 spin_unlock(&sinfo->lock);
Yan, Zheng8929ecfa2010-05-16 10:49:58 -04006005}
6006
Yan, Zhengf0486c62010-05-16 10:46:25 -04006007static void init_global_block_rsv(struct btrfs_fs_info *fs_info)
6008{
6009 struct btrfs_space_info *space_info;
6010
6011 space_info = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_SYSTEM);
6012 fs_info->chunk_block_rsv.space_info = space_info;
Yan, Zhengf0486c62010-05-16 10:46:25 -04006013
6014 space_info = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_METADATA);
Yan, Zheng8929ecfa2010-05-16 10:49:58 -04006015 fs_info->global_block_rsv.space_info = space_info;
Yan, Zhengf0486c62010-05-16 10:46:25 -04006016 fs_info->trans_block_rsv.space_info = space_info;
6017 fs_info->empty_block_rsv.space_info = space_info;
Josef Bacik6d668dd2011-11-03 22:54:25 -04006018 fs_info->delayed_block_rsv.space_info = space_info;
Josef Bacikba2c4d42018-12-03 10:20:33 -05006019 fs_info->delayed_refs_rsv.space_info = space_info;
Yan, Zhengf0486c62010-05-16 10:46:25 -04006020
Josef Bacikba2c4d42018-12-03 10:20:33 -05006021 fs_info->extent_root->block_rsv = &fs_info->delayed_refs_rsv;
6022 fs_info->csum_root->block_rsv = &fs_info->delayed_refs_rsv;
Yan, Zheng8929ecfa2010-05-16 10:49:58 -04006023 fs_info->dev_root->block_rsv = &fs_info->global_block_rsv;
6024 fs_info->tree_root->block_rsv = &fs_info->global_block_rsv;
Stefan Behrens3a6cad92013-05-16 14:48:19 +00006025 if (fs_info->quota_root)
6026 fs_info->quota_root->block_rsv = &fs_info->global_block_rsv;
Yan, Zhengf0486c62010-05-16 10:46:25 -04006027 fs_info->chunk_root->block_rsv = &fs_info->chunk_block_rsv;
Yan, Zheng8929ecfa2010-05-16 10:49:58 -04006028
Yan, Zheng8929ecfa2010-05-16 10:49:58 -04006029 update_global_block_rsv(fs_info);
6030}
6031
6032static void release_global_block_rsv(struct btrfs_fs_info *fs_info)
6033{
Josef Bacik8c2a3ca2012-01-10 10:31:31 -05006034 block_rsv_release_bytes(fs_info, &fs_info->global_block_rsv, NULL,
Qu Wenruoff6bc372017-12-21 13:42:04 +08006035 (u64)-1, NULL);
Yan, Zheng8929ecfa2010-05-16 10:49:58 -04006036 WARN_ON(fs_info->trans_block_rsv.size > 0);
6037 WARN_ON(fs_info->trans_block_rsv.reserved > 0);
6038 WARN_ON(fs_info->chunk_block_rsv.size > 0);
6039 WARN_ON(fs_info->chunk_block_rsv.reserved > 0);
Josef Bacik6d668dd2011-11-03 22:54:25 -04006040 WARN_ON(fs_info->delayed_block_rsv.size > 0);
6041 WARN_ON(fs_info->delayed_block_rsv.reserved > 0);
Josef Bacikba2c4d42018-12-03 10:20:33 -05006042 WARN_ON(fs_info->delayed_refs_rsv.reserved > 0);
6043 WARN_ON(fs_info->delayed_refs_rsv.size > 0);
Josef Bacikfcb80c22011-05-03 10:40:22 -04006044}
6045
Josef Bacikba2c4d42018-12-03 10:20:33 -05006046/*
6047 * btrfs_update_delayed_refs_rsv - adjust the size of the delayed refs rsv
6048 * @trans - the trans that may have generated delayed refs
6049 *
6050 * This is to be called anytime we may have adjusted trans->delayed_ref_updates,
6051 * it'll calculate the additional size and add it to the delayed_refs_rsv.
6052 */
6053void btrfs_update_delayed_refs_rsv(struct btrfs_trans_handle *trans)
6054{
6055 struct btrfs_fs_info *fs_info = trans->fs_info;
6056 struct btrfs_block_rsv *delayed_rsv = &fs_info->delayed_refs_rsv;
6057 u64 num_bytes;
6058
6059 if (!trans->delayed_ref_updates)
6060 return;
6061
6062 num_bytes = btrfs_calc_trans_metadata_size(fs_info,
6063 trans->delayed_ref_updates);
6064 spin_lock(&delayed_rsv->lock);
6065 delayed_rsv->size += num_bytes;
6066 delayed_rsv->full = 0;
6067 spin_unlock(&delayed_rsv->lock);
6068 trans->delayed_ref_updates = 0;
6069}
Yan, Zhenga22285a2010-05-16 10:48:46 -04006070
Filipe Manana4fbcdf62015-05-20 14:01:54 +01006071/*
6072 * To be called after all the new block groups attached to the transaction
6073 * handle have been created (btrfs_create_pending_block_groups()).
6074 */
6075void btrfs_trans_release_chunk_metadata(struct btrfs_trans_handle *trans)
6076{
Jeff Mahoney64b63582016-06-20 17:23:41 -04006077 struct btrfs_fs_info *fs_info = trans->fs_info;
Filipe Manana4fbcdf62015-05-20 14:01:54 +01006078
6079 if (!trans->chunk_bytes_reserved)
6080 return;
6081
6082 WARN_ON_ONCE(!list_empty(&trans->new_bgs));
6083
6084 block_rsv_release_bytes(fs_info, &fs_info->chunk_block_rsv, NULL,
Qu Wenruoff6bc372017-12-21 13:42:04 +08006085 trans->chunk_bytes_reserved, NULL);
Filipe Manana4fbcdf62015-05-20 14:01:54 +01006086 trans->chunk_bytes_reserved = 0;
6087}
6088
Miao Xied5c12072013-02-28 10:04:33 +00006089/*
6090 * btrfs_subvolume_reserve_metadata() - reserve space for subvolume operation
6091 * root: the root of the parent directory
6092 * rsv: block reservation
6093 * items: the number of items that we need do reservation
Lu Fengqia5b7f422018-08-09 09:46:04 +08006094 * use_global_rsv: allow fallback to the global block reservation
Miao Xied5c12072013-02-28 10:04:33 +00006095 *
6096 * This function is used to reserve the space for snapshot/subvolume
6097 * creation and deletion. Those operations are different with the
6098 * common file/directory operations, they change two fs/file trees
6099 * and root tree, the number of items that the qgroup reserves is
6100 * different with the free space reservation. So we can not use
Nicholas D Steeves01327612016-05-19 21:18:45 -04006101 * the space reservation mechanism in start_transaction().
Miao Xied5c12072013-02-28 10:04:33 +00006102 */
6103int btrfs_subvolume_reserve_metadata(struct btrfs_root *root,
Lu Fengqia5b7f422018-08-09 09:46:04 +08006104 struct btrfs_block_rsv *rsv, int items,
Jeff Mahoneyee3441b2013-07-09 16:37:21 -04006105 bool use_global_rsv)
Yan, Zhenga22285a2010-05-16 10:48:46 -04006106{
Lu Fengqia5b7f422018-08-09 09:46:04 +08006107 u64 qgroup_num_bytes = 0;
Miao Xied5c12072013-02-28 10:04:33 +00006108 u64 num_bytes;
6109 int ret;
Jeff Mahoney0b246af2016-06-22 18:54:23 -04006110 struct btrfs_fs_info *fs_info = root->fs_info;
6111 struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv;
Miao Xied5c12072013-02-28 10:04:33 +00006112
Jeff Mahoney0b246af2016-06-22 18:54:23 -04006113 if (test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags)) {
Miao Xied5c12072013-02-28 10:04:33 +00006114 /* One for parent inode, two for dir entries */
Lu Fengqia5b7f422018-08-09 09:46:04 +08006115 qgroup_num_bytes = 3 * fs_info->nodesize;
6116 ret = btrfs_qgroup_reserve_meta_prealloc(root,
6117 qgroup_num_bytes, true);
Miao Xied5c12072013-02-28 10:04:33 +00006118 if (ret)
6119 return ret;
Miao Xied5c12072013-02-28 10:04:33 +00006120 }
6121
Jeff Mahoney0b246af2016-06-22 18:54:23 -04006122 num_bytes = btrfs_calc_trans_metadata_size(fs_info, items);
6123 rsv->space_info = __find_space_info(fs_info,
Miao Xied5c12072013-02-28 10:04:33 +00006124 BTRFS_BLOCK_GROUP_METADATA);
6125 ret = btrfs_block_rsv_add(root, rsv, num_bytes,
6126 BTRFS_RESERVE_FLUSH_ALL);
Jeff Mahoneyee3441b2013-07-09 16:37:21 -04006127
6128 if (ret == -ENOSPC && use_global_rsv)
Lu Fengqi3a584172018-08-04 21:10:55 +08006129 ret = btrfs_block_rsv_migrate(global_rsv, rsv, num_bytes, true);
Jeff Mahoneyee3441b2013-07-09 16:37:21 -04006130
Lu Fengqia5b7f422018-08-09 09:46:04 +08006131 if (ret && qgroup_num_bytes)
6132 btrfs_qgroup_free_meta_prealloc(root, qgroup_num_bytes);
Miao Xied5c12072013-02-28 10:04:33 +00006133
6134 return ret;
6135}
6136
Jeff Mahoney2ff7e612016-06-22 18:54:24 -04006137void btrfs_subvolume_release_metadata(struct btrfs_fs_info *fs_info,
David Sterba7775c812017-02-10 19:18:18 +01006138 struct btrfs_block_rsv *rsv)
Miao Xied5c12072013-02-28 10:04:33 +00006139{
Jeff Mahoney2ff7e612016-06-22 18:54:24 -04006140 btrfs_block_rsv_release(fs_info, rsv, (u64)-1);
Yan, Zhenga22285a2010-05-16 10:48:46 -04006141}
6142
Josef Bacik69fe2d72017-10-19 14:15:57 -04006143static void btrfs_calculate_inode_block_rsv_size(struct btrfs_fs_info *fs_info,
6144 struct btrfs_inode *inode)
Josef Bacik9e0baf62011-07-15 15:16:44 +00006145{
Josef Bacik69fe2d72017-10-19 14:15:57 -04006146 struct btrfs_block_rsv *block_rsv = &inode->block_rsv;
6147 u64 reserve_size = 0;
Qu Wenruoff6bc372017-12-21 13:42:04 +08006148 u64 qgroup_rsv_size = 0;
Josef Bacik69fe2d72017-10-19 14:15:57 -04006149 u64 csum_leaves;
6150 unsigned outstanding_extents;
Josef Bacik9e0baf62011-07-15 15:16:44 +00006151
Josef Bacik69fe2d72017-10-19 14:15:57 -04006152 lockdep_assert_held(&inode->lock);
6153 outstanding_extents = inode->outstanding_extents;
6154 if (outstanding_extents)
6155 reserve_size = btrfs_calc_trans_metadata_size(fs_info,
6156 outstanding_extents + 1);
6157 csum_leaves = btrfs_csum_bytes_to_leaves(fs_info,
6158 inode->csum_bytes);
6159 reserve_size += btrfs_calc_trans_metadata_size(fs_info,
6160 csum_leaves);
Qu Wenruoff6bc372017-12-21 13:42:04 +08006161 /*
6162 * For qgroup rsv, the calculation is very simple:
6163 * account one nodesize for each outstanding extent
6164 *
6165 * This is overestimating in most cases.
6166 */
Nikolay Borisov139a5612019-03-18 17:45:20 +02006167 qgroup_rsv_size = (u64)outstanding_extents * fs_info->nodesize;
Josef Bacik9e0baf62011-07-15 15:16:44 +00006168
Josef Bacik69fe2d72017-10-19 14:15:57 -04006169 spin_lock(&block_rsv->lock);
6170 block_rsv->size = reserve_size;
Qu Wenruoff6bc372017-12-21 13:42:04 +08006171 block_rsv->qgroup_rsv_size = qgroup_rsv_size;
Josef Bacik69fe2d72017-10-19 14:15:57 -04006172 spin_unlock(&block_rsv->lock);
Yan, Zheng0ca1f7c2010-05-16 10:48:47 -04006173}
6174
Nikolay Borisov9f3db422017-02-20 13:50:41 +02006175int btrfs_delalloc_reserve_metadata(struct btrfs_inode *inode, u64 num_bytes)
Yan, Zheng0ca1f7c2010-05-16 10:48:47 -04006176{
David Sterba3ffbd682018-06-29 10:56:42 +02006177 struct btrfs_fs_info *fs_info = inode->root->fs_info;
Josef Bacik69fe2d72017-10-19 14:15:57 -04006178 unsigned nr_extents;
Miao Xie08e007d2012-10-16 11:33:38 +00006179 enum btrfs_reserve_flush_enum flush = BTRFS_RESERVE_FLUSH_ALL;
Jan Schmidteb6b88d2013-01-27 23:26:00 -07006180 int ret = 0;
Josef Bacikc64c2bd2012-12-14 13:48:14 -05006181 bool delalloc_lock = true;
Yan, Zheng0ca1f7c2010-05-16 10:48:47 -04006182
Josef Bacikc64c2bd2012-12-14 13:48:14 -05006183 /* If we are a free space inode we need to not flush since we will be in
6184 * the middle of a transaction commit. We also don't need the delalloc
6185 * mutex since we won't race with anybody. We need this mostly to make
6186 * lockdep shut its filthy mouth.
Josef Bacikbac357dc2016-07-20 16:48:45 -07006187 *
6188 * If we have a transaction open (can happen if we call truncate_block
6189 * from truncate), then we need FLUSH_LIMIT so we don't deadlock.
Josef Bacikc64c2bd2012-12-14 13:48:14 -05006190 */
6191 if (btrfs_is_free_space_inode(inode)) {
Miao Xie08e007d2012-10-16 11:33:38 +00006192 flush = BTRFS_RESERVE_NO_FLUSH;
Josef Bacikc64c2bd2012-12-14 13:48:14 -05006193 delalloc_lock = false;
Nikolay Borisovda07d4a2018-01-12 16:21:05 +02006194 } else {
6195 if (current->journal_info)
6196 flush = BTRFS_RESERVE_FLUSH_LIMIT;
Josef Bacikc09544e2011-08-30 10:19:10 -04006197
Nikolay Borisovda07d4a2018-01-12 16:21:05 +02006198 if (btrfs_transaction_in_commit(fs_info))
6199 schedule_timeout(1);
6200 }
Yan, Zheng0ca1f7c2010-05-16 10:48:47 -04006201
Josef Bacikc64c2bd2012-12-14 13:48:14 -05006202 if (delalloc_lock)
Nikolay Borisov9f3db422017-02-20 13:50:41 +02006203 mutex_lock(&inode->delalloc_mutex);
Josef Bacikc64c2bd2012-12-14 13:48:14 -05006204
Jeff Mahoney0b246af2016-06-22 18:54:23 -04006205 num_bytes = ALIGN(num_bytes, fs_info->sectorsize);
Josef Bacik69fe2d72017-10-19 14:15:57 -04006206
6207 /* Add our new extents and calculate the new rsv size. */
Nikolay Borisov9f3db422017-02-20 13:50:41 +02006208 spin_lock(&inode->lock);
Josef Bacik69fe2d72017-10-19 14:15:57 -04006209 nr_extents = count_max_extents(num_bytes);
Josef Bacik8b62f872017-10-19 14:15:55 -04006210 btrfs_mod_outstanding_extents(inode, nr_extents);
Josef Bacik69fe2d72017-10-19 14:15:57 -04006211 inode->csum_bytes += num_bytes;
6212 btrfs_calculate_inode_block_rsv_size(fs_info, inode);
Nikolay Borisov9f3db422017-02-20 13:50:41 +02006213 spin_unlock(&inode->lock);
Josef Bacik57a45ced2011-01-25 16:30:38 -05006214
Josef Bacik69fe2d72017-10-19 14:15:57 -04006215 ret = btrfs_inode_rsv_refill(inode, flush);
Qu Wenruo43b18592017-12-12 15:34:32 +08006216 if (unlikely(ret))
Wang Shilong88e081bf2013-03-01 11:36:01 +00006217 goto out_fail;
Yan, Zheng0ca1f7c2010-05-16 10:48:47 -04006218
Josef Bacikc64c2bd2012-12-14 13:48:14 -05006219 if (delalloc_lock)
Nikolay Borisov9f3db422017-02-20 13:50:41 +02006220 mutex_unlock(&inode->delalloc_mutex);
Yan, Zheng0ca1f7c2010-05-16 10:48:47 -04006221 return 0;
Wang Shilong88e081bf2013-03-01 11:36:01 +00006222
6223out_fail:
Nikolay Borisov9f3db422017-02-20 13:50:41 +02006224 spin_lock(&inode->lock);
Josef Bacik8b62f872017-10-19 14:15:55 -04006225 nr_extents = count_max_extents(num_bytes);
6226 btrfs_mod_outstanding_extents(inode, -nr_extents);
Josef Bacik69fe2d72017-10-19 14:15:57 -04006227 inode->csum_bytes -= num_bytes;
6228 btrfs_calculate_inode_block_rsv_size(fs_info, inode);
Nikolay Borisov9f3db422017-02-20 13:50:41 +02006229 spin_unlock(&inode->lock);
Wang Shilong88e081bf2013-03-01 11:36:01 +00006230
Qu Wenruo43b18592017-12-12 15:34:32 +08006231 btrfs_inode_rsv_release(inode, true);
Wang Shilong88e081bf2013-03-01 11:36:01 +00006232 if (delalloc_lock)
Nikolay Borisov9f3db422017-02-20 13:50:41 +02006233 mutex_unlock(&inode->delalloc_mutex);
Wang Shilong88e081bf2013-03-01 11:36:01 +00006234 return ret;
Yan, Zheng0ca1f7c2010-05-16 10:48:47 -04006235}
6236
Josef Bacik7709cde2011-08-04 10:25:02 -04006237/**
6238 * btrfs_delalloc_release_metadata - release a metadata reservation for an inode
Josef Bacik8b62f872017-10-19 14:15:55 -04006239 * @inode: the inode to release the reservation for.
6240 * @num_bytes: the number of bytes we are releasing.
Qu Wenruo43b18592017-12-12 15:34:32 +08006241 * @qgroup_free: free qgroup reservation or convert it to per-trans reservation
Josef Bacik7709cde2011-08-04 10:25:02 -04006242 *
6243 * This will release the metadata reservation for an inode. This can be called
6244 * once we complete IO for a given set of bytes to release their metadata
Josef Bacik8b62f872017-10-19 14:15:55 -04006245 * reservations, or on error for the same reason.
Josef Bacik7709cde2011-08-04 10:25:02 -04006246 */
Qu Wenruo43b18592017-12-12 15:34:32 +08006247void btrfs_delalloc_release_metadata(struct btrfs_inode *inode, u64 num_bytes,
6248 bool qgroup_free)
Yan, Zheng0ca1f7c2010-05-16 10:48:47 -04006249{
David Sterba3ffbd682018-06-29 10:56:42 +02006250 struct btrfs_fs_info *fs_info = inode->root->fs_info;
Yan, Zheng0ca1f7c2010-05-16 10:48:47 -04006251
Jeff Mahoney0b246af2016-06-22 18:54:23 -04006252 num_bytes = ALIGN(num_bytes, fs_info->sectorsize);
Nikolay Borisov691fa052017-02-20 13:50:42 +02006253 spin_lock(&inode->lock);
Josef Bacik69fe2d72017-10-19 14:15:57 -04006254 inode->csum_bytes -= num_bytes;
6255 btrfs_calculate_inode_block_rsv_size(fs_info, inode);
Nikolay Borisov691fa052017-02-20 13:50:42 +02006256 spin_unlock(&inode->lock);
Yan, Zheng0ca1f7c2010-05-16 10:48:47 -04006257
Jeff Mahoney0b246af2016-06-22 18:54:23 -04006258 if (btrfs_is_testing(fs_info))
Josef Bacik6a3891c2015-03-16 17:38:52 -04006259 return;
6260
Qu Wenruo43b18592017-12-12 15:34:32 +08006261 btrfs_inode_rsv_release(inode, qgroup_free);
Yan, Zheng0ca1f7c2010-05-16 10:48:47 -04006262}
6263
Josef Bacik7709cde2011-08-04 10:25:02 -04006264/**
Josef Bacik8b62f872017-10-19 14:15:55 -04006265 * btrfs_delalloc_release_extents - release our outstanding_extents
6266 * @inode: the inode to balance the reservation for.
6267 * @num_bytes: the number of bytes we originally reserved with
Qu Wenruo43b18592017-12-12 15:34:32 +08006268 * @qgroup_free: do we need to free qgroup meta reservation or convert them.
Josef Bacik8b62f872017-10-19 14:15:55 -04006269 *
6270 * When we reserve space we increase outstanding_extents for the extents we may
6271 * add. Once we've set the range as delalloc or created our ordered extents we
6272 * have outstanding_extents to track the real usage, so we use this to free our
6273 * temporarily tracked outstanding_extents. This _must_ be used in conjunction
6274 * with btrfs_delalloc_reserve_metadata.
6275 */
Qu Wenruo43b18592017-12-12 15:34:32 +08006276void btrfs_delalloc_release_extents(struct btrfs_inode *inode, u64 num_bytes,
6277 bool qgroup_free)
Josef Bacik8b62f872017-10-19 14:15:55 -04006278{
David Sterba3ffbd682018-06-29 10:56:42 +02006279 struct btrfs_fs_info *fs_info = inode->root->fs_info;
Josef Bacik8b62f872017-10-19 14:15:55 -04006280 unsigned num_extents;
Josef Bacik8b62f872017-10-19 14:15:55 -04006281
6282 spin_lock(&inode->lock);
6283 num_extents = count_max_extents(num_bytes);
6284 btrfs_mod_outstanding_extents(inode, -num_extents);
Josef Bacik69fe2d72017-10-19 14:15:57 -04006285 btrfs_calculate_inode_block_rsv_size(fs_info, inode);
Josef Bacik8b62f872017-10-19 14:15:55 -04006286 spin_unlock(&inode->lock);
6287
Josef Bacik8b62f872017-10-19 14:15:55 -04006288 if (btrfs_is_testing(fs_info))
6289 return;
6290
Qu Wenruo43b18592017-12-12 15:34:32 +08006291 btrfs_inode_rsv_release(inode, qgroup_free);
Josef Bacik8b62f872017-10-19 14:15:55 -04006292}
6293
6294/**
Qu Wenruo7cf5b972015-09-08 17:25:55 +08006295 * btrfs_delalloc_reserve_space - reserve data and metadata space for
Qu Wenruo1ada3a62015-09-08 17:25:53 +08006296 * delalloc
6297 * @inode: inode we're writing to
6298 * @start: start range we are writing to
6299 * @len: how long the range we are writing to
Qu Wenruo364ecf32017-02-27 15:10:38 +08006300 * @reserved: mandatory parameter, record actually reserved qgroup ranges of
6301 * current reservation.
Qu Wenruo1ada3a62015-09-08 17:25:53 +08006302 *
Qu Wenruo1ada3a62015-09-08 17:25:53 +08006303 * This will do the following things
6304 *
6305 * o reserve space in data space info for num bytes
6306 * and reserve precious corresponding qgroup space
6307 * (Done in check_data_free_space)
6308 *
6309 * o reserve space for metadata space, based on the number of outstanding
6310 * extents and how much csums will be needed
6311 * also reserve metadata space in a per root over-reserve method.
6312 * o add to the inodes->delalloc_bytes
6313 * o add it to the fs_info's delalloc inodes list.
6314 * (Above 3 all done in delalloc_reserve_metadata)
6315 *
6316 * Return 0 for success
6317 * Return <0 for error(-ENOSPC or -EQUOT)
6318 */
Qu Wenruo364ecf32017-02-27 15:10:38 +08006319int btrfs_delalloc_reserve_space(struct inode *inode,
6320 struct extent_changeset **reserved, u64 start, u64 len)
Qu Wenruo1ada3a62015-09-08 17:25:53 +08006321{
6322 int ret;
6323
Qu Wenruo364ecf32017-02-27 15:10:38 +08006324 ret = btrfs_check_data_free_space(inode, reserved, start, len);
Qu Wenruo1ada3a62015-09-08 17:25:53 +08006325 if (ret < 0)
6326 return ret;
Nikolay Borisov9f3db422017-02-20 13:50:41 +02006327 ret = btrfs_delalloc_reserve_metadata(BTRFS_I(inode), len);
Qu Wenruo1ada3a62015-09-08 17:25:53 +08006328 if (ret < 0)
Qu Wenruobc42bda2017-02-27 15:10:39 +08006329 btrfs_free_reserved_data_space(inode, *reserved, start, len);
Qu Wenruo1ada3a62015-09-08 17:25:53 +08006330 return ret;
6331}
6332
6333/**
Qu Wenruo7cf5b972015-09-08 17:25:55 +08006334 * btrfs_delalloc_release_space - release data and metadata space for delalloc
Qu Wenruo1ada3a62015-09-08 17:25:53 +08006335 * @inode: inode we're releasing space for
6336 * @start: start position of the space already reserved
6337 * @len: the len of the space already reserved
Josef Bacik8b62f872017-10-19 14:15:55 -04006338 * @release_bytes: the len of the space we consumed or didn't use
Qu Wenruo1ada3a62015-09-08 17:25:53 +08006339 *
6340 * This function will release the metadata space that was not used and will
6341 * decrement ->delalloc_bytes and remove it from the fs_info delalloc_inodes
6342 * list if there are no delalloc bytes left.
6343 * Also it will handle the qgroup reserved space.
6344 */
Qu Wenruobc42bda2017-02-27 15:10:39 +08006345void btrfs_delalloc_release_space(struct inode *inode,
Josef Bacik8b62f872017-10-19 14:15:55 -04006346 struct extent_changeset *reserved,
Qu Wenruo43b18592017-12-12 15:34:32 +08006347 u64 start, u64 len, bool qgroup_free)
Qu Wenruo1ada3a62015-09-08 17:25:53 +08006348{
Qu Wenruo43b18592017-12-12 15:34:32 +08006349 btrfs_delalloc_release_metadata(BTRFS_I(inode), len, qgroup_free);
Qu Wenruobc42bda2017-02-27 15:10:39 +08006350 btrfs_free_reserved_data_space(inode, reserved, start, len);
Yan, Zheng0ca1f7c2010-05-16 10:48:47 -04006351}
6352
Josef Bacikce93ec52014-11-17 15:45:48 -05006353static int update_block_group(struct btrfs_trans_handle *trans,
David Sterba6b279402019-03-20 12:10:15 +01006354 u64 bytenr, u64 num_bytes, int alloc)
Chris Mason9078a3e2007-04-26 16:46:15 -04006355{
David Sterba6b279402019-03-20 12:10:15 +01006356 struct btrfs_fs_info *info = trans->fs_info;
Josef Bacik0af3d002010-06-21 14:48:16 -04006357 struct btrfs_block_group_cache *cache = NULL;
Chris Masondb945352007-10-15 16:15:53 -04006358 u64 total = num_bytes;
Chris Mason9078a3e2007-04-26 16:46:15 -04006359 u64 old_val;
Chris Masondb945352007-10-15 16:15:53 -04006360 u64 byte_in_group;
Josef Bacik0af3d002010-06-21 14:48:16 -04006361 int factor;
Josef Bacikba2c4d42018-12-03 10:20:33 -05006362 int ret = 0;
Chris Mason3e1ad542007-05-07 20:03:49 -04006363
Yan Zheng5d4f98a2009-06-10 10:45:14 -04006364 /* block accounting for super block */
Miao Xieeb73c1b2013-05-15 07:48:22 +00006365 spin_lock(&info->delalloc_root_lock);
David Sterba6c417612011-04-13 15:41:04 +02006366 old_val = btrfs_super_bytes_used(info->super_copy);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04006367 if (alloc)
6368 old_val += num_bytes;
6369 else
6370 old_val -= num_bytes;
David Sterba6c417612011-04-13 15:41:04 +02006371 btrfs_set_super_bytes_used(info->super_copy, old_val);
Miao Xieeb73c1b2013-05-15 07:48:22 +00006372 spin_unlock(&info->delalloc_root_lock);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04006373
Chris Masond3977122009-01-05 21:25:51 -05006374 while (total) {
Chris Masondb945352007-10-15 16:15:53 -04006375 cache = btrfs_lookup_block_group(info, bytenr);
Josef Bacikba2c4d42018-12-03 10:20:33 -05006376 if (!cache) {
6377 ret = -ENOENT;
6378 break;
6379 }
David Sterba46df06b2018-07-13 20:46:30 +02006380 factor = btrfs_bg_type_to_factor(cache->flags);
6381
Josef Bacik9d66e232010-08-25 16:54:15 -04006382 /*
6383 * If this block group has free space cache written out, we
6384 * need to make sure to load it if we are removing space. This
6385 * is because we need the unpinning stage to actually add the
6386 * space back to the block group, otherwise we will leak space.
6387 */
6388 if (!alloc && cache->cached == BTRFS_CACHE_NO)
Liu Bof6373bf2012-12-27 09:01:18 +00006389 cache_block_group(cache, 1);
Josef Bacik0af3d002010-06-21 14:48:16 -04006390
Chris Masondb945352007-10-15 16:15:53 -04006391 byte_in_group = bytenr - cache->key.objectid;
6392 WARN_ON(byte_in_group > cache->key.offset);
Chris Mason9078a3e2007-04-26 16:46:15 -04006393
Josef Bacik25179202008-10-29 14:49:05 -04006394 spin_lock(&cache->space_info->lock);
Chris Masonc286ac42008-07-22 23:06:41 -04006395 spin_lock(&cache->lock);
Josef Bacik0af3d002010-06-21 14:48:16 -04006396
Jeff Mahoney6202df62016-06-22 18:54:22 -04006397 if (btrfs_test_opt(info, SPACE_CACHE) &&
Josef Bacik0af3d002010-06-21 14:48:16 -04006398 cache->disk_cache_state < BTRFS_DC_CLEAR)
6399 cache->disk_cache_state = BTRFS_DC_CLEAR;
6400
Chris Mason9078a3e2007-04-26 16:46:15 -04006401 old_val = btrfs_block_group_used(&cache->item);
Chris Masondb945352007-10-15 16:15:53 -04006402 num_bytes = min(total, cache->key.offset - byte_in_group);
Chris Masoncd1bc462007-04-27 10:08:34 -04006403 if (alloc) {
Chris Masondb945352007-10-15 16:15:53 -04006404 old_val += num_bytes;
Yan Zheng11833d62009-09-11 16:11:19 -04006405 btrfs_set_block_group_used(&cache->item, old_val);
6406 cache->reserved -= num_bytes;
Yan Zheng11833d62009-09-11 16:11:19 -04006407 cache->space_info->bytes_reserved -= num_bytes;
Yan, Zhengb742bb822010-05-16 10:46:24 -04006408 cache->space_info->bytes_used += num_bytes;
6409 cache->space_info->disk_used += num_bytes * factor;
Chris Masonc286ac42008-07-22 23:06:41 -04006410 spin_unlock(&cache->lock);
Josef Bacik25179202008-10-29 14:49:05 -04006411 spin_unlock(&cache->space_info->lock);
Chris Masoncd1bc462007-04-27 10:08:34 -04006412 } else {
Chris Masondb945352007-10-15 16:15:53 -04006413 old_val -= num_bytes;
Filipe Mananaae0ab002014-11-26 15:28:52 +00006414 btrfs_set_block_group_used(&cache->item, old_val);
6415 cache->pinned += num_bytes;
Lu Fengqie2907c12018-10-24 20:24:02 +08006416 update_bytes_pinned(cache->space_info, num_bytes);
Filipe Mananaae0ab002014-11-26 15:28:52 +00006417 cache->space_info->bytes_used -= num_bytes;
6418 cache->space_info->disk_used -= num_bytes * factor;
6419 spin_unlock(&cache->lock);
6420 spin_unlock(&cache->space_info->lock);
Josef Bacik47ab2a62014-09-18 11:20:02 -04006421
Jeff Mahoney0b246af2016-06-22 18:54:23 -04006422 trace_btrfs_space_reservation(info, "pinned",
Josef Bacikc51e7bb2016-03-25 13:25:54 -04006423 cache->space_info->flags,
6424 num_bytes, 1);
Ethan Liendec59fa2018-07-13 16:50:42 +08006425 percpu_counter_add_batch(&cache->space_info->total_bytes_pinned,
6426 num_bytes,
6427 BTRFS_TOTAL_BYTES_PINNED_BATCH);
Filipe Mananaae0ab002014-11-26 15:28:52 +00006428 set_extent_dirty(info->pinned_extents,
6429 bytenr, bytenr + num_bytes - 1,
6430 GFP_NOFS | __GFP_NOFAIL);
Chris Masoncd1bc462007-04-27 10:08:34 -04006431 }
Chris Mason1bbc6212015-04-06 12:46:08 -07006432
6433 spin_lock(&trans->transaction->dirty_bgs_lock);
6434 if (list_empty(&cache->dirty_list)) {
6435 list_add_tail(&cache->dirty_list,
6436 &trans->transaction->dirty_bgs);
Bart Van Asschebece2e82018-06-20 10:03:31 -07006437 trans->transaction->num_dirty_bgs++;
Josef Bacikba2c4d42018-12-03 10:20:33 -05006438 trans->delayed_ref_updates++;
Chris Mason1bbc6212015-04-06 12:46:08 -07006439 btrfs_get_block_group(cache);
6440 }
6441 spin_unlock(&trans->transaction->dirty_bgs_lock);
6442
Filipe Manana036a9342015-11-23 15:25:16 +00006443 /*
6444 * No longer have used bytes in this block group, queue it for
6445 * deletion. We do this after adding the block group to the
6446 * dirty list to avoid races between cleaner kthread and space
6447 * cache writeout.
6448 */
Qu Wenruo031f24d2018-05-22 16:43:47 +08006449 if (!alloc && old_val == 0)
6450 btrfs_mark_bg_unused(cache);
Filipe Manana036a9342015-11-23 15:25:16 +00006451
Chris Masonfa9c0d792009-04-03 09:47:43 -04006452 btrfs_put_block_group(cache);
Chris Masondb945352007-10-15 16:15:53 -04006453 total -= num_bytes;
6454 bytenr += num_bytes;
Chris Mason9078a3e2007-04-26 16:46:15 -04006455 }
Josef Bacikba2c4d42018-12-03 10:20:33 -05006456
6457 /* Modified block groups are accounted for in the delayed_refs_rsv. */
6458 btrfs_update_delayed_refs_rsv(trans);
6459 return ret;
Chris Mason9078a3e2007-04-26 16:46:15 -04006460}
Chris Mason6324fbf2008-03-24 15:01:59 -04006461
Jeff Mahoney2ff7e612016-06-22 18:54:24 -04006462static u64 first_logical_byte(struct btrfs_fs_info *fs_info, u64 search_start)
Chris Masona061fc82008-05-07 11:43:44 -04006463{
Josef Bacik0f9dd462008-09-23 13:14:11 -04006464 struct btrfs_block_group_cache *cache;
Yan Zhengd2fb3432008-12-11 16:30:39 -05006465 u64 bytenr;
Josef Bacik0f9dd462008-09-23 13:14:11 -04006466
Jeff Mahoney0b246af2016-06-22 18:54:23 -04006467 spin_lock(&fs_info->block_group_cache_lock);
6468 bytenr = fs_info->first_logical_byte;
6469 spin_unlock(&fs_info->block_group_cache_lock);
Liu Boa1897fd2012-12-27 09:01:23 +00006470
6471 if (bytenr < (u64)-1)
6472 return bytenr;
6473
Jeff Mahoney0b246af2016-06-22 18:54:23 -04006474 cache = btrfs_lookup_first_block_group(fs_info, search_start);
Josef Bacik0f9dd462008-09-23 13:14:11 -04006475 if (!cache)
Chris Masona061fc82008-05-07 11:43:44 -04006476 return 0;
Josef Bacik0f9dd462008-09-23 13:14:11 -04006477
Yan Zhengd2fb3432008-12-11 16:30:39 -05006478 bytenr = cache->key.objectid;
Chris Masonfa9c0d792009-04-03 09:47:43 -04006479 btrfs_put_block_group(cache);
Yan Zhengd2fb3432008-12-11 16:30:39 -05006480
6481 return bytenr;
Chris Masona061fc82008-05-07 11:43:44 -04006482}
6483
Jeff Mahoney2ff7e612016-06-22 18:54:24 -04006484static int pin_down_extent(struct btrfs_fs_info *fs_info,
Yan, Zhengf0486c62010-05-16 10:46:25 -04006485 struct btrfs_block_group_cache *cache,
6486 u64 bytenr, u64 num_bytes, int reserved)
Yan324ae4d2007-11-16 14:57:08 -05006487{
Yan Zheng11833d62009-09-11 16:11:19 -04006488 spin_lock(&cache->space_info->lock);
6489 spin_lock(&cache->lock);
6490 cache->pinned += num_bytes;
Lu Fengqie2907c12018-10-24 20:24:02 +08006491 update_bytes_pinned(cache->space_info, num_bytes);
Yan Zheng11833d62009-09-11 16:11:19 -04006492 if (reserved) {
6493 cache->reserved -= num_bytes;
6494 cache->space_info->bytes_reserved -= num_bytes;
Yan324ae4d2007-11-16 14:57:08 -05006495 }
Yan Zheng11833d62009-09-11 16:11:19 -04006496 spin_unlock(&cache->lock);
6497 spin_unlock(&cache->space_info->lock);
6498
Jeff Mahoney0b246af2016-06-22 18:54:23 -04006499 trace_btrfs_space_reservation(fs_info, "pinned",
Josef Bacikc51e7bb2016-03-25 13:25:54 -04006500 cache->space_info->flags, num_bytes, 1);
Ethan Liendec59fa2018-07-13 16:50:42 +08006501 percpu_counter_add_batch(&cache->space_info->total_bytes_pinned,
6502 num_bytes, BTRFS_TOTAL_BYTES_PINNED_BATCH);
Jeff Mahoney0b246af2016-06-22 18:54:23 -04006503 set_extent_dirty(fs_info->pinned_extents, bytenr,
Yan, Zhengf0486c62010-05-16 10:46:25 -04006504 bytenr + num_bytes - 1, GFP_NOFS | __GFP_NOFAIL);
Yan324ae4d2007-11-16 14:57:08 -05006505 return 0;
6506}
Chris Mason9078a3e2007-04-26 16:46:15 -04006507
Yan, Zhengf0486c62010-05-16 10:46:25 -04006508/*
6509 * this function must be called within transaction
6510 */
Jeff Mahoney2ff7e612016-06-22 18:54:24 -04006511int btrfs_pin_extent(struct btrfs_fs_info *fs_info,
Yan, Zhengf0486c62010-05-16 10:46:25 -04006512 u64 bytenr, u64 num_bytes, int reserved)
Zheng Yane8569812008-09-26 10:05:48 -04006513{
Yan, Zhengf0486c62010-05-16 10:46:25 -04006514 struct btrfs_block_group_cache *cache;
6515
Jeff Mahoney0b246af2016-06-22 18:54:23 -04006516 cache = btrfs_lookup_block_group(fs_info, bytenr);
Jeff Mahoney79787ea2012-03-12 16:03:00 +01006517 BUG_ON(!cache); /* Logic error */
Yan, Zhengf0486c62010-05-16 10:46:25 -04006518
Jeff Mahoney2ff7e612016-06-22 18:54:24 -04006519 pin_down_extent(fs_info, cache, bytenr, num_bytes, reserved);
Yan, Zhengf0486c62010-05-16 10:46:25 -04006520
6521 btrfs_put_block_group(cache);
Yan Zheng11833d62009-09-11 16:11:19 -04006522 return 0;
6523}
Zheng Yane8569812008-09-26 10:05:48 -04006524
Yan, Zhengf0486c62010-05-16 10:46:25 -04006525/*
Chris Masone688b7252011-10-31 20:52:39 -04006526 * this function must be called within transaction
Yan, Zhengf0486c62010-05-16 10:46:25 -04006527 */
Jeff Mahoney2ff7e612016-06-22 18:54:24 -04006528int btrfs_pin_extent_for_log_replay(struct btrfs_fs_info *fs_info,
Chris Masone688b7252011-10-31 20:52:39 -04006529 u64 bytenr, u64 num_bytes)
Yan, Zhengf0486c62010-05-16 10:46:25 -04006530{
Chris Masone688b7252011-10-31 20:52:39 -04006531 struct btrfs_block_group_cache *cache;
Josef Bacikb50c6e22013-04-25 15:55:30 -04006532 int ret;
Chris Masone688b7252011-10-31 20:52:39 -04006533
Jeff Mahoney0b246af2016-06-22 18:54:23 -04006534 cache = btrfs_lookup_block_group(fs_info, bytenr);
Josef Bacikb50c6e22013-04-25 15:55:30 -04006535 if (!cache)
6536 return -EINVAL;
Chris Masone688b7252011-10-31 20:52:39 -04006537
6538 /*
6539 * pull in the free space cache (if any) so that our pin
6540 * removes the free space from the cache. We have load_only set
6541 * to one because the slow code to read in the free extents does check
6542 * the pinned extents.
6543 */
Liu Bof6373bf2012-12-27 09:01:18 +00006544 cache_block_group(cache, 1);
Chris Masone688b7252011-10-31 20:52:39 -04006545
Jeff Mahoney2ff7e612016-06-22 18:54:24 -04006546 pin_down_extent(fs_info, cache, bytenr, num_bytes, 0);
Chris Masone688b7252011-10-31 20:52:39 -04006547
6548 /* remove us from the free space cache (if we're there at all) */
Josef Bacikb50c6e22013-04-25 15:55:30 -04006549 ret = btrfs_remove_free_space(cache, bytenr, num_bytes);
Chris Masone688b7252011-10-31 20:52:39 -04006550 btrfs_put_block_group(cache);
Josef Bacikb50c6e22013-04-25 15:55:30 -04006551 return ret;
Chris Masone688b7252011-10-31 20:52:39 -04006552}
6553
Jeff Mahoney2ff7e612016-06-22 18:54:24 -04006554static int __exclude_logged_extent(struct btrfs_fs_info *fs_info,
6555 u64 start, u64 num_bytes)
Josef Bacik8c2a1a32013-06-06 13:19:32 -04006556{
6557 int ret;
6558 struct btrfs_block_group_cache *block_group;
6559 struct btrfs_caching_control *caching_ctl;
6560
Jeff Mahoney0b246af2016-06-22 18:54:23 -04006561 block_group = btrfs_lookup_block_group(fs_info, start);
Josef Bacik8c2a1a32013-06-06 13:19:32 -04006562 if (!block_group)
6563 return -EINVAL;
6564
6565 cache_block_group(block_group, 0);
6566 caching_ctl = get_caching_control(block_group);
6567
6568 if (!caching_ctl) {
6569 /* Logic error */
6570 BUG_ON(!block_group_cache_done(block_group));
6571 ret = btrfs_remove_free_space(block_group, start, num_bytes);
6572 } else {
6573 mutex_lock(&caching_ctl->mutex);
6574
6575 if (start >= caching_ctl->progress) {
Jeff Mahoney2ff7e612016-06-22 18:54:24 -04006576 ret = add_excluded_extent(fs_info, start, num_bytes);
Josef Bacik8c2a1a32013-06-06 13:19:32 -04006577 } else if (start + num_bytes <= caching_ctl->progress) {
6578 ret = btrfs_remove_free_space(block_group,
6579 start, num_bytes);
6580 } else {
6581 num_bytes = caching_ctl->progress - start;
6582 ret = btrfs_remove_free_space(block_group,
6583 start, num_bytes);
6584 if (ret)
6585 goto out_lock;
6586
6587 num_bytes = (start + num_bytes) -
6588 caching_ctl->progress;
6589 start = caching_ctl->progress;
Jeff Mahoney2ff7e612016-06-22 18:54:24 -04006590 ret = add_excluded_extent(fs_info, start, num_bytes);
Josef Bacik8c2a1a32013-06-06 13:19:32 -04006591 }
6592out_lock:
6593 mutex_unlock(&caching_ctl->mutex);
6594 put_caching_control(caching_ctl);
6595 }
6596 btrfs_put_block_group(block_group);
6597 return ret;
6598}
6599
David Sterbabcdc4282019-03-20 12:14:33 +01006600int btrfs_exclude_logged_extents(struct extent_buffer *eb)
Josef Bacik8c2a1a32013-06-06 13:19:32 -04006601{
David Sterbabcdc4282019-03-20 12:14:33 +01006602 struct btrfs_fs_info *fs_info = eb->fs_info;
Josef Bacik8c2a1a32013-06-06 13:19:32 -04006603 struct btrfs_file_extent_item *item;
6604 struct btrfs_key key;
6605 int found_type;
6606 int i;
Gu Jinxiangb89311e2018-05-22 17:46:51 +08006607 int ret = 0;
Josef Bacik8c2a1a32013-06-06 13:19:32 -04006608
Jeff Mahoney2ff7e612016-06-22 18:54:24 -04006609 if (!btrfs_fs_incompat(fs_info, MIXED_GROUPS))
Josef Bacik8c2a1a32013-06-06 13:19:32 -04006610 return 0;
6611
6612 for (i = 0; i < btrfs_header_nritems(eb); i++) {
6613 btrfs_item_key_to_cpu(eb, &key, i);
6614 if (key.type != BTRFS_EXTENT_DATA_KEY)
6615 continue;
6616 item = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item);
6617 found_type = btrfs_file_extent_type(eb, item);
6618 if (found_type == BTRFS_FILE_EXTENT_INLINE)
6619 continue;
6620 if (btrfs_file_extent_disk_bytenr(eb, item) == 0)
6621 continue;
6622 key.objectid = btrfs_file_extent_disk_bytenr(eb, item);
6623 key.offset = btrfs_file_extent_disk_num_bytes(eb, item);
Gu Jinxiangb89311e2018-05-22 17:46:51 +08006624 ret = __exclude_logged_extent(fs_info, key.objectid, key.offset);
6625 if (ret)
6626 break;
Josef Bacik8c2a1a32013-06-06 13:19:32 -04006627 }
6628
Gu Jinxiangb89311e2018-05-22 17:46:51 +08006629 return ret;
Josef Bacik8c2a1a32013-06-06 13:19:32 -04006630}
6631
Filipe Manana9cfa3e32016-04-26 15:39:32 +01006632static void
6633btrfs_inc_block_group_reservations(struct btrfs_block_group_cache *bg)
6634{
6635 atomic_inc(&bg->reservations);
6636}
6637
6638void btrfs_dec_block_group_reservations(struct btrfs_fs_info *fs_info,
6639 const u64 start)
6640{
6641 struct btrfs_block_group_cache *bg;
6642
6643 bg = btrfs_lookup_block_group(fs_info, start);
6644 ASSERT(bg);
6645 if (atomic_dec_and_test(&bg->reservations))
Peter Zijlstra46259562018-03-15 11:43:08 +01006646 wake_up_var(&bg->reservations);
Filipe Manana9cfa3e32016-04-26 15:39:32 +01006647 btrfs_put_block_group(bg);
6648}
6649
Filipe Manana9cfa3e32016-04-26 15:39:32 +01006650void btrfs_wait_block_group_reservations(struct btrfs_block_group_cache *bg)
6651{
6652 struct btrfs_space_info *space_info = bg->space_info;
6653
6654 ASSERT(bg->ro);
6655
6656 if (!(bg->flags & BTRFS_BLOCK_GROUP_DATA))
6657 return;
6658
6659 /*
6660 * Our block group is read only but before we set it to read only,
6661 * some task might have had allocated an extent from it already, but it
6662 * has not yet created a respective ordered extent (and added it to a
6663 * root's list of ordered extents).
6664 * Therefore wait for any task currently allocating extents, since the
6665 * block group's reservations counter is incremented while a read lock
6666 * on the groups' semaphore is held and decremented after releasing
6667 * the read access on that semaphore and creating the ordered extent.
6668 */
6669 down_write(&space_info->groups_sem);
6670 up_write(&space_info->groups_sem);
6671
Peter Zijlstra46259562018-03-15 11:43:08 +01006672 wait_var_event(&bg->reservations, !atomic_read(&bg->reservations));
Filipe Manana9cfa3e32016-04-26 15:39:32 +01006673}
6674
Josef Bacikfb25e912011-07-26 17:00:46 -04006675/**
Wang Xiaoguang4824f1f2016-07-25 15:51:39 +08006676 * btrfs_add_reserved_bytes - update the block_group and space info counters
Josef Bacikfb25e912011-07-26 17:00:46 -04006677 * @cache: The cache we are manipulating
Wang Xiaoguang18513092016-07-25 15:51:40 +08006678 * @ram_bytes: The number of bytes of file content, and will be same to
6679 * @num_bytes except for the compress path.
Josef Bacikfb25e912011-07-26 17:00:46 -04006680 * @num_bytes: The number of bytes in question
Miao Xiee570fd22014-06-19 10:42:50 +08006681 * @delalloc: The blocks are allocated for the delalloc write
Josef Bacikfb25e912011-07-26 17:00:46 -04006682 *
Xiaoguang Wang745699e2016-09-23 12:38:50 +08006683 * This is called by the allocator when it reserves space. If this is a
6684 * reservation and the block group has become read only we cannot make the
6685 * reservation and return -EAGAIN, otherwise this function always succeeds.
Yan, Zhengf0486c62010-05-16 10:46:25 -04006686 */
Wang Xiaoguang4824f1f2016-07-25 15:51:39 +08006687static int btrfs_add_reserved_bytes(struct btrfs_block_group_cache *cache,
Wang Xiaoguang18513092016-07-25 15:51:40 +08006688 u64 ram_bytes, u64 num_bytes, int delalloc)
Yan, Zhengf0486c62010-05-16 10:46:25 -04006689{
Josef Bacikfb25e912011-07-26 17:00:46 -04006690 struct btrfs_space_info *space_info = cache->space_info;
Yan, Zhengf0486c62010-05-16 10:46:25 -04006691 int ret = 0;
Jeff Mahoney79787ea2012-03-12 16:03:00 +01006692
Josef Bacikfb25e912011-07-26 17:00:46 -04006693 spin_lock(&space_info->lock);
6694 spin_lock(&cache->lock);
Wang Xiaoguang4824f1f2016-07-25 15:51:39 +08006695 if (cache->ro) {
6696 ret = -EAGAIN;
Josef Bacikfb25e912011-07-26 17:00:46 -04006697 } else {
Wang Xiaoguang4824f1f2016-07-25 15:51:39 +08006698 cache->reserved += num_bytes;
6699 space_info->bytes_reserved += num_bytes;
Qu Wenruo9f9b8e82018-10-24 20:24:01 +08006700 update_bytes_may_use(space_info, -ram_bytes);
Miao Xiee570fd22014-06-19 10:42:50 +08006701 if (delalloc)
Wang Xiaoguang4824f1f2016-07-25 15:51:39 +08006702 cache->delalloc_bytes += num_bytes;
Yan, Zhengf0486c62010-05-16 10:46:25 -04006703 }
Josef Bacikfb25e912011-07-26 17:00:46 -04006704 spin_unlock(&cache->lock);
6705 spin_unlock(&space_info->lock);
Yan, Zhengf0486c62010-05-16 10:46:25 -04006706 return ret;
6707}
6708
Wang Xiaoguang4824f1f2016-07-25 15:51:39 +08006709/**
6710 * btrfs_free_reserved_bytes - update the block_group and space info counters
6711 * @cache: The cache we are manipulating
6712 * @num_bytes: The number of bytes in question
6713 * @delalloc: The blocks are allocated for the delalloc write
6714 *
6715 * This is called by somebody who is freeing space that was never actually used
6716 * on disk. For example if you reserve some space for a new leaf in transaction
6717 * A and before transaction A commits you free that leaf, you call this with
6718 * reserve set to 0 in order to clear the reservation.
6719 */
6720
zhong jiang556f3ca2018-08-17 00:37:14 +08006721static void btrfs_free_reserved_bytes(struct btrfs_block_group_cache *cache,
6722 u64 num_bytes, int delalloc)
Wang Xiaoguang4824f1f2016-07-25 15:51:39 +08006723{
6724 struct btrfs_space_info *space_info = cache->space_info;
Wang Xiaoguang4824f1f2016-07-25 15:51:39 +08006725
6726 spin_lock(&space_info->lock);
6727 spin_lock(&cache->lock);
6728 if (cache->ro)
6729 space_info->bytes_readonly += num_bytes;
6730 cache->reserved -= num_bytes;
6731 space_info->bytes_reserved -= num_bytes;
Josef Bacik21a94f72018-10-11 15:54:03 -04006732 space_info->max_extent_size = 0;
Wang Xiaoguang4824f1f2016-07-25 15:51:39 +08006733
6734 if (delalloc)
6735 cache->delalloc_bytes -= num_bytes;
6736 spin_unlock(&cache->lock);
6737 spin_unlock(&space_info->lock);
Wang Xiaoguang4824f1f2016-07-25 15:51:39 +08006738}
David Sterba8b74c032017-02-10 19:20:56 +01006739void btrfs_prepare_extent_commit(struct btrfs_fs_info *fs_info)
Yan Zheng11833d62009-09-11 16:11:19 -04006740{
Yan Zheng11833d62009-09-11 16:11:19 -04006741 struct btrfs_caching_control *next;
6742 struct btrfs_caching_control *caching_ctl;
6743 struct btrfs_block_group_cache *cache;
6744
Josef Bacik9e351cc2014-03-13 15:42:13 -04006745 down_write(&fs_info->commit_root_sem);
Yan Zheng11833d62009-09-11 16:11:19 -04006746
6747 list_for_each_entry_safe(caching_ctl, next,
6748 &fs_info->caching_block_groups, list) {
6749 cache = caching_ctl->block_group;
6750 if (block_group_cache_done(cache)) {
6751 cache->last_byte_to_unpin = (u64)-1;
6752 list_del_init(&caching_ctl->list);
6753 put_caching_control(caching_ctl);
6754 } else {
6755 cache->last_byte_to_unpin = caching_ctl->progress;
6756 }
6757 }
6758
6759 if (fs_info->pinned_extents == &fs_info->freed_extents[0])
6760 fs_info->pinned_extents = &fs_info->freed_extents[1];
6761 else
6762 fs_info->pinned_extents = &fs_info->freed_extents[0];
6763
Josef Bacik9e351cc2014-03-13 15:42:13 -04006764 up_write(&fs_info->commit_root_sem);
Yan, Zheng8929ecfa2010-05-16 10:49:58 -04006765
6766 update_global_block_rsv(fs_info);
Yan Zheng11833d62009-09-11 16:11:19 -04006767}
6768
Josef Bacikc759c4e2015-10-02 15:25:10 -04006769/*
6770 * Returns the free cluster for the given space info and sets empty_cluster to
6771 * what it should be based on the mount options.
6772 */
6773static struct btrfs_free_cluster *
Jeff Mahoney2ff7e612016-06-22 18:54:24 -04006774fetch_cluster_info(struct btrfs_fs_info *fs_info,
6775 struct btrfs_space_info *space_info, u64 *empty_cluster)
Josef Bacikc759c4e2015-10-02 15:25:10 -04006776{
6777 struct btrfs_free_cluster *ret = NULL;
Josef Bacikc759c4e2015-10-02 15:25:10 -04006778
6779 *empty_cluster = 0;
6780 if (btrfs_mixed_space_info(space_info))
6781 return ret;
6782
Josef Bacikc759c4e2015-10-02 15:25:10 -04006783 if (space_info->flags & BTRFS_BLOCK_GROUP_METADATA) {
Jeff Mahoney0b246af2016-06-22 18:54:23 -04006784 ret = &fs_info->meta_alloc_cluster;
Hans van Kranenburg583b7232017-07-28 08:31:28 +02006785 if (btrfs_test_opt(fs_info, SSD))
6786 *empty_cluster = SZ_2M;
6787 else
Byongho Leeee221842015-12-15 01:42:10 +09006788 *empty_cluster = SZ_64K;
Hans van Kranenburg583b7232017-07-28 08:31:28 +02006789 } else if ((space_info->flags & BTRFS_BLOCK_GROUP_DATA) &&
6790 btrfs_test_opt(fs_info, SSD_SPREAD)) {
6791 *empty_cluster = SZ_2M;
Jeff Mahoney0b246af2016-06-22 18:54:23 -04006792 ret = &fs_info->data_alloc_cluster;
Josef Bacikc759c4e2015-10-02 15:25:10 -04006793 }
6794
6795 return ret;
6796}
6797
Jeff Mahoney2ff7e612016-06-22 18:54:24 -04006798static int unpin_extent_range(struct btrfs_fs_info *fs_info,
6799 u64 start, u64 end,
Filipe Manana678886b2014-12-07 21:31:47 +00006800 const bool return_free_space)
Yan Zheng11833d62009-09-11 16:11:19 -04006801{
Yan Zheng11833d62009-09-11 16:11:19 -04006802 struct btrfs_block_group_cache *cache = NULL;
Josef Bacik7b398f82012-10-22 15:52:28 -04006803 struct btrfs_space_info *space_info;
6804 struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv;
Josef Bacikc759c4e2015-10-02 15:25:10 -04006805 struct btrfs_free_cluster *cluster = NULL;
Yan Zheng11833d62009-09-11 16:11:19 -04006806 u64 len;
Josef Bacikc759c4e2015-10-02 15:25:10 -04006807 u64 total_unpinned = 0;
6808 u64 empty_cluster = 0;
Josef Bacik7b398f82012-10-22 15:52:28 -04006809 bool readonly;
Yan Zheng11833d62009-09-11 16:11:19 -04006810
6811 while (start <= end) {
Josef Bacik7b398f82012-10-22 15:52:28 -04006812 readonly = false;
Yan Zheng11833d62009-09-11 16:11:19 -04006813 if (!cache ||
6814 start >= cache->key.objectid + cache->key.offset) {
6815 if (cache)
6816 btrfs_put_block_group(cache);
Josef Bacikc759c4e2015-10-02 15:25:10 -04006817 total_unpinned = 0;
Yan Zheng11833d62009-09-11 16:11:19 -04006818 cache = btrfs_lookup_block_group(fs_info, start);
Jeff Mahoney79787ea2012-03-12 16:03:00 +01006819 BUG_ON(!cache); /* Logic error */
Josef Bacikc759c4e2015-10-02 15:25:10 -04006820
Jeff Mahoney2ff7e612016-06-22 18:54:24 -04006821 cluster = fetch_cluster_info(fs_info,
Josef Bacikc759c4e2015-10-02 15:25:10 -04006822 cache->space_info,
6823 &empty_cluster);
6824 empty_cluster <<= 1;
Yan Zheng11833d62009-09-11 16:11:19 -04006825 }
6826
6827 len = cache->key.objectid + cache->key.offset - start;
6828 len = min(len, end + 1 - start);
6829
6830 if (start < cache->last_byte_to_unpin) {
6831 len = min(len, cache->last_byte_to_unpin - start);
Filipe Manana678886b2014-12-07 21:31:47 +00006832 if (return_free_space)
6833 btrfs_add_free_space(cache, start, len);
Yan Zheng11833d62009-09-11 16:11:19 -04006834 }
Josef Bacik25179202008-10-29 14:49:05 -04006835
Yan, Zhengf0486c62010-05-16 10:46:25 -04006836 start += len;
Josef Bacikc759c4e2015-10-02 15:25:10 -04006837 total_unpinned += len;
Josef Bacik7b398f82012-10-22 15:52:28 -04006838 space_info = cache->space_info;
Yan, Zhengf0486c62010-05-16 10:46:25 -04006839
Josef Bacikc759c4e2015-10-02 15:25:10 -04006840 /*
6841 * If this space cluster has been marked as fragmented and we've
6842 * unpinned enough in this block group to potentially allow a
6843 * cluster to be created inside of it go ahead and clear the
6844 * fragmented check.
6845 */
6846 if (cluster && cluster->fragmented &&
6847 total_unpinned > empty_cluster) {
6848 spin_lock(&cluster->lock);
6849 cluster->fragmented = 0;
6850 spin_unlock(&cluster->lock);
6851 }
6852
Josef Bacik7b398f82012-10-22 15:52:28 -04006853 spin_lock(&space_info->lock);
Josef Bacik25179202008-10-29 14:49:05 -04006854 spin_lock(&cache->lock);
Yan Zheng11833d62009-09-11 16:11:19 -04006855 cache->pinned -= len;
Lu Fengqie2907c12018-10-24 20:24:02 +08006856 update_bytes_pinned(space_info, -len);
Josef Bacikc51e7bb2016-03-25 13:25:54 -04006857
6858 trace_btrfs_space_reservation(fs_info, "pinned",
6859 space_info->flags, len, 0);
Josef Bacik4f4db212015-09-29 11:40:47 -04006860 space_info->max_extent_size = 0;
Ethan Liendec59fa2018-07-13 16:50:42 +08006861 percpu_counter_add_batch(&space_info->total_bytes_pinned,
6862 -len, BTRFS_TOTAL_BYTES_PINNED_BATCH);
Josef Bacik7b398f82012-10-22 15:52:28 -04006863 if (cache->ro) {
6864 space_info->bytes_readonly += len;
6865 readonly = true;
6866 }
Josef Bacik25179202008-10-29 14:49:05 -04006867 spin_unlock(&cache->lock);
Josef Bacik957780e2016-05-17 13:30:55 -04006868 if (!readonly && return_free_space &&
6869 global_rsv->space_info == space_info) {
6870 u64 to_add = len;
Nikolay Borisov92ac58e2017-08-17 10:52:28 +03006871
Josef Bacik7b398f82012-10-22 15:52:28 -04006872 spin_lock(&global_rsv->lock);
6873 if (!global_rsv->full) {
Josef Bacik957780e2016-05-17 13:30:55 -04006874 to_add = min(len, global_rsv->size -
6875 global_rsv->reserved);
6876 global_rsv->reserved += to_add;
Qu Wenruo9f9b8e82018-10-24 20:24:01 +08006877 update_bytes_may_use(space_info, to_add);
Josef Bacik7b398f82012-10-22 15:52:28 -04006878 if (global_rsv->reserved >= global_rsv->size)
6879 global_rsv->full = 1;
Josef Bacik957780e2016-05-17 13:30:55 -04006880 trace_btrfs_space_reservation(fs_info,
6881 "space_info",
6882 space_info->flags,
6883 to_add, 1);
6884 len -= to_add;
Josef Bacik7b398f82012-10-22 15:52:28 -04006885 }
6886 spin_unlock(&global_rsv->lock);
Josef Bacik957780e2016-05-17 13:30:55 -04006887 /* Add to any tickets we may have */
6888 if (len)
6889 space_info_add_new_bytes(fs_info, space_info,
6890 len);
Josef Bacik7b398f82012-10-22 15:52:28 -04006891 }
6892 spin_unlock(&space_info->lock);
Yan Zheng11833d62009-09-11 16:11:19 -04006893 }
6894
6895 if (cache)
Chris Masonfa9c0d792009-04-03 09:47:43 -04006896 btrfs_put_block_group(cache);
Chris Masonccd467d2007-06-28 15:57:36 -04006897 return 0;
6898}
6899
Nikolay Borisov5ead2dd2018-03-15 16:00:26 +02006900int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans)
Chris Masona28ec192007-03-06 20:08:01 -05006901{
Nikolay Borisov5ead2dd2018-03-15 16:00:26 +02006902 struct btrfs_fs_info *fs_info = trans->fs_info;
Jeff Mahoneye33e17e2015-06-15 09:41:19 -04006903 struct btrfs_block_group_cache *block_group, *tmp;
6904 struct list_head *deleted_bgs;
Yan Zheng11833d62009-09-11 16:11:19 -04006905 struct extent_io_tree *unpin;
Chris Mason1a5bc162007-10-15 16:15:26 -04006906 u64 start;
6907 u64 end;
Chris Masona28ec192007-03-06 20:08:01 -05006908 int ret;
Chris Masona28ec192007-03-06 20:08:01 -05006909
Yan Zheng11833d62009-09-11 16:11:19 -04006910 if (fs_info->pinned_extents == &fs_info->freed_extents[0])
6911 unpin = &fs_info->freed_extents[1];
6912 else
6913 unpin = &fs_info->freed_extents[0];
6914
Jeff Mahoneye33e17e2015-06-15 09:41:19 -04006915 while (!trans->aborted) {
Filipe Manana0e6ec382018-11-16 13:04:44 +00006916 struct extent_state *cached_state = NULL;
6917
Filipe Mananad4b450c2015-01-29 19:18:25 +00006918 mutex_lock(&fs_info->unused_bg_unpin_mutex);
Chris Mason1a5bc162007-10-15 16:15:26 -04006919 ret = find_first_extent_bit(unpin, 0, &start, &end,
Filipe Manana0e6ec382018-11-16 13:04:44 +00006920 EXTENT_DIRTY, &cached_state);
Filipe Mananad4b450c2015-01-29 19:18:25 +00006921 if (ret) {
6922 mutex_unlock(&fs_info->unused_bg_unpin_mutex);
Chris Masona28ec192007-03-06 20:08:01 -05006923 break;
Filipe Mananad4b450c2015-01-29 19:18:25 +00006924 }
Liu Hui1f3c79a2009-01-05 15:57:51 -05006925
Jeff Mahoney0b246af2016-06-22 18:54:23 -04006926 if (btrfs_test_opt(fs_info, DISCARD))
Jeff Mahoney2ff7e612016-06-22 18:54:24 -04006927 ret = btrfs_discard_extent(fs_info, start,
Li Dongyang5378e602011-03-24 10:24:27 +00006928 end + 1 - start, NULL);
Liu Hui1f3c79a2009-01-05 15:57:51 -05006929
Filipe Manana0e6ec382018-11-16 13:04:44 +00006930 clear_extent_dirty(unpin, start, end, &cached_state);
Jeff Mahoney2ff7e612016-06-22 18:54:24 -04006931 unpin_extent_range(fs_info, start, end, true);
Filipe Mananad4b450c2015-01-29 19:18:25 +00006932 mutex_unlock(&fs_info->unused_bg_unpin_mutex);
Filipe Manana0e6ec382018-11-16 13:04:44 +00006933 free_extent_state(cached_state);
Chris Masonb9473432009-03-13 11:00:37 -04006934 cond_resched();
Chris Masona28ec192007-03-06 20:08:01 -05006935 }
Josef Bacik817d52f2009-07-13 21:29:25 -04006936
Jeff Mahoneye33e17e2015-06-15 09:41:19 -04006937 /*
6938 * Transaction is finished. We don't need the lock anymore. We
6939 * do need to clean up the block groups in case of a transaction
6940 * abort.
6941 */
6942 deleted_bgs = &trans->transaction->deleted_bgs;
6943 list_for_each_entry_safe(block_group, tmp, deleted_bgs, bg_list) {
6944 u64 trimmed = 0;
6945
6946 ret = -EROFS;
6947 if (!trans->aborted)
Jeff Mahoney2ff7e612016-06-22 18:54:24 -04006948 ret = btrfs_discard_extent(fs_info,
Jeff Mahoneye33e17e2015-06-15 09:41:19 -04006949 block_group->key.objectid,
6950 block_group->key.offset,
6951 &trimmed);
6952
6953 list_del_init(&block_group->bg_list);
6954 btrfs_put_block_group_trimming(block_group);
6955 btrfs_put_block_group(block_group);
6956
6957 if (ret) {
6958 const char *errstr = btrfs_decode_error(ret);
6959 btrfs_warn(fs_info,
David Sterba913e1532017-07-13 15:32:18 +02006960 "discard failed while removing blockgroup: errno=%d %s",
Jeff Mahoneye33e17e2015-06-15 09:41:19 -04006961 ret, errstr);
6962 }
6963 }
6964
Chris Masone20d96d2007-03-22 12:13:20 -04006965 return 0;
6966}
6967
Yan Zheng5d4f98a2009-06-10 10:45:14 -04006968static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
Nikolay Borisove72cb922018-06-20 15:48:57 +03006969 struct btrfs_delayed_ref_node *node, u64 parent,
6970 u64 root_objectid, u64 owner_objectid,
6971 u64 owner_offset, int refs_to_drop,
6972 struct btrfs_delayed_extent_op *extent_op)
Chris Masona28ec192007-03-06 20:08:01 -05006973{
Nikolay Borisove72cb922018-06-20 15:48:57 +03006974 struct btrfs_fs_info *info = trans->fs_info;
Chris Masone2fa7222007-03-12 16:22:34 -04006975 struct btrfs_key key;
Yan Zheng5d4f98a2009-06-10 10:45:14 -04006976 struct btrfs_path *path;
Chris Mason1261ec42007-03-20 20:35:03 -04006977 struct btrfs_root *extent_root = info->extent_root;
Chris Mason5f39d392007-10-15 16:14:19 -04006978 struct extent_buffer *leaf;
Yan Zheng5d4f98a2009-06-10 10:45:14 -04006979 struct btrfs_extent_item *ei;
6980 struct btrfs_extent_inline_ref *iref;
Chris Masona28ec192007-03-06 20:08:01 -05006981 int ret;
Yan Zheng5d4f98a2009-06-10 10:45:14 -04006982 int is_data;
Chris Mason952fcca2008-02-18 16:33:44 -05006983 int extent_slot = 0;
6984 int found_extent = 0;
6985 int num_to_del = 1;
Yan Zheng5d4f98a2009-06-10 10:45:14 -04006986 u32 item_size;
6987 u64 refs;
Qu Wenruoc682f9b2015-03-17 16:59:47 +08006988 u64 bytenr = node->bytenr;
6989 u64 num_bytes = node->num_bytes;
Josef Bacikfcebe452014-05-13 17:30:47 -07006990 int last_ref = 0;
Jeff Mahoney0b246af2016-06-22 18:54:23 -04006991 bool skinny_metadata = btrfs_fs_incompat(info, SKINNY_METADATA);
Chris Mason037e6392007-03-07 11:50:24 -05006992
Chris Mason5caf2a02007-04-02 11:20:42 -04006993 path = btrfs_alloc_path();
Chris Mason54aa1f42007-06-22 14:16:25 -04006994 if (!path)
6995 return -ENOMEM;
6996
David Sterbae4058b52015-11-27 16:31:35 +01006997 path->reada = READA_FORWARD;
Chris Masonb9473432009-03-13 11:00:37 -04006998 path->leave_spinning = 1;
Yan Zheng5d4f98a2009-06-10 10:45:14 -04006999
7000 is_data = owner_objectid >= BTRFS_FIRST_FREE_OBJECTID;
7001 BUG_ON(!is_data && refs_to_drop != 1);
7002
Josef Bacik3173a182013-03-07 14:22:04 -05007003 if (is_data)
Thomas Meyer897ca812017-10-07 16:02:21 +02007004 skinny_metadata = false;
Josef Bacik3173a182013-03-07 14:22:04 -05007005
Nikolay Borisovfbe48012018-06-20 15:48:52 +03007006 ret = lookup_extent_backref(trans, path, &iref, bytenr, num_bytes,
7007 parent, root_objectid, owner_objectid,
Yan Zheng5d4f98a2009-06-10 10:45:14 -04007008 owner_offset);
Chris Mason7bb86312007-12-11 09:25:06 -05007009 if (ret == 0) {
Chris Mason952fcca2008-02-18 16:33:44 -05007010 extent_slot = path->slots[0];
Yan Zheng5d4f98a2009-06-10 10:45:14 -04007011 while (extent_slot >= 0) {
7012 btrfs_item_key_to_cpu(path->nodes[0], &key,
Chris Mason952fcca2008-02-18 16:33:44 -05007013 extent_slot);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04007014 if (key.objectid != bytenr)
Chris Mason952fcca2008-02-18 16:33:44 -05007015 break;
Yan Zheng5d4f98a2009-06-10 10:45:14 -04007016 if (key.type == BTRFS_EXTENT_ITEM_KEY &&
7017 key.offset == num_bytes) {
Chris Mason952fcca2008-02-18 16:33:44 -05007018 found_extent = 1;
7019 break;
7020 }
Josef Bacik3173a182013-03-07 14:22:04 -05007021 if (key.type == BTRFS_METADATA_ITEM_KEY &&
7022 key.offset == owner_objectid) {
7023 found_extent = 1;
7024 break;
7025 }
Chris Mason952fcca2008-02-18 16:33:44 -05007026 if (path->slots[0] - extent_slot > 5)
7027 break;
Yan Zheng5d4f98a2009-06-10 10:45:14 -04007028 extent_slot--;
Chris Mason952fcca2008-02-18 16:33:44 -05007029 }
Nikolay Borisova79865c2018-06-21 09:45:00 +03007030
Zheng Yan31840ae2008-09-23 13:14:14 -04007031 if (!found_extent) {
Yan Zheng5d4f98a2009-06-10 10:45:14 -04007032 BUG_ON(iref);
Nikolay Borisov87cc7a82018-06-20 15:49:12 +03007033 ret = remove_extent_backref(trans, path, NULL,
Jeff Mahoney87bde3c2017-02-15 16:28:27 -05007034 refs_to_drop,
Josef Bacikfcebe452014-05-13 17:30:47 -07007035 is_data, &last_ref);
David Sterba005d6422012-09-18 07:52:32 -06007036 if (ret) {
Jeff Mahoney66642832016-06-10 18:19:25 -04007037 btrfs_abort_transaction(trans, ret);
David Sterba005d6422012-09-18 07:52:32 -06007038 goto out;
7039 }
David Sterbab3b4aa72011-04-21 01:20:15 +02007040 btrfs_release_path(path);
Chris Masonb9473432009-03-13 11:00:37 -04007041 path->leave_spinning = 1;
Yan Zheng5d4f98a2009-06-10 10:45:14 -04007042
7043 key.objectid = bytenr;
7044 key.type = BTRFS_EXTENT_ITEM_KEY;
7045 key.offset = num_bytes;
7046
Josef Bacik3173a182013-03-07 14:22:04 -05007047 if (!is_data && skinny_metadata) {
7048 key.type = BTRFS_METADATA_ITEM_KEY;
7049 key.offset = owner_objectid;
7050 }
7051
Zheng Yan31840ae2008-09-23 13:14:14 -04007052 ret = btrfs_search_slot(trans, extent_root,
7053 &key, path, -1, 1);
Josef Bacik3173a182013-03-07 14:22:04 -05007054 if (ret > 0 && skinny_metadata && path->slots[0]) {
7055 /*
7056 * Couldn't find our skinny metadata item,
7057 * see if we have ye olde extent item.
7058 */
7059 path->slots[0]--;
7060 btrfs_item_key_to_cpu(path->nodes[0], &key,
7061 path->slots[0]);
7062 if (key.objectid == bytenr &&
7063 key.type == BTRFS_EXTENT_ITEM_KEY &&
7064 key.offset == num_bytes)
7065 ret = 0;
7066 }
7067
7068 if (ret > 0 && skinny_metadata) {
7069 skinny_metadata = false;
Filipe Manana9ce49a02014-04-24 15:15:28 +01007070 key.objectid = bytenr;
Josef Bacik3173a182013-03-07 14:22:04 -05007071 key.type = BTRFS_EXTENT_ITEM_KEY;
7072 key.offset = num_bytes;
7073 btrfs_release_path(path);
7074 ret = btrfs_search_slot(trans, extent_root,
7075 &key, path, -1, 1);
7076 }
7077
Josef Bacikf3465ca2008-11-12 14:19:50 -05007078 if (ret) {
Jeff Mahoney5d163e02016-09-20 10:05:00 -04007079 btrfs_err(info,
7080 "umm, got %d back from search, was looking for %llu",
7081 ret, bytenr);
Josef Bacikb783e622011-07-13 15:03:50 +00007082 if (ret > 0)
David Sterbaa4f78752017-06-29 18:37:49 +02007083 btrfs_print_leaf(path->nodes[0]);
Josef Bacikf3465ca2008-11-12 14:19:50 -05007084 }
David Sterba005d6422012-09-18 07:52:32 -06007085 if (ret < 0) {
Jeff Mahoney66642832016-06-10 18:19:25 -04007086 btrfs_abort_transaction(trans, ret);
David Sterba005d6422012-09-18 07:52:32 -06007087 goto out;
7088 }
Zheng Yan31840ae2008-09-23 13:14:14 -04007089 extent_slot = path->slots[0];
7090 }
Dulshani Gunawardhanafae7f212013-10-31 10:30:08 +05307091 } else if (WARN_ON(ret == -ENOENT)) {
David Sterbaa4f78752017-06-29 18:37:49 +02007092 btrfs_print_leaf(path->nodes[0]);
Simon Kirbyc2cf52e2013-03-19 22:41:23 +00007093 btrfs_err(info,
7094 "unable to find ref byte nr %llu parent %llu root %llu owner %llu offset %llu",
Geert Uytterhoevenc1c9ff72013-08-20 13:20:07 +02007095 bytenr, parent, root_objectid, owner_objectid,
7096 owner_offset);
Jeff Mahoney66642832016-06-10 18:19:25 -04007097 btrfs_abort_transaction(trans, ret);
Josef Bacikc4a050b2014-03-14 16:36:53 -04007098 goto out;
Jeff Mahoney79787ea2012-03-12 16:03:00 +01007099 } else {
Jeff Mahoney66642832016-06-10 18:19:25 -04007100 btrfs_abort_transaction(trans, ret);
David Sterba005d6422012-09-18 07:52:32 -06007101 goto out;
Chris Mason7bb86312007-12-11 09:25:06 -05007102 }
Chris Mason5f39d392007-10-15 16:14:19 -04007103
7104 leaf = path->nodes[0];
Yan Zheng5d4f98a2009-06-10 10:45:14 -04007105 item_size = btrfs_item_size_nr(leaf, extent_slot);
David Sterba6d8ff4e2018-06-26 16:20:59 +02007106 if (unlikely(item_size < sizeof(*ei))) {
Nikolay Borisovba3c2b12018-06-26 16:57:36 +03007107 ret = -EINVAL;
7108 btrfs_print_v0_err(info);
7109 btrfs_abort_transaction(trans, ret);
7110 goto out;
7111 }
Chris Mason952fcca2008-02-18 16:33:44 -05007112 ei = btrfs_item_ptr(leaf, extent_slot,
Chris Mason123abc82007-03-14 14:14:43 -04007113 struct btrfs_extent_item);
Josef Bacik3173a182013-03-07 14:22:04 -05007114 if (owner_objectid < BTRFS_FIRST_FREE_OBJECTID &&
7115 key.type == BTRFS_EXTENT_ITEM_KEY) {
Yan Zheng5d4f98a2009-06-10 10:45:14 -04007116 struct btrfs_tree_block_info *bi;
7117 BUG_ON(item_size < sizeof(*ei) + sizeof(*bi));
7118 bi = (struct btrfs_tree_block_info *)(ei + 1);
7119 WARN_ON(owner_objectid != btrfs_tree_block_level(leaf, bi));
Chris Mason952fcca2008-02-18 16:33:44 -05007120 }
7121
Yan Zheng5d4f98a2009-06-10 10:45:14 -04007122 refs = btrfs_extent_refs(leaf, ei);
Josef Bacik32b02532013-04-24 16:38:50 -04007123 if (refs < refs_to_drop) {
Jeff Mahoney5d163e02016-09-20 10:05:00 -04007124 btrfs_err(info,
7125 "trying to drop %d refs but we only have %Lu for bytenr %Lu",
7126 refs_to_drop, refs, bytenr);
Josef Bacik32b02532013-04-24 16:38:50 -04007127 ret = -EINVAL;
Jeff Mahoney66642832016-06-10 18:19:25 -04007128 btrfs_abort_transaction(trans, ret);
Josef Bacik32b02532013-04-24 16:38:50 -04007129 goto out;
7130 }
Yan Zheng5d4f98a2009-06-10 10:45:14 -04007131 refs -= refs_to_drop;
7132
7133 if (refs > 0) {
7134 if (extent_op)
7135 __run_delayed_extent_op(extent_op, leaf, ei);
7136 /*
7137 * In the case of inline back ref, reference count will
7138 * be updated by remove_extent_backref
7139 */
7140 if (iref) {
7141 BUG_ON(!found_extent);
7142 } else {
7143 btrfs_set_extent_refs(leaf, ei, refs);
7144 btrfs_mark_buffer_dirty(leaf);
7145 }
7146 if (found_extent) {
Nikolay Borisov87cc7a82018-06-20 15:49:12 +03007147 ret = remove_extent_backref(trans, path, iref,
7148 refs_to_drop, is_data,
7149 &last_ref);
David Sterba005d6422012-09-18 07:52:32 -06007150 if (ret) {
Jeff Mahoney66642832016-06-10 18:19:25 -04007151 btrfs_abort_transaction(trans, ret);
David Sterba005d6422012-09-18 07:52:32 -06007152 goto out;
7153 }
Yan Zheng5d4f98a2009-06-10 10:45:14 -04007154 }
7155 } else {
Yan Zheng5d4f98a2009-06-10 10:45:14 -04007156 if (found_extent) {
7157 BUG_ON(is_data && refs_to_drop !=
Zhaolei9ed0dea2015-08-06 22:16:24 +08007158 extent_data_ref_count(path, iref));
Yan Zheng5d4f98a2009-06-10 10:45:14 -04007159 if (iref) {
7160 BUG_ON(path->slots[0] != extent_slot);
7161 } else {
7162 BUG_ON(path->slots[0] != extent_slot + 1);
7163 path->slots[0] = extent_slot;
7164 num_to_del = 2;
7165 }
Chris Mason78fae272007-03-25 11:35:08 -04007166 }
Chris Masonb9473432009-03-13 11:00:37 -04007167
Josef Bacikfcebe452014-05-13 17:30:47 -07007168 last_ref = 1;
Chris Mason952fcca2008-02-18 16:33:44 -05007169 ret = btrfs_del_items(trans, extent_root, path, path->slots[0],
7170 num_to_del);
David Sterba005d6422012-09-18 07:52:32 -06007171 if (ret) {
Jeff Mahoney66642832016-06-10 18:19:25 -04007172 btrfs_abort_transaction(trans, ret);
David Sterba005d6422012-09-18 07:52:32 -06007173 goto out;
7174 }
David Sterbab3b4aa72011-04-21 01:20:15 +02007175 btrfs_release_path(path);
David Woodhouse21af8042008-08-12 14:13:26 +01007176
Yan Zheng5d4f98a2009-06-10 10:45:14 -04007177 if (is_data) {
Jeff Mahoney5b4aace2016-06-21 10:40:19 -04007178 ret = btrfs_del_csums(trans, info, bytenr, num_bytes);
David Sterba005d6422012-09-18 07:52:32 -06007179 if (ret) {
Jeff Mahoney66642832016-06-10 18:19:25 -04007180 btrfs_abort_transaction(trans, ret);
David Sterba005d6422012-09-18 07:52:32 -06007181 goto out;
7182 }
Chris Mason459931e2008-12-10 09:10:46 -05007183 }
7184
Nikolay Borisove7355e52018-05-10 15:44:55 +03007185 ret = add_to_free_space_tree(trans, bytenr, num_bytes);
Omar Sandoval1e144fb2015-09-29 20:50:37 -07007186 if (ret) {
Jeff Mahoney66642832016-06-10 18:19:25 -04007187 btrfs_abort_transaction(trans, ret);
Omar Sandoval1e144fb2015-09-29 20:50:37 -07007188 goto out;
7189 }
7190
David Sterba6b279402019-03-20 12:10:15 +01007191 ret = update_block_group(trans, bytenr, num_bytes, 0);
David Sterba005d6422012-09-18 07:52:32 -06007192 if (ret) {
Jeff Mahoney66642832016-06-10 18:19:25 -04007193 btrfs_abort_transaction(trans, ret);
David Sterba005d6422012-09-18 07:52:32 -06007194 goto out;
7195 }
Chris Masona28ec192007-03-06 20:08:01 -05007196 }
Josef Bacikfcebe452014-05-13 17:30:47 -07007197 btrfs_release_path(path);
7198
Jeff Mahoney79787ea2012-03-12 16:03:00 +01007199out:
Chris Mason5caf2a02007-04-02 11:20:42 -04007200 btrfs_free_path(path);
Chris Masona28ec192007-03-06 20:08:01 -05007201 return ret;
7202}
7203
7204/*
Yan, Zhengf0486c62010-05-16 10:46:25 -04007205 * when we free an block, it is possible (and likely) that we free the last
Chris Mason1887be62009-03-13 10:11:24 -04007206 * delayed ref for that extent as well. This searches the delayed ref tree for
7207 * a given extent, and if there are no other delayed refs to be processed, it
7208 * removes it from the tree.
7209 */
7210static noinline int check_ref_cleanup(struct btrfs_trans_handle *trans,
Jeff Mahoney2ff7e612016-06-22 18:54:24 -04007211 u64 bytenr)
Chris Mason1887be62009-03-13 10:11:24 -04007212{
7213 struct btrfs_delayed_ref_head *head;
7214 struct btrfs_delayed_ref_root *delayed_refs;
Yan, Zhengf0486c62010-05-16 10:46:25 -04007215 int ret = 0;
Chris Mason1887be62009-03-13 10:11:24 -04007216
7217 delayed_refs = &trans->transaction->delayed_refs;
7218 spin_lock(&delayed_refs->lock);
Liu Bof72ad18e2017-01-30 12:24:37 -08007219 head = btrfs_find_delayed_ref_head(delayed_refs, bytenr);
Chris Mason1887be62009-03-13 10:11:24 -04007220 if (!head)
Chris Masoncf93da72014-01-29 07:02:40 -08007221 goto out_delayed_unlock;
Chris Mason1887be62009-03-13 10:11:24 -04007222
Josef Bacikd7df2c72014-01-23 09:21:38 -05007223 spin_lock(&head->lock);
Liu Boe3d03962018-08-23 03:51:50 +08007224 if (!RB_EMPTY_ROOT(&head->ref_tree.rb_root))
Chris Mason1887be62009-03-13 10:11:24 -04007225 goto out;
7226
Josef Bacikbedc66172018-12-03 10:20:31 -05007227 if (cleanup_extent_op(head) != NULL)
7228 goto out;
Yan Zheng5d4f98a2009-06-10 10:45:14 -04007229
Chris Mason1887be62009-03-13 10:11:24 -04007230 /*
7231 * waiting for the lock here would deadlock. If someone else has it
7232 * locked they are already in the process of dropping it anyway
7233 */
7234 if (!mutex_trylock(&head->mutex))
7235 goto out;
7236
Josef Bacikd7baffd2018-12-03 10:20:29 -05007237 btrfs_delete_ref_head(delayed_refs, head);
Josef Bacikd7df2c72014-01-23 09:21:38 -05007238 head->processing = 0;
Josef Bacikd7baffd2018-12-03 10:20:29 -05007239
Josef Bacikd7df2c72014-01-23 09:21:38 -05007240 spin_unlock(&head->lock);
Chris Mason1887be62009-03-13 10:11:24 -04007241 spin_unlock(&delayed_refs->lock);
7242
Yan, Zhengf0486c62010-05-16 10:46:25 -04007243 BUG_ON(head->extent_op);
7244 if (head->must_insert_reserved)
7245 ret = 1;
7246
Josef Bacik31890da2018-11-21 14:05:41 -05007247 btrfs_cleanup_ref_head_accounting(trans->fs_info, delayed_refs, head);
Yan, Zhengf0486c62010-05-16 10:46:25 -04007248 mutex_unlock(&head->mutex);
Josef Bacikd2788502017-09-29 15:43:57 -04007249 btrfs_put_delayed_ref_head(head);
Yan, Zhengf0486c62010-05-16 10:46:25 -04007250 return ret;
Chris Mason1887be62009-03-13 10:11:24 -04007251out:
Josef Bacikd7df2c72014-01-23 09:21:38 -05007252 spin_unlock(&head->lock);
Chris Masoncf93da72014-01-29 07:02:40 -08007253
7254out_delayed_unlock:
Chris Mason1887be62009-03-13 10:11:24 -04007255 spin_unlock(&delayed_refs->lock);
7256 return 0;
7257}
7258
Yan, Zhengf0486c62010-05-16 10:46:25 -04007259void btrfs_free_tree_block(struct btrfs_trans_handle *trans,
7260 struct btrfs_root *root,
7261 struct extent_buffer *buf,
Jan Schmidt5581a512012-05-16 17:04:52 +02007262 u64 parent, int last_ref)
Yan, Zhengf0486c62010-05-16 10:46:25 -04007263{
Jeff Mahoney0b246af2016-06-22 18:54:23 -04007264 struct btrfs_fs_info *fs_info = root->fs_info;
Josef Bacikb150a4f2013-06-19 15:00:04 -04007265 int pin = 1;
Yan, Zhengf0486c62010-05-16 10:46:25 -04007266 int ret;
7267
7268 if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) {
Omar Sandovald7eae342017-06-06 16:45:31 -07007269 int old_ref_mod, new_ref_mod;
7270
Josef Bacikfd708b82017-09-29 15:43:50 -04007271 btrfs_ref_tree_mod(root, buf->start, buf->len, parent,
7272 root->root_key.objectid,
7273 btrfs_header_level(buf), 0,
7274 BTRFS_DROP_DELAYED_REF);
Nikolay Borisov44e1c472018-06-20 15:48:53 +03007275 ret = btrfs_add_delayed_tree_ref(trans, buf->start,
Omar Sandoval7be07912017-06-06 16:45:30 -07007276 buf->len, parent,
Jeff Mahoney0b246af2016-06-22 18:54:23 -04007277 root->root_key.objectid,
7278 btrfs_header_level(buf),
Omar Sandoval7be07912017-06-06 16:45:30 -07007279 BTRFS_DROP_DELAYED_REF, NULL,
Omar Sandovald7eae342017-06-06 16:45:31 -07007280 &old_ref_mod, &new_ref_mod);
Jeff Mahoney79787ea2012-03-12 16:03:00 +01007281 BUG_ON(ret); /* -ENOMEM */
Omar Sandovald7eae342017-06-06 16:45:31 -07007282 pin = old_ref_mod >= 0 && new_ref_mod < 0;
Yan, Zhengf0486c62010-05-16 10:46:25 -04007283 }
7284
Omar Sandoval0a16c7d2017-06-06 16:45:29 -07007285 if (last_ref && btrfs_header_generation(buf) == trans->transid) {
Filipe Manana62198722015-01-06 20:18:45 +00007286 struct btrfs_block_group_cache *cache;
7287
Yan, Zhengf0486c62010-05-16 10:46:25 -04007288 if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) {
Jeff Mahoney2ff7e612016-06-22 18:54:24 -04007289 ret = check_ref_cleanup(trans, buf->start);
Yan, Zhengf0486c62010-05-16 10:46:25 -04007290 if (!ret)
Josef Bacik37be25b2011-08-05 10:25:38 -04007291 goto out;
Yan, Zhengf0486c62010-05-16 10:46:25 -04007292 }
7293
Omar Sandoval4da8b762017-06-06 16:45:28 -07007294 pin = 0;
Jeff Mahoney0b246af2016-06-22 18:54:23 -04007295 cache = btrfs_lookup_block_group(fs_info, buf->start);
Filipe Manana62198722015-01-06 20:18:45 +00007296
Yan, Zhengf0486c62010-05-16 10:46:25 -04007297 if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN)) {
Jeff Mahoney2ff7e612016-06-22 18:54:24 -04007298 pin_down_extent(fs_info, cache, buf->start,
7299 buf->len, 1);
Filipe Manana62198722015-01-06 20:18:45 +00007300 btrfs_put_block_group(cache);
Josef Bacik37be25b2011-08-05 10:25:38 -04007301 goto out;
Yan, Zhengf0486c62010-05-16 10:46:25 -04007302 }
7303
7304 WARN_ON(test_bit(EXTENT_BUFFER_DIRTY, &buf->bflags));
7305
7306 btrfs_add_free_space(cache, buf->start, buf->len);
Wang Xiaoguang4824f1f2016-07-25 15:51:39 +08007307 btrfs_free_reserved_bytes(cache, buf->len, 0);
Filipe Manana62198722015-01-06 20:18:45 +00007308 btrfs_put_block_group(cache);
Jeff Mahoney71ff6432016-09-06 16:00:42 -04007309 trace_btrfs_reserved_extent_free(fs_info, buf->start, buf->len);
Yan, Zhengf0486c62010-05-16 10:46:25 -04007310 }
7311out:
Josef Bacikb150a4f2013-06-19 15:00:04 -04007312 if (pin)
Nikolay Borisov29d2b842018-03-30 12:58:47 +03007313 add_pinned_bytes(fs_info, buf->len, true,
Josef Bacikb150a4f2013-06-19 15:00:04 -04007314 root->root_key.objectid);
7315
Omar Sandoval0a16c7d2017-06-06 16:45:29 -07007316 if (last_ref) {
7317 /*
7318 * Deleting the buffer, clear the corrupt flag since it doesn't
7319 * matter anymore.
7320 */
7321 clear_bit(EXTENT_BUFFER_CORRUPT, &buf->bflags);
7322 }
Yan, Zhengf0486c62010-05-16 10:46:25 -04007323}
7324
Jeff Mahoney79787ea2012-03-12 16:03:00 +01007325/* Can return -ENOMEM */
Jeff Mahoney2ff7e612016-06-22 18:54:24 -04007326int btrfs_free_extent(struct btrfs_trans_handle *trans,
Josef Bacik84f7d8e2017-09-29 15:43:49 -04007327 struct btrfs_root *root,
Arne Jansen66d7e7f2011-09-12 15:26:38 +02007328 u64 bytenr, u64 num_bytes, u64 parent, u64 root_objectid,
Filipe Mananab06c4bf2015-10-23 07:52:54 +01007329 u64 owner, u64 offset)
Chris Mason925baed2008-06-25 16:01:30 -04007330{
Josef Bacik84f7d8e2017-09-29 15:43:49 -04007331 struct btrfs_fs_info *fs_info = root->fs_info;
Omar Sandovald7eae342017-06-06 16:45:31 -07007332 int old_ref_mod, new_ref_mod;
Chris Mason925baed2008-06-25 16:01:30 -04007333 int ret;
7334
Jeff Mahoneyf5ee5c92016-06-21 09:52:41 -04007335 if (btrfs_is_testing(fs_info))
Josef Bacikfaa2dbf2014-05-07 17:06:09 -04007336 return 0;
David Sterbafccb84c2014-09-29 23:53:21 +02007337
Josef Bacikfd708b82017-09-29 15:43:50 -04007338 if (root_objectid != BTRFS_TREE_LOG_OBJECTID)
7339 btrfs_ref_tree_mod(root, bytenr, num_bytes, parent,
7340 root_objectid, owner, offset,
7341 BTRFS_DROP_DELAYED_REF);
7342
Chris Mason56bec292009-03-13 10:10:06 -04007343 /*
7344 * tree log blocks never actually go into the extent allocation
7345 * tree, just update pinning info and exit early.
Chris Mason56bec292009-03-13 10:10:06 -04007346 */
Yan Zheng5d4f98a2009-06-10 10:45:14 -04007347 if (root_objectid == BTRFS_TREE_LOG_OBJECTID) {
7348 WARN_ON(owner >= BTRFS_FIRST_FREE_OBJECTID);
Chris Masonb9473432009-03-13 11:00:37 -04007349 /* unlocks the pinned mutex */
Jeff Mahoney2ff7e612016-06-22 18:54:24 -04007350 btrfs_pin_extent(fs_info, bytenr, num_bytes, 1);
Omar Sandovald7eae342017-06-06 16:45:31 -07007351 old_ref_mod = new_ref_mod = 0;
Chris Mason56bec292009-03-13 10:10:06 -04007352 ret = 0;
Yan Zheng5d4f98a2009-06-10 10:45:14 -04007353 } else if (owner < BTRFS_FIRST_FREE_OBJECTID) {
Nikolay Borisov44e1c472018-06-20 15:48:53 +03007354 ret = btrfs_add_delayed_tree_ref(trans, bytenr,
Omar Sandoval7be07912017-06-06 16:45:30 -07007355 num_bytes, parent,
7356 root_objectid, (int)owner,
7357 BTRFS_DROP_DELAYED_REF, NULL,
Omar Sandovald7eae342017-06-06 16:45:31 -07007358 &old_ref_mod, &new_ref_mod);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04007359 } else {
Nikolay Borisov88a979c2018-06-20 15:48:54 +03007360 ret = btrfs_add_delayed_data_ref(trans, bytenr,
Omar Sandoval7be07912017-06-06 16:45:30 -07007361 num_bytes, parent,
7362 root_objectid, owner, offset,
7363 0, BTRFS_DROP_DELAYED_REF,
Omar Sandovald7eae342017-06-06 16:45:31 -07007364 &old_ref_mod, &new_ref_mod);
Chris Mason56bec292009-03-13 10:10:06 -04007365 }
Omar Sandovald7eae342017-06-06 16:45:31 -07007366
Nikolay Borisov29d2b842018-03-30 12:58:47 +03007367 if (ret == 0 && old_ref_mod >= 0 && new_ref_mod < 0) {
7368 bool metadata = owner < BTRFS_FIRST_FREE_OBJECTID;
7369
7370 add_pinned_bytes(fs_info, num_bytes, metadata, root_objectid);
7371 }
Omar Sandovald7eae342017-06-06 16:45:31 -07007372
Chris Mason925baed2008-06-25 16:01:30 -04007373 return ret;
7374}
7375
Chris Masonfec577f2007-02-26 10:40:21 -05007376/*
Josef Bacik817d52f2009-07-13 21:29:25 -04007377 * when we wait for progress in the block group caching, its because
7378 * our allocation attempt failed at least once. So, we must sleep
7379 * and let some progress happen before we try again.
7380 *
7381 * This function will sleep at least once waiting for new free space to
7382 * show up, and then it will check the block group free space numbers
7383 * for our min num_bytes. Another option is to have it go ahead
7384 * and look in the rbtree for a free extent of a given size, but this
7385 * is a good start.
Josef Bacik36cce922013-08-05 11:15:21 -04007386 *
7387 * Callers of this must check if cache->cached == BTRFS_CACHE_ERROR before using
7388 * any of the information in this block group.
Josef Bacik817d52f2009-07-13 21:29:25 -04007389 */
Josef Bacik36cce922013-08-05 11:15:21 -04007390static noinline void
Josef Bacik817d52f2009-07-13 21:29:25 -04007391wait_block_group_cache_progress(struct btrfs_block_group_cache *cache,
7392 u64 num_bytes)
7393{
Yan Zheng11833d62009-09-11 16:11:19 -04007394 struct btrfs_caching_control *caching_ctl;
Josef Bacik817d52f2009-07-13 21:29:25 -04007395
Yan Zheng11833d62009-09-11 16:11:19 -04007396 caching_ctl = get_caching_control(cache);
7397 if (!caching_ctl)
Josef Bacik36cce922013-08-05 11:15:21 -04007398 return;
Josef Bacik817d52f2009-07-13 21:29:25 -04007399
Yan Zheng11833d62009-09-11 16:11:19 -04007400 wait_event(caching_ctl->wait, block_group_cache_done(cache) ||
Li Zefan34d52cb2011-03-29 13:46:06 +08007401 (cache->free_space_ctl->free_space >= num_bytes));
Yan Zheng11833d62009-09-11 16:11:19 -04007402
7403 put_caching_control(caching_ctl);
Yan Zheng11833d62009-09-11 16:11:19 -04007404}
7405
7406static noinline int
7407wait_block_group_cache_done(struct btrfs_block_group_cache *cache)
7408{
7409 struct btrfs_caching_control *caching_ctl;
Josef Bacik36cce922013-08-05 11:15:21 -04007410 int ret = 0;
Yan Zheng11833d62009-09-11 16:11:19 -04007411
7412 caching_ctl = get_caching_control(cache);
7413 if (!caching_ctl)
Josef Bacik36cce922013-08-05 11:15:21 -04007414 return (cache->cached == BTRFS_CACHE_ERROR) ? -EIO : 0;
Yan Zheng11833d62009-09-11 16:11:19 -04007415
7416 wait_event(caching_ctl->wait, block_group_cache_done(cache));
Josef Bacik36cce922013-08-05 11:15:21 -04007417 if (cache->cached == BTRFS_CACHE_ERROR)
7418 ret = -EIO;
Yan Zheng11833d62009-09-11 16:11:19 -04007419 put_caching_control(caching_ctl);
Josef Bacik36cce922013-08-05 11:15:21 -04007420 return ret;
Josef Bacik817d52f2009-07-13 21:29:25 -04007421}
7422
7423enum btrfs_loop_type {
Josef Bacik285ff5a2012-01-13 15:27:45 -05007424 LOOP_CACHING_NOWAIT = 0,
7425 LOOP_CACHING_WAIT = 1,
7426 LOOP_ALLOC_CHUNK = 2,
7427 LOOP_NO_EMPTY_SIZE = 3,
Josef Bacik817d52f2009-07-13 21:29:25 -04007428};
7429
Miao Xiee570fd22014-06-19 10:42:50 +08007430static inline void
7431btrfs_lock_block_group(struct btrfs_block_group_cache *cache,
7432 int delalloc)
7433{
7434 if (delalloc)
7435 down_read(&cache->data_rwsem);
7436}
7437
7438static inline void
7439btrfs_grab_block_group(struct btrfs_block_group_cache *cache,
7440 int delalloc)
7441{
7442 btrfs_get_block_group(cache);
7443 if (delalloc)
7444 down_read(&cache->data_rwsem);
7445}
7446
7447static struct btrfs_block_group_cache *
7448btrfs_lock_cluster(struct btrfs_block_group_cache *block_group,
7449 struct btrfs_free_cluster *cluster,
7450 int delalloc)
7451{
Sudip Mukherjee89771cc2016-02-16 13:32:47 +05307452 struct btrfs_block_group_cache *used_bg = NULL;
Geert Uytterhoeven6719afd2014-06-22 14:30:09 +02007453
Miao Xiee570fd22014-06-19 10:42:50 +08007454 spin_lock(&cluster->refill_lock);
Geert Uytterhoeven6719afd2014-06-22 14:30:09 +02007455 while (1) {
7456 used_bg = cluster->block_group;
7457 if (!used_bg)
7458 return NULL;
7459
7460 if (used_bg == block_group)
7461 return used_bg;
7462
7463 btrfs_get_block_group(used_bg);
7464
7465 if (!delalloc)
7466 return used_bg;
7467
7468 if (down_read_trylock(&used_bg->data_rwsem))
7469 return used_bg;
7470
7471 spin_unlock(&cluster->refill_lock);
7472
Liu Boe321f8a2016-11-30 16:11:04 -08007473 /* We should only have one-level nested. */
7474 down_read_nested(&used_bg->data_rwsem, SINGLE_DEPTH_NESTING);
Geert Uytterhoeven6719afd2014-06-22 14:30:09 +02007475
7476 spin_lock(&cluster->refill_lock);
Miao Xiee570fd22014-06-19 10:42:50 +08007477 if (used_bg == cluster->block_group)
7478 return used_bg;
7479
7480 up_read(&used_bg->data_rwsem);
7481 btrfs_put_block_group(used_bg);
7482 }
Miao Xiee570fd22014-06-19 10:42:50 +08007483}
7484
7485static inline void
7486btrfs_release_block_group(struct btrfs_block_group_cache *cache,
7487 int delalloc)
7488{
7489 if (delalloc)
7490 up_read(&cache->data_rwsem);
7491 btrfs_put_block_group(cache);
7492}
7493
Josef Bacik817d52f2009-07-13 21:29:25 -04007494/*
Qu Wenruob4bd7452018-11-02 09:39:47 +08007495 * Structure used internally for find_free_extent() function. Wraps needed
7496 * parameters.
7497 */
7498struct find_free_extent_ctl {
7499 /* Basic allocation info */
7500 u64 ram_bytes;
7501 u64 num_bytes;
7502 u64 empty_size;
7503 u64 flags;
7504 int delalloc;
7505
7506 /* Where to start the search inside the bg */
7507 u64 search_start;
7508
7509 /* For clustered allocation */
7510 u64 empty_cluster;
7511
7512 bool have_caching_bg;
7513 bool orig_have_caching_bg;
7514
7515 /* RAID index, converted from flags */
7516 int index;
7517
Qu Wenruoe72d79d2018-11-02 09:39:50 +08007518 /*
7519 * Current loop number, check find_free_extent_update_loop() for details
7520 */
Qu Wenruob4bd7452018-11-02 09:39:47 +08007521 int loop;
7522
7523 /*
7524 * Whether we're refilling a cluster, if true we need to re-search
7525 * current block group but don't try to refill the cluster again.
7526 */
7527 bool retry_clustered;
7528
7529 /*
7530 * Whether we're updating free space cache, if true we need to re-search
7531 * current block group but don't try updating free space cache again.
7532 */
7533 bool retry_unclustered;
7534
7535 /* If current block group is cached */
7536 int cached;
7537
7538 /* Max contiguous hole found */
7539 u64 max_extent_size;
7540
7541 /* Total free space from free space cache, not always contiguous */
7542 u64 total_free_space;
7543
7544 /* Found result */
7545 u64 found_offset;
7546};
7547
Qu Wenruod06e3bb2018-11-02 09:39:48 +08007548
7549/*
7550 * Helper function for find_free_extent().
7551 *
7552 * Return -ENOENT to inform caller that we need fallback to unclustered mode.
7553 * Return -EAGAIN to inform caller that we need to re-search this block group
7554 * Return >0 to inform caller that we find nothing
7555 * Return 0 means we have found a location and set ffe_ctl->found_offset.
7556 */
7557static int find_free_extent_clustered(struct btrfs_block_group_cache *bg,
7558 struct btrfs_free_cluster *last_ptr,
7559 struct find_free_extent_ctl *ffe_ctl,
7560 struct btrfs_block_group_cache **cluster_bg_ret)
7561{
7562 struct btrfs_fs_info *fs_info = bg->fs_info;
7563 struct btrfs_block_group_cache *cluster_bg;
7564 u64 aligned_cluster;
7565 u64 offset;
7566 int ret;
7567
7568 cluster_bg = btrfs_lock_cluster(bg, last_ptr, ffe_ctl->delalloc);
7569 if (!cluster_bg)
7570 goto refill_cluster;
7571 if (cluster_bg != bg && (cluster_bg->ro ||
7572 !block_group_bits(cluster_bg, ffe_ctl->flags)))
7573 goto release_cluster;
7574
7575 offset = btrfs_alloc_from_cluster(cluster_bg, last_ptr,
7576 ffe_ctl->num_bytes, cluster_bg->key.objectid,
7577 &ffe_ctl->max_extent_size);
7578 if (offset) {
7579 /* We have a block, we're done */
7580 spin_unlock(&last_ptr->refill_lock);
7581 trace_btrfs_reserve_extent_cluster(cluster_bg,
7582 ffe_ctl->search_start, ffe_ctl->num_bytes);
7583 *cluster_bg_ret = cluster_bg;
7584 ffe_ctl->found_offset = offset;
7585 return 0;
7586 }
7587 WARN_ON(last_ptr->block_group != cluster_bg);
7588
7589release_cluster:
7590 /*
7591 * If we are on LOOP_NO_EMPTY_SIZE, we can't set up a new clusters, so
7592 * lets just skip it and let the allocator find whatever block it can
7593 * find. If we reach this point, we will have tried the cluster
7594 * allocator plenty of times and not have found anything, so we are
7595 * likely way too fragmented for the clustering stuff to find anything.
7596 *
7597 * However, if the cluster is taken from the current block group,
7598 * release the cluster first, so that we stand a better chance of
7599 * succeeding in the unclustered allocation.
7600 */
7601 if (ffe_ctl->loop >= LOOP_NO_EMPTY_SIZE && cluster_bg != bg) {
7602 spin_unlock(&last_ptr->refill_lock);
7603 btrfs_release_block_group(cluster_bg, ffe_ctl->delalloc);
7604 return -ENOENT;
7605 }
7606
7607 /* This cluster didn't work out, free it and start over */
7608 btrfs_return_cluster_to_free_space(NULL, last_ptr);
7609
7610 if (cluster_bg != bg)
7611 btrfs_release_block_group(cluster_bg, ffe_ctl->delalloc);
7612
7613refill_cluster:
7614 if (ffe_ctl->loop >= LOOP_NO_EMPTY_SIZE) {
7615 spin_unlock(&last_ptr->refill_lock);
7616 return -ENOENT;
7617 }
7618
7619 aligned_cluster = max_t(u64,
7620 ffe_ctl->empty_cluster + ffe_ctl->empty_size,
7621 bg->full_stripe_len);
7622 ret = btrfs_find_space_cluster(fs_info, bg, last_ptr,
7623 ffe_ctl->search_start, ffe_ctl->num_bytes,
7624 aligned_cluster);
7625 if (ret == 0) {
7626 /* Now pull our allocation out of this cluster */
7627 offset = btrfs_alloc_from_cluster(bg, last_ptr,
7628 ffe_ctl->num_bytes, ffe_ctl->search_start,
7629 &ffe_ctl->max_extent_size);
7630 if (offset) {
7631 /* We found one, proceed */
7632 spin_unlock(&last_ptr->refill_lock);
7633 trace_btrfs_reserve_extent_cluster(bg,
7634 ffe_ctl->search_start,
7635 ffe_ctl->num_bytes);
7636 ffe_ctl->found_offset = offset;
7637 return 0;
7638 }
7639 } else if (!ffe_ctl->cached && ffe_ctl->loop > LOOP_CACHING_NOWAIT &&
7640 !ffe_ctl->retry_clustered) {
7641 spin_unlock(&last_ptr->refill_lock);
7642
7643 ffe_ctl->retry_clustered = true;
7644 wait_block_group_cache_progress(bg, ffe_ctl->num_bytes +
7645 ffe_ctl->empty_cluster + ffe_ctl->empty_size);
7646 return -EAGAIN;
7647 }
7648 /*
7649 * At this point we either didn't find a cluster or we weren't able to
7650 * allocate a block from our cluster. Free the cluster we've been
7651 * trying to use, and go to the next block group.
7652 */
7653 btrfs_return_cluster_to_free_space(NULL, last_ptr);
7654 spin_unlock(&last_ptr->refill_lock);
7655 return 1;
7656}
7657
Qu Wenruob4bd7452018-11-02 09:39:47 +08007658/*
Qu Wenruoe1a41842018-11-02 09:39:49 +08007659 * Return >0 to inform caller that we find nothing
7660 * Return 0 when we found an free extent and set ffe_ctrl->found_offset
7661 * Return -EAGAIN to inform caller that we need to re-search this block group
7662 */
7663static int find_free_extent_unclustered(struct btrfs_block_group_cache *bg,
7664 struct btrfs_free_cluster *last_ptr,
7665 struct find_free_extent_ctl *ffe_ctl)
7666{
7667 u64 offset;
7668
7669 /*
7670 * We are doing an unclustered allocation, set the fragmented flag so
7671 * we don't bother trying to setup a cluster again until we get more
7672 * space.
7673 */
7674 if (unlikely(last_ptr)) {
7675 spin_lock(&last_ptr->lock);
7676 last_ptr->fragmented = 1;
7677 spin_unlock(&last_ptr->lock);
7678 }
7679 if (ffe_ctl->cached) {
7680 struct btrfs_free_space_ctl *free_space_ctl;
7681
7682 free_space_ctl = bg->free_space_ctl;
7683 spin_lock(&free_space_ctl->tree_lock);
7684 if (free_space_ctl->free_space <
7685 ffe_ctl->num_bytes + ffe_ctl->empty_cluster +
7686 ffe_ctl->empty_size) {
7687 ffe_ctl->total_free_space = max_t(u64,
7688 ffe_ctl->total_free_space,
7689 free_space_ctl->free_space);
7690 spin_unlock(&free_space_ctl->tree_lock);
7691 return 1;
7692 }
7693 spin_unlock(&free_space_ctl->tree_lock);
7694 }
7695
7696 offset = btrfs_find_space_for_alloc(bg, ffe_ctl->search_start,
7697 ffe_ctl->num_bytes, ffe_ctl->empty_size,
7698 &ffe_ctl->max_extent_size);
7699
7700 /*
7701 * If we didn't find a chunk, and we haven't failed on this block group
7702 * before, and this block group is in the middle of caching and we are
7703 * ok with waiting, then go ahead and wait for progress to be made, and
7704 * set @retry_unclustered to true.
7705 *
7706 * If @retry_unclustered is true then we've already waited on this
7707 * block group once and should move on to the next block group.
7708 */
7709 if (!offset && !ffe_ctl->retry_unclustered && !ffe_ctl->cached &&
7710 ffe_ctl->loop > LOOP_CACHING_NOWAIT) {
7711 wait_block_group_cache_progress(bg, ffe_ctl->num_bytes +
7712 ffe_ctl->empty_size);
7713 ffe_ctl->retry_unclustered = true;
7714 return -EAGAIN;
7715 } else if (!offset) {
7716 return 1;
7717 }
7718 ffe_ctl->found_offset = offset;
7719 return 0;
7720}
7721
7722/*
Qu Wenruoe72d79d2018-11-02 09:39:50 +08007723 * Return >0 means caller needs to re-search for free extent
7724 * Return 0 means we have the needed free extent.
7725 * Return <0 means we failed to locate any free extent.
7726 */
7727static int find_free_extent_update_loop(struct btrfs_fs_info *fs_info,
7728 struct btrfs_free_cluster *last_ptr,
7729 struct btrfs_key *ins,
7730 struct find_free_extent_ctl *ffe_ctl,
7731 int full_search, bool use_cluster)
7732{
7733 struct btrfs_root *root = fs_info->extent_root;
7734 int ret;
7735
7736 if ((ffe_ctl->loop == LOOP_CACHING_NOWAIT) &&
7737 ffe_ctl->have_caching_bg && !ffe_ctl->orig_have_caching_bg)
7738 ffe_ctl->orig_have_caching_bg = true;
7739
7740 if (!ins->objectid && ffe_ctl->loop >= LOOP_CACHING_WAIT &&
7741 ffe_ctl->have_caching_bg)
7742 return 1;
7743
7744 if (!ins->objectid && ++(ffe_ctl->index) < BTRFS_NR_RAID_TYPES)
7745 return 1;
7746
7747 if (ins->objectid) {
7748 if (!use_cluster && last_ptr) {
7749 spin_lock(&last_ptr->lock);
7750 last_ptr->window_start = ins->objectid;
7751 spin_unlock(&last_ptr->lock);
7752 }
7753 return 0;
7754 }
7755
7756 /*
7757 * LOOP_CACHING_NOWAIT, search partially cached block groups, kicking
7758 * caching kthreads as we move along
7759 * LOOP_CACHING_WAIT, search everything, and wait if our bg is caching
7760 * LOOP_ALLOC_CHUNK, force a chunk allocation and try again
7761 * LOOP_NO_EMPTY_SIZE, set empty_size and empty_cluster to 0 and try
7762 * again
7763 */
7764 if (ffe_ctl->loop < LOOP_NO_EMPTY_SIZE) {
7765 ffe_ctl->index = 0;
7766 if (ffe_ctl->loop == LOOP_CACHING_NOWAIT) {
7767 /*
7768 * We want to skip the LOOP_CACHING_WAIT step if we
7769 * don't have any uncached bgs and we've already done a
7770 * full search through.
7771 */
7772 if (ffe_ctl->orig_have_caching_bg || !full_search)
7773 ffe_ctl->loop = LOOP_CACHING_WAIT;
7774 else
7775 ffe_ctl->loop = LOOP_ALLOC_CHUNK;
7776 } else {
7777 ffe_ctl->loop++;
7778 }
7779
7780 if (ffe_ctl->loop == LOOP_ALLOC_CHUNK) {
7781 struct btrfs_trans_handle *trans;
7782 int exist = 0;
7783
7784 trans = current->journal_info;
7785 if (trans)
7786 exist = 1;
7787 else
7788 trans = btrfs_join_transaction(root);
7789
7790 if (IS_ERR(trans)) {
7791 ret = PTR_ERR(trans);
7792 return ret;
7793 }
7794
7795 ret = do_chunk_alloc(trans, ffe_ctl->flags,
7796 CHUNK_ALLOC_FORCE);
7797
7798 /*
7799 * If we can't allocate a new chunk we've already looped
7800 * through at least once, move on to the NO_EMPTY_SIZE
7801 * case.
7802 */
7803 if (ret == -ENOSPC)
7804 ffe_ctl->loop = LOOP_NO_EMPTY_SIZE;
7805
7806 /* Do not bail out on ENOSPC since we can do more. */
7807 if (ret < 0 && ret != -ENOSPC)
7808 btrfs_abort_transaction(trans, ret);
7809 else
7810 ret = 0;
7811 if (!exist)
7812 btrfs_end_transaction(trans);
7813 if (ret)
7814 return ret;
7815 }
7816
7817 if (ffe_ctl->loop == LOOP_NO_EMPTY_SIZE) {
7818 /*
7819 * Don't loop again if we already have no empty_size and
7820 * no empty_cluster.
7821 */
7822 if (ffe_ctl->empty_size == 0 &&
7823 ffe_ctl->empty_cluster == 0)
7824 return -ENOSPC;
7825 ffe_ctl->empty_size = 0;
7826 ffe_ctl->empty_cluster = 0;
7827 }
7828 return 1;
7829 }
7830 return -ENOSPC;
7831}
7832
7833/*
Chris Masonfec577f2007-02-26 10:40:21 -05007834 * walks the btree of allocated extents and find a hole of a given size.
7835 * The key ins is changed to record the hole:
Miao Xiea4820392013-09-09 13:19:42 +08007836 * ins->objectid == start position
Chris Mason62e27492007-03-15 12:56:47 -04007837 * ins->flags = BTRFS_EXTENT_ITEM_KEY
Miao Xiea4820392013-09-09 13:19:42 +08007838 * ins->offset == the size of the hole.
Chris Masonfec577f2007-02-26 10:40:21 -05007839 * Any available blocks before search_start are skipped.
Miao Xiea4820392013-09-09 13:19:42 +08007840 *
7841 * If there is no suitable free space, we will record the max size of
7842 * the free space extent currently.
Qu Wenruoe72d79d2018-11-02 09:39:50 +08007843 *
7844 * The overall logic and call chain:
7845 *
7846 * find_free_extent()
7847 * |- Iterate through all block groups
7848 * | |- Get a valid block group
7849 * | |- Try to do clustered allocation in that block group
7850 * | |- Try to do unclustered allocation in that block group
7851 * | |- Check if the result is valid
7852 * | | |- If valid, then exit
7853 * | |- Jump to next block group
7854 * |
7855 * |- Push harder to find free extents
7856 * |- If not found, re-iterate all block groups
Chris Masonfec577f2007-02-26 10:40:21 -05007857 */
Jeff Mahoney87bde3c2017-02-15 16:28:27 -05007858static noinline int find_free_extent(struct btrfs_fs_info *fs_info,
Wang Xiaoguang18513092016-07-25 15:51:40 +08007859 u64 ram_bytes, u64 num_bytes, u64 empty_size,
7860 u64 hint_byte, struct btrfs_key *ins,
7861 u64 flags, int delalloc)
Chris Masonfec577f2007-02-26 10:40:21 -05007862{
Josef Bacik80eb2342008-10-29 14:49:05 -04007863 int ret = 0;
Chris Masonfa9c0d792009-04-03 09:47:43 -04007864 struct btrfs_free_cluster *last_ptr = NULL;
Josef Bacik80eb2342008-10-29 14:49:05 -04007865 struct btrfs_block_group_cache *block_group = NULL;
Qu Wenruob4bd7452018-11-02 09:39:47 +08007866 struct find_free_extent_ctl ffe_ctl = {0};
Josef Bacik80eb2342008-10-29 14:49:05 -04007867 struct btrfs_space_info *space_info;
Josef Bacik67377732010-09-16 16:19:09 -04007868 bool use_cluster = true;
Josef Bacika5e681d2015-10-01 14:54:10 -04007869 bool full_search = false;
Chris Masonfec577f2007-02-26 10:40:21 -05007870
Jeff Mahoney0b246af2016-06-22 18:54:23 -04007871 WARN_ON(num_bytes < fs_info->sectorsize);
Qu Wenruob4bd7452018-11-02 09:39:47 +08007872
7873 ffe_ctl.ram_bytes = ram_bytes;
7874 ffe_ctl.num_bytes = num_bytes;
7875 ffe_ctl.empty_size = empty_size;
7876 ffe_ctl.flags = flags;
7877 ffe_ctl.search_start = 0;
7878 ffe_ctl.retry_clustered = false;
7879 ffe_ctl.retry_unclustered = false;
7880 ffe_ctl.delalloc = delalloc;
7881 ffe_ctl.index = btrfs_bg_flags_to_raid_index(flags);
7882 ffe_ctl.have_caching_bg = false;
7883 ffe_ctl.orig_have_caching_bg = false;
7884 ffe_ctl.found_offset = 0;
7885
David Sterba962a2982014-06-04 18:41:45 +02007886 ins->type = BTRFS_EXTENT_ITEM_KEY;
Josef Bacik80eb2342008-10-29 14:49:05 -04007887 ins->objectid = 0;
7888 ins->offset = 0;
Chris Masonb1a4d962007-04-04 15:27:52 -04007889
Jeff Mahoney71ff6432016-09-06 16:00:42 -04007890 trace_find_free_extent(fs_info, num_bytes, empty_size, flags);
Josef Bacik3f7de032011-11-10 08:29:20 -05007891
Jeff Mahoney0b246af2016-06-22 18:54:23 -04007892 space_info = __find_space_info(fs_info, flags);
Josef Bacik1b1d1f62010-03-19 20:49:55 +00007893 if (!space_info) {
Jeff Mahoney0b246af2016-06-22 18:54:23 -04007894 btrfs_err(fs_info, "No space info for %llu", flags);
Josef Bacik1b1d1f62010-03-19 20:49:55 +00007895 return -ENOSPC;
7896 }
Josef Bacik2552d172009-04-03 10:14:19 -04007897
Josef Bacik67377732010-09-16 16:19:09 -04007898 /*
Josef Bacik4f4db212015-09-29 11:40:47 -04007899 * If our free space is heavily fragmented we may not be able to make
7900 * big contiguous allocations, so instead of doing the expensive search
7901 * for free space, simply return ENOSPC with our max_extent_size so we
7902 * can go ahead and search for a more manageable chunk.
7903 *
7904 * If our max_extent_size is large enough for our allocation simply
7905 * disable clustering since we will likely not be able to find enough
7906 * space to create a cluster and induce latency trying.
Josef Bacik67377732010-09-16 16:19:09 -04007907 */
Josef Bacik4f4db212015-09-29 11:40:47 -04007908 if (unlikely(space_info->max_extent_size)) {
7909 spin_lock(&space_info->lock);
7910 if (space_info->max_extent_size &&
7911 num_bytes > space_info->max_extent_size) {
7912 ins->offset = space_info->max_extent_size;
7913 spin_unlock(&space_info->lock);
7914 return -ENOSPC;
7915 } else if (space_info->max_extent_size) {
7916 use_cluster = false;
7917 }
7918 spin_unlock(&space_info->lock);
Chris Mason239b14b2008-03-24 15:02:07 -04007919 }
7920
Qu Wenruob4bd7452018-11-02 09:39:47 +08007921 last_ptr = fetch_cluster_info(fs_info, space_info,
7922 &ffe_ctl.empty_cluster);
Chris Mason239b14b2008-03-24 15:02:07 -04007923 if (last_ptr) {
Chris Masonfa9c0d792009-04-03 09:47:43 -04007924 spin_lock(&last_ptr->lock);
7925 if (last_ptr->block_group)
7926 hint_byte = last_ptr->window_start;
Josef Bacikc759c4e2015-10-02 15:25:10 -04007927 if (last_ptr->fragmented) {
7928 /*
7929 * We still set window_start so we can keep track of the
7930 * last place we found an allocation to try and save
7931 * some time.
7932 */
7933 hint_byte = last_ptr->window_start;
7934 use_cluster = false;
7935 }
Chris Masonfa9c0d792009-04-03 09:47:43 -04007936 spin_unlock(&last_ptr->lock);
Chris Mason239b14b2008-03-24 15:02:07 -04007937 }
Chris Masonfa9c0d792009-04-03 09:47:43 -04007938
Qu Wenruob4bd7452018-11-02 09:39:47 +08007939 ffe_ctl.search_start = max(ffe_ctl.search_start,
7940 first_logical_byte(fs_info, 0));
7941 ffe_ctl.search_start = max(ffe_ctl.search_start, hint_byte);
7942 if (ffe_ctl.search_start == hint_byte) {
7943 block_group = btrfs_lookup_block_group(fs_info,
7944 ffe_ctl.search_start);
Josef Bacik817d52f2009-07-13 21:29:25 -04007945 /*
7946 * we don't want to use the block group if it doesn't match our
7947 * allocation bits, or if its not cached.
Josef Bacikccf0e722009-11-10 21:23:48 -05007948 *
7949 * However if we are re-searching with an ideal block group
7950 * picked out then we don't care that the block group is cached.
Josef Bacik817d52f2009-07-13 21:29:25 -04007951 */
David Sterbab6919a52013-04-29 13:39:40 +00007952 if (block_group && block_group_bits(block_group, flags) &&
Josef Bacik285ff5a2012-01-13 15:27:45 -05007953 block_group->cached != BTRFS_CACHE_NO) {
Josef Bacik2552d172009-04-03 10:14:19 -04007954 down_read(&space_info->groups_sem);
Chris Mason44fb5512009-06-04 15:34:51 -04007955 if (list_empty(&block_group->list) ||
7956 block_group->ro) {
7957 /*
7958 * someone is removing this block group,
7959 * we can't jump into the have_block_group
7960 * target because our list pointers are not
7961 * valid
7962 */
7963 btrfs_put_block_group(block_group);
7964 up_read(&space_info->groups_sem);
Josef Bacikccf0e722009-11-10 21:23:48 -05007965 } else {
Qu Wenruob4bd7452018-11-02 09:39:47 +08007966 ffe_ctl.index = btrfs_bg_flags_to_raid_index(
Qu Wenruo3e72ee82018-01-30 18:20:45 +08007967 block_group->flags);
Miao Xiee570fd22014-06-19 10:42:50 +08007968 btrfs_lock_block_group(block_group, delalloc);
Chris Mason44fb5512009-06-04 15:34:51 -04007969 goto have_block_group;
Josef Bacikccf0e722009-11-10 21:23:48 -05007970 }
Josef Bacik2552d172009-04-03 10:14:19 -04007971 } else if (block_group) {
Chris Masonfa9c0d792009-04-03 09:47:43 -04007972 btrfs_put_block_group(block_group);
Josef Bacik2552d172009-04-03 10:14:19 -04007973 }
Chris Mason42e70e72008-11-07 18:17:11 -05007974 }
Josef Bacik2552d172009-04-03 10:14:19 -04007975search:
Qu Wenruob4bd7452018-11-02 09:39:47 +08007976 ffe_ctl.have_caching_bg = false;
7977 if (ffe_ctl.index == btrfs_bg_flags_to_raid_index(flags) ||
7978 ffe_ctl.index == 0)
Josef Bacika5e681d2015-10-01 14:54:10 -04007979 full_search = true;
Josef Bacik80eb2342008-10-29 14:49:05 -04007980 down_read(&space_info->groups_sem);
Qu Wenruob4bd7452018-11-02 09:39:47 +08007981 list_for_each_entry(block_group,
7982 &space_info->block_groups[ffe_ctl.index], list) {
Jeff Mahoney14443932017-07-19 23:25:51 -04007983 /* If the block group is read-only, we can skip it entirely. */
7984 if (unlikely(block_group->ro))
7985 continue;
7986
Miao Xiee570fd22014-06-19 10:42:50 +08007987 btrfs_grab_block_group(block_group, delalloc);
Qu Wenruob4bd7452018-11-02 09:39:47 +08007988 ffe_ctl.search_start = block_group->key.objectid;
Chris Mason42e70e72008-11-07 18:17:11 -05007989
Chris Mason83a50de2010-12-13 15:06:46 -05007990 /*
7991 * this can happen if we end up cycling through all the
7992 * raid types, but we want to make sure we only allocate
7993 * for the proper type.
7994 */
David Sterbab6919a52013-04-29 13:39:40 +00007995 if (!block_group_bits(block_group, flags)) {
Bart Van Asschebece2e82018-06-20 10:03:31 -07007996 u64 extra = BTRFS_BLOCK_GROUP_DUP |
Chris Mason83a50de2010-12-13 15:06:46 -05007997 BTRFS_BLOCK_GROUP_RAID1 |
David Woodhouse53b381b2013-01-29 18:40:14 -05007998 BTRFS_BLOCK_GROUP_RAID5 |
7999 BTRFS_BLOCK_GROUP_RAID6 |
Chris Mason83a50de2010-12-13 15:06:46 -05008000 BTRFS_BLOCK_GROUP_RAID10;
8001
8002 /*
8003 * if they asked for extra copies and this block group
8004 * doesn't provide them, bail. This does allow us to
8005 * fill raid0 from raid1.
8006 */
David Sterbab6919a52013-04-29 13:39:40 +00008007 if ((flags & extra) && !(block_group->flags & extra))
Chris Mason83a50de2010-12-13 15:06:46 -05008008 goto loop;
8009 }
8010
Josef Bacik2552d172009-04-03 10:14:19 -04008011have_block_group:
Qu Wenruob4bd7452018-11-02 09:39:47 +08008012 ffe_ctl.cached = block_group_cache_done(block_group);
8013 if (unlikely(!ffe_ctl.cached)) {
8014 ffe_ctl.have_caching_bg = true;
Liu Bof6373bf2012-12-27 09:01:18 +00008015 ret = cache_block_group(block_group, 0);
Chris Mason1d4284b2012-03-28 20:31:37 -04008016 BUG_ON(ret < 0);
8017 ret = 0;
Josef Bacikea6a4782008-11-20 12:16:16 -05008018 }
8019
Josef Bacik36cce922013-08-05 11:15:21 -04008020 if (unlikely(block_group->cached == BTRFS_CACHE_ERROR))
8021 goto loop;
Josef Bacik0f9dd462008-09-23 13:14:11 -04008022
Josef Bacik0a243252009-09-11 16:11:20 -04008023 /*
Alexandre Oliva062c05c2011-12-07 19:50:42 -05008024 * Ok we want to try and use the cluster allocator, so
8025 * lets look there
Josef Bacik0a243252009-09-11 16:11:20 -04008026 */
Josef Bacikc759c4e2015-10-02 15:25:10 -04008027 if (last_ptr && use_cluster) {
Qu Wenruod06e3bb2018-11-02 09:39:48 +08008028 struct btrfs_block_group_cache *cluster_bg = NULL;
Chris Mason44fb5512009-06-04 15:34:51 -04008029
Qu Wenruod06e3bb2018-11-02 09:39:48 +08008030 ret = find_free_extent_clustered(block_group, last_ptr,
8031 &ffe_ctl, &cluster_bg);
Alexandre Oliva274bd4f2011-12-07 20:08:40 -05008032
Qu Wenruod06e3bb2018-11-02 09:39:48 +08008033 if (ret == 0) {
8034 if (cluster_bg && cluster_bg != block_group) {
Miao Xiee570fd22014-06-19 10:42:50 +08008035 btrfs_release_block_group(block_group,
8036 delalloc);
Qu Wenruod06e3bb2018-11-02 09:39:48 +08008037 block_group = cluster_bg;
Miao Xie215a63d2014-01-15 20:00:56 +08008038 }
Chris Masonfa9c0d792009-04-03 09:47:43 -04008039 goto checks;
Qu Wenruod06e3bb2018-11-02 09:39:48 +08008040 } else if (ret == -EAGAIN) {
Josef Bacik817d52f2009-07-13 21:29:25 -04008041 goto have_block_group;
Qu Wenruod06e3bb2018-11-02 09:39:48 +08008042 } else if (ret > 0) {
8043 goto loop;
Chris Masonfa9c0d792009-04-03 09:47:43 -04008044 }
Qu Wenruod06e3bb2018-11-02 09:39:48 +08008045 /* ret == -ENOENT case falls through */
Chris Masonfa9c0d792009-04-03 09:47:43 -04008046 }
8047
Qu Wenruoe1a41842018-11-02 09:39:49 +08008048 ret = find_free_extent_unclustered(block_group, last_ptr,
8049 &ffe_ctl);
8050 if (ret == -EAGAIN)
Josef Bacik817d52f2009-07-13 21:29:25 -04008051 goto have_block_group;
Qu Wenruoe1a41842018-11-02 09:39:49 +08008052 else if (ret > 0)
Josef Bacik1cdda9b2009-10-06 10:04:28 -04008053 goto loop;
Qu Wenruoe1a41842018-11-02 09:39:49 +08008054 /* ret == 0 case falls through */
Chris Masonfa9c0d792009-04-03 09:47:43 -04008055checks:
Qu Wenruob4bd7452018-11-02 09:39:47 +08008056 ffe_ctl.search_start = round_up(ffe_ctl.found_offset,
8057 fs_info->stripesize);
Chris Masone37c9e62007-05-09 20:13:14 -04008058
Josef Bacik2552d172009-04-03 10:14:19 -04008059 /* move on to the next group */
Qu Wenruob4bd7452018-11-02 09:39:47 +08008060 if (ffe_ctl.search_start + num_bytes >
Miao Xie215a63d2014-01-15 20:00:56 +08008061 block_group->key.objectid + block_group->key.offset) {
Qu Wenruob4bd7452018-11-02 09:39:47 +08008062 btrfs_add_free_space(block_group, ffe_ctl.found_offset,
8063 num_bytes);
Josef Bacik2552d172009-04-03 10:14:19 -04008064 goto loop;
Josef Bacik6226cb02009-04-03 10:14:18 -04008065 }
Josef Bacik80eb2342008-10-29 14:49:05 -04008066
Qu Wenruob4bd7452018-11-02 09:39:47 +08008067 if (ffe_ctl.found_offset < ffe_ctl.search_start)
8068 btrfs_add_free_space(block_group, ffe_ctl.found_offset,
8069 ffe_ctl.search_start - ffe_ctl.found_offset);
Josef Bacik6226cb02009-04-03 10:14:18 -04008070
Wang Xiaoguang18513092016-07-25 15:51:40 +08008071 ret = btrfs_add_reserved_bytes(block_group, ram_bytes,
8072 num_bytes, delalloc);
Yan, Zhengf0486c62010-05-16 10:46:25 -04008073 if (ret == -EAGAIN) {
Qu Wenruob4bd7452018-11-02 09:39:47 +08008074 btrfs_add_free_space(block_group, ffe_ctl.found_offset,
8075 num_bytes);
Yan, Zhengf0486c62010-05-16 10:46:25 -04008076 goto loop;
8077 }
Filipe Manana9cfa3e32016-04-26 15:39:32 +01008078 btrfs_inc_block_group_reservations(block_group);
Yan Zheng11833d62009-09-11 16:11:19 -04008079
Josef Bacik2552d172009-04-03 10:14:19 -04008080 /* we are all good, lets return */
Qu Wenruob4bd7452018-11-02 09:39:47 +08008081 ins->objectid = ffe_ctl.search_start;
Yan, Zhengf0486c62010-05-16 10:46:25 -04008082 ins->offset = num_bytes;
8083
Qu Wenruob4bd7452018-11-02 09:39:47 +08008084 trace_btrfs_reserve_extent(block_group, ffe_ctl.search_start,
8085 num_bytes);
Miao Xiee570fd22014-06-19 10:42:50 +08008086 btrfs_release_block_group(block_group, delalloc);
Josef Bacik2552d172009-04-03 10:14:19 -04008087 break;
8088loop:
Qu Wenruob4bd7452018-11-02 09:39:47 +08008089 ffe_ctl.retry_clustered = false;
8090 ffe_ctl.retry_unclustered = false;
Qu Wenruo3e72ee82018-01-30 18:20:45 +08008091 BUG_ON(btrfs_bg_flags_to_raid_index(block_group->flags) !=
Qu Wenruob4bd7452018-11-02 09:39:47 +08008092 ffe_ctl.index);
Miao Xiee570fd22014-06-19 10:42:50 +08008093 btrfs_release_block_group(block_group, delalloc);
Jeff Mahoney14443932017-07-19 23:25:51 -04008094 cond_resched();
Josef Bacik2552d172009-04-03 10:14:19 -04008095 }
8096 up_read(&space_info->groups_sem);
Chris Masonf5a31e12008-11-10 11:47:09 -05008097
Qu Wenruoe72d79d2018-11-02 09:39:50 +08008098 ret = find_free_extent_update_loop(fs_info, last_ptr, ins, &ffe_ctl,
8099 full_search, use_cluster);
8100 if (ret > 0)
Miao Xie60d2adb2011-09-09 17:34:35 +08008101 goto search;
8102
Josef Bacik4f4db212015-09-29 11:40:47 -04008103 if (ret == -ENOSPC) {
Qu Wenruob4bd7452018-11-02 09:39:47 +08008104 /*
8105 * Use ffe_ctl->total_free_space as fallback if we can't find
8106 * any contiguous hole.
8107 */
8108 if (!ffe_ctl.max_extent_size)
8109 ffe_ctl.max_extent_size = ffe_ctl.total_free_space;
Josef Bacik4f4db212015-09-29 11:40:47 -04008110 spin_lock(&space_info->lock);
Qu Wenruob4bd7452018-11-02 09:39:47 +08008111 space_info->max_extent_size = ffe_ctl.max_extent_size;
Josef Bacik4f4db212015-09-29 11:40:47 -04008112 spin_unlock(&space_info->lock);
Qu Wenruob4bd7452018-11-02 09:39:47 +08008113 ins->offset = ffe_ctl.max_extent_size;
Josef Bacik4f4db212015-09-29 11:40:47 -04008114 }
Chris Mason0f70abe2007-02-28 16:46:22 -05008115 return ret;
Chris Masonfec577f2007-02-26 10:40:21 -05008116}
Chris Masonec44a352008-04-28 15:29:52 -04008117
Josef Bacikb78e5612018-11-21 14:03:07 -05008118#define DUMP_BLOCK_RSV(fs_info, rsv_name) \
8119do { \
8120 struct btrfs_block_rsv *__rsv = &(fs_info)->rsv_name; \
8121 spin_lock(&__rsv->lock); \
8122 btrfs_info(fs_info, #rsv_name ": size %llu reserved %llu", \
8123 __rsv->size, __rsv->reserved); \
8124 spin_unlock(&__rsv->lock); \
8125} while (0)
8126
Jeff Mahoneyab8d0fc2016-09-20 10:05:02 -04008127static void dump_space_info(struct btrfs_fs_info *fs_info,
8128 struct btrfs_space_info *info, u64 bytes,
Josef Bacik9ed74f22009-09-11 16:12:44 -04008129 int dump_block_groups)
Josef Bacik0f9dd462008-09-23 13:14:11 -04008130{
8131 struct btrfs_block_group_cache *cache;
Yan, Zhengb742bb822010-05-16 10:46:24 -04008132 int index = 0;
Josef Bacik0f9dd462008-09-23 13:14:11 -04008133
Josef Bacik9ed74f22009-09-11 16:12:44 -04008134 spin_lock(&info->lock);
Jeff Mahoneyab8d0fc2016-09-20 10:05:02 -04008135 btrfs_info(fs_info, "space_info %llu has %llu free, is %sfull",
8136 info->flags,
Liu Bo41361352017-02-13 15:42:21 -08008137 info->total_bytes - btrfs_space_info_used(info, true),
8138 info->full ? "" : "not ");
Jeff Mahoneyab8d0fc2016-09-20 10:05:02 -04008139 btrfs_info(fs_info,
8140 "space_info total=%llu, used=%llu, pinned=%llu, reserved=%llu, may_use=%llu, readonly=%llu",
8141 info->total_bytes, info->bytes_used, info->bytes_pinned,
8142 info->bytes_reserved, info->bytes_may_use,
8143 info->bytes_readonly);
Josef Bacik9ed74f22009-09-11 16:12:44 -04008144 spin_unlock(&info->lock);
8145
Josef Bacikb78e5612018-11-21 14:03:07 -05008146 DUMP_BLOCK_RSV(fs_info, global_block_rsv);
8147 DUMP_BLOCK_RSV(fs_info, trans_block_rsv);
8148 DUMP_BLOCK_RSV(fs_info, chunk_block_rsv);
8149 DUMP_BLOCK_RSV(fs_info, delayed_block_rsv);
8150 DUMP_BLOCK_RSV(fs_info, delayed_refs_rsv);
8151
Josef Bacik9ed74f22009-09-11 16:12:44 -04008152 if (!dump_block_groups)
8153 return;
Josef Bacik0f9dd462008-09-23 13:14:11 -04008154
Josef Bacik80eb2342008-10-29 14:49:05 -04008155 down_read(&info->groups_sem);
Yan, Zhengb742bb822010-05-16 10:46:24 -04008156again:
8157 list_for_each_entry(cache, &info->block_groups[index], list) {
Josef Bacik0f9dd462008-09-23 13:14:11 -04008158 spin_lock(&cache->lock);
Jeff Mahoneyab8d0fc2016-09-20 10:05:02 -04008159 btrfs_info(fs_info,
8160 "block group %llu has %llu bytes, %llu used %llu pinned %llu reserved %s",
8161 cache->key.objectid, cache->key.offset,
8162 btrfs_block_group_used(&cache->item), cache->pinned,
8163 cache->reserved, cache->ro ? "[readonly]" : "");
Josef Bacik0f9dd462008-09-23 13:14:11 -04008164 btrfs_dump_free_space(cache, bytes);
8165 spin_unlock(&cache->lock);
8166 }
Yan, Zhengb742bb822010-05-16 10:46:24 -04008167 if (++index < BTRFS_NR_RAID_TYPES)
8168 goto again;
Josef Bacik80eb2342008-10-29 14:49:05 -04008169 up_read(&info->groups_sem);
Josef Bacik0f9dd462008-09-23 13:14:11 -04008170}
Zheng Yane8569812008-09-26 10:05:48 -04008171
Nikolay Borisov6f47c702018-03-13 12:22:32 +02008172/*
8173 * btrfs_reserve_extent - entry point to the extent allocator. Tries to find a
8174 * hole that is at least as big as @num_bytes.
8175 *
8176 * @root - The root that will contain this extent
8177 *
8178 * @ram_bytes - The amount of space in ram that @num_bytes take. This
8179 * is used for accounting purposes. This value differs
8180 * from @num_bytes only in the case of compressed extents.
8181 *
8182 * @num_bytes - Number of bytes to allocate on-disk.
8183 *
8184 * @min_alloc_size - Indicates the minimum amount of space that the
8185 * allocator should try to satisfy. In some cases
8186 * @num_bytes may be larger than what is required and if
8187 * the filesystem is fragmented then allocation fails.
8188 * However, the presence of @min_alloc_size gives a
8189 * chance to try and satisfy the smaller allocation.
8190 *
8191 * @empty_size - A hint that you plan on doing more COW. This is the
8192 * size in bytes the allocator should try to find free
8193 * next to the block it returns. This is just a hint and
8194 * may be ignored by the allocator.
8195 *
8196 * @hint_byte - Hint to the allocator to start searching above the byte
8197 * address passed. It might be ignored.
8198 *
8199 * @ins - This key is modified to record the found hole. It will
8200 * have the following values:
8201 * ins->objectid == start position
8202 * ins->flags = BTRFS_EXTENT_ITEM_KEY
8203 * ins->offset == the size of the hole.
8204 *
8205 * @is_data - Boolean flag indicating whether an extent is
8206 * allocated for data (true) or metadata (false)
8207 *
8208 * @delalloc - Boolean flag indicating whether this allocation is for
8209 * delalloc or not. If 'true' data_rwsem of block groups
8210 * is going to be acquired.
8211 *
8212 *
8213 * Returns 0 when an allocation succeeded or < 0 when an error occurred. In
8214 * case -ENOSPC is returned then @ins->offset will contain the size of the
8215 * largest available hole the allocator managed to find.
8216 */
Wang Xiaoguang18513092016-07-25 15:51:40 +08008217int btrfs_reserve_extent(struct btrfs_root *root, u64 ram_bytes,
Yan Zheng11833d62009-09-11 16:11:19 -04008218 u64 num_bytes, u64 min_alloc_size,
8219 u64 empty_size, u64 hint_byte,
Miao Xiee570fd22014-06-19 10:42:50 +08008220 struct btrfs_key *ins, int is_data, int delalloc)
Chris Masonfec577f2007-02-26 10:40:21 -05008221{
Jeff Mahoneyab8d0fc2016-09-20 10:05:02 -04008222 struct btrfs_fs_info *fs_info = root->fs_info;
Josef Bacik36af4e02015-09-25 16:13:11 -04008223 bool final_tried = num_bytes == min_alloc_size;
David Sterbab6919a52013-04-29 13:39:40 +00008224 u64 flags;
Chris Masonfec577f2007-02-26 10:40:21 -05008225 int ret;
Chris Mason925baed2008-06-25 16:01:30 -04008226
Jeff Mahoney1b868262017-05-17 11:38:35 -04008227 flags = get_alloc_profile_by_root(root, is_data);
Chris Mason98d20f62008-04-14 09:46:10 -04008228again:
Jeff Mahoney0b246af2016-06-22 18:54:23 -04008229 WARN_ON(num_bytes < fs_info->sectorsize);
Jeff Mahoney87bde3c2017-02-15 16:28:27 -05008230 ret = find_free_extent(fs_info, ram_bytes, num_bytes, empty_size,
Wang Xiaoguang18513092016-07-25 15:51:40 +08008231 hint_byte, ins, flags, delalloc);
Filipe Manana9cfa3e32016-04-26 15:39:32 +01008232 if (!ret && !is_data) {
Jeff Mahoneyab8d0fc2016-09-20 10:05:02 -04008233 btrfs_dec_block_group_reservations(fs_info, ins->objectid);
Filipe Manana9cfa3e32016-04-26 15:39:32 +01008234 } else if (ret == -ENOSPC) {
Miao Xiea4820392013-09-09 13:19:42 +08008235 if (!final_tried && ins->offset) {
8236 num_bytes = min(num_bytes >> 1, ins->offset);
Jeff Mahoneyda170662016-06-15 09:22:56 -04008237 num_bytes = round_down(num_bytes,
Jeff Mahoney0b246af2016-06-22 18:54:23 -04008238 fs_info->sectorsize);
Miao Xie9e622d62012-01-26 15:01:12 -05008239 num_bytes = max(num_bytes, min_alloc_size);
Wang Xiaoguang18513092016-07-25 15:51:40 +08008240 ram_bytes = num_bytes;
Miao Xie9e622d62012-01-26 15:01:12 -05008241 if (num_bytes == min_alloc_size)
8242 final_tried = true;
8243 goto again;
Jeff Mahoneyab8d0fc2016-09-20 10:05:02 -04008244 } else if (btrfs_test_opt(fs_info, ENOSPC_DEBUG)) {
Miao Xie9e622d62012-01-26 15:01:12 -05008245 struct btrfs_space_info *sinfo;
Josef Bacik0f9dd462008-09-23 13:14:11 -04008246
Jeff Mahoneyab8d0fc2016-09-20 10:05:02 -04008247 sinfo = __find_space_info(fs_info, flags);
Jeff Mahoney0b246af2016-06-22 18:54:23 -04008248 btrfs_err(fs_info,
Jeff Mahoney5d163e02016-09-20 10:05:00 -04008249 "allocation failed flags %llu, wanted %llu",
8250 flags, num_bytes);
Jeff Mahoney53804282012-03-01 14:56:28 +01008251 if (sinfo)
Jeff Mahoneyab8d0fc2016-09-20 10:05:02 -04008252 dump_space_info(fs_info, sinfo, num_bytes, 1);
Miao Xie9e622d62012-01-26 15:01:12 -05008253 }
Chris Mason925baed2008-06-25 16:01:30 -04008254 }
Josef Bacik0f9dd462008-09-23 13:14:11 -04008255
8256 return ret;
Chris Masone6dcd2d2008-07-17 12:53:50 -04008257}
8258
Jeff Mahoney2ff7e612016-06-22 18:54:24 -04008259static int __btrfs_free_reserved_extent(struct btrfs_fs_info *fs_info,
Miao Xiee570fd22014-06-19 10:42:50 +08008260 u64 start, u64 len,
8261 int pin, int delalloc)
Chris Mason65b51a02008-08-01 15:11:20 -04008262{
Josef Bacik0f9dd462008-09-23 13:14:11 -04008263 struct btrfs_block_group_cache *cache;
Liu Hui1f3c79a2009-01-05 15:57:51 -05008264 int ret = 0;
Josef Bacik0f9dd462008-09-23 13:14:11 -04008265
Jeff Mahoney0b246af2016-06-22 18:54:23 -04008266 cache = btrfs_lookup_block_group(fs_info, start);
Josef Bacik0f9dd462008-09-23 13:14:11 -04008267 if (!cache) {
Jeff Mahoney0b246af2016-06-22 18:54:23 -04008268 btrfs_err(fs_info, "Unable to find block group for %llu",
8269 start);
Josef Bacik0f9dd462008-09-23 13:14:11 -04008270 return -ENOSPC;
8271 }
Liu Hui1f3c79a2009-01-05 15:57:51 -05008272
Chris Masone688b7252011-10-31 20:52:39 -04008273 if (pin)
Jeff Mahoney2ff7e612016-06-22 18:54:24 -04008274 pin_down_extent(fs_info, cache, start, len, 1);
Chris Masone688b7252011-10-31 20:52:39 -04008275 else {
Jeff Mahoney0b246af2016-06-22 18:54:23 -04008276 if (btrfs_test_opt(fs_info, DISCARD))
Jeff Mahoney2ff7e612016-06-22 18:54:24 -04008277 ret = btrfs_discard_extent(fs_info, start, len, NULL);
Chris Masone688b7252011-10-31 20:52:39 -04008278 btrfs_add_free_space(cache, start, len);
Wang Xiaoguang4824f1f2016-07-25 15:51:39 +08008279 btrfs_free_reserved_bytes(cache, len, delalloc);
Jeff Mahoney71ff6432016-09-06 16:00:42 -04008280 trace_btrfs_reserved_extent_free(fs_info, start, len);
Chris Masone688b7252011-10-31 20:52:39 -04008281 }
Dongsheng Yang31193212014-12-12 16:44:35 +08008282
Chris Masonfa9c0d792009-04-03 09:47:43 -04008283 btrfs_put_block_group(cache);
Chris Masone6dcd2d2008-07-17 12:53:50 -04008284 return ret;
8285}
8286
Jeff Mahoney2ff7e612016-06-22 18:54:24 -04008287int btrfs_free_reserved_extent(struct btrfs_fs_info *fs_info,
Miao Xiee570fd22014-06-19 10:42:50 +08008288 u64 start, u64 len, int delalloc)
Chris Masone688b7252011-10-31 20:52:39 -04008289{
Jeff Mahoney2ff7e612016-06-22 18:54:24 -04008290 return __btrfs_free_reserved_extent(fs_info, start, len, 0, delalloc);
Chris Masone688b7252011-10-31 20:52:39 -04008291}
8292
Jeff Mahoney2ff7e612016-06-22 18:54:24 -04008293int btrfs_free_and_pin_reserved_extent(struct btrfs_fs_info *fs_info,
Chris Masone688b7252011-10-31 20:52:39 -04008294 u64 start, u64 len)
8295{
Jeff Mahoney2ff7e612016-06-22 18:54:24 -04008296 return __btrfs_free_reserved_extent(fs_info, start, len, 1, 0);
Chris Masone688b7252011-10-31 20:52:39 -04008297}
8298
Yan Zheng5d4f98a2009-06-10 10:45:14 -04008299static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
Yan Zheng5d4f98a2009-06-10 10:45:14 -04008300 u64 parent, u64 root_objectid,
8301 u64 flags, u64 owner, u64 offset,
8302 struct btrfs_key *ins, int ref_mod)
Chris Masone6dcd2d2008-07-17 12:53:50 -04008303{
Nikolay Borisovef89b822018-06-20 15:48:58 +03008304 struct btrfs_fs_info *fs_info = trans->fs_info;
Chris Masone6dcd2d2008-07-17 12:53:50 -04008305 int ret;
Chris Masone6dcd2d2008-07-17 12:53:50 -04008306 struct btrfs_extent_item *extent_item;
Yan Zheng5d4f98a2009-06-10 10:45:14 -04008307 struct btrfs_extent_inline_ref *iref;
Chris Masone6dcd2d2008-07-17 12:53:50 -04008308 struct btrfs_path *path;
Yan Zheng5d4f98a2009-06-10 10:45:14 -04008309 struct extent_buffer *leaf;
8310 int type;
8311 u32 size;
Chris Masonf2654de2007-06-26 12:20:46 -04008312
Yan Zheng5d4f98a2009-06-10 10:45:14 -04008313 if (parent > 0)
8314 type = BTRFS_SHARED_DATA_REF_KEY;
8315 else
8316 type = BTRFS_EXTENT_DATA_REF_KEY;
Zheng Yan31840ae2008-09-23 13:14:14 -04008317
Yan Zheng5d4f98a2009-06-10 10:45:14 -04008318 size = sizeof(*extent_item) + btrfs_extent_inline_ref_size(type);
Chris Mason7bb86312007-12-11 09:25:06 -05008319
8320 path = btrfs_alloc_path();
Tsutomu Itohdb5b4932011-03-23 08:14:16 +00008321 if (!path)
8322 return -ENOMEM;
Chris Mason47e4bb92008-02-01 14:51:59 -05008323
Chris Masonb9473432009-03-13 11:00:37 -04008324 path->leave_spinning = 1;
Yan Zheng5d4f98a2009-06-10 10:45:14 -04008325 ret = btrfs_insert_empty_item(trans, fs_info->extent_root, path,
8326 ins, size);
Jeff Mahoney79787ea2012-03-12 16:03:00 +01008327 if (ret) {
8328 btrfs_free_path(path);
8329 return ret;
8330 }
Josef Bacik0f9dd462008-09-23 13:14:11 -04008331
Yan Zheng5d4f98a2009-06-10 10:45:14 -04008332 leaf = path->nodes[0];
8333 extent_item = btrfs_item_ptr(leaf, path->slots[0],
Chris Mason47e4bb92008-02-01 14:51:59 -05008334 struct btrfs_extent_item);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04008335 btrfs_set_extent_refs(leaf, extent_item, ref_mod);
8336 btrfs_set_extent_generation(leaf, extent_item, trans->transid);
8337 btrfs_set_extent_flags(leaf, extent_item,
8338 flags | BTRFS_EXTENT_FLAG_DATA);
Chris Mason47e4bb92008-02-01 14:51:59 -05008339
Yan Zheng5d4f98a2009-06-10 10:45:14 -04008340 iref = (struct btrfs_extent_inline_ref *)(extent_item + 1);
8341 btrfs_set_extent_inline_ref_type(leaf, iref, type);
8342 if (parent > 0) {
8343 struct btrfs_shared_data_ref *ref;
8344 ref = (struct btrfs_shared_data_ref *)(iref + 1);
8345 btrfs_set_extent_inline_ref_offset(leaf, iref, parent);
8346 btrfs_set_shared_data_ref_count(leaf, ref, ref_mod);
8347 } else {
8348 struct btrfs_extent_data_ref *ref;
8349 ref = (struct btrfs_extent_data_ref *)(&iref->offset);
8350 btrfs_set_extent_data_ref_root(leaf, ref, root_objectid);
8351 btrfs_set_extent_data_ref_objectid(leaf, ref, owner);
8352 btrfs_set_extent_data_ref_offset(leaf, ref, offset);
8353 btrfs_set_extent_data_ref_count(leaf, ref, ref_mod);
8354 }
Chris Mason47e4bb92008-02-01 14:51:59 -05008355
8356 btrfs_mark_buffer_dirty(path->nodes[0]);
Chris Mason7bb86312007-12-11 09:25:06 -05008357 btrfs_free_path(path);
Chris Masonf510cfe2007-10-15 16:14:48 -04008358
Nikolay Borisov25a356d2018-05-10 15:44:54 +03008359 ret = remove_from_free_space_tree(trans, ins->objectid, ins->offset);
Omar Sandoval1e144fb2015-09-29 20:50:37 -07008360 if (ret)
8361 return ret;
8362
David Sterba6b279402019-03-20 12:10:15 +01008363 ret = update_block_group(trans, ins->objectid, ins->offset, 1);
Jeff Mahoney79787ea2012-03-12 16:03:00 +01008364 if (ret) { /* -ENOENT, logic error */
Simon Kirbyc2cf52e2013-03-19 22:41:23 +00008365 btrfs_err(fs_info, "update block group failed for %llu %llu",
Geert Uytterhoevenc1c9ff72013-08-20 13:20:07 +02008366 ins->objectid, ins->offset);
Chris Masonf5947062008-02-04 10:10:13 -05008367 BUG();
8368 }
Jeff Mahoney71ff6432016-09-06 16:00:42 -04008369 trace_btrfs_reserved_extent_alloc(fs_info, ins->objectid, ins->offset);
Chris Masone6dcd2d2008-07-17 12:53:50 -04008370 return ret;
8371}
8372
Yan Zheng5d4f98a2009-06-10 10:45:14 -04008373static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans,
Nikolay Borisov4e6bd4e2018-05-21 12:27:21 +03008374 struct btrfs_delayed_ref_node *node,
Nikolay Borisov21ebfbe2018-05-21 12:27:22 +03008375 struct btrfs_delayed_extent_op *extent_op)
Yan Zheng5d4f98a2009-06-10 10:45:14 -04008376{
Nikolay Borisov9dcdbe02018-05-21 12:27:20 +03008377 struct btrfs_fs_info *fs_info = trans->fs_info;
Yan Zheng5d4f98a2009-06-10 10:45:14 -04008378 int ret;
Yan Zheng5d4f98a2009-06-10 10:45:14 -04008379 struct btrfs_extent_item *extent_item;
Nikolay Borisov4e6bd4e2018-05-21 12:27:21 +03008380 struct btrfs_key extent_key;
Yan Zheng5d4f98a2009-06-10 10:45:14 -04008381 struct btrfs_tree_block_info *block_info;
8382 struct btrfs_extent_inline_ref *iref;
8383 struct btrfs_path *path;
8384 struct extent_buffer *leaf;
Nikolay Borisov4e6bd4e2018-05-21 12:27:21 +03008385 struct btrfs_delayed_tree_ref *ref;
Josef Bacik3173a182013-03-07 14:22:04 -05008386 u32 size = sizeof(*extent_item) + sizeof(*iref);
Nikolay Borisov4e6bd4e2018-05-21 12:27:21 +03008387 u64 num_bytes;
Nikolay Borisov21ebfbe2018-05-21 12:27:22 +03008388 u64 flags = extent_op->flags_to_set;
Jeff Mahoney0b246af2016-06-22 18:54:23 -04008389 bool skinny_metadata = btrfs_fs_incompat(fs_info, SKINNY_METADATA);
Josef Bacik3173a182013-03-07 14:22:04 -05008390
Nikolay Borisov4e6bd4e2018-05-21 12:27:21 +03008391 ref = btrfs_delayed_node_to_tree_ref(node);
8392
Nikolay Borisov4e6bd4e2018-05-21 12:27:21 +03008393 extent_key.objectid = node->bytenr;
8394 if (skinny_metadata) {
8395 extent_key.offset = ref->level;
8396 extent_key.type = BTRFS_METADATA_ITEM_KEY;
8397 num_bytes = fs_info->nodesize;
8398 } else {
8399 extent_key.offset = node->num_bytes;
8400 extent_key.type = BTRFS_EXTENT_ITEM_KEY;
Josef Bacik3173a182013-03-07 14:22:04 -05008401 size += sizeof(*block_info);
Nikolay Borisov4e6bd4e2018-05-21 12:27:21 +03008402 num_bytes = node->num_bytes;
8403 }
Yan Zheng5d4f98a2009-06-10 10:45:14 -04008404
8405 path = btrfs_alloc_path();
Josef Bacik80ee54b2018-10-11 15:54:22 -04008406 if (!path)
Mark Fashehd8926bb2011-07-13 10:38:47 -07008407 return -ENOMEM;
Yan Zheng5d4f98a2009-06-10 10:45:14 -04008408
8409 path->leave_spinning = 1;
8410 ret = btrfs_insert_empty_item(trans, fs_info->extent_root, path,
Nikolay Borisov4e6bd4e2018-05-21 12:27:21 +03008411 &extent_key, size);
Jeff Mahoney79787ea2012-03-12 16:03:00 +01008412 if (ret) {
Chris Masondd825252015-04-01 08:36:05 -07008413 btrfs_free_path(path);
Jeff Mahoney79787ea2012-03-12 16:03:00 +01008414 return ret;
8415 }
Yan Zheng5d4f98a2009-06-10 10:45:14 -04008416
8417 leaf = path->nodes[0];
8418 extent_item = btrfs_item_ptr(leaf, path->slots[0],
8419 struct btrfs_extent_item);
8420 btrfs_set_extent_refs(leaf, extent_item, 1);
8421 btrfs_set_extent_generation(leaf, extent_item, trans->transid);
8422 btrfs_set_extent_flags(leaf, extent_item,
8423 flags | BTRFS_EXTENT_FLAG_TREE_BLOCK);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04008424
Josef Bacik3173a182013-03-07 14:22:04 -05008425 if (skinny_metadata) {
8426 iref = (struct btrfs_extent_inline_ref *)(extent_item + 1);
8427 } else {
8428 block_info = (struct btrfs_tree_block_info *)(extent_item + 1);
Nikolay Borisov21ebfbe2018-05-21 12:27:22 +03008429 btrfs_set_tree_block_key(leaf, block_info, &extent_op->key);
Nikolay Borisov4e6bd4e2018-05-21 12:27:21 +03008430 btrfs_set_tree_block_level(leaf, block_info, ref->level);
Josef Bacik3173a182013-03-07 14:22:04 -05008431 iref = (struct btrfs_extent_inline_ref *)(block_info + 1);
8432 }
Yan Zheng5d4f98a2009-06-10 10:45:14 -04008433
Nikolay Borisovd4b20732018-05-21 12:27:23 +03008434 if (node->type == BTRFS_SHARED_BLOCK_REF_KEY) {
Yan Zheng5d4f98a2009-06-10 10:45:14 -04008435 BUG_ON(!(flags & BTRFS_BLOCK_FLAG_FULL_BACKREF));
8436 btrfs_set_extent_inline_ref_type(leaf, iref,
8437 BTRFS_SHARED_BLOCK_REF_KEY);
Nikolay Borisovd4b20732018-05-21 12:27:23 +03008438 btrfs_set_extent_inline_ref_offset(leaf, iref, ref->parent);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04008439 } else {
8440 btrfs_set_extent_inline_ref_type(leaf, iref,
8441 BTRFS_TREE_BLOCK_REF_KEY);
Nikolay Borisov4e6bd4e2018-05-21 12:27:21 +03008442 btrfs_set_extent_inline_ref_offset(leaf, iref, ref->root);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04008443 }
8444
8445 btrfs_mark_buffer_dirty(leaf);
8446 btrfs_free_path(path);
8447
Nikolay Borisov4e6bd4e2018-05-21 12:27:21 +03008448 ret = remove_from_free_space_tree(trans, extent_key.objectid,
8449 num_bytes);
Omar Sandoval1e144fb2015-09-29 20:50:37 -07008450 if (ret)
8451 return ret;
8452
David Sterba6b279402019-03-20 12:10:15 +01008453 ret = update_block_group(trans, extent_key.objectid,
Jeff Mahoney6202df62016-06-22 18:54:22 -04008454 fs_info->nodesize, 1);
Jeff Mahoney79787ea2012-03-12 16:03:00 +01008455 if (ret) { /* -ENOENT, logic error */
Simon Kirbyc2cf52e2013-03-19 22:41:23 +00008456 btrfs_err(fs_info, "update block group failed for %llu %llu",
Nikolay Borisov4e6bd4e2018-05-21 12:27:21 +03008457 extent_key.objectid, extent_key.offset);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04008458 BUG();
8459 }
Josef Bacik0be5dc62013-10-07 15:18:52 -04008460
Nikolay Borisov4e6bd4e2018-05-21 12:27:21 +03008461 trace_btrfs_reserved_extent_alloc(fs_info, extent_key.objectid,
Jeff Mahoney0b246af2016-06-22 18:54:23 -04008462 fs_info->nodesize);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04008463 return ret;
8464}
8465
8466int btrfs_alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
Josef Bacik84f7d8e2017-09-29 15:43:49 -04008467 struct btrfs_root *root, u64 owner,
Qu Wenruo5846a3c2015-10-26 14:11:18 +08008468 u64 offset, u64 ram_bytes,
8469 struct btrfs_key *ins)
Chris Masone6dcd2d2008-07-17 12:53:50 -04008470{
8471 int ret;
Chris Mason1c2308f82008-09-23 13:14:13 -04008472
Josef Bacik84f7d8e2017-09-29 15:43:49 -04008473 BUG_ON(root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID);
Chris Mason56bec292009-03-13 10:10:06 -04008474
Josef Bacikfd708b82017-09-29 15:43:50 -04008475 btrfs_ref_tree_mod(root, ins->objectid, ins->offset, 0,
8476 root->root_key.objectid, owner, offset,
8477 BTRFS_ADD_DELAYED_EXTENT);
8478
Nikolay Borisov88a979c2018-06-20 15:48:54 +03008479 ret = btrfs_add_delayed_data_ref(trans, ins->objectid,
Josef Bacik84f7d8e2017-09-29 15:43:49 -04008480 ins->offset, 0,
8481 root->root_key.objectid, owner,
Omar Sandoval7be07912017-06-06 16:45:30 -07008482 offset, ram_bytes,
8483 BTRFS_ADD_DELAYED_EXTENT, NULL, NULL);
Chris Masone6dcd2d2008-07-17 12:53:50 -04008484 return ret;
8485}
Chris Masone02119d2008-09-05 16:13:11 -04008486
8487/*
8488 * this is used by the tree logging recovery code. It records that
8489 * an extent has been allocated and makes sure to clear the free
8490 * space cache bits as well
8491 */
Yan Zheng5d4f98a2009-06-10 10:45:14 -04008492int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans,
Yan Zheng5d4f98a2009-06-10 10:45:14 -04008493 u64 root_objectid, u64 owner, u64 offset,
8494 struct btrfs_key *ins)
Chris Masone02119d2008-09-05 16:13:11 -04008495{
Nikolay Borisov61da2ab2018-06-20 15:49:13 +03008496 struct btrfs_fs_info *fs_info = trans->fs_info;
Chris Masone02119d2008-09-05 16:13:11 -04008497 int ret;
8498 struct btrfs_block_group_cache *block_group;
Wang Xiaoguanged7a6942016-08-26 11:33:14 +08008499 struct btrfs_space_info *space_info;
Josef Bacik8c2a1a32013-06-06 13:19:32 -04008500
8501 /*
8502 * Mixed block groups will exclude before processing the log so we only
Nicholas D Steeves01327612016-05-19 21:18:45 -04008503 * need to do the exclude dance if this fs isn't mixed.
Josef Bacik8c2a1a32013-06-06 13:19:32 -04008504 */
Jeff Mahoney0b246af2016-06-22 18:54:23 -04008505 if (!btrfs_fs_incompat(fs_info, MIXED_GROUPS)) {
Jeff Mahoney2ff7e612016-06-22 18:54:24 -04008506 ret = __exclude_logged_extent(fs_info, ins->objectid,
8507 ins->offset);
Josef Bacik8c2a1a32013-06-06 13:19:32 -04008508 if (ret)
8509 return ret;
8510 }
Chris Masone02119d2008-09-05 16:13:11 -04008511
Jeff Mahoney0b246af2016-06-22 18:54:23 -04008512 block_group = btrfs_lookup_block_group(fs_info, ins->objectid);
Josef Bacik8c2a1a32013-06-06 13:19:32 -04008513 if (!block_group)
8514 return -EINVAL;
Yan Zheng11833d62009-09-11 16:11:19 -04008515
Wang Xiaoguanged7a6942016-08-26 11:33:14 +08008516 space_info = block_group->space_info;
8517 spin_lock(&space_info->lock);
8518 spin_lock(&block_group->lock);
8519 space_info->bytes_reserved += ins->offset;
8520 block_group->reserved += ins->offset;
8521 spin_unlock(&block_group->lock);
8522 spin_unlock(&space_info->lock);
8523
Nikolay Borisovef89b822018-06-20 15:48:58 +03008524 ret = alloc_reserved_file_extent(trans, 0, root_objectid, 0, owner,
8525 offset, ins, 1);
Josef Bacikb50c6e22013-04-25 15:55:30 -04008526 btrfs_put_block_group(block_group);
Chris Masone02119d2008-09-05 16:13:11 -04008527 return ret;
8528}
8529
Eric Sandeen48a3b632013-04-25 20:41:01 +00008530static struct extent_buffer *
8531btrfs_init_new_buffer(struct btrfs_trans_handle *trans, struct btrfs_root *root,
Nikolay Borisovbc877d22018-06-18 14:13:19 +03008532 u64 bytenr, int level, u64 owner)
Chris Mason65b51a02008-08-01 15:11:20 -04008533{
Jeff Mahoney0b246af2016-06-22 18:54:23 -04008534 struct btrfs_fs_info *fs_info = root->fs_info;
Chris Mason65b51a02008-08-01 15:11:20 -04008535 struct extent_buffer *buf;
8536
Jeff Mahoney2ff7e612016-06-22 18:54:24 -04008537 buf = btrfs_find_create_tree_block(fs_info, bytenr);
Liu Boc871b0f2016-06-06 12:01:23 -07008538 if (IS_ERR(buf))
8539 return buf;
8540
Qu Wenruob72c3ab2018-08-21 09:53:47 +08008541 /*
8542 * Extra safety check in case the extent tree is corrupted and extent
8543 * allocator chooses to use a tree block which is already used and
8544 * locked.
8545 */
8546 if (buf->lock_owner == current->pid) {
8547 btrfs_err_rl(fs_info,
8548"tree block %llu owner %llu already locked by pid=%d, extent tree corruption detected",
8549 buf->start, btrfs_header_owner(buf), current->pid);
8550 free_extent_buffer(buf);
8551 return ERR_PTR(-EUCLEAN);
8552 }
8553
Chris Mason85d4e462011-07-26 16:11:19 -04008554 btrfs_set_buffer_lockdep_class(root->root_key.objectid, buf, level);
Chris Mason65b51a02008-08-01 15:11:20 -04008555 btrfs_tree_lock(buf);
David Sterba6a884d7d2019-03-20 14:30:02 +01008556 btrfs_clean_tree_block(buf);
Josef Bacik3083ee22012-03-09 16:01:49 -05008557 clear_bit(EXTENT_BUFFER_STALE, &buf->bflags);
Chris Masonb4ce94d2009-02-04 09:25:08 -05008558
David Sterba8bead252018-04-04 02:03:48 +02008559 btrfs_set_lock_blocking_write(buf);
David Sterba4db8c522015-12-03 13:06:46 +01008560 set_extent_buffer_uptodate(buf);
Chris Masonb4ce94d2009-02-04 09:25:08 -05008561
Nikolay Borisovbc877d22018-06-18 14:13:19 +03008562 memzero_extent_buffer(buf, 0, sizeof(struct btrfs_header));
8563 btrfs_set_header_level(buf, level);
8564 btrfs_set_header_bytenr(buf, buf->start);
8565 btrfs_set_header_generation(buf, trans->transid);
8566 btrfs_set_header_backref_rev(buf, BTRFS_MIXED_BACKREF_REV);
8567 btrfs_set_header_owner(buf, owner);
Nikolay Borisovde37aa52018-10-30 16:43:24 +02008568 write_extent_buffer_fsid(buf, fs_info->fs_devices->metadata_uuid);
Nikolay Borisovbc877d22018-06-18 14:13:19 +03008569 write_extent_buffer_chunk_tree_uuid(buf, fs_info->chunk_tree_uuid);
Chris Masond0c803c2008-09-11 16:17:57 -04008570 if (root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID) {
Filipe Manana656f30d2014-09-26 12:25:56 +01008571 buf->log_index = root->log_transid % 2;
Yan, Zheng8cef4e12009-11-12 09:33:26 +00008572 /*
8573 * we allow two log transactions at a time, use different
Andrea Gelmini52042d82018-11-28 12:05:13 +01008574 * EXTENT bit to differentiate dirty pages.
Yan, Zheng8cef4e12009-11-12 09:33:26 +00008575 */
Filipe Manana656f30d2014-09-26 12:25:56 +01008576 if (buf->log_index == 0)
Yan, Zheng8cef4e12009-11-12 09:33:26 +00008577 set_extent_dirty(&root->dirty_log_pages, buf->start,
8578 buf->start + buf->len - 1, GFP_NOFS);
8579 else
8580 set_extent_new(&root->dirty_log_pages, buf->start,
David Sterba3744dbe2016-04-26 23:54:39 +02008581 buf->start + buf->len - 1);
Chris Masond0c803c2008-09-11 16:17:57 -04008582 } else {
Filipe Manana656f30d2014-09-26 12:25:56 +01008583 buf->log_index = -1;
Chris Masond0c803c2008-09-11 16:17:57 -04008584 set_extent_dirty(&trans->transaction->dirty_pages, buf->start,
8585 buf->start + buf->len - 1, GFP_NOFS);
8586 }
Jeff Mahoney64c12922016-06-08 00:36:38 -04008587 trans->dirty = true;
Chris Masonb4ce94d2009-02-04 09:25:08 -05008588 /* this returns a buffer locked for blocking */
Chris Mason65b51a02008-08-01 15:11:20 -04008589 return buf;
8590}
8591
Yan, Zhengf0486c62010-05-16 10:46:25 -04008592static struct btrfs_block_rsv *
8593use_block_rsv(struct btrfs_trans_handle *trans,
8594 struct btrfs_root *root, u32 blocksize)
8595{
Jeff Mahoney0b246af2016-06-22 18:54:23 -04008596 struct btrfs_fs_info *fs_info = root->fs_info;
Yan, Zhengf0486c62010-05-16 10:46:25 -04008597 struct btrfs_block_rsv *block_rsv;
Jeff Mahoney0b246af2016-06-22 18:54:23 -04008598 struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv;
Yan, Zhengf0486c62010-05-16 10:46:25 -04008599 int ret;
Miao Xied88033d2013-05-13 13:55:12 +00008600 bool global_updated = false;
Yan, Zhengf0486c62010-05-16 10:46:25 -04008601
8602 block_rsv = get_block_rsv(trans, root);
8603
Miao Xieb586b322013-05-13 13:55:10 +00008604 if (unlikely(block_rsv->size == 0))
8605 goto try_reserve;
Miao Xied88033d2013-05-13 13:55:12 +00008606again:
Yan, Zhengf0486c62010-05-16 10:46:25 -04008607 ret = block_rsv_use_bytes(block_rsv, blocksize);
8608 if (!ret)
8609 return block_rsv;
8610
Miao Xieb586b322013-05-13 13:55:10 +00008611 if (block_rsv->failfast)
8612 return ERR_PTR(ret);
8613
Miao Xied88033d2013-05-13 13:55:12 +00008614 if (block_rsv->type == BTRFS_BLOCK_RSV_GLOBAL && !global_updated) {
8615 global_updated = true;
Jeff Mahoney0b246af2016-06-22 18:54:23 -04008616 update_global_block_rsv(fs_info);
Miao Xied88033d2013-05-13 13:55:12 +00008617 goto again;
8618 }
8619
Josef Bacikba2c4d42018-12-03 10:20:33 -05008620 /*
8621 * The global reserve still exists to save us from ourselves, so don't
8622 * warn_on if we are short on our delayed refs reserve.
8623 */
8624 if (block_rsv->type != BTRFS_BLOCK_RSV_DELREFS &&
8625 btrfs_test_opt(fs_info, ENOSPC_DEBUG)) {
Miao Xieb586b322013-05-13 13:55:10 +00008626 static DEFINE_RATELIMIT_STATE(_rs,
8627 DEFAULT_RATELIMIT_INTERVAL * 10,
8628 /*DEFAULT_RATELIMIT_BURST*/ 1);
8629 if (__ratelimit(&_rs))
8630 WARN(1, KERN_DEBUG
Frank Holtonefe120a2013-12-20 11:37:06 -05008631 "BTRFS: block rsv returned %d\n", ret);
Miao Xieb586b322013-05-13 13:55:10 +00008632 }
8633try_reserve:
8634 ret = reserve_metadata_bytes(root, block_rsv, blocksize,
8635 BTRFS_RESERVE_NO_FLUSH);
8636 if (!ret)
8637 return block_rsv;
8638 /*
8639 * If we couldn't reserve metadata bytes try and use some from
Miao Xie5881cfc2013-05-13 13:55:11 +00008640 * the global reserve if its space type is the same as the global
8641 * reservation.
Miao Xieb586b322013-05-13 13:55:10 +00008642 */
Miao Xie5881cfc2013-05-13 13:55:11 +00008643 if (block_rsv->type != BTRFS_BLOCK_RSV_GLOBAL &&
8644 block_rsv->space_info == global_rsv->space_info) {
Miao Xieb586b322013-05-13 13:55:10 +00008645 ret = block_rsv_use_bytes(global_rsv, blocksize);
8646 if (!ret)
8647 return global_rsv;
8648 }
8649 return ERR_PTR(ret);
Yan, Zhengf0486c62010-05-16 10:46:25 -04008650}
8651
Josef Bacik8c2a3ca2012-01-10 10:31:31 -05008652static void unuse_block_rsv(struct btrfs_fs_info *fs_info,
8653 struct btrfs_block_rsv *block_rsv, u32 blocksize)
Yan, Zhengf0486c62010-05-16 10:46:25 -04008654{
Lu Fengqi3a584172018-08-04 21:10:55 +08008655 block_rsv_add_bytes(block_rsv, blocksize, false);
Qu Wenruoff6bc372017-12-21 13:42:04 +08008656 block_rsv_release_bytes(fs_info, block_rsv, NULL, 0, NULL);
Yan, Zhengf0486c62010-05-16 10:46:25 -04008657}
8658
Chris Masonfec577f2007-02-26 10:40:21 -05008659/*
Yan, Zhengf0486c62010-05-16 10:46:25 -04008660 * finds a free extent and does all the dirty work required for allocation
Omar Sandoval67b78592015-02-24 02:47:04 -08008661 * returns the tree buffer or an ERR_PTR on error.
Chris Masonfec577f2007-02-26 10:40:21 -05008662 */
David Sterba4d75f8a2014-06-15 01:54:12 +02008663struct extent_buffer *btrfs_alloc_tree_block(struct btrfs_trans_handle *trans,
Omar Sandoval310712b2017-01-17 23:24:37 -08008664 struct btrfs_root *root,
8665 u64 parent, u64 root_objectid,
8666 const struct btrfs_disk_key *key,
8667 int level, u64 hint,
8668 u64 empty_size)
Chris Masonfec577f2007-02-26 10:40:21 -05008669{
Jeff Mahoney0b246af2016-06-22 18:54:23 -04008670 struct btrfs_fs_info *fs_info = root->fs_info;
Chris Masone2fa7222007-03-12 16:22:34 -04008671 struct btrfs_key ins;
Yan, Zhengf0486c62010-05-16 10:46:25 -04008672 struct btrfs_block_rsv *block_rsv;
Chris Mason5f39d392007-10-15 16:14:19 -04008673 struct extent_buffer *buf;
Omar Sandoval67b78592015-02-24 02:47:04 -08008674 struct btrfs_delayed_extent_op *extent_op;
Yan, Zhengf0486c62010-05-16 10:46:25 -04008675 u64 flags = 0;
8676 int ret;
Jeff Mahoney0b246af2016-06-22 18:54:23 -04008677 u32 blocksize = fs_info->nodesize;
8678 bool skinny_metadata = btrfs_fs_incompat(fs_info, SKINNY_METADATA);
Yan, Zhengf0486c62010-05-16 10:46:25 -04008679
David Sterba05653ef2016-07-15 15:23:37 +02008680#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
Jeff Mahoney0b246af2016-06-22 18:54:23 -04008681 if (btrfs_is_testing(fs_info)) {
Josef Bacikfaa2dbf2014-05-07 17:06:09 -04008682 buf = btrfs_init_new_buffer(trans, root, root->alloc_bytenr,
Nikolay Borisovbc877d22018-06-18 14:13:19 +03008683 level, root_objectid);
Josef Bacikfaa2dbf2014-05-07 17:06:09 -04008684 if (!IS_ERR(buf))
8685 root->alloc_bytenr += blocksize;
8686 return buf;
8687 }
David Sterba05653ef2016-07-15 15:23:37 +02008688#endif
David Sterbafccb84c2014-09-29 23:53:21 +02008689
Yan, Zhengf0486c62010-05-16 10:46:25 -04008690 block_rsv = use_block_rsv(trans, root, blocksize);
8691 if (IS_ERR(block_rsv))
8692 return ERR_CAST(block_rsv);
8693
Wang Xiaoguang18513092016-07-25 15:51:40 +08008694 ret = btrfs_reserve_extent(root, blocksize, blocksize, blocksize,
Miao Xiee570fd22014-06-19 10:42:50 +08008695 empty_size, hint, &ins, 0, 0);
Omar Sandoval67b78592015-02-24 02:47:04 -08008696 if (ret)
8697 goto out_unuse;
Chris Mason55c69072008-01-09 15:55:33 -05008698
Nikolay Borisovbc877d22018-06-18 14:13:19 +03008699 buf = btrfs_init_new_buffer(trans, root, ins.objectid, level,
8700 root_objectid);
Omar Sandoval67b78592015-02-24 02:47:04 -08008701 if (IS_ERR(buf)) {
8702 ret = PTR_ERR(buf);
8703 goto out_free_reserved;
8704 }
Yan, Zhengf0486c62010-05-16 10:46:25 -04008705
8706 if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
8707 if (parent == 0)
8708 parent = ins.objectid;
8709 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
8710 } else
8711 BUG_ON(parent > 0);
8712
8713 if (root_objectid != BTRFS_TREE_LOG_OBJECTID) {
Miao Xie78a61842012-11-21 02:21:28 +00008714 extent_op = btrfs_alloc_delayed_extent_op();
Omar Sandoval67b78592015-02-24 02:47:04 -08008715 if (!extent_op) {
8716 ret = -ENOMEM;
8717 goto out_free_buf;
8718 }
Yan, Zhengf0486c62010-05-16 10:46:25 -04008719 if (key)
8720 memcpy(&extent_op->key, key, sizeof(extent_op->key));
8721 else
8722 memset(&extent_op->key, 0, sizeof(extent_op->key));
8723 extent_op->flags_to_set = flags;
David Sterba35b3ad52015-11-30 16:51:29 +01008724 extent_op->update_key = skinny_metadata ? false : true;
8725 extent_op->update_flags = true;
8726 extent_op->is_data = false;
Josef Bacikb1c79e02013-05-09 13:49:30 -04008727 extent_op->level = level;
Yan, Zhengf0486c62010-05-16 10:46:25 -04008728
Josef Bacikfd708b82017-09-29 15:43:50 -04008729 btrfs_ref_tree_mod(root, ins.objectid, ins.offset, parent,
8730 root_objectid, level, 0,
8731 BTRFS_ADD_DELAYED_EXTENT);
Nikolay Borisov44e1c472018-06-20 15:48:53 +03008732 ret = btrfs_add_delayed_tree_ref(trans, ins.objectid,
Omar Sandoval7be07912017-06-06 16:45:30 -07008733 ins.offset, parent,
8734 root_objectid, level,
Omar Sandoval67b78592015-02-24 02:47:04 -08008735 BTRFS_ADD_DELAYED_EXTENT,
Omar Sandoval7be07912017-06-06 16:45:30 -07008736 extent_op, NULL, NULL);
Omar Sandoval67b78592015-02-24 02:47:04 -08008737 if (ret)
8738 goto out_free_delayed;
Yan, Zhengf0486c62010-05-16 10:46:25 -04008739 }
Chris Masonfec577f2007-02-26 10:40:21 -05008740 return buf;
Omar Sandoval67b78592015-02-24 02:47:04 -08008741
8742out_free_delayed:
8743 btrfs_free_delayed_extent_op(extent_op);
8744out_free_buf:
8745 free_extent_buffer(buf);
8746out_free_reserved:
Jeff Mahoney2ff7e612016-06-22 18:54:24 -04008747 btrfs_free_reserved_extent(fs_info, ins.objectid, ins.offset, 0);
Omar Sandoval67b78592015-02-24 02:47:04 -08008748out_unuse:
Jeff Mahoney0b246af2016-06-22 18:54:23 -04008749 unuse_block_rsv(fs_info, block_rsv, blocksize);
Omar Sandoval67b78592015-02-24 02:47:04 -08008750 return ERR_PTR(ret);
Chris Masonfec577f2007-02-26 10:40:21 -05008751}
Chris Masona28ec192007-03-06 20:08:01 -05008752
Yan Zheng2c47e6052009-06-27 21:07:35 -04008753struct walk_control {
8754 u64 refs[BTRFS_MAX_LEVEL];
8755 u64 flags[BTRFS_MAX_LEVEL];
8756 struct btrfs_key update_progress;
Josef Bacikaea6f022019-02-06 15:46:15 -05008757 struct btrfs_key drop_progress;
8758 int drop_level;
Yan Zheng2c47e6052009-06-27 21:07:35 -04008759 int stage;
8760 int level;
8761 int shared_level;
8762 int update_ref;
8763 int keep_locks;
Yan, Zheng1c4850e2009-09-21 15:55:59 -04008764 int reada_slot;
8765 int reada_count;
Josef Bacik78c52d92019-02-06 15:46:14 -05008766 int restarted;
Yan Zheng2c47e6052009-06-27 21:07:35 -04008767};
8768
8769#define DROP_REFERENCE 1
8770#define UPDATE_BACKREF 2
8771
Yan, Zheng1c4850e2009-09-21 15:55:59 -04008772static noinline void reada_walk_down(struct btrfs_trans_handle *trans,
8773 struct btrfs_root *root,
8774 struct walk_control *wc,
8775 struct btrfs_path *path)
8776{
Jeff Mahoney0b246af2016-06-22 18:54:23 -04008777 struct btrfs_fs_info *fs_info = root->fs_info;
Yan, Zheng1c4850e2009-09-21 15:55:59 -04008778 u64 bytenr;
8779 u64 generation;
8780 u64 refs;
Yan, Zheng94fcca92009-10-09 09:25:16 -04008781 u64 flags;
Yan, Zheng1c4850e2009-09-21 15:55:59 -04008782 u32 nritems;
Yan, Zheng1c4850e2009-09-21 15:55:59 -04008783 struct btrfs_key key;
8784 struct extent_buffer *eb;
8785 int ret;
8786 int slot;
8787 int nread = 0;
8788
8789 if (path->slots[wc->level] < wc->reada_slot) {
8790 wc->reada_count = wc->reada_count * 2 / 3;
8791 wc->reada_count = max(wc->reada_count, 2);
8792 } else {
8793 wc->reada_count = wc->reada_count * 3 / 2;
8794 wc->reada_count = min_t(int, wc->reada_count,
Jeff Mahoney0b246af2016-06-22 18:54:23 -04008795 BTRFS_NODEPTRS_PER_BLOCK(fs_info));
Yan, Zheng1c4850e2009-09-21 15:55:59 -04008796 }
8797
8798 eb = path->nodes[wc->level];
8799 nritems = btrfs_header_nritems(eb);
Yan, Zheng1c4850e2009-09-21 15:55:59 -04008800
8801 for (slot = path->slots[wc->level]; slot < nritems; slot++) {
8802 if (nread >= wc->reada_count)
8803 break;
8804
8805 cond_resched();
8806 bytenr = btrfs_node_blockptr(eb, slot);
8807 generation = btrfs_node_ptr_generation(eb, slot);
8808
8809 if (slot == path->slots[wc->level])
8810 goto reada;
8811
8812 if (wc->stage == UPDATE_BACKREF &&
8813 generation <= root->root_key.offset)
8814 continue;
8815
Yan, Zheng94fcca92009-10-09 09:25:16 -04008816 /* We don't lock the tree block, it's OK to be racy here */
Jeff Mahoney2ff7e612016-06-22 18:54:24 -04008817 ret = btrfs_lookup_extent_info(trans, fs_info, bytenr,
Josef Bacik3173a182013-03-07 14:22:04 -05008818 wc->level - 1, 1, &refs,
8819 &flags);
Jeff Mahoney79787ea2012-03-12 16:03:00 +01008820 /* We don't care about errors in readahead. */
8821 if (ret < 0)
8822 continue;
Yan, Zheng94fcca92009-10-09 09:25:16 -04008823 BUG_ON(refs == 0);
8824
Yan, Zheng1c4850e2009-09-21 15:55:59 -04008825 if (wc->stage == DROP_REFERENCE) {
Yan, Zheng1c4850e2009-09-21 15:55:59 -04008826 if (refs == 1)
8827 goto reada;
8828
Yan, Zheng94fcca92009-10-09 09:25:16 -04008829 if (wc->level == 1 &&
8830 (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF))
8831 continue;
Yan, Zheng1c4850e2009-09-21 15:55:59 -04008832 if (!wc->update_ref ||
8833 generation <= root->root_key.offset)
8834 continue;
8835 btrfs_node_key_to_cpu(eb, &key, slot);
8836 ret = btrfs_comp_cpu_keys(&key,
8837 &wc->update_progress);
8838 if (ret < 0)
8839 continue;
Yan, Zheng94fcca92009-10-09 09:25:16 -04008840 } else {
8841 if (wc->level == 1 &&
8842 (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF))
8843 continue;
Yan, Zheng1c4850e2009-09-21 15:55:59 -04008844 }
8845reada:
Jeff Mahoney2ff7e612016-06-22 18:54:24 -04008846 readahead_tree_block(fs_info, bytenr);
Yan, Zheng1c4850e2009-09-21 15:55:59 -04008847 nread++;
8848 }
8849 wc->reada_slot = slot;
8850}
8851
Chris Mason9aca1d52007-03-13 11:09:37 -04008852/*
Liu Bo2c016dc2012-12-26 15:32:17 +08008853 * helper to process tree block while walking down the tree.
Yan Zheng2c47e6052009-06-27 21:07:35 -04008854 *
Yan Zheng2c47e6052009-06-27 21:07:35 -04008855 * when wc->stage == UPDATE_BACKREF, this function updates
8856 * back refs for pointers in the block.
8857 *
8858 * NOTE: return value 1 means we should stop walking down.
Yan Zhengf82d02d2008-10-29 14:49:05 -04008859 */
Yan Zheng2c47e6052009-06-27 21:07:35 -04008860static noinline int walk_down_proc(struct btrfs_trans_handle *trans,
8861 struct btrfs_root *root,
8862 struct btrfs_path *path,
Yan, Zheng94fcca92009-10-09 09:25:16 -04008863 struct walk_control *wc, int lookup_info)
Yan Zheng2c47e6052009-06-27 21:07:35 -04008864{
Jeff Mahoney2ff7e612016-06-22 18:54:24 -04008865 struct btrfs_fs_info *fs_info = root->fs_info;
Yan Zheng2c47e6052009-06-27 21:07:35 -04008866 int level = wc->level;
8867 struct extent_buffer *eb = path->nodes[level];
Yan Zheng2c47e6052009-06-27 21:07:35 -04008868 u64 flag = BTRFS_BLOCK_FLAG_FULL_BACKREF;
8869 int ret;
8870
8871 if (wc->stage == UPDATE_BACKREF &&
8872 btrfs_header_owner(eb) != root->root_key.objectid)
8873 return 1;
8874
8875 /*
8876 * when reference count of tree block is 1, it won't increase
8877 * again. once full backref flag is set, we never clear it.
8878 */
Yan, Zheng94fcca92009-10-09 09:25:16 -04008879 if (lookup_info &&
8880 ((wc->stage == DROP_REFERENCE && wc->refs[level] != 1) ||
8881 (wc->stage == UPDATE_BACKREF && !(wc->flags[level] & flag)))) {
Yan Zheng2c47e6052009-06-27 21:07:35 -04008882 BUG_ON(!path->locks[level]);
Jeff Mahoney2ff7e612016-06-22 18:54:24 -04008883 ret = btrfs_lookup_extent_info(trans, fs_info,
Josef Bacik3173a182013-03-07 14:22:04 -05008884 eb->start, level, 1,
Yan Zheng2c47e6052009-06-27 21:07:35 -04008885 &wc->refs[level],
8886 &wc->flags[level]);
Jeff Mahoney79787ea2012-03-12 16:03:00 +01008887 BUG_ON(ret == -ENOMEM);
8888 if (ret)
8889 return ret;
Yan Zheng2c47e6052009-06-27 21:07:35 -04008890 BUG_ON(wc->refs[level] == 0);
8891 }
8892
Yan Zheng2c47e6052009-06-27 21:07:35 -04008893 if (wc->stage == DROP_REFERENCE) {
8894 if (wc->refs[level] > 1)
8895 return 1;
8896
8897 if (path->locks[level] && !wc->keep_locks) {
Chris Masonbd681512011-07-16 15:23:14 -04008898 btrfs_tree_unlock_rw(eb, path->locks[level]);
Yan Zheng2c47e6052009-06-27 21:07:35 -04008899 path->locks[level] = 0;
8900 }
8901 return 0;
8902 }
8903
8904 /* wc->stage == UPDATE_BACKREF */
8905 if (!(wc->flags[level] & flag)) {
8906 BUG_ON(!path->locks[level]);
Josef Bacike339a6b2014-07-02 10:54:25 -07008907 ret = btrfs_inc_ref(trans, root, eb, 1);
Jeff Mahoney79787ea2012-03-12 16:03:00 +01008908 BUG_ON(ret); /* -ENOMEM */
Josef Bacike339a6b2014-07-02 10:54:25 -07008909 ret = btrfs_dec_ref(trans, root, eb, 0);
Jeff Mahoney79787ea2012-03-12 16:03:00 +01008910 BUG_ON(ret); /* -ENOMEM */
Jeff Mahoney2ff7e612016-06-22 18:54:24 -04008911 ret = btrfs_set_disk_extent_flags(trans, fs_info, eb->start,
Josef Bacikb1c79e02013-05-09 13:49:30 -04008912 eb->len, flag,
8913 btrfs_header_level(eb), 0);
Jeff Mahoney79787ea2012-03-12 16:03:00 +01008914 BUG_ON(ret); /* -ENOMEM */
Yan Zheng2c47e6052009-06-27 21:07:35 -04008915 wc->flags[level] |= flag;
8916 }
8917
8918 /*
8919 * the block is shared by multiple trees, so it's not good to
8920 * keep the tree lock
8921 */
8922 if (path->locks[level] && level > 0) {
Chris Masonbd681512011-07-16 15:23:14 -04008923 btrfs_tree_unlock_rw(eb, path->locks[level]);
Yan Zheng2c47e6052009-06-27 21:07:35 -04008924 path->locks[level] = 0;
8925 }
8926 return 0;
8927}
8928
8929/*
Josef Bacik78c52d92019-02-06 15:46:14 -05008930 * This is used to verify a ref exists for this root to deal with a bug where we
8931 * would have a drop_progress key that hadn't been updated properly.
8932 */
8933static int check_ref_exists(struct btrfs_trans_handle *trans,
8934 struct btrfs_root *root, u64 bytenr, u64 parent,
8935 int level)
8936{
8937 struct btrfs_path *path;
8938 struct btrfs_extent_inline_ref *iref;
8939 int ret;
8940
8941 path = btrfs_alloc_path();
8942 if (!path)
8943 return -ENOMEM;
8944
8945 ret = lookup_extent_backref(trans, path, &iref, bytenr,
8946 root->fs_info->nodesize, parent,
8947 root->root_key.objectid, level, 0);
8948 btrfs_free_path(path);
8949 if (ret == -ENOENT)
8950 return 0;
8951 if (ret < 0)
8952 return ret;
8953 return 1;
8954}
8955
8956/*
Liu Bo2c016dc2012-12-26 15:32:17 +08008957 * helper to process tree block pointer.
Yan, Zheng1c4850e2009-09-21 15:55:59 -04008958 *
8959 * when wc->stage == DROP_REFERENCE, this function checks
8960 * reference count of the block pointed to. if the block
8961 * is shared and we need update back refs for the subtree
8962 * rooted at the block, this function changes wc->stage to
8963 * UPDATE_BACKREF. if the block is shared and there is no
8964 * need to update back, this function drops the reference
8965 * to the block.
8966 *
8967 * NOTE: return value 1 means we should stop walking down.
8968 */
8969static noinline int do_walk_down(struct btrfs_trans_handle *trans,
8970 struct btrfs_root *root,
8971 struct btrfs_path *path,
Yan, Zheng94fcca92009-10-09 09:25:16 -04008972 struct walk_control *wc, int *lookup_info)
Yan, Zheng1c4850e2009-09-21 15:55:59 -04008973{
Jeff Mahoney0b246af2016-06-22 18:54:23 -04008974 struct btrfs_fs_info *fs_info = root->fs_info;
Yan, Zheng1c4850e2009-09-21 15:55:59 -04008975 u64 bytenr;
8976 u64 generation;
8977 u64 parent;
Yan, Zheng1c4850e2009-09-21 15:55:59 -04008978 struct btrfs_key key;
Qu Wenruo581c1762018-03-29 09:08:11 +08008979 struct btrfs_key first_key;
Yan, Zheng1c4850e2009-09-21 15:55:59 -04008980 struct extent_buffer *next;
8981 int level = wc->level;
8982 int reada = 0;
8983 int ret = 0;
Mark Fasheh11526512014-07-17 12:39:01 -07008984 bool need_account = false;
Yan, Zheng1c4850e2009-09-21 15:55:59 -04008985
8986 generation = btrfs_node_ptr_generation(path->nodes[level],
8987 path->slots[level]);
8988 /*
8989 * if the lower level block was created before the snapshot
8990 * was created, we know there is no need to update back refs
8991 * for the subtree
8992 */
8993 if (wc->stage == UPDATE_BACKREF &&
Yan, Zheng94fcca92009-10-09 09:25:16 -04008994 generation <= root->root_key.offset) {
8995 *lookup_info = 1;
Yan, Zheng1c4850e2009-09-21 15:55:59 -04008996 return 1;
Yan, Zheng94fcca92009-10-09 09:25:16 -04008997 }
Yan, Zheng1c4850e2009-09-21 15:55:59 -04008998
8999 bytenr = btrfs_node_blockptr(path->nodes[level], path->slots[level]);
Qu Wenruo581c1762018-03-29 09:08:11 +08009000 btrfs_node_key_to_cpu(path->nodes[level], &first_key,
9001 path->slots[level]);
Yan, Zheng1c4850e2009-09-21 15:55:59 -04009002
Jeff Mahoney0b246af2016-06-22 18:54:23 -04009003 next = find_extent_buffer(fs_info, bytenr);
Yan, Zheng1c4850e2009-09-21 15:55:59 -04009004 if (!next) {
Jeff Mahoney2ff7e612016-06-22 18:54:24 -04009005 next = btrfs_find_create_tree_block(fs_info, bytenr);
Liu Boc871b0f2016-06-06 12:01:23 -07009006 if (IS_ERR(next))
9007 return PTR_ERR(next);
9008
Josef Bacikb2aaaa32013-07-05 17:05:38 -04009009 btrfs_set_buffer_lockdep_class(root->root_key.objectid, next,
9010 level - 1);
Yan, Zheng1c4850e2009-09-21 15:55:59 -04009011 reada = 1;
9012 }
9013 btrfs_tree_lock(next);
David Sterba8bead252018-04-04 02:03:48 +02009014 btrfs_set_lock_blocking_write(next);
Yan, Zheng1c4850e2009-09-21 15:55:59 -04009015
Jeff Mahoney2ff7e612016-06-22 18:54:24 -04009016 ret = btrfs_lookup_extent_info(trans, fs_info, bytenr, level - 1, 1,
Yan, Zheng94fcca92009-10-09 09:25:16 -04009017 &wc->refs[level - 1],
9018 &wc->flags[level - 1]);
Josef Bacik48672682016-09-23 13:23:28 +02009019 if (ret < 0)
9020 goto out_unlock;
Jeff Mahoney79787ea2012-03-12 16:03:00 +01009021
Simon Kirbyc2cf52e2013-03-19 22:41:23 +00009022 if (unlikely(wc->refs[level - 1] == 0)) {
Jeff Mahoney0b246af2016-06-22 18:54:23 -04009023 btrfs_err(fs_info, "Missing references.");
Josef Bacik48672682016-09-23 13:23:28 +02009024 ret = -EIO;
9025 goto out_unlock;
Simon Kirbyc2cf52e2013-03-19 22:41:23 +00009026 }
Yan, Zheng94fcca92009-10-09 09:25:16 -04009027 *lookup_info = 0;
Yan, Zheng1c4850e2009-09-21 15:55:59 -04009028
Yan, Zheng94fcca92009-10-09 09:25:16 -04009029 if (wc->stage == DROP_REFERENCE) {
Yan, Zheng1c4850e2009-09-21 15:55:59 -04009030 if (wc->refs[level - 1] > 1) {
Mark Fasheh11526512014-07-17 12:39:01 -07009031 need_account = true;
Yan, Zheng94fcca92009-10-09 09:25:16 -04009032 if (level == 1 &&
9033 (wc->flags[0] & BTRFS_BLOCK_FLAG_FULL_BACKREF))
9034 goto skip;
9035
Yan, Zheng1c4850e2009-09-21 15:55:59 -04009036 if (!wc->update_ref ||
9037 generation <= root->root_key.offset)
9038 goto skip;
9039
9040 btrfs_node_key_to_cpu(path->nodes[level], &key,
9041 path->slots[level]);
9042 ret = btrfs_comp_cpu_keys(&key, &wc->update_progress);
9043 if (ret < 0)
9044 goto skip;
9045
9046 wc->stage = UPDATE_BACKREF;
9047 wc->shared_level = level - 1;
9048 }
Yan, Zheng94fcca92009-10-09 09:25:16 -04009049 } else {
9050 if (level == 1 &&
9051 (wc->flags[0] & BTRFS_BLOCK_FLAG_FULL_BACKREF))
9052 goto skip;
Yan, Zheng1c4850e2009-09-21 15:55:59 -04009053 }
9054
Chris Masonb9fab912012-05-06 07:23:47 -04009055 if (!btrfs_buffer_uptodate(next, generation, 0)) {
Yan, Zheng1c4850e2009-09-21 15:55:59 -04009056 btrfs_tree_unlock(next);
9057 free_extent_buffer(next);
9058 next = NULL;
Yan, Zheng94fcca92009-10-09 09:25:16 -04009059 *lookup_info = 1;
Yan, Zheng1c4850e2009-09-21 15:55:59 -04009060 }
9061
9062 if (!next) {
9063 if (reada && level == 1)
9064 reada_walk_down(trans, root, wc, path);
Qu Wenruo581c1762018-03-29 09:08:11 +08009065 next = read_tree_block(fs_info, bytenr, generation, level - 1,
9066 &first_key);
Liu Bo64c043d2015-05-25 17:30:15 +08009067 if (IS_ERR(next)) {
9068 return PTR_ERR(next);
9069 } else if (!extent_buffer_uptodate(next)) {
Josef Bacik416bc652013-04-23 14:17:42 -04009070 free_extent_buffer(next);
Tsutomu Itoh97d9a8a2011-03-24 06:33:21 +00009071 return -EIO;
Josef Bacik416bc652013-04-23 14:17:42 -04009072 }
Yan, Zheng1c4850e2009-09-21 15:55:59 -04009073 btrfs_tree_lock(next);
David Sterba8bead252018-04-04 02:03:48 +02009074 btrfs_set_lock_blocking_write(next);
Yan, Zheng1c4850e2009-09-21 15:55:59 -04009075 }
9076
9077 level--;
Josef Bacik48672682016-09-23 13:23:28 +02009078 ASSERT(level == btrfs_header_level(next));
9079 if (level != btrfs_header_level(next)) {
9080 btrfs_err(root->fs_info, "mismatched level");
9081 ret = -EIO;
9082 goto out_unlock;
9083 }
Yan, Zheng1c4850e2009-09-21 15:55:59 -04009084 path->nodes[level] = next;
9085 path->slots[level] = 0;
Chris Masonbd681512011-07-16 15:23:14 -04009086 path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING;
Yan, Zheng1c4850e2009-09-21 15:55:59 -04009087 wc->level = level;
9088 if (wc->level == 1)
9089 wc->reada_slot = 0;
9090 return 0;
9091skip:
9092 wc->refs[level - 1] = 0;
9093 wc->flags[level - 1] = 0;
Yan, Zheng94fcca92009-10-09 09:25:16 -04009094 if (wc->stage == DROP_REFERENCE) {
9095 if (wc->flags[level] & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
9096 parent = path->nodes[level]->start;
9097 } else {
Josef Bacik48672682016-09-23 13:23:28 +02009098 ASSERT(root->root_key.objectid ==
Yan, Zheng94fcca92009-10-09 09:25:16 -04009099 btrfs_header_owner(path->nodes[level]));
Josef Bacik48672682016-09-23 13:23:28 +02009100 if (root->root_key.objectid !=
9101 btrfs_header_owner(path->nodes[level])) {
9102 btrfs_err(root->fs_info,
9103 "mismatched block owner");
9104 ret = -EIO;
9105 goto out_unlock;
9106 }
Yan, Zheng94fcca92009-10-09 09:25:16 -04009107 parent = 0;
9108 }
Yan, Zheng1c4850e2009-09-21 15:55:59 -04009109
Qu Wenruo2cd86d32018-09-27 14:42:33 +08009110 /*
Josef Bacik78c52d92019-02-06 15:46:14 -05009111 * If we had a drop_progress we need to verify the refs are set
9112 * as expected. If we find our ref then we know that from here
9113 * on out everything should be correct, and we can clear the
9114 * ->restarted flag.
9115 */
9116 if (wc->restarted) {
9117 ret = check_ref_exists(trans, root, bytenr, parent,
9118 level - 1);
9119 if (ret < 0)
9120 goto out_unlock;
9121 if (ret == 0)
9122 goto no_delete;
9123 ret = 0;
9124 wc->restarted = 0;
9125 }
9126
9127 /*
Qu Wenruo2cd86d32018-09-27 14:42:33 +08009128 * Reloc tree doesn't contribute to qgroup numbers, and we have
9129 * already accounted them at merge time (replace_path),
9130 * thus we could skip expensive subtree trace here.
9131 */
9132 if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID &&
9133 need_account) {
Lu Fengqideb40622018-07-18 14:45:38 +08009134 ret = btrfs_qgroup_trace_subtree(trans, next,
Qu Wenruo33d1f052016-10-18 09:31:28 +08009135 generation, level - 1);
Mark Fasheh11526512014-07-17 12:39:01 -07009136 if (ret) {
Jeff Mahoney0b246af2016-06-22 18:54:23 -04009137 btrfs_err_rl(fs_info,
Jeff Mahoney5d163e02016-09-20 10:05:00 -04009138 "Error %d accounting shared subtree. Quota is out of sync, rescan required.",
9139 ret);
Mark Fasheh11526512014-07-17 12:39:01 -07009140 }
9141 }
Josef Bacikaea6f022019-02-06 15:46:15 -05009142
9143 /*
9144 * We need to update the next key in our walk control so we can
9145 * update the drop_progress key accordingly. We don't care if
9146 * find_next_key doesn't find a key because that means we're at
9147 * the end and are going to clean up now.
9148 */
9149 wc->drop_level = level;
9150 find_next_key(path, level, &wc->drop_progress);
9151
Qu Wenruo01e0da42018-12-10 15:01:03 +08009152 ret = btrfs_free_extent(trans, root, bytenr, fs_info->nodesize,
Jeff Mahoney2ff7e612016-06-22 18:54:24 -04009153 parent, root->root_key.objectid,
9154 level - 1, 0);
Josef Bacik48672682016-09-23 13:23:28 +02009155 if (ret)
9156 goto out_unlock;
Yan, Zheng1c4850e2009-09-21 15:55:59 -04009157 }
Josef Bacik78c52d92019-02-06 15:46:14 -05009158no_delete:
Josef Bacik48672682016-09-23 13:23:28 +02009159 *lookup_info = 1;
9160 ret = 1;
9161
9162out_unlock:
Yan, Zheng1c4850e2009-09-21 15:55:59 -04009163 btrfs_tree_unlock(next);
9164 free_extent_buffer(next);
Josef Bacik48672682016-09-23 13:23:28 +02009165
9166 return ret;
Yan, Zheng1c4850e2009-09-21 15:55:59 -04009167}
9168
9169/*
Liu Bo2c016dc2012-12-26 15:32:17 +08009170 * helper to process tree block while walking up the tree.
Yan Zheng2c47e6052009-06-27 21:07:35 -04009171 *
9172 * when wc->stage == DROP_REFERENCE, this function drops
9173 * reference count on the block.
9174 *
9175 * when wc->stage == UPDATE_BACKREF, this function changes
9176 * wc->stage back to DROP_REFERENCE if we changed wc->stage
9177 * to UPDATE_BACKREF previously while processing the block.
9178 *
9179 * NOTE: return value 1 means we should stop walking up.
9180 */
9181static noinline int walk_up_proc(struct btrfs_trans_handle *trans,
9182 struct btrfs_root *root,
9183 struct btrfs_path *path,
9184 struct walk_control *wc)
9185{
Jeff Mahoney0b246af2016-06-22 18:54:23 -04009186 struct btrfs_fs_info *fs_info = root->fs_info;
Yan, Zhengf0486c62010-05-16 10:46:25 -04009187 int ret;
Yan Zheng2c47e6052009-06-27 21:07:35 -04009188 int level = wc->level;
9189 struct extent_buffer *eb = path->nodes[level];
9190 u64 parent = 0;
9191
9192 if (wc->stage == UPDATE_BACKREF) {
9193 BUG_ON(wc->shared_level < level);
9194 if (level < wc->shared_level)
9195 goto out;
9196
Yan Zheng2c47e6052009-06-27 21:07:35 -04009197 ret = find_next_key(path, level + 1, &wc->update_progress);
9198 if (ret > 0)
9199 wc->update_ref = 0;
9200
9201 wc->stage = DROP_REFERENCE;
9202 wc->shared_level = -1;
9203 path->slots[level] = 0;
9204
9205 /*
9206 * check reference count again if the block isn't locked.
9207 * we should start walking down the tree again if reference
9208 * count is one.
9209 */
9210 if (!path->locks[level]) {
9211 BUG_ON(level == 0);
9212 btrfs_tree_lock(eb);
David Sterba8bead252018-04-04 02:03:48 +02009213 btrfs_set_lock_blocking_write(eb);
Chris Masonbd681512011-07-16 15:23:14 -04009214 path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING;
Yan Zheng2c47e6052009-06-27 21:07:35 -04009215
Jeff Mahoney2ff7e612016-06-22 18:54:24 -04009216 ret = btrfs_lookup_extent_info(trans, fs_info,
Josef Bacik3173a182013-03-07 14:22:04 -05009217 eb->start, level, 1,
Yan Zheng2c47e6052009-06-27 21:07:35 -04009218 &wc->refs[level],
9219 &wc->flags[level]);
Jeff Mahoney79787ea2012-03-12 16:03:00 +01009220 if (ret < 0) {
9221 btrfs_tree_unlock_rw(eb, path->locks[level]);
Liu Bo3268a242012-12-28 09:33:19 +00009222 path->locks[level] = 0;
Jeff Mahoney79787ea2012-03-12 16:03:00 +01009223 return ret;
9224 }
Yan Zheng2c47e6052009-06-27 21:07:35 -04009225 BUG_ON(wc->refs[level] == 0);
9226 if (wc->refs[level] == 1) {
Chris Masonbd681512011-07-16 15:23:14 -04009227 btrfs_tree_unlock_rw(eb, path->locks[level]);
Liu Bo3268a242012-12-28 09:33:19 +00009228 path->locks[level] = 0;
Yan Zheng2c47e6052009-06-27 21:07:35 -04009229 return 1;
9230 }
Yan Zheng2c47e6052009-06-27 21:07:35 -04009231 }
9232 }
9233
9234 /* wc->stage == DROP_REFERENCE */
9235 BUG_ON(wc->refs[level] > 1 && !path->locks[level]);
9236
9237 if (wc->refs[level] == 1) {
9238 if (level == 0) {
9239 if (wc->flags[level] & BTRFS_BLOCK_FLAG_FULL_BACKREF)
Josef Bacike339a6b2014-07-02 10:54:25 -07009240 ret = btrfs_dec_ref(trans, root, eb, 1);
Yan Zheng2c47e6052009-06-27 21:07:35 -04009241 else
Josef Bacike339a6b2014-07-02 10:54:25 -07009242 ret = btrfs_dec_ref(trans, root, eb, 0);
Jeff Mahoney79787ea2012-03-12 16:03:00 +01009243 BUG_ON(ret); /* -ENOMEM */
Lu Fengqi8d38d7e2018-07-18 14:45:37 +08009244 ret = btrfs_qgroup_trace_leaf_items(trans, eb);
Mark Fasheh11526512014-07-17 12:39:01 -07009245 if (ret) {
Jeff Mahoney0b246af2016-06-22 18:54:23 -04009246 btrfs_err_rl(fs_info,
Jeff Mahoney5d163e02016-09-20 10:05:00 -04009247 "error %d accounting leaf items. Quota is out of sync, rescan required.",
9248 ret);
Mark Fasheh11526512014-07-17 12:39:01 -07009249 }
Yan Zheng2c47e6052009-06-27 21:07:35 -04009250 }
David Sterba6a884d7d2019-03-20 14:30:02 +01009251 /* make block locked assertion in btrfs_clean_tree_block happy */
Yan Zheng2c47e6052009-06-27 21:07:35 -04009252 if (!path->locks[level] &&
9253 btrfs_header_generation(eb) == trans->transid) {
9254 btrfs_tree_lock(eb);
David Sterba8bead252018-04-04 02:03:48 +02009255 btrfs_set_lock_blocking_write(eb);
Chris Masonbd681512011-07-16 15:23:14 -04009256 path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING;
Yan Zheng2c47e6052009-06-27 21:07:35 -04009257 }
David Sterba6a884d7d2019-03-20 14:30:02 +01009258 btrfs_clean_tree_block(eb);
Yan Zheng2c47e6052009-06-27 21:07:35 -04009259 }
9260
9261 if (eb == root->node) {
9262 if (wc->flags[level] & BTRFS_BLOCK_FLAG_FULL_BACKREF)
9263 parent = eb->start;
Qu Wenruo65c6e822018-08-21 09:42:03 +08009264 else if (root->root_key.objectid != btrfs_header_owner(eb))
9265 goto owner_mismatch;
Yan Zheng2c47e6052009-06-27 21:07:35 -04009266 } else {
9267 if (wc->flags[level + 1] & BTRFS_BLOCK_FLAG_FULL_BACKREF)
9268 parent = path->nodes[level + 1]->start;
Qu Wenruo65c6e822018-08-21 09:42:03 +08009269 else if (root->root_key.objectid !=
9270 btrfs_header_owner(path->nodes[level + 1]))
9271 goto owner_mismatch;
Yan Zheng2c47e6052009-06-27 21:07:35 -04009272 }
9273
Jan Schmidt5581a512012-05-16 17:04:52 +02009274 btrfs_free_tree_block(trans, root, eb, parent, wc->refs[level] == 1);
Yan Zheng2c47e6052009-06-27 21:07:35 -04009275out:
9276 wc->refs[level] = 0;
9277 wc->flags[level] = 0;
Yan, Zhengf0486c62010-05-16 10:46:25 -04009278 return 0;
Qu Wenruo65c6e822018-08-21 09:42:03 +08009279
9280owner_mismatch:
9281 btrfs_err_rl(fs_info, "unexpected tree owner, have %llu expect %llu",
9282 btrfs_header_owner(eb), root->root_key.objectid);
9283 return -EUCLEAN;
Yan Zheng2c47e6052009-06-27 21:07:35 -04009284}
9285
Yan Zheng5d4f98a2009-06-10 10:45:14 -04009286static noinline int walk_down_tree(struct btrfs_trans_handle *trans,
9287 struct btrfs_root *root,
Yan Zheng2c47e6052009-06-27 21:07:35 -04009288 struct btrfs_path *path,
9289 struct walk_control *wc)
Yan Zhengf82d02d2008-10-29 14:49:05 -04009290{
Yan Zheng2c47e6052009-06-27 21:07:35 -04009291 int level = wc->level;
Yan, Zheng94fcca92009-10-09 09:25:16 -04009292 int lookup_info = 1;
Yan Zhengf82d02d2008-10-29 14:49:05 -04009293 int ret;
9294
Yan Zheng2c47e6052009-06-27 21:07:35 -04009295 while (level >= 0) {
Yan, Zheng94fcca92009-10-09 09:25:16 -04009296 ret = walk_down_proc(trans, root, path, wc, lookup_info);
Yan Zheng2c47e6052009-06-27 21:07:35 -04009297 if (ret > 0)
Yan Zhengf82d02d2008-10-29 14:49:05 -04009298 break;
Yan Zhengf82d02d2008-10-29 14:49:05 -04009299
Yan Zheng2c47e6052009-06-27 21:07:35 -04009300 if (level == 0)
9301 break;
9302
Yan, Zheng7a7965f2010-02-01 02:41:17 +00009303 if (path->slots[level] >=
9304 btrfs_header_nritems(path->nodes[level]))
9305 break;
9306
Yan, Zheng94fcca92009-10-09 09:25:16 -04009307 ret = do_walk_down(trans, root, path, wc, &lookup_info);
Yan, Zheng1c4850e2009-09-21 15:55:59 -04009308 if (ret > 0) {
9309 path->slots[level]++;
9310 continue;
Miao Xie90d2c51d2010-03-25 12:37:12 +00009311 } else if (ret < 0)
9312 return ret;
Yan, Zheng1c4850e2009-09-21 15:55:59 -04009313 level = wc->level;
Yan Zhengf82d02d2008-10-29 14:49:05 -04009314 }
Yan Zhengf82d02d2008-10-29 14:49:05 -04009315 return 0;
9316}
9317
Chris Masond3977122009-01-05 21:25:51 -05009318static noinline int walk_up_tree(struct btrfs_trans_handle *trans,
Chris Mason98ed5172008-01-03 10:01:48 -05009319 struct btrfs_root *root,
Yan Zhengf82d02d2008-10-29 14:49:05 -04009320 struct btrfs_path *path,
Yan Zheng2c47e6052009-06-27 21:07:35 -04009321 struct walk_control *wc, int max_level)
Chris Mason20524f02007-03-10 06:35:47 -05009322{
Yan Zheng2c47e6052009-06-27 21:07:35 -04009323 int level = wc->level;
Chris Mason20524f02007-03-10 06:35:47 -05009324 int ret;
Chris Mason9f3a7422007-08-07 15:52:19 -04009325
Yan Zheng2c47e6052009-06-27 21:07:35 -04009326 path->slots[level] = btrfs_header_nritems(path->nodes[level]);
9327 while (level < max_level && path->nodes[level]) {
9328 wc->level = level;
9329 if (path->slots[level] + 1 <
9330 btrfs_header_nritems(path->nodes[level])) {
9331 path->slots[level]++;
Chris Mason20524f02007-03-10 06:35:47 -05009332 return 0;
9333 } else {
Yan Zheng2c47e6052009-06-27 21:07:35 -04009334 ret = walk_up_proc(trans, root, path, wc);
9335 if (ret > 0)
9336 return 0;
Qu Wenruo65c6e822018-08-21 09:42:03 +08009337 if (ret < 0)
9338 return ret;
Chris Masonbd56b302009-02-04 09:27:02 -05009339
Yan Zheng2c47e6052009-06-27 21:07:35 -04009340 if (path->locks[level]) {
Chris Masonbd681512011-07-16 15:23:14 -04009341 btrfs_tree_unlock_rw(path->nodes[level],
9342 path->locks[level]);
Yan Zheng2c47e6052009-06-27 21:07:35 -04009343 path->locks[level] = 0;
Yan Zhengf82d02d2008-10-29 14:49:05 -04009344 }
Yan Zheng2c47e6052009-06-27 21:07:35 -04009345 free_extent_buffer(path->nodes[level]);
9346 path->nodes[level] = NULL;
9347 level++;
Chris Mason20524f02007-03-10 06:35:47 -05009348 }
9349 }
9350 return 1;
9351}
9352
Chris Mason9aca1d52007-03-13 11:09:37 -04009353/*
Yan Zheng2c47e6052009-06-27 21:07:35 -04009354 * drop a subvolume tree.
9355 *
9356 * this function traverses the tree freeing any blocks that only
9357 * referenced by the tree.
9358 *
9359 * when a shared tree block is found. this function decreases its
9360 * reference count by one. if update_ref is true, this function
9361 * also make sure backrefs for the shared block and all lower level
9362 * blocks are properly updated.
David Sterba9d1a2a32013-03-12 15:13:28 +00009363 *
9364 * If called with for_reloc == 0, may exit early with -EAGAIN
Chris Mason9aca1d52007-03-13 11:09:37 -04009365 */
Jeff Mahoney2c536792011-10-03 23:22:41 -04009366int btrfs_drop_snapshot(struct btrfs_root *root,
Arne Jansen66d7e7f2011-09-12 15:26:38 +02009367 struct btrfs_block_rsv *block_rsv, int update_ref,
9368 int for_reloc)
Chris Mason20524f02007-03-10 06:35:47 -05009369{
Jeff Mahoneyab8d0fc2016-09-20 10:05:02 -04009370 struct btrfs_fs_info *fs_info = root->fs_info;
Chris Mason5caf2a02007-04-02 11:20:42 -04009371 struct btrfs_path *path;
Yan Zheng2c47e6052009-06-27 21:07:35 -04009372 struct btrfs_trans_handle *trans;
Jeff Mahoneyab8d0fc2016-09-20 10:05:02 -04009373 struct btrfs_root *tree_root = fs_info->tree_root;
Chris Mason9f3a7422007-08-07 15:52:19 -04009374 struct btrfs_root_item *root_item = &root->root_item;
Yan Zheng2c47e6052009-06-27 21:07:35 -04009375 struct walk_control *wc;
9376 struct btrfs_key key;
9377 int err = 0;
9378 int ret;
9379 int level;
Josef Bacikd29a9f62013-07-17 19:30:20 -04009380 bool root_dropped = false;
Chris Mason20524f02007-03-10 06:35:47 -05009381
Misono Tomohiro4fd786e2018-08-06 14:25:24 +09009382 btrfs_debug(fs_info, "Drop subvolume %llu", root->root_key.objectid);
Mark Fasheh11526512014-07-17 12:39:01 -07009383
Chris Mason5caf2a02007-04-02 11:20:42 -04009384 path = btrfs_alloc_path();
Tsutomu Itohcb1b69f2011-08-09 07:11:13 +00009385 if (!path) {
9386 err = -ENOMEM;
9387 goto out;
9388 }
Chris Mason20524f02007-03-10 06:35:47 -05009389
Yan Zheng2c47e6052009-06-27 21:07:35 -04009390 wc = kzalloc(sizeof(*wc), GFP_NOFS);
Mark Fasheh38a1a912011-07-13 10:59:59 -07009391 if (!wc) {
9392 btrfs_free_path(path);
Tsutomu Itohcb1b69f2011-08-09 07:11:13 +00009393 err = -ENOMEM;
9394 goto out;
Mark Fasheh38a1a912011-07-13 10:59:59 -07009395 }
Yan Zheng2c47e6052009-06-27 21:07:35 -04009396
Yan, Zhenga22285a2010-05-16 10:48:46 -04009397 trans = btrfs_start_transaction(tree_root, 0);
Jeff Mahoney79787ea2012-03-12 16:03:00 +01009398 if (IS_ERR(trans)) {
9399 err = PTR_ERR(trans);
9400 goto out_free;
9401 }
Tsutomu Itoh98d5dc12011-01-20 06:19:37 +00009402
Josef Bacik0568e822018-11-30 11:52:14 -05009403 err = btrfs_run_delayed_items(trans);
9404 if (err)
9405 goto out_end_trans;
9406
Yan, Zheng3fd0a552010-05-16 10:49:59 -04009407 if (block_rsv)
9408 trans->block_rsv = block_rsv;
Yan Zheng2c47e6052009-06-27 21:07:35 -04009409
Josef Bacik83354f02018-11-30 11:52:13 -05009410 /*
9411 * This will help us catch people modifying the fs tree while we're
9412 * dropping it. It is unsafe to mess with the fs tree while it's being
9413 * dropped as we unlock the root node and parent nodes as we walk down
9414 * the tree, assuming nothing will change. If something does change
9415 * then we'll have stale information and drop references to blocks we've
9416 * already dropped.
9417 */
9418 set_bit(BTRFS_ROOT_DELETING, &root->state);
Chris Mason9f3a7422007-08-07 15:52:19 -04009419 if (btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
Yan Zheng2c47e6052009-06-27 21:07:35 -04009420 level = btrfs_header_level(root->node);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04009421 path->nodes[level] = btrfs_lock_root_node(root);
David Sterba8bead252018-04-04 02:03:48 +02009422 btrfs_set_lock_blocking_write(path->nodes[level]);
Chris Mason9f3a7422007-08-07 15:52:19 -04009423 path->slots[level] = 0;
Chris Masonbd681512011-07-16 15:23:14 -04009424 path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING;
Yan Zheng2c47e6052009-06-27 21:07:35 -04009425 memset(&wc->update_progress, 0,
9426 sizeof(wc->update_progress));
Chris Mason9f3a7422007-08-07 15:52:19 -04009427 } else {
Chris Mason9f3a7422007-08-07 15:52:19 -04009428 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
Yan Zheng2c47e6052009-06-27 21:07:35 -04009429 memcpy(&wc->update_progress, &key,
9430 sizeof(wc->update_progress));
9431
Chris Mason6702ed42007-08-07 16:15:09 -04009432 level = root_item->drop_level;
Yan Zheng2c47e6052009-06-27 21:07:35 -04009433 BUG_ON(level == 0);
Chris Mason6702ed42007-08-07 16:15:09 -04009434 path->lowest_level = level;
Yan Zheng2c47e6052009-06-27 21:07:35 -04009435 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
9436 path->lowest_level = 0;
9437 if (ret < 0) {
9438 err = ret;
Jeff Mahoney79787ea2012-03-12 16:03:00 +01009439 goto out_end_trans;
Chris Mason9f3a7422007-08-07 15:52:19 -04009440 }
Yan, Zheng1c4850e2009-09-21 15:55:59 -04009441 WARN_ON(ret > 0);
Yan Zheng2c47e6052009-06-27 21:07:35 -04009442
Chris Mason7d9eb122008-07-08 14:19:17 -04009443 /*
9444 * unlock our path, this is safe because only this
9445 * function is allowed to delete this snapshot
9446 */
Yan Zheng5d4f98a2009-06-10 10:45:14 -04009447 btrfs_unlock_up_safe(path, 0);
Chris Mason9aca1d52007-03-13 11:09:37 -04009448
Yan Zheng2c47e6052009-06-27 21:07:35 -04009449 level = btrfs_header_level(root->node);
9450 while (1) {
9451 btrfs_tree_lock(path->nodes[level]);
David Sterba8bead252018-04-04 02:03:48 +02009452 btrfs_set_lock_blocking_write(path->nodes[level]);
Josef Bacikfec386a2013-07-15 12:41:42 -04009453 path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING;
Yan Zheng2c47e6052009-06-27 21:07:35 -04009454
Jeff Mahoney2ff7e612016-06-22 18:54:24 -04009455 ret = btrfs_lookup_extent_info(trans, fs_info,
Yan Zheng2c47e6052009-06-27 21:07:35 -04009456 path->nodes[level]->start,
Josef Bacik3173a182013-03-07 14:22:04 -05009457 level, 1, &wc->refs[level],
Yan Zheng2c47e6052009-06-27 21:07:35 -04009458 &wc->flags[level]);
Jeff Mahoney79787ea2012-03-12 16:03:00 +01009459 if (ret < 0) {
9460 err = ret;
9461 goto out_end_trans;
9462 }
Yan Zheng2c47e6052009-06-27 21:07:35 -04009463 BUG_ON(wc->refs[level] == 0);
9464
9465 if (level == root_item->drop_level)
9466 break;
9467
9468 btrfs_tree_unlock(path->nodes[level]);
Josef Bacikfec386a2013-07-15 12:41:42 -04009469 path->locks[level] = 0;
Yan Zheng2c47e6052009-06-27 21:07:35 -04009470 WARN_ON(wc->refs[level] != 1);
9471 level--;
9472 }
9473 }
9474
Josef Bacik78c52d92019-02-06 15:46:14 -05009475 wc->restarted = test_bit(BTRFS_ROOT_DEAD_TREE, &root->state);
Yan Zheng2c47e6052009-06-27 21:07:35 -04009476 wc->level = level;
9477 wc->shared_level = -1;
9478 wc->stage = DROP_REFERENCE;
9479 wc->update_ref = update_ref;
9480 wc->keep_locks = 0;
Jeff Mahoney0b246af2016-06-22 18:54:23 -04009481 wc->reada_count = BTRFS_NODEPTRS_PER_BLOCK(fs_info);
Yan Zheng2c47e6052009-06-27 21:07:35 -04009482
9483 while (1) {
David Sterba9d1a2a32013-03-12 15:13:28 +00009484
Yan Zheng2c47e6052009-06-27 21:07:35 -04009485 ret = walk_down_tree(trans, root, path, wc);
9486 if (ret < 0) {
9487 err = ret;
Chris Masone7a84562008-06-25 16:01:31 -04009488 break;
9489 }
Yan Zheng2c47e6052009-06-27 21:07:35 -04009490
9491 ret = walk_up_tree(trans, root, path, wc, BTRFS_MAX_LEVEL);
9492 if (ret < 0) {
9493 err = ret;
9494 break;
9495 }
9496
9497 if (ret > 0) {
9498 BUG_ON(wc->stage != DROP_REFERENCE);
9499 break;
9500 }
9501
9502 if (wc->stage == DROP_REFERENCE) {
Josef Bacikaea6f022019-02-06 15:46:15 -05009503 wc->drop_level = wc->level;
9504 btrfs_node_key_to_cpu(path->nodes[wc->drop_level],
9505 &wc->drop_progress,
9506 path->slots[wc->drop_level]);
Yan Zheng2c47e6052009-06-27 21:07:35 -04009507 }
Josef Bacikaea6f022019-02-06 15:46:15 -05009508 btrfs_cpu_key_to_disk(&root_item->drop_progress,
9509 &wc->drop_progress);
9510 root_item->drop_level = wc->drop_level;
Yan Zheng2c47e6052009-06-27 21:07:35 -04009511
9512 BUG_ON(wc->level == 0);
Jeff Mahoney3a45bb22016-09-09 21:39:03 -04009513 if (btrfs_should_end_transaction(trans) ||
Jeff Mahoney2ff7e612016-06-22 18:54:24 -04009514 (!for_reloc && btrfs_need_cleaner_sleep(fs_info))) {
Yan Zheng2c47e6052009-06-27 21:07:35 -04009515 ret = btrfs_update_root(trans, tree_root,
9516 &root->root_key,
9517 root_item);
Jeff Mahoney79787ea2012-03-12 16:03:00 +01009518 if (ret) {
Jeff Mahoney66642832016-06-10 18:19:25 -04009519 btrfs_abort_transaction(trans, ret);
Jeff Mahoney79787ea2012-03-12 16:03:00 +01009520 err = ret;
9521 goto out_end_trans;
9522 }
Yan Zheng2c47e6052009-06-27 21:07:35 -04009523
Jeff Mahoney3a45bb22016-09-09 21:39:03 -04009524 btrfs_end_transaction_throttle(trans);
Jeff Mahoney2ff7e612016-06-22 18:54:24 -04009525 if (!for_reloc && btrfs_need_cleaner_sleep(fs_info)) {
Jeff Mahoneyab8d0fc2016-09-20 10:05:02 -04009526 btrfs_debug(fs_info,
9527 "drop snapshot early exit");
Josef Bacik3c8f2422013-07-15 11:57:06 -04009528 err = -EAGAIN;
9529 goto out_free;
9530 }
9531
Yan, Zhenga22285a2010-05-16 10:48:46 -04009532 trans = btrfs_start_transaction(tree_root, 0);
Jeff Mahoney79787ea2012-03-12 16:03:00 +01009533 if (IS_ERR(trans)) {
9534 err = PTR_ERR(trans);
9535 goto out_free;
9536 }
Yan, Zheng3fd0a552010-05-16 10:49:59 -04009537 if (block_rsv)
9538 trans->block_rsv = block_rsv;
Chris Masonc3e69d52009-03-13 10:17:05 -04009539 }
Chris Mason20524f02007-03-10 06:35:47 -05009540 }
David Sterbab3b4aa72011-04-21 01:20:15 +02009541 btrfs_release_path(path);
Jeff Mahoney79787ea2012-03-12 16:03:00 +01009542 if (err)
9543 goto out_end_trans;
Yan Zheng2c47e6052009-06-27 21:07:35 -04009544
Lu Fengqiab9ce7d2018-08-01 11:32:27 +08009545 ret = btrfs_del_root(trans, &root->root_key);
Jeff Mahoney79787ea2012-03-12 16:03:00 +01009546 if (ret) {
Jeff Mahoney66642832016-06-10 18:19:25 -04009547 btrfs_abort_transaction(trans, ret);
Jeff Mahoneye19182c2017-12-04 13:11:45 -05009548 err = ret;
Jeff Mahoney79787ea2012-03-12 16:03:00 +01009549 goto out_end_trans;
9550 }
Yan Zheng2c47e6052009-06-27 21:07:35 -04009551
Yan, Zheng76dda932009-09-21 16:00:26 -04009552 if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
Miao Xiecb517ea2013-05-15 07:48:19 +00009553 ret = btrfs_find_root(tree_root, &root->root_key, path,
9554 NULL, NULL);
Jeff Mahoney79787ea2012-03-12 16:03:00 +01009555 if (ret < 0) {
Jeff Mahoney66642832016-06-10 18:19:25 -04009556 btrfs_abort_transaction(trans, ret);
Jeff Mahoney79787ea2012-03-12 16:03:00 +01009557 err = ret;
9558 goto out_end_trans;
9559 } else if (ret > 0) {
Josef Bacik84cd9482010-12-08 12:24:01 -05009560 /* if we fail to delete the orphan item this time
9561 * around, it'll get picked up the next time.
9562 *
9563 * The most common failure here is just -ENOENT.
9564 */
9565 btrfs_del_orphan_item(trans, tree_root,
9566 root->root_key.objectid);
Yan, Zheng76dda932009-09-21 16:00:26 -04009567 }
9568 }
9569
Miao Xie27cdeb72014-04-02 19:51:05 +08009570 if (test_bit(BTRFS_ROOT_IN_RADIX, &root->state)) {
Josef Bacik2b9dbef2015-09-15 10:07:04 -04009571 btrfs_add_dropped_root(trans, root);
Yan, Zheng76dda932009-09-21 16:00:26 -04009572 } else {
9573 free_extent_buffer(root->node);
9574 free_extent_buffer(root->commit_root);
Miao Xieb0feb9d2013-05-15 07:48:20 +00009575 btrfs_put_fs_root(root);
Yan, Zheng76dda932009-09-21 16:00:26 -04009576 }
Josef Bacikd29a9f62013-07-17 19:30:20 -04009577 root_dropped = true;
Jeff Mahoney79787ea2012-03-12 16:03:00 +01009578out_end_trans:
Jeff Mahoney3a45bb22016-09-09 21:39:03 -04009579 btrfs_end_transaction_throttle(trans);
Jeff Mahoney79787ea2012-03-12 16:03:00 +01009580out_free:
Yan Zheng2c47e6052009-06-27 21:07:35 -04009581 kfree(wc);
Chris Mason5caf2a02007-04-02 11:20:42 -04009582 btrfs_free_path(path);
Tsutomu Itohcb1b69f2011-08-09 07:11:13 +00009583out:
Josef Bacikd29a9f62013-07-17 19:30:20 -04009584 /*
9585 * So if we need to stop dropping the snapshot for whatever reason we
9586 * need to make sure to add it back to the dead root list so that we
9587 * keep trying to do the work later. This also cleans up roots if we
9588 * don't have it in the radix (like when we recover after a power fail
9589 * or unmount) so we don't leak memory.
9590 */
Thomas Meyer897ca812017-10-07 16:02:21 +02009591 if (!for_reloc && !root_dropped)
Josef Bacikd29a9f62013-07-17 19:30:20 -04009592 btrfs_add_dead_root(root);
Wang Shilong90515e72014-01-07 17:26:58 +08009593 if (err && err != -EAGAIN)
Jeff Mahoneyab8d0fc2016-09-20 10:05:02 -04009594 btrfs_handle_fs_error(fs_info, err, NULL);
Jeff Mahoney2c536792011-10-03 23:22:41 -04009595 return err;
Chris Mason20524f02007-03-10 06:35:47 -05009596}
Chris Mason9078a3e2007-04-26 16:46:15 -04009597
Yan Zheng2c47e6052009-06-27 21:07:35 -04009598/*
9599 * drop subtree rooted at tree block 'node'.
9600 *
9601 * NOTE: this function will unlock and release tree block 'node'
Arne Jansen66d7e7f2011-09-12 15:26:38 +02009602 * only used by relocation code
Yan Zheng2c47e6052009-06-27 21:07:35 -04009603 */
Yan Zhengf82d02d2008-10-29 14:49:05 -04009604int btrfs_drop_subtree(struct btrfs_trans_handle *trans,
9605 struct btrfs_root *root,
9606 struct extent_buffer *node,
9607 struct extent_buffer *parent)
9608{
Jeff Mahoney0b246af2016-06-22 18:54:23 -04009609 struct btrfs_fs_info *fs_info = root->fs_info;
Yan Zhengf82d02d2008-10-29 14:49:05 -04009610 struct btrfs_path *path;
Yan Zheng2c47e6052009-06-27 21:07:35 -04009611 struct walk_control *wc;
Yan Zhengf82d02d2008-10-29 14:49:05 -04009612 int level;
9613 int parent_level;
9614 int ret = 0;
9615 int wret;
9616
Yan Zheng2c47e6052009-06-27 21:07:35 -04009617 BUG_ON(root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID);
9618
Yan Zhengf82d02d2008-10-29 14:49:05 -04009619 path = btrfs_alloc_path();
Tsutomu Itohdb5b4932011-03-23 08:14:16 +00009620 if (!path)
9621 return -ENOMEM;
Yan Zhengf82d02d2008-10-29 14:49:05 -04009622
Yan Zheng2c47e6052009-06-27 21:07:35 -04009623 wc = kzalloc(sizeof(*wc), GFP_NOFS);
Tsutomu Itohdb5b4932011-03-23 08:14:16 +00009624 if (!wc) {
9625 btrfs_free_path(path);
9626 return -ENOMEM;
9627 }
Yan Zheng2c47e6052009-06-27 21:07:35 -04009628
Chris Masonb9447ef82009-03-09 11:45:38 -04009629 btrfs_assert_tree_locked(parent);
Yan Zhengf82d02d2008-10-29 14:49:05 -04009630 parent_level = btrfs_header_level(parent);
9631 extent_buffer_get(parent);
9632 path->nodes[parent_level] = parent;
9633 path->slots[parent_level] = btrfs_header_nritems(parent);
9634
Chris Masonb9447ef82009-03-09 11:45:38 -04009635 btrfs_assert_tree_locked(node);
Yan Zhengf82d02d2008-10-29 14:49:05 -04009636 level = btrfs_header_level(node);
Yan Zhengf82d02d2008-10-29 14:49:05 -04009637 path->nodes[level] = node;
9638 path->slots[level] = 0;
Chris Masonbd681512011-07-16 15:23:14 -04009639 path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING;
Yan Zheng2c47e6052009-06-27 21:07:35 -04009640
9641 wc->refs[parent_level] = 1;
9642 wc->flags[parent_level] = BTRFS_BLOCK_FLAG_FULL_BACKREF;
9643 wc->level = level;
9644 wc->shared_level = -1;
9645 wc->stage = DROP_REFERENCE;
9646 wc->update_ref = 0;
9647 wc->keep_locks = 1;
Jeff Mahoney0b246af2016-06-22 18:54:23 -04009648 wc->reada_count = BTRFS_NODEPTRS_PER_BLOCK(fs_info);
Yan Zhengf82d02d2008-10-29 14:49:05 -04009649
9650 while (1) {
Yan Zheng2c47e6052009-06-27 21:07:35 -04009651 wret = walk_down_tree(trans, root, path, wc);
9652 if (wret < 0) {
Yan Zhengf82d02d2008-10-29 14:49:05 -04009653 ret = wret;
Yan Zhengf82d02d2008-10-29 14:49:05 -04009654 break;
Yan Zheng2c47e6052009-06-27 21:07:35 -04009655 }
Yan Zhengf82d02d2008-10-29 14:49:05 -04009656
Yan Zheng2c47e6052009-06-27 21:07:35 -04009657 wret = walk_up_tree(trans, root, path, wc, parent_level);
Yan Zhengf82d02d2008-10-29 14:49:05 -04009658 if (wret < 0)
9659 ret = wret;
9660 if (wret != 0)
9661 break;
9662 }
9663
Yan Zheng2c47e6052009-06-27 21:07:35 -04009664 kfree(wc);
Yan Zhengf82d02d2008-10-29 14:49:05 -04009665 btrfs_free_path(path);
9666 return ret;
9667}
9668
Jeff Mahoney6202df62016-06-22 18:54:22 -04009669static u64 update_block_group_flags(struct btrfs_fs_info *fs_info, u64 flags)
Chris Masonec44a352008-04-28 15:29:52 -04009670{
9671 u64 num_devices;
Ilya Dryomovfc67c452012-03-27 17:09:17 +03009672 u64 stripped;
Chris Masonec44a352008-04-28 15:29:52 -04009673
Ilya Dryomovfc67c452012-03-27 17:09:17 +03009674 /*
9675 * if restripe for this chunk_type is on pick target profile and
9676 * return, otherwise do the usual balance
9677 */
Jeff Mahoney6202df62016-06-22 18:54:22 -04009678 stripped = get_restripe_target(fs_info, flags);
Ilya Dryomovfc67c452012-03-27 17:09:17 +03009679 if (stripped)
9680 return extended_to_chunk(stripped);
Ilya Dryomove4d8ec02012-01-16 22:04:48 +02009681
Jeff Mahoney6202df62016-06-22 18:54:22 -04009682 num_devices = fs_info->fs_devices->rw_devices;
Chris Masoncd02dca2010-12-13 14:56:23 -05009683
Ilya Dryomovfc67c452012-03-27 17:09:17 +03009684 stripped = BTRFS_BLOCK_GROUP_RAID0 |
David Woodhouse53b381b2013-01-29 18:40:14 -05009685 BTRFS_BLOCK_GROUP_RAID5 | BTRFS_BLOCK_GROUP_RAID6 |
Ilya Dryomovfc67c452012-03-27 17:09:17 +03009686 BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID10;
9687
Chris Masonec44a352008-04-28 15:29:52 -04009688 if (num_devices == 1) {
9689 stripped |= BTRFS_BLOCK_GROUP_DUP;
9690 stripped = flags & ~stripped;
9691
9692 /* turn raid0 into single device chunks */
9693 if (flags & BTRFS_BLOCK_GROUP_RAID0)
9694 return stripped;
9695
9696 /* turn mirroring into duplication */
9697 if (flags & (BTRFS_BLOCK_GROUP_RAID1 |
9698 BTRFS_BLOCK_GROUP_RAID10))
9699 return stripped | BTRFS_BLOCK_GROUP_DUP;
Chris Masonec44a352008-04-28 15:29:52 -04009700 } else {
9701 /* they already had raid on here, just return */
Chris Masonec44a352008-04-28 15:29:52 -04009702 if (flags & stripped)
9703 return flags;
9704
9705 stripped |= BTRFS_BLOCK_GROUP_DUP;
9706 stripped = flags & ~stripped;
9707
9708 /* switch duplicated blocks with raid1 */
9709 if (flags & BTRFS_BLOCK_GROUP_DUP)
9710 return stripped | BTRFS_BLOCK_GROUP_RAID1;
9711
Ilya Dryomove3176ca2012-03-27 17:09:16 +03009712 /* this is drive concat, leave it alone */
Chris Masonec44a352008-04-28 15:29:52 -04009713 }
Ilya Dryomove3176ca2012-03-27 17:09:16 +03009714
Chris Masonec44a352008-04-28 15:29:52 -04009715 return flags;
9716}
9717
Zhaolei868f4012015-08-05 16:43:27 +08009718static int inc_block_group_ro(struct btrfs_block_group_cache *cache, int force)
Chris Mason0ef3e662008-05-24 14:04:53 -04009719{
Yan, Zhengf0486c62010-05-16 10:46:25 -04009720 struct btrfs_space_info *sinfo = cache->space_info;
9721 u64 num_bytes;
Qu Wenruo3ece54e2019-01-30 13:07:51 +08009722 u64 sinfo_used;
Miao Xie199c36e2011-07-15 10:34:36 +00009723 u64 min_allocable_bytes;
Yan, Zhengf0486c62010-05-16 10:46:25 -04009724 int ret = -ENOSPC;
Chris Mason0ef3e662008-05-24 14:04:53 -04009725
Miao Xie199c36e2011-07-15 10:34:36 +00009726 /*
9727 * We need some metadata space and system metadata space for
9728 * allocating chunks in some corner cases until we force to set
9729 * it to be readonly.
9730 */
9731 if ((sinfo->flags &
9732 (BTRFS_BLOCK_GROUP_SYSTEM | BTRFS_BLOCK_GROUP_METADATA)) &&
9733 !force)
Byongho Leeee221842015-12-15 01:42:10 +09009734 min_allocable_bytes = SZ_1M;
Miao Xie199c36e2011-07-15 10:34:36 +00009735 else
9736 min_allocable_bytes = 0;
9737
Yan, Zhengf0486c62010-05-16 10:46:25 -04009738 spin_lock(&sinfo->lock);
9739 spin_lock(&cache->lock);
WuBo61cfea92011-07-26 03:30:11 +00009740
9741 if (cache->ro) {
Zhaolei868f4012015-08-05 16:43:27 +08009742 cache->ro++;
WuBo61cfea92011-07-26 03:30:11 +00009743 ret = 0;
9744 goto out;
9745 }
9746
Yan, Zhengf0486c62010-05-16 10:46:25 -04009747 num_bytes = cache->key.offset - cache->reserved - cache->pinned -
9748 cache->bytes_super - btrfs_block_group_used(&cache->item);
Qu Wenruo3ece54e2019-01-30 13:07:51 +08009749 sinfo_used = btrfs_space_info_used(sinfo, true);
Chris Mason7d9eb122008-07-08 14:19:17 -04009750
Qu Wenruo3ece54e2019-01-30 13:07:51 +08009751 if (sinfo_used + num_bytes + min_allocable_bytes <=
9752 sinfo->total_bytes) {
Yan, Zhengf0486c62010-05-16 10:46:25 -04009753 sinfo->bytes_readonly += num_bytes;
Zhaolei868f4012015-08-05 16:43:27 +08009754 cache->ro++;
Josef Bacik633c0aa2014-10-31 09:49:34 -04009755 list_add_tail(&cache->ro_list, &sinfo->ro_bgs);
Yan, Zhengf0486c62010-05-16 10:46:25 -04009756 ret = 0;
9757 }
WuBo61cfea92011-07-26 03:30:11 +00009758out:
Yan, Zhengf0486c62010-05-16 10:46:25 -04009759 spin_unlock(&cache->lock);
9760 spin_unlock(&sinfo->lock);
Qu Wenruo3ece54e2019-01-30 13:07:51 +08009761 if (ret == -ENOSPC && btrfs_test_opt(cache->fs_info, ENOSPC_DEBUG)) {
9762 btrfs_info(cache->fs_info,
9763 "unable to make block group %llu ro",
9764 cache->key.objectid);
9765 btrfs_info(cache->fs_info,
9766 "sinfo_used=%llu bg_num_bytes=%llu min_allocable=%llu",
9767 sinfo_used, num_bytes, min_allocable_bytes);
9768 dump_space_info(cache->fs_info, cache->space_info, 0, 0);
9769 }
Yan, Zhengf0486c62010-05-16 10:46:25 -04009770 return ret;
Chris Mason0ef3e662008-05-24 14:04:53 -04009771}
9772
Nikolay Borisovc83488a2018-06-20 15:49:14 +03009773int btrfs_inc_block_group_ro(struct btrfs_block_group_cache *cache)
Yan Zheng5d4f98a2009-06-10 10:45:14 -04009774
9775{
Nikolay Borisovc83488a2018-06-20 15:49:14 +03009776 struct btrfs_fs_info *fs_info = cache->fs_info;
Yan, Zhengf0486c62010-05-16 10:46:25 -04009777 struct btrfs_trans_handle *trans;
9778 u64 alloc_flags;
9779 int ret;
9780
Chris Mason1bbc6212015-04-06 12:46:08 -07009781again:
Jeff Mahoney5e00f192017-02-15 16:28:29 -05009782 trans = btrfs_join_transaction(fs_info->extent_root);
Jeff Mahoney79787ea2012-03-12 16:03:00 +01009783 if (IS_ERR(trans))
9784 return PTR_ERR(trans);
Yan, Zhengf0486c62010-05-16 10:46:25 -04009785
Chris Mason1bbc6212015-04-06 12:46:08 -07009786 /*
9787 * we're not allowed to set block groups readonly after the dirty
9788 * block groups cache has started writing. If it already started,
9789 * back off and let this transaction commit
9790 */
Jeff Mahoney0b246af2016-06-22 18:54:23 -04009791 mutex_lock(&fs_info->ro_block_group_mutex);
Josef Bacik3204d332015-09-24 10:46:10 -04009792 if (test_bit(BTRFS_TRANS_DIRTY_BG_RUN, &trans->transaction->flags)) {
Chris Mason1bbc6212015-04-06 12:46:08 -07009793 u64 transid = trans->transid;
9794
Jeff Mahoney0b246af2016-06-22 18:54:23 -04009795 mutex_unlock(&fs_info->ro_block_group_mutex);
Jeff Mahoney3a45bb22016-09-09 21:39:03 -04009796 btrfs_end_transaction(trans);
Chris Mason1bbc6212015-04-06 12:46:08 -07009797
Jeff Mahoney2ff7e612016-06-22 18:54:24 -04009798 ret = btrfs_wait_for_commit(fs_info, transid);
Chris Mason1bbc6212015-04-06 12:46:08 -07009799 if (ret)
9800 return ret;
9801 goto again;
9802 }
9803
Chris Mason153c35b2015-05-19 18:54:41 -07009804 /*
9805 * if we are changing raid levels, try to allocate a corresponding
9806 * block group with the new raid level.
9807 */
Jeff Mahoney0b246af2016-06-22 18:54:23 -04009808 alloc_flags = update_block_group_flags(fs_info, cache->flags);
Chris Mason153c35b2015-05-19 18:54:41 -07009809 if (alloc_flags != cache->flags) {
Nikolay Borisov01458822018-06-20 15:49:05 +03009810 ret = do_chunk_alloc(trans, alloc_flags,
Chris Mason153c35b2015-05-19 18:54:41 -07009811 CHUNK_ALLOC_FORCE);
9812 /*
9813 * ENOSPC is allowed here, we may have enough space
9814 * already allocated at the new raid level to
9815 * carry on
9816 */
9817 if (ret == -ENOSPC)
9818 ret = 0;
9819 if (ret < 0)
9820 goto out;
9821 }
Chris Mason1bbc6212015-04-06 12:46:08 -07009822
Zhaolei868f4012015-08-05 16:43:27 +08009823 ret = inc_block_group_ro(cache, 0);
Yan, Zhengf0486c62010-05-16 10:46:25 -04009824 if (!ret)
9825 goto out;
Jeff Mahoney2ff7e612016-06-22 18:54:24 -04009826 alloc_flags = get_alloc_profile(fs_info, cache->space_info->flags);
Nikolay Borisov01458822018-06-20 15:49:05 +03009827 ret = do_chunk_alloc(trans, alloc_flags, CHUNK_ALLOC_FORCE);
Yan, Zhengf0486c62010-05-16 10:46:25 -04009828 if (ret < 0)
9829 goto out;
Zhaolei868f4012015-08-05 16:43:27 +08009830 ret = inc_block_group_ro(cache, 0);
Yan, Zhengf0486c62010-05-16 10:46:25 -04009831out:
Shaohua Li2f081082015-01-09 10:40:15 -08009832 if (cache->flags & BTRFS_BLOCK_GROUP_SYSTEM) {
Jeff Mahoney0b246af2016-06-22 18:54:23 -04009833 alloc_flags = update_block_group_flags(fs_info, cache->flags);
David Sterba34441362016-10-04 19:34:27 +02009834 mutex_lock(&fs_info->chunk_mutex);
Nikolay Borisov451a2c12018-06-20 15:49:07 +03009835 check_system_chunk(trans, alloc_flags);
David Sterba34441362016-10-04 19:34:27 +02009836 mutex_unlock(&fs_info->chunk_mutex);
Shaohua Li2f081082015-01-09 10:40:15 -08009837 }
Jeff Mahoney0b246af2016-06-22 18:54:23 -04009838 mutex_unlock(&fs_info->ro_block_group_mutex);
Shaohua Li2f081082015-01-09 10:40:15 -08009839
Jeff Mahoney3a45bb22016-09-09 21:39:03 -04009840 btrfs_end_transaction(trans);
Yan, Zhengf0486c62010-05-16 10:46:25 -04009841 return ret;
9842}
9843
Nikolay Borisov43a7e992018-06-20 15:49:15 +03009844int btrfs_force_chunk_alloc(struct btrfs_trans_handle *trans, u64 type)
Chris Masonc87f08c2011-02-16 13:57:04 -05009845{
Nikolay Borisov43a7e992018-06-20 15:49:15 +03009846 u64 alloc_flags = get_alloc_profile(trans->fs_info, type);
Jeff Mahoney2ff7e612016-06-22 18:54:24 -04009847
Nikolay Borisov01458822018-06-20 15:49:05 +03009848 return do_chunk_alloc(trans, alloc_flags, CHUNK_ALLOC_FORCE);
Chris Masonc87f08c2011-02-16 13:57:04 -05009849}
9850
Miao Xie6d07bce2011-01-05 10:07:31 +00009851/*
9852 * helper to account the unused space of all the readonly block group in the
Josef Bacik633c0aa2014-10-31 09:49:34 -04009853 * space_info. takes mirrors into account.
Miao Xie6d07bce2011-01-05 10:07:31 +00009854 */
Josef Bacik633c0aa2014-10-31 09:49:34 -04009855u64 btrfs_account_ro_block_groups_free_space(struct btrfs_space_info *sinfo)
Miao Xie6d07bce2011-01-05 10:07:31 +00009856{
9857 struct btrfs_block_group_cache *block_group;
9858 u64 free_bytes = 0;
9859 int factor;
9860
Nicholas D Steeves01327612016-05-19 21:18:45 -04009861 /* It's df, we don't care if it's racy */
Josef Bacik633c0aa2014-10-31 09:49:34 -04009862 if (list_empty(&sinfo->ro_bgs))
9863 return 0;
9864
9865 spin_lock(&sinfo->lock);
9866 list_for_each_entry(block_group, &sinfo->ro_bgs, ro_list) {
Miao Xie6d07bce2011-01-05 10:07:31 +00009867 spin_lock(&block_group->lock);
9868
9869 if (!block_group->ro) {
9870 spin_unlock(&block_group->lock);
9871 continue;
9872 }
9873
David Sterba46df06b2018-07-13 20:46:30 +02009874 factor = btrfs_bg_type_to_factor(block_group->flags);
Miao Xie6d07bce2011-01-05 10:07:31 +00009875 free_bytes += (block_group->key.offset -
9876 btrfs_block_group_used(&block_group->item)) *
9877 factor;
9878
9879 spin_unlock(&block_group->lock);
9880 }
Miao Xie6d07bce2011-01-05 10:07:31 +00009881 spin_unlock(&sinfo->lock);
9882
9883 return free_bytes;
9884}
9885
Jeff Mahoney2ff7e612016-06-22 18:54:24 -04009886void btrfs_dec_block_group_ro(struct btrfs_block_group_cache *cache)
Yan, Zhengf0486c62010-05-16 10:46:25 -04009887{
9888 struct btrfs_space_info *sinfo = cache->space_info;
9889 u64 num_bytes;
9890
9891 BUG_ON(!cache->ro);
9892
9893 spin_lock(&sinfo->lock);
9894 spin_lock(&cache->lock);
Zhaolei868f4012015-08-05 16:43:27 +08009895 if (!--cache->ro) {
9896 num_bytes = cache->key.offset - cache->reserved -
9897 cache->pinned - cache->bytes_super -
9898 btrfs_block_group_used(&cache->item);
9899 sinfo->bytes_readonly -= num_bytes;
9900 list_del_init(&cache->ro_list);
9901 }
Yan, Zhengf0486c62010-05-16 10:46:25 -04009902 spin_unlock(&cache->lock);
9903 spin_unlock(&sinfo->lock);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04009904}
9905
Josef Bacikba1bf482009-09-11 16:11:19 -04009906/*
Andrea Gelmini52042d82018-11-28 12:05:13 +01009907 * Checks to see if it's even possible to relocate this block group.
Josef Bacikba1bf482009-09-11 16:11:19 -04009908 *
9909 * @return - -1 if it's not a good idea to relocate this block group, 0 if its
9910 * ok to go ahead and try.
9911 */
Jeff Mahoney6bccf3a2016-06-21 21:16:51 -04009912int btrfs_can_relocate(struct btrfs_fs_info *fs_info, u64 bytenr)
Zheng Yan1a40e232008-09-26 10:09:34 -04009913{
Zheng Yan1a40e232008-09-26 10:09:34 -04009914 struct btrfs_block_group_cache *block_group;
Josef Bacikba1bf482009-09-11 16:11:19 -04009915 struct btrfs_space_info *space_info;
Jeff Mahoney0b246af2016-06-22 18:54:23 -04009916 struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
Josef Bacikba1bf482009-09-11 16:11:19 -04009917 struct btrfs_device *device;
liubocdcb7252011-08-03 10:15:25 +00009918 u64 min_free;
Josef Bacik6719db62011-08-20 08:29:51 -04009919 u64 dev_min = 1;
9920 u64 dev_nr = 0;
Ilya Dryomov4a5e98f2012-03-27 17:09:17 +03009921 u64 target;
Qu Wenruo0305bc22016-03-23 11:38:17 +08009922 int debug;
liubocdcb7252011-08-03 10:15:25 +00009923 int index;
Josef Bacikba1bf482009-09-11 16:11:19 -04009924 int full = 0;
9925 int ret = 0;
Chris Masonedbd8d42007-12-21 16:27:24 -05009926
Jeff Mahoney0b246af2016-06-22 18:54:23 -04009927 debug = btrfs_test_opt(fs_info, ENOSPC_DEBUG);
Qu Wenruo0305bc22016-03-23 11:38:17 +08009928
Jeff Mahoney0b246af2016-06-22 18:54:23 -04009929 block_group = btrfs_lookup_block_group(fs_info, bytenr);
Zheng Yan1a40e232008-09-26 10:09:34 -04009930
Josef Bacikba1bf482009-09-11 16:11:19 -04009931 /* odd, couldn't find the block group, leave it alone */
Qu Wenruo0305bc22016-03-23 11:38:17 +08009932 if (!block_group) {
9933 if (debug)
Jeff Mahoney0b246af2016-06-22 18:54:23 -04009934 btrfs_warn(fs_info,
Qu Wenruo0305bc22016-03-23 11:38:17 +08009935 "can't find block group for bytenr %llu",
9936 bytenr);
Josef Bacikba1bf482009-09-11 16:11:19 -04009937 return -1;
Qu Wenruo0305bc22016-03-23 11:38:17 +08009938 }
Chris Masonedbd8d42007-12-21 16:27:24 -05009939
liubocdcb7252011-08-03 10:15:25 +00009940 min_free = btrfs_block_group_used(&block_group->item);
9941
Josef Bacikba1bf482009-09-11 16:11:19 -04009942 /* no bytes used, we're good */
liubocdcb7252011-08-03 10:15:25 +00009943 if (!min_free)
Josef Bacikba1bf482009-09-11 16:11:19 -04009944 goto out;
Chris Mason323da792008-05-09 11:46:48 -04009945
Josef Bacikba1bf482009-09-11 16:11:19 -04009946 space_info = block_group->space_info;
9947 spin_lock(&space_info->lock);
Chris Mason323da792008-05-09 11:46:48 -04009948
Josef Bacikba1bf482009-09-11 16:11:19 -04009949 full = space_info->full;
Zheng Yan1a40e232008-09-26 10:09:34 -04009950
Josef Bacikba1bf482009-09-11 16:11:19 -04009951 /*
9952 * if this is the last block group we have in this space, we can't
Chris Mason7ce618d2009-09-22 14:48:44 -04009953 * relocate it unless we're able to allocate a new chunk below.
9954 *
9955 * Otherwise, we need to make sure we have room in the space to handle
9956 * all of the extents from this block group. If we can, we're good
Josef Bacikba1bf482009-09-11 16:11:19 -04009957 */
Chris Mason7ce618d2009-09-22 14:48:44 -04009958 if ((space_info->total_bytes != block_group->key.offset) &&
Liu Bo41361352017-02-13 15:42:21 -08009959 (btrfs_space_info_used(space_info, false) + min_free <
9960 space_info->total_bytes)) {
Josef Bacikba1bf482009-09-11 16:11:19 -04009961 spin_unlock(&space_info->lock);
9962 goto out;
9963 }
9964 spin_unlock(&space_info->lock);
Zheng Yan1a40e232008-09-26 10:09:34 -04009965
Josef Bacikba1bf482009-09-11 16:11:19 -04009966 /*
9967 * ok we don't have enough space, but maybe we have free space on our
9968 * devices to allocate new chunks for relocation, so loop through our
Ilya Dryomov4a5e98f2012-03-27 17:09:17 +03009969 * alloc devices and guess if we have enough space. if this block
9970 * group is going to be restriped, run checks against the target
9971 * profile instead of the current one.
Josef Bacikba1bf482009-09-11 16:11:19 -04009972 */
9973 ret = -1;
Chris Mason4313b392008-01-03 09:08:48 -05009974
liubocdcb7252011-08-03 10:15:25 +00009975 /*
9976 * index:
9977 * 0: raid10
9978 * 1: raid1
9979 * 2: dup
9980 * 3: raid0
9981 * 4: single
9982 */
Jeff Mahoney0b246af2016-06-22 18:54:23 -04009983 target = get_restripe_target(fs_info, block_group->flags);
Ilya Dryomov4a5e98f2012-03-27 17:09:17 +03009984 if (target) {
Qu Wenruo3e72ee82018-01-30 18:20:45 +08009985 index = btrfs_bg_flags_to_raid_index(extended_to_chunk(target));
Ilya Dryomov4a5e98f2012-03-27 17:09:17 +03009986 } else {
9987 /*
9988 * this is just a balance, so if we were marked as full
9989 * we know there is no space for a new chunk
9990 */
Qu Wenruo0305bc22016-03-23 11:38:17 +08009991 if (full) {
9992 if (debug)
Jeff Mahoney0b246af2016-06-22 18:54:23 -04009993 btrfs_warn(fs_info,
9994 "no space to alloc new chunk for block group %llu",
9995 block_group->key.objectid);
Ilya Dryomov4a5e98f2012-03-27 17:09:17 +03009996 goto out;
Qu Wenruo0305bc22016-03-23 11:38:17 +08009997 }
Ilya Dryomov4a5e98f2012-03-27 17:09:17 +03009998
Qu Wenruo3e72ee82018-01-30 18:20:45 +08009999 index = btrfs_bg_flags_to_raid_index(block_group->flags);
Ilya Dryomov4a5e98f2012-03-27 17:09:17 +030010000 }
10001
Miao Xiee6ec7162013-01-17 05:38:51 +000010002 if (index == BTRFS_RAID_RAID10) {
liubocdcb7252011-08-03 10:15:25 +000010003 dev_min = 4;
Josef Bacik6719db62011-08-20 08:29:51 -040010004 /* Divide by 2 */
10005 min_free >>= 1;
Miao Xiee6ec7162013-01-17 05:38:51 +000010006 } else if (index == BTRFS_RAID_RAID1) {
liubocdcb7252011-08-03 10:15:25 +000010007 dev_min = 2;
Miao Xiee6ec7162013-01-17 05:38:51 +000010008 } else if (index == BTRFS_RAID_DUP) {
Josef Bacik6719db62011-08-20 08:29:51 -040010009 /* Multiply by 2 */
10010 min_free <<= 1;
Miao Xiee6ec7162013-01-17 05:38:51 +000010011 } else if (index == BTRFS_RAID_RAID0) {
liubocdcb7252011-08-03 10:15:25 +000010012 dev_min = fs_devices->rw_devices;
David Sterba47c57132015-02-20 18:43:47 +010010013 min_free = div64_u64(min_free, dev_min);
liubocdcb7252011-08-03 10:15:25 +000010014 }
10015
Jeff Mahoney0b246af2016-06-22 18:54:23 -040010016 mutex_lock(&fs_info->chunk_mutex);
Josef Bacikba1bf482009-09-11 16:11:19 -040010017 list_for_each_entry(device, &fs_devices->alloc_list, dev_alloc_list) {
Miao Xie7bfc8372011-01-05 10:07:26 +000010018 u64 dev_offset;
Chris Masonea8c2812008-08-04 23:17:27 -040010019
Josef Bacikba1bf482009-09-11 16:11:19 -040010020 /*
10021 * check to make sure we can actually find a chunk with enough
10022 * space to fit our block group in.
10023 */
Stefan Behrens63a212a2012-11-05 18:29:28 +010010024 if (device->total_bytes > device->bytes_used + min_free &&
Anand Jain401e29c2017-12-04 12:54:55 +080010025 !test_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state)) {
Nikolay Borisov60dfdf22019-03-27 14:24:14 +020010026 ret = find_free_dev_extent(device, min_free,
Miao Xie7bfc8372011-01-05 10:07:26 +000010027 &dev_offset, NULL);
Josef Bacikba1bf482009-09-11 16:11:19 -040010028 if (!ret)
liubocdcb7252011-08-03 10:15:25 +000010029 dev_nr++;
10030
10031 if (dev_nr >= dev_min)
Yan73e48b22008-01-03 14:14:39 -050010032 break;
liubocdcb7252011-08-03 10:15:25 +000010033
Josef Bacikba1bf482009-09-11 16:11:19 -040010034 ret = -1;
Yan73e48b22008-01-03 14:14:39 -050010035 }
Chris Masonedbd8d42007-12-21 16:27:24 -050010036 }
Qu Wenruo0305bc22016-03-23 11:38:17 +080010037 if (debug && ret == -1)
Jeff Mahoney0b246af2016-06-22 18:54:23 -040010038 btrfs_warn(fs_info,
10039 "no space to allocate a new chunk for block group %llu",
10040 block_group->key.objectid);
10041 mutex_unlock(&fs_info->chunk_mutex);
Chris Masonedbd8d42007-12-21 16:27:24 -050010042out:
Josef Bacikba1bf482009-09-11 16:11:19 -040010043 btrfs_put_block_group(block_group);
Chris Masonedbd8d42007-12-21 16:27:24 -050010044 return ret;
10045}
10046
Jeff Mahoney6bccf3a2016-06-21 21:16:51 -040010047static int find_first_block_group(struct btrfs_fs_info *fs_info,
10048 struct btrfs_path *path,
10049 struct btrfs_key *key)
Chris Mason0b86a832008-03-24 15:01:56 -040010050{
Jeff Mahoney6bccf3a2016-06-21 21:16:51 -040010051 struct btrfs_root *root = fs_info->extent_root;
Chris Mason925baed2008-06-25 16:01:30 -040010052 int ret = 0;
Chris Mason0b86a832008-03-24 15:01:56 -040010053 struct btrfs_key found_key;
10054 struct extent_buffer *leaf;
Qu Wenruo514c7dc2018-08-01 10:37:16 +080010055 struct btrfs_block_group_item bg;
10056 u64 flags;
Chris Mason0b86a832008-03-24 15:01:56 -040010057 int slot;
10058
10059 ret = btrfs_search_slot(NULL, root, key, path, 0, 0);
10060 if (ret < 0)
Chris Mason925baed2008-06-25 16:01:30 -040010061 goto out;
10062
Chris Masond3977122009-01-05 21:25:51 -050010063 while (1) {
Chris Mason0b86a832008-03-24 15:01:56 -040010064 slot = path->slots[0];
10065 leaf = path->nodes[0];
10066 if (slot >= btrfs_header_nritems(leaf)) {
10067 ret = btrfs_next_leaf(root, path);
10068 if (ret == 0)
10069 continue;
10070 if (ret < 0)
Chris Mason925baed2008-06-25 16:01:30 -040010071 goto out;
Chris Mason0b86a832008-03-24 15:01:56 -040010072 break;
10073 }
10074 btrfs_item_key_to_cpu(leaf, &found_key, slot);
10075
10076 if (found_key.objectid >= key->objectid &&
Chris Mason925baed2008-06-25 16:01:30 -040010077 found_key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
Liu Bo6fb37b72016-06-22 18:31:27 -070010078 struct extent_map_tree *em_tree;
10079 struct extent_map *em;
10080
10081 em_tree = &root->fs_info->mapping_tree.map_tree;
10082 read_lock(&em_tree->lock);
10083 em = lookup_extent_mapping(em_tree, found_key.objectid,
10084 found_key.offset);
10085 read_unlock(&em_tree->lock);
10086 if (!em) {
Jeff Mahoney0b246af2016-06-22 18:54:23 -040010087 btrfs_err(fs_info,
Liu Bo6fb37b72016-06-22 18:31:27 -070010088 "logical %llu len %llu found bg but no related chunk",
10089 found_key.objectid, found_key.offset);
10090 ret = -ENOENT;
Qu Wenruo514c7dc2018-08-01 10:37:16 +080010091 } else if (em->start != found_key.objectid ||
10092 em->len != found_key.offset) {
10093 btrfs_err(fs_info,
10094 "block group %llu len %llu mismatch with chunk %llu len %llu",
10095 found_key.objectid, found_key.offset,
10096 em->start, em->len);
10097 ret = -EUCLEAN;
Liu Bo6fb37b72016-06-22 18:31:27 -070010098 } else {
Qu Wenruo514c7dc2018-08-01 10:37:16 +080010099 read_extent_buffer(leaf, &bg,
10100 btrfs_item_ptr_offset(leaf, slot),
10101 sizeof(bg));
10102 flags = btrfs_block_group_flags(&bg) &
10103 BTRFS_BLOCK_GROUP_TYPE_MASK;
10104
10105 if (flags != (em->map_lookup->type &
10106 BTRFS_BLOCK_GROUP_TYPE_MASK)) {
10107 btrfs_err(fs_info,
10108"block group %llu len %llu type flags 0x%llx mismatch with chunk type flags 0x%llx",
10109 found_key.objectid,
10110 found_key.offset, flags,
10111 (BTRFS_BLOCK_GROUP_TYPE_MASK &
10112 em->map_lookup->type));
10113 ret = -EUCLEAN;
10114 } else {
10115 ret = 0;
10116 }
Liu Bo6fb37b72016-06-22 18:31:27 -070010117 }
Josef Bacik187ee582016-08-18 15:30:06 -040010118 free_extent_map(em);
Chris Mason925baed2008-06-25 16:01:30 -040010119 goto out;
10120 }
Chris Mason0b86a832008-03-24 15:01:56 -040010121 path->slots[0]++;
10122 }
Chris Mason925baed2008-06-25 16:01:30 -040010123out:
Chris Mason0b86a832008-03-24 15:01:56 -040010124 return ret;
10125}
10126
Josef Bacik0af3d002010-06-21 14:48:16 -040010127void btrfs_put_block_group_cache(struct btrfs_fs_info *info)
10128{
10129 struct btrfs_block_group_cache *block_group;
10130 u64 last = 0;
10131
10132 while (1) {
10133 struct inode *inode;
10134
10135 block_group = btrfs_lookup_first_block_group(info, last);
10136 while (block_group) {
Josef Bacik3aa7c7a2018-09-12 10:45:45 -040010137 wait_block_group_cache_done(block_group);
Josef Bacik0af3d002010-06-21 14:48:16 -040010138 spin_lock(&block_group->lock);
10139 if (block_group->iref)
10140 break;
10141 spin_unlock(&block_group->lock);
Jeff Mahoney2ff7e612016-06-22 18:54:24 -040010142 block_group = next_block_group(info, block_group);
Josef Bacik0af3d002010-06-21 14:48:16 -040010143 }
10144 if (!block_group) {
10145 if (last == 0)
10146 break;
10147 last = 0;
10148 continue;
10149 }
10150
10151 inode = block_group->inode;
10152 block_group->iref = 0;
10153 block_group->inode = NULL;
10154 spin_unlock(&block_group->lock);
Liu Bof3bca802016-07-20 17:33:44 -070010155 ASSERT(block_group->io_ctl.inode == NULL);
Josef Bacik0af3d002010-06-21 14:48:16 -040010156 iput(inode);
10157 last = block_group->key.objectid + block_group->key.offset;
10158 btrfs_put_block_group(block_group);
10159 }
10160}
10161
Filipe Manana5cdd7db2017-02-01 22:39:50 +000010162/*
10163 * Must be called only after stopping all workers, since we could have block
10164 * group caching kthreads running, and therefore they could race with us if we
10165 * freed the block groups before stopping them.
10166 */
Zheng Yan1a40e232008-09-26 10:09:34 -040010167int btrfs_free_block_groups(struct btrfs_fs_info *info)
10168{
10169 struct btrfs_block_group_cache *block_group;
Chris Mason4184ea72009-03-10 12:39:20 -040010170 struct btrfs_space_info *space_info;
Yan Zheng11833d62009-09-11 16:11:19 -040010171 struct btrfs_caching_control *caching_ctl;
Zheng Yan1a40e232008-09-26 10:09:34 -040010172 struct rb_node *n;
10173
Josef Bacik9e351cc2014-03-13 15:42:13 -040010174 down_write(&info->commit_root_sem);
Yan Zheng11833d62009-09-11 16:11:19 -040010175 while (!list_empty(&info->caching_block_groups)) {
10176 caching_ctl = list_entry(info->caching_block_groups.next,
10177 struct btrfs_caching_control, list);
10178 list_del(&caching_ctl->list);
10179 put_caching_control(caching_ctl);
10180 }
Josef Bacik9e351cc2014-03-13 15:42:13 -040010181 up_write(&info->commit_root_sem);
Yan Zheng11833d62009-09-11 16:11:19 -040010182
Josef Bacik47ab2a62014-09-18 11:20:02 -040010183 spin_lock(&info->unused_bgs_lock);
10184 while (!list_empty(&info->unused_bgs)) {
10185 block_group = list_first_entry(&info->unused_bgs,
10186 struct btrfs_block_group_cache,
10187 bg_list);
10188 list_del_init(&block_group->bg_list);
10189 btrfs_put_block_group(block_group);
10190 }
10191 spin_unlock(&info->unused_bgs_lock);
10192
Zheng Yan1a40e232008-09-26 10:09:34 -040010193 spin_lock(&info->block_group_cache_lock);
10194 while ((n = rb_last(&info->block_group_cache_tree)) != NULL) {
10195 block_group = rb_entry(n, struct btrfs_block_group_cache,
10196 cache_node);
Zheng Yan1a40e232008-09-26 10:09:34 -040010197 rb_erase(&block_group->cache_node,
10198 &info->block_group_cache_tree);
Filipe Manana01eacb22014-12-04 18:38:30 +000010199 RB_CLEAR_NODE(&block_group->cache_node);
Yan Zhengd899e052008-10-30 14:25:28 -040010200 spin_unlock(&info->block_group_cache_lock);
10201
Josef Bacik80eb2342008-10-29 14:49:05 -040010202 down_write(&block_group->space_info->groups_sem);
Zheng Yan1a40e232008-09-26 10:09:34 -040010203 list_del(&block_group->list);
Josef Bacik80eb2342008-10-29 14:49:05 -040010204 up_write(&block_group->space_info->groups_sem);
Yan Zhengd2fb3432008-12-11 16:30:39 -050010205
Josef Bacik3c148742011-02-02 15:53:47 +000010206 /*
10207 * We haven't cached this block group, which means we could
10208 * possibly have excluded extents on this block group.
10209 */
Josef Bacik36cce922013-08-05 11:15:21 -040010210 if (block_group->cached == BTRFS_CACHE_NO ||
10211 block_group->cached == BTRFS_CACHE_ERROR)
Nikolay Borisov9e715da2018-06-20 15:49:08 +030010212 free_excluded_extents(block_group);
Josef Bacik3c148742011-02-02 15:53:47 +000010213
Josef Bacik817d52f2009-07-13 21:29:25 -040010214 btrfs_remove_free_space_cache(block_group);
Filipe Manana5cdd7db2017-02-01 22:39:50 +000010215 ASSERT(block_group->cached != BTRFS_CACHE_STARTED);
Liu Bof3bca802016-07-20 17:33:44 -070010216 ASSERT(list_empty(&block_group->dirty_list));
10217 ASSERT(list_empty(&block_group->io_list));
10218 ASSERT(list_empty(&block_group->bg_list));
10219 ASSERT(atomic_read(&block_group->count) == 1);
Josef Bacik11dfe352009-11-13 20:12:59 +000010220 btrfs_put_block_group(block_group);
Yan Zhengd899e052008-10-30 14:25:28 -040010221
10222 spin_lock(&info->block_group_cache_lock);
Zheng Yan1a40e232008-09-26 10:09:34 -040010223 }
10224 spin_unlock(&info->block_group_cache_lock);
Chris Mason4184ea72009-03-10 12:39:20 -040010225
10226 /* now that all the block groups are freed, go through and
10227 * free all the space_info structs. This is only called during
10228 * the final stages of unmount, and so we know nobody is
10229 * using them. We call synchronize_rcu() once before we start,
10230 * just to be on the safe side.
10231 */
10232 synchronize_rcu();
10233
Yan, Zheng8929ecfa2010-05-16 10:49:58 -040010234 release_global_block_rsv(info);
10235
Dulshani Gunawardhana67871252013-10-31 10:33:04 +053010236 while (!list_empty(&info->space_info)) {
Jeff Mahoney6ab0a202013-11-01 13:07:04 -040010237 int i;
10238
Chris Mason4184ea72009-03-10 12:39:20 -040010239 space_info = list_entry(info->space_info.next,
10240 struct btrfs_space_info,
10241 list);
Josef Bacikd555b6c32016-03-25 13:25:51 -040010242
10243 /*
10244 * Do not hide this behind enospc_debug, this is actually
10245 * important and indicates a real bug if this happens.
10246 */
10247 if (WARN_ON(space_info->bytes_pinned > 0 ||
David Sterbab069e0c2013-02-08 21:28:17 +000010248 space_info->bytes_reserved > 0 ||
Josef Bacikd555b6c32016-03-25 13:25:51 -040010249 space_info->bytes_may_use > 0))
Jeff Mahoneyab8d0fc2016-09-20 10:05:02 -040010250 dump_space_info(info, space_info, 0, 0);
Chris Mason4184ea72009-03-10 12:39:20 -040010251 list_del(&space_info->list);
Jeff Mahoney6ab0a202013-11-01 13:07:04 -040010252 for (i = 0; i < BTRFS_NR_RAID_TYPES; i++) {
10253 struct kobject *kobj;
Jeff Mahoneyc1895442014-05-27 12:59:57 -040010254 kobj = space_info->block_group_kobjs[i];
10255 space_info->block_group_kobjs[i] = NULL;
10256 if (kobj) {
Jeff Mahoney6ab0a202013-11-01 13:07:04 -040010257 kobject_del(kobj);
10258 kobject_put(kobj);
10259 }
10260 }
10261 kobject_del(&space_info->kobj);
10262 kobject_put(&space_info->kobj);
Chris Mason4184ea72009-03-10 12:39:20 -040010263 }
Zheng Yan1a40e232008-09-26 10:09:34 -040010264 return 0;
10265}
10266
Jeff Mahoney75cb3792018-03-20 15:25:26 -040010267/* link_block_group will queue up kobjects to add when we're reclaim-safe */
10268void btrfs_add_raid_kobjects(struct btrfs_fs_info *fs_info)
10269{
10270 struct btrfs_space_info *space_info;
10271 struct raid_kobject *rkobj;
10272 LIST_HEAD(list);
10273 int index;
10274 int ret = 0;
10275
10276 spin_lock(&fs_info->pending_raid_kobjs_lock);
10277 list_splice_init(&fs_info->pending_raid_kobjs, &list);
10278 spin_unlock(&fs_info->pending_raid_kobjs_lock);
10279
10280 list_for_each_entry(rkobj, &list, list) {
10281 space_info = __find_space_info(fs_info, rkobj->flags);
10282 index = btrfs_bg_flags_to_raid_index(rkobj->flags);
10283
10284 ret = kobject_add(&rkobj->kobj, &space_info->kobj,
10285 "%s", get_raid_name(index));
10286 if (ret) {
10287 kobject_put(&rkobj->kobj);
10288 break;
10289 }
10290 }
10291 if (ret)
10292 btrfs_warn(fs_info,
10293 "failed to add kobject for block cache, ignoring");
10294}
10295
Nikolay Borisovc434d212017-08-21 12:43:50 +030010296static void link_block_group(struct btrfs_block_group_cache *cache)
Yan, Zhengb742bb822010-05-16 10:46:24 -040010297{
Nikolay Borisovc434d212017-08-21 12:43:50 +030010298 struct btrfs_space_info *space_info = cache->space_info;
Jeff Mahoney75cb3792018-03-20 15:25:26 -040010299 struct btrfs_fs_info *fs_info = cache->fs_info;
Qu Wenruo3e72ee82018-01-30 18:20:45 +080010300 int index = btrfs_bg_flags_to_raid_index(cache->flags);
Jeff Mahoneyed55b6a2014-03-26 14:11:26 -040010301 bool first = false;
Yan, Zhengb742bb822010-05-16 10:46:24 -040010302
10303 down_write(&space_info->groups_sem);
Jeff Mahoneyed55b6a2014-03-26 14:11:26 -040010304 if (list_empty(&space_info->block_groups[index]))
10305 first = true;
10306 list_add_tail(&cache->list, &space_info->block_groups[index]);
10307 up_write(&space_info->groups_sem);
10308
10309 if (first) {
Jeff Mahoney75cb3792018-03-20 15:25:26 -040010310 struct raid_kobject *rkobj = kzalloc(sizeof(*rkobj), GFP_NOFS);
10311 if (!rkobj) {
10312 btrfs_warn(cache->fs_info,
10313 "couldn't alloc memory for raid level kobject");
10314 return;
Jeff Mahoney6ab0a202013-11-01 13:07:04 -040010315 }
Jeff Mahoney75cb3792018-03-20 15:25:26 -040010316 rkobj->flags = cache->flags;
10317 kobject_init(&rkobj->kobj, &btrfs_raid_ktype);
10318
10319 spin_lock(&fs_info->pending_raid_kobjs_lock);
10320 list_add_tail(&rkobj->list, &fs_info->pending_raid_kobjs);
10321 spin_unlock(&fs_info->pending_raid_kobjs_lock);
Jeff Mahoneyc1895442014-05-27 12:59:57 -040010322 space_info->block_group_kobjs[index] = &rkobj->kobj;
Jeff Mahoney6ab0a202013-11-01 13:07:04 -040010323 }
Yan, Zhengb742bb822010-05-16 10:46:24 -040010324}
10325
Miao Xie920e4a52014-01-15 20:00:55 +080010326static struct btrfs_block_group_cache *
Jeff Mahoney2ff7e612016-06-22 18:54:24 -040010327btrfs_create_block_group_cache(struct btrfs_fs_info *fs_info,
10328 u64 start, u64 size)
Miao Xie920e4a52014-01-15 20:00:55 +080010329{
10330 struct btrfs_block_group_cache *cache;
10331
10332 cache = kzalloc(sizeof(*cache), GFP_NOFS);
10333 if (!cache)
10334 return NULL;
10335
10336 cache->free_space_ctl = kzalloc(sizeof(*cache->free_space_ctl),
10337 GFP_NOFS);
10338 if (!cache->free_space_ctl) {
10339 kfree(cache);
10340 return NULL;
10341 }
10342
10343 cache->key.objectid = start;
10344 cache->key.offset = size;
10345 cache->key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
10346
Jeff Mahoney0b246af2016-06-22 18:54:23 -040010347 cache->fs_info = fs_info;
Nikolay Borisove4ff5fb2017-07-19 10:48:42 +030010348 cache->full_stripe_len = btrfs_full_stripe_len(fs_info, start);
Omar Sandoval1e144fb2015-09-29 20:50:37 -070010349 set_free_space_tree_thresholds(cache);
10350
Miao Xie920e4a52014-01-15 20:00:55 +080010351 atomic_set(&cache->count, 1);
10352 spin_lock_init(&cache->lock);
Miao Xiee570fd22014-06-19 10:42:50 +080010353 init_rwsem(&cache->data_rwsem);
Miao Xie920e4a52014-01-15 20:00:55 +080010354 INIT_LIST_HEAD(&cache->list);
10355 INIT_LIST_HEAD(&cache->cluster_list);
Josef Bacik47ab2a62014-09-18 11:20:02 -040010356 INIT_LIST_HEAD(&cache->bg_list);
Josef Bacik633c0aa2014-10-31 09:49:34 -040010357 INIT_LIST_HEAD(&cache->ro_list);
Josef Bacikce93ec52014-11-17 15:45:48 -050010358 INIT_LIST_HEAD(&cache->dirty_list);
Chris Masonc9dc4c62015-04-04 17:14:42 -070010359 INIT_LIST_HEAD(&cache->io_list);
Miao Xie920e4a52014-01-15 20:00:55 +080010360 btrfs_init_free_space_ctl(cache);
Filipe Manana04216822014-11-27 21:14:15 +000010361 atomic_set(&cache->trimming, 0);
Omar Sandovala5ed9182015-09-29 20:50:35 -070010362 mutex_init(&cache->free_space_lock);
Qu Wenruo0966a7b2017-04-14 08:35:54 +080010363 btrfs_init_full_stripe_locks_tree(&cache->full_stripe_locks_root);
Miao Xie920e4a52014-01-15 20:00:55 +080010364
10365 return cache;
10366}
10367
Qu Wenruo7ef49512018-08-01 10:37:17 +080010368
10369/*
10370 * Iterate all chunks and verify that each of them has the corresponding block
10371 * group
10372 */
10373static int check_chunk_block_group_mappings(struct btrfs_fs_info *fs_info)
10374{
10375 struct btrfs_mapping_tree *map_tree = &fs_info->mapping_tree;
10376 struct extent_map *em;
10377 struct btrfs_block_group_cache *bg;
10378 u64 start = 0;
10379 int ret = 0;
10380
10381 while (1) {
10382 read_lock(&map_tree->map_tree.lock);
10383 /*
10384 * lookup_extent_mapping will return the first extent map
10385 * intersecting the range, so setting @len to 1 is enough to
10386 * get the first chunk.
10387 */
10388 em = lookup_extent_mapping(&map_tree->map_tree, start, 1);
10389 read_unlock(&map_tree->map_tree.lock);
10390 if (!em)
10391 break;
10392
10393 bg = btrfs_lookup_block_group(fs_info, em->start);
10394 if (!bg) {
10395 btrfs_err(fs_info,
10396 "chunk start=%llu len=%llu doesn't have corresponding block group",
10397 em->start, em->len);
10398 ret = -EUCLEAN;
10399 free_extent_map(em);
10400 break;
10401 }
10402 if (bg->key.objectid != em->start ||
10403 bg->key.offset != em->len ||
10404 (bg->flags & BTRFS_BLOCK_GROUP_TYPE_MASK) !=
10405 (em->map_lookup->type & BTRFS_BLOCK_GROUP_TYPE_MASK)) {
10406 btrfs_err(fs_info,
10407"chunk start=%llu len=%llu flags=0x%llx doesn't match block group start=%llu len=%llu flags=0x%llx",
10408 em->start, em->len,
10409 em->map_lookup->type & BTRFS_BLOCK_GROUP_TYPE_MASK,
10410 bg->key.objectid, bg->key.offset,
10411 bg->flags & BTRFS_BLOCK_GROUP_TYPE_MASK);
10412 ret = -EUCLEAN;
10413 free_extent_map(em);
10414 btrfs_put_block_group(bg);
10415 break;
10416 }
10417 start = em->start + em->len;
10418 free_extent_map(em);
10419 btrfs_put_block_group(bg);
10420 }
10421 return ret;
10422}
10423
Jeff Mahoney5b4aace2016-06-21 10:40:19 -040010424int btrfs_read_block_groups(struct btrfs_fs_info *info)
Chris Mason9078a3e2007-04-26 16:46:15 -040010425{
10426 struct btrfs_path *path;
10427 int ret;
Chris Mason9078a3e2007-04-26 16:46:15 -040010428 struct btrfs_block_group_cache *cache;
Chris Mason6324fbf2008-03-24 15:01:59 -040010429 struct btrfs_space_info *space_info;
Chris Mason9078a3e2007-04-26 16:46:15 -040010430 struct btrfs_key key;
10431 struct btrfs_key found_key;
Chris Mason5f39d392007-10-15 16:14:19 -040010432 struct extent_buffer *leaf;
Josef Bacik0af3d002010-06-21 14:48:16 -040010433 int need_clear = 0;
10434 u64 cache_gen;
Liu Bo49303382016-08-25 18:08:27 -070010435 u64 feature;
10436 int mixed;
10437
10438 feature = btrfs_super_incompat_flags(info->super_copy);
10439 mixed = !!(feature & BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS);
Chris Mason96b51792007-10-15 16:15:19 -040010440
Chris Mason9078a3e2007-04-26 16:46:15 -040010441 key.objectid = 0;
Chris Mason0b86a832008-03-24 15:01:56 -040010442 key.offset = 0;
David Sterba962a2982014-06-04 18:41:45 +020010443 key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
Chris Mason9078a3e2007-04-26 16:46:15 -040010444 path = btrfs_alloc_path();
10445 if (!path)
10446 return -ENOMEM;
David Sterbae4058b52015-11-27 16:31:35 +010010447 path->reada = READA_FORWARD;
Chris Mason9078a3e2007-04-26 16:46:15 -040010448
Jeff Mahoney0b246af2016-06-22 18:54:23 -040010449 cache_gen = btrfs_super_cache_generation(info->super_copy);
10450 if (btrfs_test_opt(info, SPACE_CACHE) &&
10451 btrfs_super_generation(info->super_copy) != cache_gen)
Josef Bacik0af3d002010-06-21 14:48:16 -040010452 need_clear = 1;
Jeff Mahoney0b246af2016-06-22 18:54:23 -040010453 if (btrfs_test_opt(info, CLEAR_CACHE))
Josef Bacik88c2ba32010-09-21 14:21:34 -040010454 need_clear = 1;
Josef Bacik0af3d002010-06-21 14:48:16 -040010455
Chris Masond3977122009-01-05 21:25:51 -050010456 while (1) {
Jeff Mahoney6bccf3a2016-06-21 21:16:51 -040010457 ret = find_first_block_group(info, path, &key);
Yan, Zhengb742bb822010-05-16 10:46:24 -040010458 if (ret > 0)
10459 break;
Chris Mason0b86a832008-03-24 15:01:56 -040010460 if (ret != 0)
10461 goto error;
Miao Xie920e4a52014-01-15 20:00:55 +080010462
Chris Mason5f39d392007-10-15 16:14:19 -040010463 leaf = path->nodes[0];
10464 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
Miao Xie920e4a52014-01-15 20:00:55 +080010465
Jeff Mahoney2ff7e612016-06-22 18:54:24 -040010466 cache = btrfs_create_block_group_cache(info, found_key.objectid,
Miao Xie920e4a52014-01-15 20:00:55 +080010467 found_key.offset);
Chris Mason9078a3e2007-04-26 16:46:15 -040010468 if (!cache) {
Chris Mason0b86a832008-03-24 15:01:56 -040010469 ret = -ENOMEM;
Yan, Zhengf0486c62010-05-16 10:46:25 -040010470 goto error;
Chris Mason9078a3e2007-04-26 16:46:15 -040010471 }
Josef Bacik96303082009-07-13 21:29:25 -040010472
Liu Bocf7c1ef2012-07-06 03:31:34 -060010473 if (need_clear) {
10474 /*
10475 * When we mount with old space cache, we need to
10476 * set BTRFS_DC_CLEAR and set dirty flag.
10477 *
10478 * a) Setting 'BTRFS_DC_CLEAR' makes sure that we
10479 * truncate the old free space cache inode and
10480 * setup a new one.
10481 * b) Setting 'dirty flag' makes sure that we flush
10482 * the new space cache info onto disk.
10483 */
Jeff Mahoney0b246af2016-06-22 18:54:23 -040010484 if (btrfs_test_opt(info, SPACE_CACHE))
Josef Bacikce93ec52014-11-17 15:45:48 -050010485 cache->disk_cache_state = BTRFS_DC_CLEAR;
Liu Bocf7c1ef2012-07-06 03:31:34 -060010486 }
Josef Bacik0af3d002010-06-21 14:48:16 -040010487
Chris Mason5f39d392007-10-15 16:14:19 -040010488 read_extent_buffer(leaf, &cache->item,
10489 btrfs_item_ptr_offset(leaf, path->slots[0]),
10490 sizeof(cache->item));
Miao Xie920e4a52014-01-15 20:00:55 +080010491 cache->flags = btrfs_block_group_flags(&cache->item);
Liu Bo49303382016-08-25 18:08:27 -070010492 if (!mixed &&
10493 ((cache->flags & BTRFS_BLOCK_GROUP_METADATA) &&
10494 (cache->flags & BTRFS_BLOCK_GROUP_DATA))) {
10495 btrfs_err(info,
10496"bg %llu is a mixed block group but filesystem hasn't enabled mixed block groups",
10497 cache->key.objectid);
10498 ret = -EINVAL;
10499 goto error;
10500 }
Chris Mason0b86a832008-03-24 15:01:56 -040010501
Chris Mason9078a3e2007-04-26 16:46:15 -040010502 key.objectid = found_key.objectid + found_key.offset;
David Sterbab3b4aa72011-04-21 01:20:15 +020010503 btrfs_release_path(path);
Li Zefan34d52cb2011-03-29 13:46:06 +080010504
Josef Bacik817d52f2009-07-13 21:29:25 -040010505 /*
Josef Bacik3c148742011-02-02 15:53:47 +000010506 * We need to exclude the super stripes now so that the space
10507 * info has super bytes accounted for, otherwise we'll think
10508 * we have more space than we actually do.
10509 */
Nikolay Borisov3c4da652018-06-20 15:49:09 +030010510 ret = exclude_super_stripes(cache);
Josef Bacik835d9742013-03-19 12:13:25 -040010511 if (ret) {
10512 /*
10513 * We may have excluded something, so call this just in
10514 * case.
10515 */
Nikolay Borisov9e715da2018-06-20 15:49:08 +030010516 free_excluded_extents(cache);
Miao Xie920e4a52014-01-15 20:00:55 +080010517 btrfs_put_block_group(cache);
Josef Bacik835d9742013-03-19 12:13:25 -040010518 goto error;
10519 }
Josef Bacik3c148742011-02-02 15:53:47 +000010520
10521 /*
Josef Bacik817d52f2009-07-13 21:29:25 -040010522 * check for two cases, either we are full, and therefore
10523 * don't need to bother with the caching work since we won't
10524 * find any space, or we are empty, and we can just add all
Andrea Gelmini52042d82018-11-28 12:05:13 +010010525 * the space in and be done with it. This saves us _a_lot_ of
Josef Bacik817d52f2009-07-13 21:29:25 -040010526 * time, particularly in the full case.
10527 */
10528 if (found_key.offset == btrfs_block_group_used(&cache->item)) {
Yan Zheng11833d62009-09-11 16:11:19 -040010529 cache->last_byte_to_unpin = (u64)-1;
Josef Bacik817d52f2009-07-13 21:29:25 -040010530 cache->cached = BTRFS_CACHE_FINISHED;
Nikolay Borisov9e715da2018-06-20 15:49:08 +030010531 free_excluded_extents(cache);
Josef Bacik817d52f2009-07-13 21:29:25 -040010532 } else if (btrfs_block_group_used(&cache->item) == 0) {
Yan Zheng11833d62009-09-11 16:11:19 -040010533 cache->last_byte_to_unpin = (u64)-1;
Josef Bacik817d52f2009-07-13 21:29:25 -040010534 cache->cached = BTRFS_CACHE_FINISHED;
Nikolay Borisov4457c1c2018-05-10 15:44:45 +030010535 add_new_free_space(cache, found_key.objectid,
Josef Bacik817d52f2009-07-13 21:29:25 -040010536 found_key.objectid +
10537 found_key.offset);
Nikolay Borisov9e715da2018-06-20 15:49:08 +030010538 free_excluded_extents(cache);
Josef Bacik817d52f2009-07-13 21:29:25 -040010539 }
Chris Mason96b51792007-10-15 16:15:19 -040010540
Jeff Mahoney0b246af2016-06-22 18:54:23 -040010541 ret = btrfs_add_block_group_cache(info, cache);
Josef Bacik8c579fe2013-04-02 12:40:42 -040010542 if (ret) {
10543 btrfs_remove_free_space_cache(cache);
10544 btrfs_put_block_group(cache);
10545 goto error;
10546 }
10547
Jeff Mahoney0b246af2016-06-22 18:54:23 -040010548 trace_btrfs_add_block_group(info, cache, 0);
Nikolay Borisovd2006e62017-05-22 09:35:50 +030010549 update_space_info(info, cache->flags, found_key.offset,
10550 btrfs_block_group_used(&cache->item),
10551 cache->bytes_super, &space_info);
Josef Bacik8c579fe2013-04-02 12:40:42 -040010552
Chris Mason6324fbf2008-03-24 15:01:59 -040010553 cache->space_info = space_info;
Josef Bacik1b2da372009-09-11 16:11:20 -040010554
Nikolay Borisovc434d212017-08-21 12:43:50 +030010555 link_block_group(cache);
Chris Mason6324fbf2008-03-24 15:01:59 -040010556
Jeff Mahoney0b246af2016-06-22 18:54:23 -040010557 set_avail_alloc_bits(info, cache->flags);
Jeff Mahoney2ff7e612016-06-22 18:54:24 -040010558 if (btrfs_chunk_readonly(info, cache->key.objectid)) {
Zhaolei868f4012015-08-05 16:43:27 +080010559 inc_block_group_ro(cache, 1);
Josef Bacik47ab2a62014-09-18 11:20:02 -040010560 } else if (btrfs_block_group_used(&cache->item) == 0) {
Qu Wenruo031f24d2018-05-22 16:43:47 +080010561 ASSERT(list_empty(&cache->bg_list));
10562 btrfs_mark_bg_unused(cache);
Josef Bacik47ab2a62014-09-18 11:20:02 -040010563 }
Chris Mason9078a3e2007-04-26 16:46:15 -040010564 }
Yan, Zhengb742bb822010-05-16 10:46:24 -040010565
Jeff Mahoney0b246af2016-06-22 18:54:23 -040010566 list_for_each_entry_rcu(space_info, &info->space_info, list) {
Jeff Mahoney2ff7e612016-06-22 18:54:24 -040010567 if (!(get_alloc_profile(info, space_info->flags) &
Yan, Zhengb742bb822010-05-16 10:46:24 -040010568 (BTRFS_BLOCK_GROUP_RAID10 |
10569 BTRFS_BLOCK_GROUP_RAID1 |
David Woodhouse53b381b2013-01-29 18:40:14 -050010570 BTRFS_BLOCK_GROUP_RAID5 |
10571 BTRFS_BLOCK_GROUP_RAID6 |
Yan, Zhengb742bb822010-05-16 10:46:24 -040010572 BTRFS_BLOCK_GROUP_DUP)))
10573 continue;
10574 /*
10575 * avoid allocating from un-mirrored block group if there are
10576 * mirrored block groups.
10577 */
chandan1095cc02013-07-16 12:28:56 +053010578 list_for_each_entry(cache,
10579 &space_info->block_groups[BTRFS_RAID_RAID0],
10580 list)
Zhaolei868f4012015-08-05 16:43:27 +080010581 inc_block_group_ro(cache, 1);
chandan1095cc02013-07-16 12:28:56 +053010582 list_for_each_entry(cache,
10583 &space_info->block_groups[BTRFS_RAID_SINGLE],
10584 list)
Zhaolei868f4012015-08-05 16:43:27 +080010585 inc_block_group_ro(cache, 1);
Yan, Zhengb742bb822010-05-16 10:46:24 -040010586 }
Yan, Zhengf0486c62010-05-16 10:46:25 -040010587
Jeff Mahoney75cb3792018-03-20 15:25:26 -040010588 btrfs_add_raid_kobjects(info);
Yan, Zhengf0486c62010-05-16 10:46:25 -040010589 init_global_block_rsv(info);
Qu Wenruo7ef49512018-08-01 10:37:17 +080010590 ret = check_chunk_block_group_mappings(info);
Chris Mason0b86a832008-03-24 15:01:56 -040010591error:
Chris Mason9078a3e2007-04-26 16:46:15 -040010592 btrfs_free_path(path);
Chris Mason0b86a832008-03-24 15:01:56 -040010593 return ret;
Chris Mason9078a3e2007-04-26 16:46:15 -040010594}
Chris Mason6324fbf2008-03-24 15:01:59 -040010595
Nikolay Borisov6c686b32018-02-07 17:55:40 +020010596void btrfs_create_pending_block_groups(struct btrfs_trans_handle *trans)
Josef Bacikea658ba2012-09-11 16:57:25 -040010597{
Nikolay Borisov6c686b32018-02-07 17:55:40 +020010598 struct btrfs_fs_info *fs_info = trans->fs_info;
Josef Bacik545e3362018-09-28 07:18:02 -040010599 struct btrfs_block_group_cache *block_group;
Jeff Mahoney0b246af2016-06-22 18:54:23 -040010600 struct btrfs_root *extent_root = fs_info->extent_root;
Josef Bacikea658ba2012-09-11 16:57:25 -040010601 struct btrfs_block_group_item item;
10602 struct btrfs_key key;
10603 int ret = 0;
10604
Filipe Manana5ce55552018-10-12 10:03:55 +010010605 if (!trans->can_flush_pending_bgs)
10606 return;
10607
Josef Bacik545e3362018-09-28 07:18:02 -040010608 while (!list_empty(&trans->new_bgs)) {
10609 block_group = list_first_entry(&trans->new_bgs,
10610 struct btrfs_block_group_cache,
10611 bg_list);
Josef Bacikea658ba2012-09-11 16:57:25 -040010612 if (ret)
Filipe Mananac92f6be2014-11-26 15:28:55 +000010613 goto next;
Josef Bacikea658ba2012-09-11 16:57:25 -040010614
10615 spin_lock(&block_group->lock);
10616 memcpy(&item, &block_group->item, sizeof(item));
10617 memcpy(&key, &block_group->key, sizeof(key));
10618 spin_unlock(&block_group->lock);
10619
10620 ret = btrfs_insert_item(trans, extent_root, &key, &item,
10621 sizeof(item));
10622 if (ret)
Jeff Mahoney66642832016-06-10 18:19:25 -040010623 btrfs_abort_transaction(trans, ret);
Nikolay Borisov97aff912018-07-20 19:37:53 +030010624 ret = btrfs_finish_chunk_alloc(trans, key.objectid, key.offset);
Josef Bacik6df9a952013-06-27 13:22:46 -040010625 if (ret)
Jeff Mahoney66642832016-06-10 18:19:25 -040010626 btrfs_abort_transaction(trans, ret);
Nikolay Borisove4e07112018-05-10 15:44:41 +030010627 add_block_group_free_space(trans, block_group);
Omar Sandoval1e144fb2015-09-29 20:50:37 -070010628 /* already aborted the transaction if it failed. */
Filipe Mananac92f6be2014-11-26 15:28:55 +000010629next:
Josef Bacikba2c4d42018-12-03 10:20:33 -050010630 btrfs_delayed_refs_rsv_release(fs_info, 1);
Filipe Mananac92f6be2014-11-26 15:28:55 +000010631 list_del_init(&block_group->bg_list);
Josef Bacikea658ba2012-09-11 16:57:25 -040010632 }
Filipe Manana5ce55552018-10-12 10:03:55 +010010633 btrfs_trans_release_chunk_metadata(trans);
Josef Bacikea658ba2012-09-11 16:57:25 -040010634}
10635
Nikolay Borisove7e02092018-06-20 15:48:55 +030010636int btrfs_make_block_group(struct btrfs_trans_handle *trans, u64 bytes_used,
Nikolay Borisov01744842017-07-27 14:22:11 +030010637 u64 type, u64 chunk_offset, u64 size)
Chris Mason6324fbf2008-03-24 15:01:59 -040010638{
Nikolay Borisove7e02092018-06-20 15:48:55 +030010639 struct btrfs_fs_info *fs_info = trans->fs_info;
Chris Mason6324fbf2008-03-24 15:01:59 -040010640 struct btrfs_block_group_cache *cache;
Jeff Mahoney0b246af2016-06-22 18:54:23 -040010641 int ret;
Chris Mason6324fbf2008-03-24 15:01:59 -040010642
David Sterba90787762019-03-20 13:28:05 +010010643 btrfs_set_log_full_commit(trans);
Chris Masone02119d2008-09-05 16:13:11 -040010644
Jeff Mahoney2ff7e612016-06-22 18:54:24 -040010645 cache = btrfs_create_block_group_cache(fs_info, chunk_offset, size);
Josef Bacik0f9dd462008-09-23 13:14:11 -040010646 if (!cache)
10647 return -ENOMEM;
Li Zefan34d52cb2011-03-29 13:46:06 +080010648
Chris Mason6324fbf2008-03-24 15:01:59 -040010649 btrfs_set_block_group_used(&cache->item, bytes_used);
Nikolay Borisov01744842017-07-27 14:22:11 +030010650 btrfs_set_block_group_chunk_objectid(&cache->item,
10651 BTRFS_FIRST_CHUNK_TREE_OBJECTID);
Chris Mason6324fbf2008-03-24 15:01:59 -040010652 btrfs_set_block_group_flags(&cache->item, type);
10653
Miao Xie920e4a52014-01-15 20:00:55 +080010654 cache->flags = type;
Yan Zheng11833d62009-09-11 16:11:19 -040010655 cache->last_byte_to_unpin = (u64)-1;
Josef Bacik817d52f2009-07-13 21:29:25 -040010656 cache->cached = BTRFS_CACHE_FINISHED;
Omar Sandoval1e144fb2015-09-29 20:50:37 -070010657 cache->needs_free_space = 1;
Nikolay Borisov3c4da652018-06-20 15:49:09 +030010658 ret = exclude_super_stripes(cache);
Josef Bacik835d9742013-03-19 12:13:25 -040010659 if (ret) {
10660 /*
10661 * We may have excluded something, so call this just in
10662 * case.
10663 */
Nikolay Borisov9e715da2018-06-20 15:49:08 +030010664 free_excluded_extents(cache);
Miao Xie920e4a52014-01-15 20:00:55 +080010665 btrfs_put_block_group(cache);
Josef Bacik835d9742013-03-19 12:13:25 -040010666 return ret;
10667 }
Josef Bacik96303082009-07-13 21:29:25 -040010668
Nikolay Borisov4457c1c2018-05-10 15:44:45 +030010669 add_new_free_space(cache, chunk_offset, chunk_offset + size);
Josef Bacik817d52f2009-07-13 21:29:25 -040010670
Nikolay Borisov9e715da2018-06-20 15:49:08 +030010671 free_excluded_extents(cache);
Yan Zheng11833d62009-09-11 16:11:19 -040010672
Josef Bacikd0bd4562015-09-23 14:54:14 -040010673#ifdef CONFIG_BTRFS_DEBUG
Jeff Mahoney2ff7e612016-06-22 18:54:24 -040010674 if (btrfs_should_fragment_free_space(cache)) {
Josef Bacikd0bd4562015-09-23 14:54:14 -040010675 u64 new_bytes_used = size - bytes_used;
10676
10677 bytes_used += new_bytes_used >> 1;
Jeff Mahoney2ff7e612016-06-22 18:54:24 -040010678 fragment_free_space(cache);
Josef Bacikd0bd4562015-09-23 14:54:14 -040010679 }
10680#endif
Filipe Manana2e6e5182015-05-12 00:28:11 +010010681 /*
Nikolay Borisov2be12ef2017-05-22 09:35:49 +030010682 * Ensure the corresponding space_info object is created and
10683 * assigned to our block group. We want our bg to be added to the rbtree
10684 * with its ->space_info set.
Filipe Manana2e6e5182015-05-12 00:28:11 +010010685 */
Nikolay Borisov2be12ef2017-05-22 09:35:49 +030010686 cache->space_info = __find_space_info(fs_info, cache->flags);
Jeff Mahoneydc2d3002018-03-20 15:25:25 -040010687 ASSERT(cache->space_info);
Filipe Manana2e6e5182015-05-12 00:28:11 +010010688
Jeff Mahoney0b246af2016-06-22 18:54:23 -040010689 ret = btrfs_add_block_group_cache(fs_info, cache);
Josef Bacik8c579fe2013-04-02 12:40:42 -040010690 if (ret) {
10691 btrfs_remove_free_space_cache(cache);
10692 btrfs_put_block_group(cache);
10693 return ret;
10694 }
10695
Filipe Manana2e6e5182015-05-12 00:28:11 +010010696 /*
10697 * Now that our block group has its ->space_info set and is inserted in
10698 * the rbtree, update the space info's counters.
10699 */
Jeff Mahoney0b246af2016-06-22 18:54:23 -040010700 trace_btrfs_add_block_group(fs_info, cache, 1);
Nikolay Borisovd2006e62017-05-22 09:35:50 +030010701 update_space_info(fs_info, cache->flags, size, bytes_used,
Josef Bacike40edf22016-03-25 13:25:47 -040010702 cache->bytes_super, &cache->space_info);
Jeff Mahoney0b246af2016-06-22 18:54:23 -040010703 update_global_block_rsv(fs_info);
Josef Bacik1b2da372009-09-11 16:11:20 -040010704
Nikolay Borisovc434d212017-08-21 12:43:50 +030010705 link_block_group(cache);
Chris Mason6324fbf2008-03-24 15:01:59 -040010706
Josef Bacik47ab2a62014-09-18 11:20:02 -040010707 list_add_tail(&cache->bg_list, &trans->new_bgs);
Josef Bacikba2c4d42018-12-03 10:20:33 -050010708 trans->delayed_ref_updates++;
10709 btrfs_update_delayed_refs_rsv(trans);
Chris Mason6324fbf2008-03-24 15:01:59 -040010710
Jeff Mahoney0b246af2016-06-22 18:54:23 -040010711 set_avail_alloc_bits(fs_info, type);
Chris Mason6324fbf2008-03-24 15:01:59 -040010712 return 0;
10713}
Zheng Yan1a40e232008-09-26 10:09:34 -040010714
Ilya Dryomov10ea00f2012-01-16 22:04:47 +020010715static void clear_avail_alloc_bits(struct btrfs_fs_info *fs_info, u64 flags)
10716{
Ilya Dryomov899c81e2012-03-27 17:09:16 +030010717 u64 extra_flags = chunk_to_extended(flags) &
10718 BTRFS_EXTENDED_PROFILE_MASK;
Ilya Dryomov10ea00f2012-01-16 22:04:47 +020010719
Miao Xiede98ced2013-01-29 10:13:12 +000010720 write_seqlock(&fs_info->profiles_lock);
Ilya Dryomov10ea00f2012-01-16 22:04:47 +020010721 if (flags & BTRFS_BLOCK_GROUP_DATA)
10722 fs_info->avail_data_alloc_bits &= ~extra_flags;
10723 if (flags & BTRFS_BLOCK_GROUP_METADATA)
10724 fs_info->avail_metadata_alloc_bits &= ~extra_flags;
10725 if (flags & BTRFS_BLOCK_GROUP_SYSTEM)
10726 fs_info->avail_system_alloc_bits &= ~extra_flags;
Miao Xiede98ced2013-01-29 10:13:12 +000010727 write_sequnlock(&fs_info->profiles_lock);
Ilya Dryomov10ea00f2012-01-16 22:04:47 +020010728}
10729
Zheng Yan1a40e232008-09-26 10:09:34 -040010730int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
Nikolay Borisov5a98ec02018-06-20 15:48:56 +030010731 u64 group_start, struct extent_map *em)
Zheng Yan1a40e232008-09-26 10:09:34 -040010732{
Nikolay Borisov5a98ec02018-06-20 15:48:56 +030010733 struct btrfs_fs_info *fs_info = trans->fs_info;
Jeff Mahoney6bccf3a2016-06-21 21:16:51 -040010734 struct btrfs_root *root = fs_info->extent_root;
Zheng Yan1a40e232008-09-26 10:09:34 -040010735 struct btrfs_path *path;
10736 struct btrfs_block_group_cache *block_group;
Chris Mason44fb5512009-06-04 15:34:51 -040010737 struct btrfs_free_cluster *cluster;
Jeff Mahoney0b246af2016-06-22 18:54:23 -040010738 struct btrfs_root *tree_root = fs_info->tree_root;
Zheng Yan1a40e232008-09-26 10:09:34 -040010739 struct btrfs_key key;
Josef Bacik0af3d002010-06-21 14:48:16 -040010740 struct inode *inode;
Jeff Mahoneyc1895442014-05-27 12:59:57 -040010741 struct kobject *kobj = NULL;
Zheng Yan1a40e232008-09-26 10:09:34 -040010742 int ret;
Ilya Dryomov10ea00f2012-01-16 22:04:47 +020010743 int index;
Josef Bacik89a55892010-10-14 14:52:27 -040010744 int factor;
Filipe Manana4f69cb92014-11-26 15:28:51 +000010745 struct btrfs_caching_control *caching_ctl = NULL;
Filipe Manana04216822014-11-27 21:14:15 +000010746 bool remove_em;
Josef Bacikba2c4d42018-12-03 10:20:33 -050010747 bool remove_rsv = false;
Zheng Yan1a40e232008-09-26 10:09:34 -040010748
Jeff Mahoney6bccf3a2016-06-21 21:16:51 -040010749 block_group = btrfs_lookup_block_group(fs_info, group_start);
Zheng Yan1a40e232008-09-26 10:09:34 -040010750 BUG_ON(!block_group);
Yan Zhengc146afa2008-11-12 14:34:12 -050010751 BUG_ON(!block_group->ro);
Zheng Yan1a40e232008-09-26 10:09:34 -040010752
Qu Wenruo4ed0a7a2018-04-26 17:17:20 +080010753 trace_btrfs_remove_block_group(block_group);
liubo9f7c43c2011-03-07 02:13:33 +000010754 /*
10755 * Free the reserved super bytes from this block group before
10756 * remove it.
10757 */
Nikolay Borisov9e715da2018-06-20 15:49:08 +030010758 free_excluded_extents(block_group);
Josef Bacikfd708b82017-09-29 15:43:50 -040010759 btrfs_free_ref_tree_range(fs_info, block_group->key.objectid,
10760 block_group->key.offset);
liubo9f7c43c2011-03-07 02:13:33 +000010761
Zheng Yan1a40e232008-09-26 10:09:34 -040010762 memcpy(&key, &block_group->key, sizeof(key));
Qu Wenruo3e72ee82018-01-30 18:20:45 +080010763 index = btrfs_bg_flags_to_raid_index(block_group->flags);
David Sterba46df06b2018-07-13 20:46:30 +020010764 factor = btrfs_bg_type_to_factor(block_group->flags);
Zheng Yan1a40e232008-09-26 10:09:34 -040010765
Chris Mason44fb5512009-06-04 15:34:51 -040010766 /* make sure this block group isn't part of an allocation cluster */
Jeff Mahoney0b246af2016-06-22 18:54:23 -040010767 cluster = &fs_info->data_alloc_cluster;
Chris Mason44fb5512009-06-04 15:34:51 -040010768 spin_lock(&cluster->refill_lock);
10769 btrfs_return_cluster_to_free_space(block_group, cluster);
10770 spin_unlock(&cluster->refill_lock);
10771
10772 /*
10773 * make sure this block group isn't part of a metadata
10774 * allocation cluster
10775 */
Jeff Mahoney0b246af2016-06-22 18:54:23 -040010776 cluster = &fs_info->meta_alloc_cluster;
Chris Mason44fb5512009-06-04 15:34:51 -040010777 spin_lock(&cluster->refill_lock);
10778 btrfs_return_cluster_to_free_space(block_group, cluster);
10779 spin_unlock(&cluster->refill_lock);
10780
Zheng Yan1a40e232008-09-26 10:09:34 -040010781 path = btrfs_alloc_path();
Mark Fashehd8926bb2011-07-13 10:38:47 -070010782 if (!path) {
10783 ret = -ENOMEM;
10784 goto out;
10785 }
Zheng Yan1a40e232008-09-26 10:09:34 -040010786
Chris Mason1bbc6212015-04-06 12:46:08 -070010787 /*
10788 * get the inode first so any iput calls done for the io_list
10789 * aren't the final iput (no unlinks allowed now)
10790 */
Jeff Mahoney77ab86b2017-02-15 16:28:30 -050010791 inode = lookup_free_space_inode(fs_info, block_group, path);
Chris Mason1bbc6212015-04-06 12:46:08 -070010792
10793 mutex_lock(&trans->transaction->cache_write_mutex);
10794 /*
Andrea Gelmini52042d82018-11-28 12:05:13 +010010795 * Make sure our free space cache IO is done before removing the
Chris Mason1bbc6212015-04-06 12:46:08 -070010796 * free space inode
10797 */
10798 spin_lock(&trans->transaction->dirty_bgs_lock);
10799 if (!list_empty(&block_group->io_list)) {
10800 list_del_init(&block_group->io_list);
10801
10802 WARN_ON(!IS_ERR(inode) && inode != block_group->io_ctl.inode);
10803
10804 spin_unlock(&trans->transaction->dirty_bgs_lock);
Jeff Mahoneyafdb5712016-09-09 12:09:35 -040010805 btrfs_wait_cache_io(trans, block_group, path);
Chris Mason1bbc6212015-04-06 12:46:08 -070010806 btrfs_put_block_group(block_group);
10807 spin_lock(&trans->transaction->dirty_bgs_lock);
10808 }
10809
10810 if (!list_empty(&block_group->dirty_list)) {
10811 list_del_init(&block_group->dirty_list);
Josef Bacikba2c4d42018-12-03 10:20:33 -050010812 remove_rsv = true;
Chris Mason1bbc6212015-04-06 12:46:08 -070010813 btrfs_put_block_group(block_group);
10814 }
10815 spin_unlock(&trans->transaction->dirty_bgs_lock);
10816 mutex_unlock(&trans->transaction->cache_write_mutex);
10817
Josef Bacik0af3d002010-06-21 14:48:16 -040010818 if (!IS_ERR(inode)) {
Nikolay Borisov73f2e542017-02-20 13:50:59 +020010819 ret = btrfs_orphan_add(trans, BTRFS_I(inode));
Jeff Mahoney79787ea2012-03-12 16:03:00 +010010820 if (ret) {
10821 btrfs_add_delayed_iput(inode);
10822 goto out;
10823 }
Josef Bacik0af3d002010-06-21 14:48:16 -040010824 clear_nlink(inode);
10825 /* One for the block groups ref */
10826 spin_lock(&block_group->lock);
10827 if (block_group->iref) {
10828 block_group->iref = 0;
10829 block_group->inode = NULL;
10830 spin_unlock(&block_group->lock);
10831 iput(inode);
10832 } else {
10833 spin_unlock(&block_group->lock);
10834 }
10835 /* One for our lookup ref */
Josef Bacik455757c2011-09-19 12:26:24 -040010836 btrfs_add_delayed_iput(inode);
Josef Bacik0af3d002010-06-21 14:48:16 -040010837 }
10838
10839 key.objectid = BTRFS_FREE_SPACE_OBJECTID;
10840 key.offset = block_group->key.objectid;
10841 key.type = 0;
10842
10843 ret = btrfs_search_slot(trans, tree_root, &key, path, -1, 1);
10844 if (ret < 0)
10845 goto out;
10846 if (ret > 0)
David Sterbab3b4aa72011-04-21 01:20:15 +020010847 btrfs_release_path(path);
Josef Bacik0af3d002010-06-21 14:48:16 -040010848 if (ret == 0) {
10849 ret = btrfs_del_item(trans, tree_root, path);
10850 if (ret)
10851 goto out;
David Sterbab3b4aa72011-04-21 01:20:15 +020010852 btrfs_release_path(path);
Josef Bacik0af3d002010-06-21 14:48:16 -040010853 }
10854
Jeff Mahoney0b246af2016-06-22 18:54:23 -040010855 spin_lock(&fs_info->block_group_cache_lock);
Zheng Yan1a40e232008-09-26 10:09:34 -040010856 rb_erase(&block_group->cache_node,
Jeff Mahoney0b246af2016-06-22 18:54:23 -040010857 &fs_info->block_group_cache_tree);
Filipe Manana292cbd52014-11-26 15:28:50 +000010858 RB_CLEAR_NODE(&block_group->cache_node);
Liu Boa1897fd2012-12-27 09:01:23 +000010859
Jeff Mahoney0b246af2016-06-22 18:54:23 -040010860 if (fs_info->first_logical_byte == block_group->key.objectid)
10861 fs_info->first_logical_byte = (u64)-1;
10862 spin_unlock(&fs_info->block_group_cache_lock);
Josef Bacik817d52f2009-07-13 21:29:25 -040010863
Josef Bacik80eb2342008-10-29 14:49:05 -040010864 down_write(&block_group->space_info->groups_sem);
Chris Mason44fb5512009-06-04 15:34:51 -040010865 /*
10866 * we must use list_del_init so people can check to see if they
10867 * are still on the list after taking the semaphore
10868 */
10869 list_del_init(&block_group->list);
Jeff Mahoney6ab0a202013-11-01 13:07:04 -040010870 if (list_empty(&block_group->space_info->block_groups[index])) {
Jeff Mahoneyc1895442014-05-27 12:59:57 -040010871 kobj = block_group->space_info->block_group_kobjs[index];
10872 block_group->space_info->block_group_kobjs[index] = NULL;
Jeff Mahoney0b246af2016-06-22 18:54:23 -040010873 clear_avail_alloc_bits(fs_info, block_group->flags);
Jeff Mahoney6ab0a202013-11-01 13:07:04 -040010874 }
Josef Bacik80eb2342008-10-29 14:49:05 -040010875 up_write(&block_group->space_info->groups_sem);
Jeff Mahoneyc1895442014-05-27 12:59:57 -040010876 if (kobj) {
10877 kobject_del(kobj);
10878 kobject_put(kobj);
10879 }
Zheng Yan1a40e232008-09-26 10:09:34 -040010880
Filipe Manana4f69cb92014-11-26 15:28:51 +000010881 if (block_group->has_caching_ctl)
10882 caching_ctl = get_caching_control(block_group);
Josef Bacik817d52f2009-07-13 21:29:25 -040010883 if (block_group->cached == BTRFS_CACHE_STARTED)
Yan Zheng11833d62009-09-11 16:11:19 -040010884 wait_block_group_cache_done(block_group);
Filipe Manana4f69cb92014-11-26 15:28:51 +000010885 if (block_group->has_caching_ctl) {
Jeff Mahoney0b246af2016-06-22 18:54:23 -040010886 down_write(&fs_info->commit_root_sem);
Filipe Manana4f69cb92014-11-26 15:28:51 +000010887 if (!caching_ctl) {
10888 struct btrfs_caching_control *ctl;
10889
10890 list_for_each_entry(ctl,
Jeff Mahoney0b246af2016-06-22 18:54:23 -040010891 &fs_info->caching_block_groups, list)
Filipe Manana4f69cb92014-11-26 15:28:51 +000010892 if (ctl->block_group == block_group) {
10893 caching_ctl = ctl;
Elena Reshetova1e4f4712017-03-03 10:55:14 +020010894 refcount_inc(&caching_ctl->count);
Filipe Manana4f69cb92014-11-26 15:28:51 +000010895 break;
10896 }
10897 }
10898 if (caching_ctl)
10899 list_del_init(&caching_ctl->list);
Jeff Mahoney0b246af2016-06-22 18:54:23 -040010900 up_write(&fs_info->commit_root_sem);
Filipe Manana4f69cb92014-11-26 15:28:51 +000010901 if (caching_ctl) {
10902 /* Once for the caching bgs list and once for us. */
10903 put_caching_control(caching_ctl);
10904 put_caching_control(caching_ctl);
10905 }
10906 }
Josef Bacik817d52f2009-07-13 21:29:25 -040010907
Josef Bacikce93ec52014-11-17 15:45:48 -050010908 spin_lock(&trans->transaction->dirty_bgs_lock);
Nikolay Borisov9a0ec832019-01-30 16:50:49 +020010909 WARN_ON(!list_empty(&block_group->dirty_list));
10910 WARN_ON(!list_empty(&block_group->io_list));
Josef Bacikce93ec52014-11-17 15:45:48 -050010911 spin_unlock(&trans->transaction->dirty_bgs_lock);
Nikolay Borisov9a0ec832019-01-30 16:50:49 +020010912
Josef Bacik817d52f2009-07-13 21:29:25 -040010913 btrfs_remove_free_space_cache(block_group);
10914
Yan Zhengc146afa2008-11-12 14:34:12 -050010915 spin_lock(&block_group->space_info->lock);
Filipe Manana75c68e92015-01-16 13:24:40 +000010916 list_del_init(&block_group->ro_list);
Zhao Lei18d018a2015-02-24 20:07:44 +080010917
Jeff Mahoney0b246af2016-06-22 18:54:23 -040010918 if (btrfs_test_opt(fs_info, ENOSPC_DEBUG)) {
Zhao Lei18d018a2015-02-24 20:07:44 +080010919 WARN_ON(block_group->space_info->total_bytes
10920 < block_group->key.offset);
10921 WARN_ON(block_group->space_info->bytes_readonly
10922 < block_group->key.offset);
10923 WARN_ON(block_group->space_info->disk_total
10924 < block_group->key.offset * factor);
10925 }
Yan Zhengc146afa2008-11-12 14:34:12 -050010926 block_group->space_info->total_bytes -= block_group->key.offset;
10927 block_group->space_info->bytes_readonly -= block_group->key.offset;
Josef Bacik89a55892010-10-14 14:52:27 -040010928 block_group->space_info->disk_total -= block_group->key.offset * factor;
Zhao Lei18d018a2015-02-24 20:07:44 +080010929
Yan Zhengc146afa2008-11-12 14:34:12 -050010930 spin_unlock(&block_group->space_info->lock);
Chris Mason283bb192009-07-24 16:30:55 -040010931
Josef Bacik0af3d002010-06-21 14:48:16 -040010932 memcpy(&key, &block_group->key, sizeof(key));
10933
David Sterba34441362016-10-04 19:34:27 +020010934 mutex_lock(&fs_info->chunk_mutex);
Filipe Manana04216822014-11-27 21:14:15 +000010935 spin_lock(&block_group->lock);
10936 block_group->removed = 1;
10937 /*
10938 * At this point trimming can't start on this block group, because we
10939 * removed the block group from the tree fs_info->block_group_cache_tree
10940 * so no one can't find it anymore and even if someone already got this
10941 * block group before we removed it from the rbtree, they have already
10942 * incremented block_group->trimming - if they didn't, they won't find
10943 * any free space entries because we already removed them all when we
10944 * called btrfs_remove_free_space_cache().
10945 *
10946 * And we must not remove the extent map from the fs_info->mapping_tree
10947 * to prevent the same logical address range and physical device space
10948 * ranges from being reused for a new block group. This is because our
10949 * fs trim operation (btrfs_trim_fs() / btrfs_ioctl_fitrim()) is
10950 * completely transactionless, so while it is trimming a range the
10951 * currently running transaction might finish and a new one start,
10952 * allowing for new block groups to be created that can reuse the same
10953 * physical device locations unless we take this special care.
Jeff Mahoneye33e17e2015-06-15 09:41:19 -040010954 *
10955 * There may also be an implicit trim operation if the file system
10956 * is mounted with -odiscard. The same protections must remain
10957 * in place until the extents have been discarded completely when
10958 * the transaction commit has completed.
Filipe Manana04216822014-11-27 21:14:15 +000010959 */
10960 remove_em = (atomic_read(&block_group->trimming) == 0);
Filipe Manana04216822014-11-27 21:14:15 +000010961 spin_unlock(&block_group->lock);
Filipe Manana04216822014-11-27 21:14:15 +000010962
10963 if (remove_em) {
10964 struct extent_map_tree *em_tree;
10965
Jeff Mahoney0b246af2016-06-22 18:54:23 -040010966 em_tree = &fs_info->mapping_tree.map_tree;
Filipe Manana04216822014-11-27 21:14:15 +000010967 write_lock(&em_tree->lock);
10968 remove_extent_mapping(em_tree, em);
10969 write_unlock(&em_tree->lock);
10970 /* once for the tree */
10971 free_extent_map(em);
10972 }
10973
David Sterba34441362016-10-04 19:34:27 +020010974 mutex_unlock(&fs_info->chunk_mutex);
Filipe Manana8dbcd102014-12-02 18:07:49 +000010975
Nikolay Borisovf3f72772018-05-10 15:44:46 +030010976 ret = remove_block_group_free_space(trans, block_group);
Omar Sandoval1e144fb2015-09-29 20:50:37 -070010977 if (ret)
10978 goto out;
10979
Chris Masonfa9c0d792009-04-03 09:47:43 -040010980 btrfs_put_block_group(block_group);
10981 btrfs_put_block_group(block_group);
Zheng Yan1a40e232008-09-26 10:09:34 -040010982
10983 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
10984 if (ret > 0)
10985 ret = -EIO;
10986 if (ret < 0)
10987 goto out;
10988
10989 ret = btrfs_del_item(trans, root, path);
10990out:
Josef Bacikba2c4d42018-12-03 10:20:33 -050010991 if (remove_rsv)
10992 btrfs_delayed_refs_rsv_release(fs_info, 1);
Zheng Yan1a40e232008-09-26 10:09:34 -040010993 btrfs_free_path(path);
10994 return ret;
10995}
liuboacce9522011-01-06 19:30:25 +080010996
Filipe Manana8eab77f2015-11-13 23:57:16 +000010997struct btrfs_trans_handle *
Filipe Manana7fd01182015-11-13 23:57:17 +000010998btrfs_start_trans_remove_block_group(struct btrfs_fs_info *fs_info,
10999 const u64 chunk_offset)
Filipe Manana8eab77f2015-11-13 23:57:16 +000011000{
Filipe Manana7fd01182015-11-13 23:57:17 +000011001 struct extent_map_tree *em_tree = &fs_info->mapping_tree.map_tree;
11002 struct extent_map *em;
11003 struct map_lookup *map;
11004 unsigned int num_items;
11005
11006 read_lock(&em_tree->lock);
11007 em = lookup_extent_mapping(em_tree, chunk_offset, 1);
11008 read_unlock(&em_tree->lock);
11009 ASSERT(em && em->start == chunk_offset);
11010
Filipe Manana8eab77f2015-11-13 23:57:16 +000011011 /*
Filipe Manana7fd01182015-11-13 23:57:17 +000011012 * We need to reserve 3 + N units from the metadata space info in order
11013 * to remove a block group (done at btrfs_remove_chunk() and at
11014 * btrfs_remove_block_group()), which are used for:
11015 *
Filipe Manana8eab77f2015-11-13 23:57:16 +000011016 * 1 unit for adding the free space inode's orphan (located in the tree
11017 * of tree roots).
Filipe Manana7fd01182015-11-13 23:57:17 +000011018 * 1 unit for deleting the block group item (located in the extent
11019 * tree).
11020 * 1 unit for deleting the free space item (located in tree of tree
11021 * roots).
11022 * N units for deleting N device extent items corresponding to each
11023 * stripe (located in the device tree).
11024 *
11025 * In order to remove a block group we also need to reserve units in the
11026 * system space info in order to update the chunk tree (update one or
11027 * more device items and remove one chunk item), but this is done at
11028 * btrfs_remove_chunk() through a call to check_system_chunk().
Filipe Manana8eab77f2015-11-13 23:57:16 +000011029 */
Jeff Mahoney95617d62015-06-03 10:55:48 -040011030 map = em->map_lookup;
Filipe Manana7fd01182015-11-13 23:57:17 +000011031 num_items = 3 + map->num_stripes;
11032 free_extent_map(em);
11033
Filipe Manana8eab77f2015-11-13 23:57:16 +000011034 return btrfs_start_transaction_fallback_global_rsv(fs_info->extent_root,
Filipe Manana7fd01182015-11-13 23:57:17 +000011035 num_items, 1);
Filipe Manana8eab77f2015-11-13 23:57:16 +000011036}
11037
Josef Bacik47ab2a62014-09-18 11:20:02 -040011038/*
11039 * Process the unused_bgs list and remove any that don't have any allocated
11040 * space inside of them.
11041 */
11042void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info)
11043{
11044 struct btrfs_block_group_cache *block_group;
11045 struct btrfs_space_info *space_info;
Josef Bacik47ab2a62014-09-18 11:20:02 -040011046 struct btrfs_trans_handle *trans;
11047 int ret = 0;
11048
Josef Bacikafcdd122016-09-02 15:40:02 -040011049 if (!test_bit(BTRFS_FS_OPEN, &fs_info->flags))
Josef Bacik47ab2a62014-09-18 11:20:02 -040011050 return;
11051
11052 spin_lock(&fs_info->unused_bgs_lock);
11053 while (!list_empty(&fs_info->unused_bgs)) {
11054 u64 start, end;
Jeff Mahoneye33e17e2015-06-15 09:41:19 -040011055 int trimming;
Josef Bacik47ab2a62014-09-18 11:20:02 -040011056
11057 block_group = list_first_entry(&fs_info->unused_bgs,
11058 struct btrfs_block_group_cache,
11059 bg_list);
Josef Bacik47ab2a62014-09-18 11:20:02 -040011060 list_del_init(&block_group->bg_list);
Zhao Leiaefbe9a2015-09-29 21:03:54 +080011061
11062 space_info = block_group->space_info;
11063
Josef Bacik47ab2a62014-09-18 11:20:02 -040011064 if (ret || btrfs_mixed_space_info(space_info)) {
11065 btrfs_put_block_group(block_group);
11066 continue;
11067 }
11068 spin_unlock(&fs_info->unused_bgs_lock);
11069
Zhao Leid5f2e332015-10-08 18:46:44 +080011070 mutex_lock(&fs_info->delete_unused_bgs_mutex);
Filipe Manana67c5e7d2015-06-11 00:58:53 +010011071
Josef Bacik47ab2a62014-09-18 11:20:02 -040011072 /* Don't want to race with allocators so take the groups_sem */
11073 down_write(&space_info->groups_sem);
11074 spin_lock(&block_group->lock);
Qu Wenruo43794442018-06-22 12:35:00 +080011075 if (block_group->reserved || block_group->pinned ||
Josef Bacik47ab2a62014-09-18 11:20:02 -040011076 btrfs_block_group_used(&block_group->item) ||
Chris Mason19c4d2f2016-10-10 13:43:31 -070011077 block_group->ro ||
Zhao Leiaefbe9a2015-09-29 21:03:54 +080011078 list_is_singular(&block_group->list)) {
Josef Bacik47ab2a62014-09-18 11:20:02 -040011079 /*
11080 * We want to bail if we made new allocations or have
11081 * outstanding allocations in this block group. We do
11082 * the ro check in case balance is currently acting on
11083 * this block group.
11084 */
Qu Wenruo4ed0a7a2018-04-26 17:17:20 +080011085 trace_btrfs_skip_unused_block_group(block_group);
Josef Bacik47ab2a62014-09-18 11:20:02 -040011086 spin_unlock(&block_group->lock);
11087 up_write(&space_info->groups_sem);
11088 goto next;
11089 }
11090 spin_unlock(&block_group->lock);
11091
11092 /* We don't want to force the issue, only flip if it's ok. */
Zhaolei868f4012015-08-05 16:43:27 +080011093 ret = inc_block_group_ro(block_group, 0);
Josef Bacik47ab2a62014-09-18 11:20:02 -040011094 up_write(&space_info->groups_sem);
11095 if (ret < 0) {
11096 ret = 0;
11097 goto next;
11098 }
11099
11100 /*
11101 * Want to do this before we do anything else so we can recover
11102 * properly if we fail to join the transaction.
11103 */
Filipe Manana7fd01182015-11-13 23:57:17 +000011104 trans = btrfs_start_trans_remove_block_group(fs_info,
11105 block_group->key.objectid);
Josef Bacik47ab2a62014-09-18 11:20:02 -040011106 if (IS_ERR(trans)) {
Jeff Mahoney2ff7e612016-06-22 18:54:24 -040011107 btrfs_dec_block_group_ro(block_group);
Josef Bacik47ab2a62014-09-18 11:20:02 -040011108 ret = PTR_ERR(trans);
11109 goto next;
11110 }
11111
11112 /*
11113 * We could have pending pinned extents for this block group,
11114 * just delete them, we don't care about them anymore.
11115 */
11116 start = block_group->key.objectid;
11117 end = start + block_group->key.offset - 1;
Filipe Mananad4b450c2015-01-29 19:18:25 +000011118 /*
11119 * Hold the unused_bg_unpin_mutex lock to avoid racing with
11120 * btrfs_finish_extent_commit(). If we are at transaction N,
11121 * another task might be running finish_extent_commit() for the
11122 * previous transaction N - 1, and have seen a range belonging
11123 * to the block group in freed_extents[] before we were able to
11124 * clear the whole block group range from freed_extents[]. This
11125 * means that task can lookup for the block group after we
11126 * unpinned it from freed_extents[] and removed it, leading to
11127 * a BUG_ON() at btrfs_unpin_extent_range().
11128 */
11129 mutex_lock(&fs_info->unused_bg_unpin_mutex);
Filipe Manana758eb512014-11-03 14:08:39 +000011130 ret = clear_extent_bits(&fs_info->freed_extents[0], start, end,
David Sterba91166212016-04-26 23:54:39 +020011131 EXTENT_DIRTY);
Filipe Manana758eb512014-11-03 14:08:39 +000011132 if (ret) {
Filipe Mananad4b450c2015-01-29 19:18:25 +000011133 mutex_unlock(&fs_info->unused_bg_unpin_mutex);
Jeff Mahoney2ff7e612016-06-22 18:54:24 -040011134 btrfs_dec_block_group_ro(block_group);
Filipe Manana758eb512014-11-03 14:08:39 +000011135 goto end_trans;
11136 }
11137 ret = clear_extent_bits(&fs_info->freed_extents[1], start, end,
David Sterba91166212016-04-26 23:54:39 +020011138 EXTENT_DIRTY);
Filipe Manana758eb512014-11-03 14:08:39 +000011139 if (ret) {
Filipe Mananad4b450c2015-01-29 19:18:25 +000011140 mutex_unlock(&fs_info->unused_bg_unpin_mutex);
Jeff Mahoney2ff7e612016-06-22 18:54:24 -040011141 btrfs_dec_block_group_ro(block_group);
Filipe Manana758eb512014-11-03 14:08:39 +000011142 goto end_trans;
11143 }
Filipe Mananad4b450c2015-01-29 19:18:25 +000011144 mutex_unlock(&fs_info->unused_bg_unpin_mutex);
Josef Bacik47ab2a62014-09-18 11:20:02 -040011145
11146 /* Reset pinned so btrfs_put_block_group doesn't complain */
Zhao Leic30666d2015-02-25 14:17:20 +080011147 spin_lock(&space_info->lock);
11148 spin_lock(&block_group->lock);
11149
Lu Fengqie2907c12018-10-24 20:24:02 +080011150 update_bytes_pinned(space_info, -block_group->pinned);
Zhao Leic30666d2015-02-25 14:17:20 +080011151 space_info->bytes_readonly += block_group->pinned;
Ethan Liendec59fa2018-07-13 16:50:42 +080011152 percpu_counter_add_batch(&space_info->total_bytes_pinned,
11153 -block_group->pinned,
11154 BTRFS_TOTAL_BYTES_PINNED_BATCH);
Josef Bacik47ab2a62014-09-18 11:20:02 -040011155 block_group->pinned = 0;
11156
Zhao Leic30666d2015-02-25 14:17:20 +080011157 spin_unlock(&block_group->lock);
11158 spin_unlock(&space_info->lock);
11159
Jeff Mahoneye33e17e2015-06-15 09:41:19 -040011160 /* DISCARD can flip during remount */
Jeff Mahoney0b246af2016-06-22 18:54:23 -040011161 trimming = btrfs_test_opt(fs_info, DISCARD);
Jeff Mahoneye33e17e2015-06-15 09:41:19 -040011162
11163 /* Implicit trim during transaction commit. */
11164 if (trimming)
11165 btrfs_get_block_group_trimming(block_group);
11166
Josef Bacik47ab2a62014-09-18 11:20:02 -040011167 /*
11168 * Btrfs_remove_chunk will abort the transaction if things go
11169 * horribly wrong.
11170 */
Nikolay Borisov97aff912018-07-20 19:37:53 +030011171 ret = btrfs_remove_chunk(trans, block_group->key.objectid);
Jeff Mahoneye33e17e2015-06-15 09:41:19 -040011172
11173 if (ret) {
11174 if (trimming)
11175 btrfs_put_block_group_trimming(block_group);
11176 goto end_trans;
11177 }
11178
11179 /*
11180 * If we're not mounted with -odiscard, we can just forget
11181 * about this block group. Otherwise we'll need to wait
11182 * until transaction commit to do the actual discard.
11183 */
11184 if (trimming) {
Filipe Manana348a0012015-11-27 12:16:16 +000011185 spin_lock(&fs_info->unused_bgs_lock);
11186 /*
11187 * A concurrent scrub might have added us to the list
11188 * fs_info->unused_bgs, so use a list_move operation
11189 * to add the block group to the deleted_bgs list.
11190 */
Jeff Mahoneye33e17e2015-06-15 09:41:19 -040011191 list_move(&block_group->bg_list,
11192 &trans->transaction->deleted_bgs);
Filipe Manana348a0012015-11-27 12:16:16 +000011193 spin_unlock(&fs_info->unused_bgs_lock);
Jeff Mahoneye33e17e2015-06-15 09:41:19 -040011194 btrfs_get_block_group(block_group);
11195 }
Filipe Manana758eb512014-11-03 14:08:39 +000011196end_trans:
Jeff Mahoney3a45bb22016-09-09 21:39:03 -040011197 btrfs_end_transaction(trans);
Josef Bacik47ab2a62014-09-18 11:20:02 -040011198next:
Zhao Leid5f2e332015-10-08 18:46:44 +080011199 mutex_unlock(&fs_info->delete_unused_bgs_mutex);
Josef Bacik47ab2a62014-09-18 11:20:02 -040011200 btrfs_put_block_group(block_group);
11201 spin_lock(&fs_info->unused_bgs_lock);
11202 }
11203 spin_unlock(&fs_info->unused_bgs_lock);
11204}
11205
liuboc59021f2011-03-07 02:13:14 +000011206int btrfs_init_space_info(struct btrfs_fs_info *fs_info)
11207{
liubo1aba86d2011-04-08 08:44:37 +000011208 struct btrfs_super_block *disk_super;
11209 u64 features;
11210 u64 flags;
11211 int mixed = 0;
liuboc59021f2011-03-07 02:13:14 +000011212 int ret;
11213
David Sterba6c417612011-04-13 15:41:04 +020011214 disk_super = fs_info->super_copy;
liubo1aba86d2011-04-08 08:44:37 +000011215 if (!btrfs_super_root(disk_super))
Dan Carpenter0dc924c52016-01-13 15:21:17 +030011216 return -EINVAL;
liuboc59021f2011-03-07 02:13:14 +000011217
liubo1aba86d2011-04-08 08:44:37 +000011218 features = btrfs_super_incompat_flags(disk_super);
11219 if (features & BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS)
11220 mixed = 1;
liuboc59021f2011-03-07 02:13:14 +000011221
liubo1aba86d2011-04-08 08:44:37 +000011222 flags = BTRFS_BLOCK_GROUP_SYSTEM;
Lu Fengqi4ca61682018-05-28 14:30:27 +080011223 ret = create_space_info(fs_info, flags);
liuboc59021f2011-03-07 02:13:14 +000011224 if (ret)
liubo1aba86d2011-04-08 08:44:37 +000011225 goto out;
liuboc59021f2011-03-07 02:13:14 +000011226
liubo1aba86d2011-04-08 08:44:37 +000011227 if (mixed) {
11228 flags = BTRFS_BLOCK_GROUP_METADATA | BTRFS_BLOCK_GROUP_DATA;
Lu Fengqi4ca61682018-05-28 14:30:27 +080011229 ret = create_space_info(fs_info, flags);
liubo1aba86d2011-04-08 08:44:37 +000011230 } else {
11231 flags = BTRFS_BLOCK_GROUP_METADATA;
Lu Fengqi4ca61682018-05-28 14:30:27 +080011232 ret = create_space_info(fs_info, flags);
liubo1aba86d2011-04-08 08:44:37 +000011233 if (ret)
11234 goto out;
11235
11236 flags = BTRFS_BLOCK_GROUP_DATA;
Lu Fengqi4ca61682018-05-28 14:30:27 +080011237 ret = create_space_info(fs_info, flags);
liubo1aba86d2011-04-08 08:44:37 +000011238 }
11239out:
liuboc59021f2011-03-07 02:13:14 +000011240 return ret;
11241}
11242
Jeff Mahoney2ff7e612016-06-22 18:54:24 -040011243int btrfs_error_unpin_extent_range(struct btrfs_fs_info *fs_info,
11244 u64 start, u64 end)
liuboacce9522011-01-06 19:30:25 +080011245{
Jeff Mahoney2ff7e612016-06-22 18:54:24 -040011246 return unpin_extent_range(fs_info, start, end, false);
liuboacce9522011-01-06 19:30:25 +080011247}
11248
Jeff Mahoney499f3772015-06-15 09:41:17 -040011249/*
11250 * It used to be that old block groups would be left around forever.
11251 * Iterating over them would be enough to trim unused space. Since we
11252 * now automatically remove them, we also need to iterate over unallocated
11253 * space.
11254 *
11255 * We don't want a transaction for this since the discard may take a
11256 * substantial amount of time. We don't require that a transaction be
11257 * running, but we do need to take a running transaction into account
Jeff Mahoneyfee7acc2018-09-06 17:18:16 -040011258 * to ensure that we're not discarding chunks that were released or
11259 * allocated in the current transaction.
Jeff Mahoney499f3772015-06-15 09:41:17 -040011260 *
11261 * Holding the chunks lock will prevent other threads from allocating
11262 * or releasing chunks, but it won't prevent a running transaction
11263 * from committing and releasing the memory that the pending chunks
11264 * list head uses. For that, we need to take a reference to the
Jeff Mahoneyfee7acc2018-09-06 17:18:16 -040011265 * transaction and hold the commit root sem. We only need to hold
11266 * it while performing the free space search since we have already
11267 * held back allocations.
Jeff Mahoney499f3772015-06-15 09:41:17 -040011268 */
11269static int btrfs_trim_free_extents(struct btrfs_device *device,
Nikolay Borisovc2d1b3a2019-03-25 14:31:21 +020011270 struct fstrim_range *range, u64 *trimmed)
Jeff Mahoney499f3772015-06-15 09:41:17 -040011271{
Nikolay Borisov929be172019-03-27 14:24:18 +020011272 u64 start, len = 0, end = 0;
Jeff Mahoney499f3772015-06-15 09:41:17 -040011273 int ret;
11274
Nikolay Borisov929be172019-03-27 14:24:18 +020011275 start = max_t(u64, range->start, SZ_1M);
Jeff Mahoney499f3772015-06-15 09:41:17 -040011276 *trimmed = 0;
11277
Jeff Mahoney0be88e32018-09-06 17:18:15 -040011278 /* Discard not supported = nothing to do. */
11279 if (!blk_queue_discard(bdev_get_queue(device->bdev)))
11280 return 0;
11281
Andrea Gelmini52042d82018-11-28 12:05:13 +010011282 /* Not writable = nothing to do. */
Anand Jainebbede42017-12-04 12:54:52 +080011283 if (!test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state))
Jeff Mahoney499f3772015-06-15 09:41:17 -040011284 return 0;
11285
11286 /* No free space = nothing to do. */
11287 if (device->total_bytes <= device->bytes_used)
11288 return 0;
11289
11290 ret = 0;
11291
11292 while (1) {
Jeff Mahoneyfb456252016-06-22 18:54:56 -040011293 struct btrfs_fs_info *fs_info = device->fs_info;
Jeff Mahoney499f3772015-06-15 09:41:17 -040011294 u64 bytes;
11295
11296 ret = mutex_lock_interruptible(&fs_info->chunk_mutex);
11297 if (ret)
Jeff Mahoneyfee7acc2018-09-06 17:18:16 -040011298 break;
Jeff Mahoney499f3772015-06-15 09:41:17 -040011299
Nikolay Borisov929be172019-03-27 14:24:18 +020011300 find_first_clear_extent_bit(&device->alloc_state, start,
11301 &start, &end,
11302 CHUNK_TRIMMED | CHUNK_ALLOCATED);
11303 /*
11304 * If find_first_clear_extent_bit find a range that spans the
11305 * end of the device it will set end to -1, in this case it's up
11306 * to the caller to trim the value to the size of the device.
11307 */
11308 end = min(end, device->total_bytes - 1);
11309 len = end - start + 1;
Jeff Mahoney499f3772015-06-15 09:41:17 -040011310
Nikolay Borisov929be172019-03-27 14:24:18 +020011311 /* We didn't find any extents */
11312 if (!len) {
Jeff Mahoney499f3772015-06-15 09:41:17 -040011313 mutex_unlock(&fs_info->chunk_mutex);
Nikolay Borisov929be172019-03-27 14:24:18 +020011314 ret = 0;
Jeff Mahoney499f3772015-06-15 09:41:17 -040011315 break;
11316 }
11317
Nikolay Borisov929be172019-03-27 14:24:18 +020011318 /* Keep going until we satisfy minlen or reach end of space */
11319 if (len < range->minlen) {
11320 mutex_unlock(&fs_info->chunk_mutex);
11321 start += len;
11322 continue;
11323 }
11324
Nikolay Borisovc2d1b3a2019-03-25 14:31:21 +020011325 /* If we are out of the passed range break */
11326 if (start > range->start + range->len - 1) {
11327 mutex_unlock(&fs_info->chunk_mutex);
Nikolay Borisovc2d1b3a2019-03-25 14:31:21 +020011328 break;
11329 }
11330
11331 start = max(range->start, start);
11332 len = min(range->len, len);
11333
Nikolay Borisov929be172019-03-27 14:24:18 +020011334 ret = btrfs_issue_discard(device->bdev, start, len,
11335 &bytes);
11336 if (!ret)
11337 set_extent_bits(&device->alloc_state, start,
11338 start + bytes - 1,
11339 CHUNK_TRIMMED);
Jeff Mahoney499f3772015-06-15 09:41:17 -040011340 mutex_unlock(&fs_info->chunk_mutex);
11341
11342 if (ret)
11343 break;
11344
11345 start += len;
11346 *trimmed += bytes;
11347
Nikolay Borisovc2d1b3a2019-03-25 14:31:21 +020011348 /* We've trimmed enough */
11349 if (*trimmed >= range->len)
11350 break;
11351
Jeff Mahoney499f3772015-06-15 09:41:17 -040011352 if (fatal_signal_pending(current)) {
11353 ret = -ERESTARTSYS;
11354 break;
11355 }
11356
11357 cond_resched();
11358 }
11359
11360 return ret;
11361}
11362
Qu Wenruo93bba242018-09-07 14:16:23 +080011363/*
11364 * Trim the whole filesystem by:
11365 * 1) trimming the free space in each block group
11366 * 2) trimming the unallocated space on each device
11367 *
11368 * This will also continue trimming even if a block group or device encounters
11369 * an error. The return value will be the last error, or 0 if nothing bad
11370 * happens.
11371 */
Jeff Mahoney2ff7e612016-06-22 18:54:24 -040011372int btrfs_trim_fs(struct btrfs_fs_info *fs_info, struct fstrim_range *range)
Li Dongyangf7039b12011-03-24 10:24:28 +000011373{
Li Dongyangf7039b12011-03-24 10:24:28 +000011374 struct btrfs_block_group_cache *cache = NULL;
Jeff Mahoney499f3772015-06-15 09:41:17 -040011375 struct btrfs_device *device;
11376 struct list_head *devices;
Li Dongyangf7039b12011-03-24 10:24:28 +000011377 u64 group_trimmed;
11378 u64 start;
11379 u64 end;
11380 u64 trimmed = 0;
Qu Wenruo93bba242018-09-07 14:16:23 +080011381 u64 bg_failed = 0;
11382 u64 dev_failed = 0;
11383 int bg_ret = 0;
11384 int dev_ret = 0;
Li Dongyangf7039b12011-03-24 10:24:28 +000011385 int ret = 0;
11386
Qu Wenruo6ba9fc82018-09-07 14:16:24 +080011387 cache = btrfs_lookup_first_block_group(fs_info, range->start);
Qu Wenruo93bba242018-09-07 14:16:23 +080011388 for (; cache; cache = next_block_group(fs_info, cache)) {
Li Dongyangf7039b12011-03-24 10:24:28 +000011389 if (cache->key.objectid >= (range->start + range->len)) {
11390 btrfs_put_block_group(cache);
11391 break;
11392 }
11393
11394 start = max(range->start, cache->key.objectid);
11395 end = min(range->start + range->len,
11396 cache->key.objectid + cache->key.offset);
11397
11398 if (end - start >= range->minlen) {
11399 if (!block_group_cache_done(cache)) {
Liu Bof6373bf2012-12-27 09:01:18 +000011400 ret = cache_block_group(cache, 0);
Josef Bacik1be41b72013-06-12 13:56:06 -040011401 if (ret) {
Qu Wenruo93bba242018-09-07 14:16:23 +080011402 bg_failed++;
11403 bg_ret = ret;
11404 continue;
Josef Bacik1be41b72013-06-12 13:56:06 -040011405 }
11406 ret = wait_block_group_cache_done(cache);
11407 if (ret) {
Qu Wenruo93bba242018-09-07 14:16:23 +080011408 bg_failed++;
11409 bg_ret = ret;
11410 continue;
Josef Bacik1be41b72013-06-12 13:56:06 -040011411 }
Li Dongyangf7039b12011-03-24 10:24:28 +000011412 }
11413 ret = btrfs_trim_block_group(cache,
11414 &group_trimmed,
11415 start,
11416 end,
11417 range->minlen);
11418
11419 trimmed += group_trimmed;
11420 if (ret) {
Qu Wenruo93bba242018-09-07 14:16:23 +080011421 bg_failed++;
11422 bg_ret = ret;
11423 continue;
Li Dongyangf7039b12011-03-24 10:24:28 +000011424 }
11425 }
Li Dongyangf7039b12011-03-24 10:24:28 +000011426 }
11427
Qu Wenruo93bba242018-09-07 14:16:23 +080011428 if (bg_failed)
11429 btrfs_warn(fs_info,
11430 "failed to trim %llu block group(s), last error %d",
11431 bg_failed, bg_ret);
Jeff Mahoney0b246af2016-06-22 18:54:23 -040011432 mutex_lock(&fs_info->fs_devices->device_list_mutex);
Jeff Mahoneyd4e329d2018-09-06 17:18:14 -040011433 devices = &fs_info->fs_devices->devices;
11434 list_for_each_entry(device, devices, dev_list) {
Nikolay Borisovc2d1b3a2019-03-25 14:31:21 +020011435 ret = btrfs_trim_free_extents(device, range, &group_trimmed);
Qu Wenruo93bba242018-09-07 14:16:23 +080011436 if (ret) {
11437 dev_failed++;
11438 dev_ret = ret;
Jeff Mahoney499f3772015-06-15 09:41:17 -040011439 break;
Qu Wenruo93bba242018-09-07 14:16:23 +080011440 }
Jeff Mahoney499f3772015-06-15 09:41:17 -040011441
11442 trimmed += group_trimmed;
11443 }
Jeff Mahoney0b246af2016-06-22 18:54:23 -040011444 mutex_unlock(&fs_info->fs_devices->device_list_mutex);
Jeff Mahoney499f3772015-06-15 09:41:17 -040011445
Qu Wenruo93bba242018-09-07 14:16:23 +080011446 if (dev_failed)
11447 btrfs_warn(fs_info,
11448 "failed to trim %llu device(s), last error %d",
11449 dev_failed, dev_ret);
Li Dongyangf7039b12011-03-24 10:24:28 +000011450 range->len = trimmed;
Qu Wenruo93bba242018-09-07 14:16:23 +080011451 if (bg_ret)
11452 return bg_ret;
11453 return dev_ret;
Li Dongyangf7039b12011-03-24 10:24:28 +000011454}
Miao Xie8257b2d2014-03-06 13:38:19 +080011455
11456/*
David Sterbaea14b57f2017-06-22 02:19:11 +020011457 * btrfs_{start,end}_write_no_snapshotting() are similar to
Filipe Manana9ea24bb2014-10-29 11:57:59 +000011458 * mnt_{want,drop}_write(), they are used to prevent some tasks from writing
11459 * data into the page cache through nocow before the subvolume is snapshoted,
11460 * but flush the data into disk after the snapshot creation, or to prevent
David Sterbaea14b57f2017-06-22 02:19:11 +020011461 * operations while snapshotting is ongoing and that cause the snapshot to be
Filipe Manana9ea24bb2014-10-29 11:57:59 +000011462 * inconsistent (writes followed by expanding truncates for example).
Miao Xie8257b2d2014-03-06 13:38:19 +080011463 */
David Sterbaea14b57f2017-06-22 02:19:11 +020011464void btrfs_end_write_no_snapshotting(struct btrfs_root *root)
Miao Xie8257b2d2014-03-06 13:38:19 +080011465{
11466 percpu_counter_dec(&root->subv_writers->counter);
David Sterba093258e2018-02-26 16:15:17 +010011467 cond_wake_up(&root->subv_writers->wait);
Miao Xie8257b2d2014-03-06 13:38:19 +080011468}
11469
David Sterbaea14b57f2017-06-22 02:19:11 +020011470int btrfs_start_write_no_snapshotting(struct btrfs_root *root)
Miao Xie8257b2d2014-03-06 13:38:19 +080011471{
David Sterbaea14b57f2017-06-22 02:19:11 +020011472 if (atomic_read(&root->will_be_snapshotted))
Miao Xie8257b2d2014-03-06 13:38:19 +080011473 return 0;
11474
11475 percpu_counter_inc(&root->subv_writers->counter);
11476 /*
11477 * Make sure counter is updated before we check for snapshot creation.
11478 */
11479 smp_mb();
David Sterbaea14b57f2017-06-22 02:19:11 +020011480 if (atomic_read(&root->will_be_snapshotted)) {
11481 btrfs_end_write_no_snapshotting(root);
Miao Xie8257b2d2014-03-06 13:38:19 +080011482 return 0;
11483 }
11484 return 1;
11485}
Zhao Lei0bc19f902016-01-06 18:56:36 +080011486
Zhao Lei0bc19f902016-01-06 18:56:36 +080011487void btrfs_wait_for_snapshot_creation(struct btrfs_root *root)
11488{
11489 while (true) {
11490 int ret;
11491
David Sterbaea14b57f2017-06-22 02:19:11 +020011492 ret = btrfs_start_write_no_snapshotting(root);
Zhao Lei0bc19f902016-01-06 18:56:36 +080011493 if (ret)
11494 break;
Peter Zijlstra46259562018-03-15 11:43:08 +010011495 wait_var_event(&root->will_be_snapshotted,
11496 !atomic_read(&root->will_be_snapshotted));
Zhao Lei0bc19f902016-01-06 18:56:36 +080011497 }
11498}
Qu Wenruo031f24d2018-05-22 16:43:47 +080011499
11500void btrfs_mark_bg_unused(struct btrfs_block_group_cache *bg)
11501{
11502 struct btrfs_fs_info *fs_info = bg->fs_info;
11503
11504 spin_lock(&fs_info->unused_bgs_lock);
11505 if (list_empty(&bg->bg_list)) {
11506 btrfs_get_block_group(bg);
11507 trace_btrfs_add_unused_block_group(bg);
11508 list_add_tail(&bg->bg_list, &fs_info->unused_bgs);
11509 }
11510 spin_unlock(&fs_info->unused_bgs_lock);
11511}