blob: f5db03669eadfb5c3ad4e05b3dcf94323249c8ce [file] [log] [blame]
Chris Mason6cbd5572007-06-12 09:07:21 -04001/*
2 * Copyright (C) 2007 Oracle. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
17 */
Zach Brownec6b9102007-07-11 10:00:37 -040018#include <linux/sched.h>
Chris Masonedbd8d42007-12-21 16:27:24 -050019#include <linux/pagemap.h>
Chris Masonec44a352008-04-28 15:29:52 -040020#include <linux/writeback.h>
David Woodhouse21af8042008-08-12 14:13:26 +010021#include <linux/blkdev.h>
Chris Masonb7a9f292009-02-04 09:23:45 -050022#include <linux/sort.h>
Chris Mason4184ea72009-03-10 12:39:20 -040023#include <linux/rcupdate.h>
Josef Bacik817d52f2009-07-13 21:29:25 -040024#include <linux/kthread.h>
Tejun Heo5a0e3ad2010-03-24 17:04:11 +090025#include <linux/slab.h>
Chris Mason4b4e25f2008-11-20 10:22:27 -050026#include "compat.h"
Chris Mason74493f72007-12-11 09:25:06 -050027#include "hash.h"
Chris Masonfec577f2007-02-26 10:40:21 -050028#include "ctree.h"
29#include "disk-io.h"
30#include "print-tree.h"
Chris Masone089f052007-03-16 16:20:31 -040031#include "transaction.h"
Chris Mason0b86a832008-03-24 15:01:56 -040032#include "volumes.h"
Chris Mason925baed2008-06-25 16:01:30 -040033#include "locking.h"
Chris Masonfa9c0d792009-04-03 09:47:43 -040034#include "free-space-cache.h"
Chris Masonfec577f2007-02-26 10:40:21 -050035
Josef Bacikf3465ca2008-11-12 14:19:50 -050036static int update_block_group(struct btrfs_trans_handle *trans,
37 struct btrfs_root *root,
38 u64 bytenr, u64 num_bytes, int alloc,
39 int mark_free);
Yan Zheng11833d62009-09-11 16:11:19 -040040static int update_reserved_extents(struct btrfs_block_group_cache *cache,
41 u64 num_bytes, int reserve);
Yan Zheng5d4f98a2009-06-10 10:45:14 -040042static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
43 struct btrfs_root *root,
44 u64 bytenr, u64 num_bytes, u64 parent,
45 u64 root_objectid, u64 owner_objectid,
46 u64 owner_offset, int refs_to_drop,
47 struct btrfs_delayed_extent_op *extra_op);
48static void __run_delayed_extent_op(struct btrfs_delayed_extent_op *extent_op,
49 struct extent_buffer *leaf,
50 struct btrfs_extent_item *ei);
51static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
52 struct btrfs_root *root,
53 u64 parent, u64 root_objectid,
54 u64 flags, u64 owner, u64 offset,
55 struct btrfs_key *ins, int ref_mod);
56static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans,
57 struct btrfs_root *root,
58 u64 parent, u64 root_objectid,
59 u64 flags, struct btrfs_disk_key *key,
60 int level, struct btrfs_key *ins);
Josef Bacik6a632092009-02-20 11:00:09 -050061static int do_chunk_alloc(struct btrfs_trans_handle *trans,
62 struct btrfs_root *extent_root, u64 alloc_bytes,
63 u64 flags, int force);
Yan Zheng11833d62009-09-11 16:11:19 -040064static int pin_down_bytes(struct btrfs_trans_handle *trans,
65 struct btrfs_root *root,
66 struct btrfs_path *path,
67 u64 bytenr, u64 num_bytes,
68 int is_data, int reserved,
69 struct extent_buffer **must_clean);
70static int find_next_key(struct btrfs_path *path, int level,
71 struct btrfs_key *key);
Josef Bacik9ed74f22009-09-11 16:12:44 -040072static void dump_space_info(struct btrfs_space_info *info, u64 bytes,
73 int dump_block_groups);
Josef Bacik6a632092009-02-20 11:00:09 -050074
Josef Bacik817d52f2009-07-13 21:29:25 -040075static noinline int
76block_group_cache_done(struct btrfs_block_group_cache *cache)
77{
78 smp_mb();
79 return cache->cached == BTRFS_CACHE_FINISHED;
80}
81
Josef Bacik0f9dd462008-09-23 13:14:11 -040082static int block_group_bits(struct btrfs_block_group_cache *cache, u64 bits)
83{
84 return (cache->flags & bits) == bits;
85}
86
Josef Bacik11dfe352009-11-13 20:12:59 +000087void btrfs_get_block_group(struct btrfs_block_group_cache *cache)
88{
89 atomic_inc(&cache->count);
90}
91
92void btrfs_put_block_group(struct btrfs_block_group_cache *cache)
93{
94 if (atomic_dec_and_test(&cache->count))
95 kfree(cache);
96}
97
Josef Bacik0f9dd462008-09-23 13:14:11 -040098/*
99 * this adds the block group to the fs_info rb tree for the block group
100 * cache
101 */
Christoph Hellwigb2950862008-12-02 09:54:17 -0500102static int btrfs_add_block_group_cache(struct btrfs_fs_info *info,
Josef Bacik0f9dd462008-09-23 13:14:11 -0400103 struct btrfs_block_group_cache *block_group)
104{
105 struct rb_node **p;
106 struct rb_node *parent = NULL;
107 struct btrfs_block_group_cache *cache;
108
109 spin_lock(&info->block_group_cache_lock);
110 p = &info->block_group_cache_tree.rb_node;
111
112 while (*p) {
113 parent = *p;
114 cache = rb_entry(parent, struct btrfs_block_group_cache,
115 cache_node);
116 if (block_group->key.objectid < cache->key.objectid) {
117 p = &(*p)->rb_left;
118 } else if (block_group->key.objectid > cache->key.objectid) {
119 p = &(*p)->rb_right;
120 } else {
121 spin_unlock(&info->block_group_cache_lock);
122 return -EEXIST;
123 }
124 }
125
126 rb_link_node(&block_group->cache_node, parent, p);
127 rb_insert_color(&block_group->cache_node,
128 &info->block_group_cache_tree);
129 spin_unlock(&info->block_group_cache_lock);
130
131 return 0;
132}
133
134/*
135 * This will return the block group at or after bytenr if contains is 0, else
136 * it will return the block group that contains the bytenr
137 */
138static struct btrfs_block_group_cache *
139block_group_cache_tree_search(struct btrfs_fs_info *info, u64 bytenr,
140 int contains)
141{
142 struct btrfs_block_group_cache *cache, *ret = NULL;
143 struct rb_node *n;
144 u64 end, start;
145
146 spin_lock(&info->block_group_cache_lock);
147 n = info->block_group_cache_tree.rb_node;
148
149 while (n) {
150 cache = rb_entry(n, struct btrfs_block_group_cache,
151 cache_node);
152 end = cache->key.objectid + cache->key.offset - 1;
153 start = cache->key.objectid;
154
155 if (bytenr < start) {
156 if (!contains && (!ret || start < ret->key.objectid))
157 ret = cache;
158 n = n->rb_left;
159 } else if (bytenr > start) {
160 if (contains && bytenr <= end) {
161 ret = cache;
162 break;
163 }
164 n = n->rb_right;
165 } else {
166 ret = cache;
167 break;
168 }
169 }
Yan Zhengd2fb3432008-12-11 16:30:39 -0500170 if (ret)
Josef Bacik11dfe352009-11-13 20:12:59 +0000171 btrfs_get_block_group(ret);
Josef Bacik0f9dd462008-09-23 13:14:11 -0400172 spin_unlock(&info->block_group_cache_lock);
173
174 return ret;
175}
176
Yan Zheng11833d62009-09-11 16:11:19 -0400177static int add_excluded_extent(struct btrfs_root *root,
178 u64 start, u64 num_bytes)
Josef Bacik817d52f2009-07-13 21:29:25 -0400179{
Yan Zheng11833d62009-09-11 16:11:19 -0400180 u64 end = start + num_bytes - 1;
181 set_extent_bits(&root->fs_info->freed_extents[0],
182 start, end, EXTENT_UPTODATE, GFP_NOFS);
183 set_extent_bits(&root->fs_info->freed_extents[1],
184 start, end, EXTENT_UPTODATE, GFP_NOFS);
185 return 0;
Josef Bacik817d52f2009-07-13 21:29:25 -0400186}
187
Yan Zheng11833d62009-09-11 16:11:19 -0400188static void free_excluded_extents(struct btrfs_root *root,
189 struct btrfs_block_group_cache *cache)
Josef Bacik817d52f2009-07-13 21:29:25 -0400190{
Yan Zheng11833d62009-09-11 16:11:19 -0400191 u64 start, end;
192
193 start = cache->key.objectid;
194 end = start + cache->key.offset - 1;
195
196 clear_extent_bits(&root->fs_info->freed_extents[0],
197 start, end, EXTENT_UPTODATE, GFP_NOFS);
198 clear_extent_bits(&root->fs_info->freed_extents[1],
199 start, end, EXTENT_UPTODATE, GFP_NOFS);
200}
201
202static int exclude_super_stripes(struct btrfs_root *root,
203 struct btrfs_block_group_cache *cache)
204{
Josef Bacik817d52f2009-07-13 21:29:25 -0400205 u64 bytenr;
206 u64 *logical;
207 int stripe_len;
208 int i, nr, ret;
209
Yan, Zheng06b23312009-11-26 09:31:11 +0000210 if (cache->key.objectid < BTRFS_SUPER_INFO_OFFSET) {
211 stripe_len = BTRFS_SUPER_INFO_OFFSET - cache->key.objectid;
212 cache->bytes_super += stripe_len;
213 ret = add_excluded_extent(root, cache->key.objectid,
214 stripe_len);
215 BUG_ON(ret);
216 }
217
Josef Bacik817d52f2009-07-13 21:29:25 -0400218 for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
219 bytenr = btrfs_sb_offset(i);
220 ret = btrfs_rmap_block(&root->fs_info->mapping_tree,
221 cache->key.objectid, bytenr,
222 0, &logical, &nr, &stripe_len);
223 BUG_ON(ret);
Yan Zheng11833d62009-09-11 16:11:19 -0400224
Josef Bacik817d52f2009-07-13 21:29:25 -0400225 while (nr--) {
Josef Bacik1b2da372009-09-11 16:11:20 -0400226 cache->bytes_super += stripe_len;
Yan Zheng11833d62009-09-11 16:11:19 -0400227 ret = add_excluded_extent(root, logical[nr],
228 stripe_len);
229 BUG_ON(ret);
Josef Bacik817d52f2009-07-13 21:29:25 -0400230 }
Yan Zheng11833d62009-09-11 16:11:19 -0400231
Josef Bacik817d52f2009-07-13 21:29:25 -0400232 kfree(logical);
233 }
Josef Bacik817d52f2009-07-13 21:29:25 -0400234 return 0;
235}
236
Yan Zheng11833d62009-09-11 16:11:19 -0400237static struct btrfs_caching_control *
238get_caching_control(struct btrfs_block_group_cache *cache)
239{
240 struct btrfs_caching_control *ctl;
241
242 spin_lock(&cache->lock);
243 if (cache->cached != BTRFS_CACHE_STARTED) {
244 spin_unlock(&cache->lock);
245 return NULL;
246 }
247
248 ctl = cache->caching_ctl;
249 atomic_inc(&ctl->count);
250 spin_unlock(&cache->lock);
251 return ctl;
252}
253
254static void put_caching_control(struct btrfs_caching_control *ctl)
255{
256 if (atomic_dec_and_test(&ctl->count))
257 kfree(ctl);
258}
259
Josef Bacik0f9dd462008-09-23 13:14:11 -0400260/*
261 * this is only called by cache_block_group, since we could have freed extents
262 * we need to check the pinned_extents for any extents that can't be used yet
263 * since their free space will be released as soon as the transaction commits.
264 */
Josef Bacik817d52f2009-07-13 21:29:25 -0400265static u64 add_new_free_space(struct btrfs_block_group_cache *block_group,
Josef Bacik0f9dd462008-09-23 13:14:11 -0400266 struct btrfs_fs_info *info, u64 start, u64 end)
267{
Josef Bacik817d52f2009-07-13 21:29:25 -0400268 u64 extent_start, extent_end, size, total_added = 0;
Josef Bacik0f9dd462008-09-23 13:14:11 -0400269 int ret;
270
271 while (start < end) {
Yan Zheng11833d62009-09-11 16:11:19 -0400272 ret = find_first_extent_bit(info->pinned_extents, start,
Josef Bacik0f9dd462008-09-23 13:14:11 -0400273 &extent_start, &extent_end,
Yan Zheng11833d62009-09-11 16:11:19 -0400274 EXTENT_DIRTY | EXTENT_UPTODATE);
Josef Bacik0f9dd462008-09-23 13:14:11 -0400275 if (ret)
276 break;
277
Yan, Zheng06b23312009-11-26 09:31:11 +0000278 if (extent_start <= start) {
Josef Bacik0f9dd462008-09-23 13:14:11 -0400279 start = extent_end + 1;
280 } else if (extent_start > start && extent_start < end) {
281 size = extent_start - start;
Josef Bacik817d52f2009-07-13 21:29:25 -0400282 total_added += size;
Josef Bacikea6a4782008-11-20 12:16:16 -0500283 ret = btrfs_add_free_space(block_group, start,
284 size);
Josef Bacik0f9dd462008-09-23 13:14:11 -0400285 BUG_ON(ret);
286 start = extent_end + 1;
287 } else {
288 break;
289 }
290 }
291
292 if (start < end) {
293 size = end - start;
Josef Bacik817d52f2009-07-13 21:29:25 -0400294 total_added += size;
Josef Bacikea6a4782008-11-20 12:16:16 -0500295 ret = btrfs_add_free_space(block_group, start, size);
Josef Bacik0f9dd462008-09-23 13:14:11 -0400296 BUG_ON(ret);
297 }
298
Josef Bacik817d52f2009-07-13 21:29:25 -0400299 return total_added;
Josef Bacik0f9dd462008-09-23 13:14:11 -0400300}
301
Josef Bacik817d52f2009-07-13 21:29:25 -0400302static int caching_kthread(void *data)
Chris Masone37c9e62007-05-09 20:13:14 -0400303{
Josef Bacik817d52f2009-07-13 21:29:25 -0400304 struct btrfs_block_group_cache *block_group = data;
305 struct btrfs_fs_info *fs_info = block_group->fs_info;
Yan Zheng11833d62009-09-11 16:11:19 -0400306 struct btrfs_caching_control *caching_ctl = block_group->caching_ctl;
307 struct btrfs_root *extent_root = fs_info->extent_root;
Chris Masone37c9e62007-05-09 20:13:14 -0400308 struct btrfs_path *path;
Chris Mason5f39d392007-10-15 16:14:19 -0400309 struct extent_buffer *leaf;
Yan Zheng11833d62009-09-11 16:11:19 -0400310 struct btrfs_key key;
Josef Bacik817d52f2009-07-13 21:29:25 -0400311 u64 total_found = 0;
Yan Zheng11833d62009-09-11 16:11:19 -0400312 u64 last = 0;
313 u32 nritems;
314 int ret = 0;
Chris Masonf510cfe2007-10-15 16:14:48 -0400315
Chris Masone37c9e62007-05-09 20:13:14 -0400316 path = btrfs_alloc_path();
317 if (!path)
318 return -ENOMEM;
Yan7d7d6062007-09-14 16:15:28 -0400319
Yan Zheng11833d62009-09-11 16:11:19 -0400320 exclude_super_stripes(extent_root, block_group);
Josef Bacik1b2da372009-09-11 16:11:20 -0400321 spin_lock(&block_group->space_info->lock);
322 block_group->space_info->bytes_super += block_group->bytes_super;
323 spin_unlock(&block_group->space_info->lock);
Yan Zheng11833d62009-09-11 16:11:19 -0400324
Josef Bacik817d52f2009-07-13 21:29:25 -0400325 last = max_t(u64, block_group->key.objectid, BTRFS_SUPER_INFO_OFFSET);
Yan Zheng11833d62009-09-11 16:11:19 -0400326
Chris Mason5cd57b22008-06-25 16:01:30 -0400327 /*
Josef Bacik817d52f2009-07-13 21:29:25 -0400328 * We don't want to deadlock with somebody trying to allocate a new
329 * extent for the extent root while also trying to search the extent
330 * root to add free space. So we skip locking and search the commit
331 * root, since its read-only
Chris Mason5cd57b22008-06-25 16:01:30 -0400332 */
333 path->skip_locking = 1;
Josef Bacik817d52f2009-07-13 21:29:25 -0400334 path->search_commit_root = 1;
335 path->reada = 2;
336
Yan Zhenge4404d62008-12-12 10:03:26 -0500337 key.objectid = last;
Chris Masone37c9e62007-05-09 20:13:14 -0400338 key.offset = 0;
Yan Zheng11833d62009-09-11 16:11:19 -0400339 key.type = BTRFS_EXTENT_ITEM_KEY;
Chris Mason013f1b12009-07-31 14:57:55 -0400340again:
Yan Zheng11833d62009-09-11 16:11:19 -0400341 mutex_lock(&caching_ctl->mutex);
Chris Mason013f1b12009-07-31 14:57:55 -0400342 /* need to make sure the commit_root doesn't disappear */
343 down_read(&fs_info->extent_commit_sem);
344
Yan Zheng11833d62009-09-11 16:11:19 -0400345 ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
Chris Masone37c9e62007-05-09 20:13:14 -0400346 if (ret < 0)
Josef Bacikef8bbdf2008-09-23 13:14:11 -0400347 goto err;
Yan Zhenga512bbf2008-12-08 16:46:26 -0500348
Yan Zheng11833d62009-09-11 16:11:19 -0400349 leaf = path->nodes[0];
350 nritems = btrfs_header_nritems(leaf);
351
Chris Masond3977122009-01-05 21:25:51 -0500352 while (1) {
Josef Bacik817d52f2009-07-13 21:29:25 -0400353 smp_mb();
Yan Zheng11833d62009-09-11 16:11:19 -0400354 if (fs_info->closing > 1) {
Yan Zhengf25784b2009-07-28 08:41:57 -0400355 last = (u64)-1;
Josef Bacik817d52f2009-07-13 21:29:25 -0400356 break;
Yan Zhengf25784b2009-07-28 08:41:57 -0400357 }
Josef Bacik817d52f2009-07-13 21:29:25 -0400358
Yan Zheng11833d62009-09-11 16:11:19 -0400359 if (path->slots[0] < nritems) {
360 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
361 } else {
362 ret = find_next_key(path, 0, &key);
363 if (ret)
Chris Masone37c9e62007-05-09 20:13:14 -0400364 break;
Josef Bacik817d52f2009-07-13 21:29:25 -0400365
Yan Zheng11833d62009-09-11 16:11:19 -0400366 caching_ctl->progress = last;
367 btrfs_release_path(extent_root, path);
368 up_read(&fs_info->extent_commit_sem);
369 mutex_unlock(&caching_ctl->mutex);
370 if (btrfs_transaction_in_commit(fs_info))
Chris Masonf36f3042009-07-30 10:04:48 -0400371 schedule_timeout(1);
Yan Zheng11833d62009-09-11 16:11:19 -0400372 else
373 cond_resched();
374 goto again;
375 }
Josef Bacik817d52f2009-07-13 21:29:25 -0400376
Yan Zheng11833d62009-09-11 16:11:19 -0400377 if (key.objectid < block_group->key.objectid) {
378 path->slots[0]++;
Josef Bacik817d52f2009-07-13 21:29:25 -0400379 continue;
Chris Masone37c9e62007-05-09 20:13:14 -0400380 }
Josef Bacik0f9dd462008-09-23 13:14:11 -0400381
Chris Masone37c9e62007-05-09 20:13:14 -0400382 if (key.objectid >= block_group->key.objectid +
Josef Bacik0f9dd462008-09-23 13:14:11 -0400383 block_group->key.offset)
Yan7d7d6062007-09-14 16:15:28 -0400384 break;
Yan7d7d6062007-09-14 16:15:28 -0400385
Yan Zheng11833d62009-09-11 16:11:19 -0400386 if (key.type == BTRFS_EXTENT_ITEM_KEY) {
Josef Bacik817d52f2009-07-13 21:29:25 -0400387 total_found += add_new_free_space(block_group,
388 fs_info, last,
389 key.objectid);
Yan7d7d6062007-09-14 16:15:28 -0400390 last = key.objectid + key.offset;
Josef Bacik817d52f2009-07-13 21:29:25 -0400391
Yan Zheng11833d62009-09-11 16:11:19 -0400392 if (total_found > (1024 * 1024 * 2)) {
393 total_found = 0;
394 wake_up(&caching_ctl->wait);
395 }
Josef Bacik817d52f2009-07-13 21:29:25 -0400396 }
Chris Masone37c9e62007-05-09 20:13:14 -0400397 path->slots[0]++;
398 }
Josef Bacikef8bbdf2008-09-23 13:14:11 -0400399 ret = 0;
Josef Bacik817d52f2009-07-13 21:29:25 -0400400
401 total_found += add_new_free_space(block_group, fs_info, last,
402 block_group->key.objectid +
403 block_group->key.offset);
Yan Zheng11833d62009-09-11 16:11:19 -0400404 caching_ctl->progress = (u64)-1;
Josef Bacik817d52f2009-07-13 21:29:25 -0400405
406 spin_lock(&block_group->lock);
Yan Zheng11833d62009-09-11 16:11:19 -0400407 block_group->caching_ctl = NULL;
Josef Bacik817d52f2009-07-13 21:29:25 -0400408 block_group->cached = BTRFS_CACHE_FINISHED;
409 spin_unlock(&block_group->lock);
410
Chris Mason54aa1f42007-06-22 14:16:25 -0400411err:
Chris Masone37c9e62007-05-09 20:13:14 -0400412 btrfs_free_path(path);
Yan Zheng276e6802009-07-30 09:40:40 -0400413 up_read(&fs_info->extent_commit_sem);
Josef Bacik817d52f2009-07-13 21:29:25 -0400414
Yan Zheng11833d62009-09-11 16:11:19 -0400415 free_excluded_extents(extent_root, block_group);
416
417 mutex_unlock(&caching_ctl->mutex);
418 wake_up(&caching_ctl->wait);
419
420 put_caching_control(caching_ctl);
421 atomic_dec(&block_group->space_info->caching_threads);
Josef Bacik11dfe352009-11-13 20:12:59 +0000422 btrfs_put_block_group(block_group);
423
Josef Bacik817d52f2009-07-13 21:29:25 -0400424 return 0;
425}
426
427static int cache_block_group(struct btrfs_block_group_cache *cache)
428{
Yan Zheng11833d62009-09-11 16:11:19 -0400429 struct btrfs_fs_info *fs_info = cache->fs_info;
430 struct btrfs_caching_control *caching_ctl;
Josef Bacik817d52f2009-07-13 21:29:25 -0400431 struct task_struct *tsk;
432 int ret = 0;
433
Yan Zheng11833d62009-09-11 16:11:19 -0400434 smp_mb();
435 if (cache->cached != BTRFS_CACHE_NO)
436 return 0;
437
438 caching_ctl = kzalloc(sizeof(*caching_ctl), GFP_KERNEL);
439 BUG_ON(!caching_ctl);
440
441 INIT_LIST_HEAD(&caching_ctl->list);
442 mutex_init(&caching_ctl->mutex);
443 init_waitqueue_head(&caching_ctl->wait);
444 caching_ctl->block_group = cache;
445 caching_ctl->progress = cache->key.objectid;
446 /* one for caching kthread, one for caching block group list */
447 atomic_set(&caching_ctl->count, 2);
448
Josef Bacik817d52f2009-07-13 21:29:25 -0400449 spin_lock(&cache->lock);
450 if (cache->cached != BTRFS_CACHE_NO) {
451 spin_unlock(&cache->lock);
Yan Zheng11833d62009-09-11 16:11:19 -0400452 kfree(caching_ctl);
453 return 0;
Josef Bacik817d52f2009-07-13 21:29:25 -0400454 }
Yan Zheng11833d62009-09-11 16:11:19 -0400455 cache->caching_ctl = caching_ctl;
Josef Bacik817d52f2009-07-13 21:29:25 -0400456 cache->cached = BTRFS_CACHE_STARTED;
457 spin_unlock(&cache->lock);
458
Yan Zheng11833d62009-09-11 16:11:19 -0400459 down_write(&fs_info->extent_commit_sem);
460 list_add_tail(&caching_ctl->list, &fs_info->caching_block_groups);
461 up_write(&fs_info->extent_commit_sem);
462
463 atomic_inc(&cache->space_info->caching_threads);
Josef Bacik11dfe352009-11-13 20:12:59 +0000464 btrfs_get_block_group(cache);
Yan Zheng11833d62009-09-11 16:11:19 -0400465
Josef Bacik817d52f2009-07-13 21:29:25 -0400466 tsk = kthread_run(caching_kthread, cache, "btrfs-cache-%llu\n",
467 cache->key.objectid);
468 if (IS_ERR(tsk)) {
469 ret = PTR_ERR(tsk);
470 printk(KERN_ERR "error running thread %d\n", ret);
471 BUG();
472 }
473
Josef Bacikef8bbdf2008-09-23 13:14:11 -0400474 return ret;
Chris Masone37c9e62007-05-09 20:13:14 -0400475}
476
Josef Bacik0f9dd462008-09-23 13:14:11 -0400477/*
478 * return the block group that starts at or after bytenr
479 */
Chris Masond3977122009-01-05 21:25:51 -0500480static struct btrfs_block_group_cache *
481btrfs_lookup_first_block_group(struct btrfs_fs_info *info, u64 bytenr)
Chris Mason0ef3e662008-05-24 14:04:53 -0400482{
Josef Bacik0f9dd462008-09-23 13:14:11 -0400483 struct btrfs_block_group_cache *cache;
Chris Mason0ef3e662008-05-24 14:04:53 -0400484
Josef Bacik0f9dd462008-09-23 13:14:11 -0400485 cache = block_group_cache_tree_search(info, bytenr, 0);
Chris Mason0ef3e662008-05-24 14:04:53 -0400486
Josef Bacik0f9dd462008-09-23 13:14:11 -0400487 return cache;
Chris Mason0ef3e662008-05-24 14:04:53 -0400488}
489
Josef Bacik0f9dd462008-09-23 13:14:11 -0400490/*
Sankar P9f556842009-05-14 13:52:22 -0400491 * return the block group that contains the given bytenr
Josef Bacik0f9dd462008-09-23 13:14:11 -0400492 */
Chris Masond3977122009-01-05 21:25:51 -0500493struct btrfs_block_group_cache *btrfs_lookup_block_group(
494 struct btrfs_fs_info *info,
495 u64 bytenr)
Chris Masonbe744172007-05-06 10:15:01 -0400496{
Josef Bacik0f9dd462008-09-23 13:14:11 -0400497 struct btrfs_block_group_cache *cache;
Chris Masonbe744172007-05-06 10:15:01 -0400498
Josef Bacik0f9dd462008-09-23 13:14:11 -0400499 cache = block_group_cache_tree_search(info, bytenr, 1);
Chris Mason96b51792007-10-15 16:15:19 -0400500
Josef Bacik0f9dd462008-09-23 13:14:11 -0400501 return cache;
Chris Masonbe744172007-05-06 10:15:01 -0400502}
Chris Mason0b86a832008-03-24 15:01:56 -0400503
Josef Bacik0f9dd462008-09-23 13:14:11 -0400504static struct btrfs_space_info *__find_space_info(struct btrfs_fs_info *info,
505 u64 flags)
Chris Mason6324fbf2008-03-24 15:01:59 -0400506{
Josef Bacik0f9dd462008-09-23 13:14:11 -0400507 struct list_head *head = &info->space_info;
Josef Bacik0f9dd462008-09-23 13:14:11 -0400508 struct btrfs_space_info *found;
Chris Mason4184ea72009-03-10 12:39:20 -0400509
Yan, Zhengb742bb822010-05-16 10:46:24 -0400510 flags &= BTRFS_BLOCK_GROUP_DATA | BTRFS_BLOCK_GROUP_SYSTEM |
511 BTRFS_BLOCK_GROUP_METADATA;
512
Chris Mason4184ea72009-03-10 12:39:20 -0400513 rcu_read_lock();
514 list_for_each_entry_rcu(found, head, list) {
515 if (found->flags == flags) {
516 rcu_read_unlock();
Josef Bacik0f9dd462008-09-23 13:14:11 -0400517 return found;
Chris Mason4184ea72009-03-10 12:39:20 -0400518 }
Josef Bacik0f9dd462008-09-23 13:14:11 -0400519 }
Chris Mason4184ea72009-03-10 12:39:20 -0400520 rcu_read_unlock();
Josef Bacik0f9dd462008-09-23 13:14:11 -0400521 return NULL;
Chris Mason6324fbf2008-03-24 15:01:59 -0400522}
523
Chris Mason4184ea72009-03-10 12:39:20 -0400524/*
525 * after adding space to the filesystem, we need to clear the full flags
526 * on all the space infos.
527 */
528void btrfs_clear_space_info_full(struct btrfs_fs_info *info)
529{
530 struct list_head *head = &info->space_info;
531 struct btrfs_space_info *found;
532
533 rcu_read_lock();
534 list_for_each_entry_rcu(found, head, list)
535 found->full = 0;
536 rcu_read_unlock();
537}
538
Josef Bacik80eb2342008-10-29 14:49:05 -0400539static u64 div_factor(u64 num, int factor)
540{
541 if (factor == 10)
542 return num;
543 num *= factor;
544 do_div(num, 10);
545 return num;
546}
547
Yan Zhengd2fb3432008-12-11 16:30:39 -0500548u64 btrfs_find_block_group(struct btrfs_root *root,
549 u64 search_start, u64 search_hint, int owner)
Chris Masoncd1bc462007-04-27 10:08:34 -0400550{
Chris Mason96b51792007-10-15 16:15:19 -0400551 struct btrfs_block_group_cache *cache;
Chris Masoncd1bc462007-04-27 10:08:34 -0400552 u64 used;
Yan Zhengd2fb3432008-12-11 16:30:39 -0500553 u64 last = max(search_hint, search_start);
554 u64 group_start = 0;
Chris Mason31f3c992007-04-30 15:25:45 -0400555 int full_search = 0;
Yan Zhengd2fb3432008-12-11 16:30:39 -0500556 int factor = 9;
Chris Mason0ef3e662008-05-24 14:04:53 -0400557 int wrapped = 0;
Chris Mason31f3c992007-04-30 15:25:45 -0400558again:
Zheng Yane8569812008-09-26 10:05:48 -0400559 while (1) {
560 cache = btrfs_lookup_first_block_group(root->fs_info, last);
Josef Bacik0f9dd462008-09-23 13:14:11 -0400561 if (!cache)
Chris Masoncd1bc462007-04-27 10:08:34 -0400562 break;
Chris Mason96b51792007-10-15 16:15:19 -0400563
Chris Masonc286ac42008-07-22 23:06:41 -0400564 spin_lock(&cache->lock);
Chris Mason96b51792007-10-15 16:15:19 -0400565 last = cache->key.objectid + cache->key.offset;
566 used = btrfs_block_group_used(&cache->item);
567
Yan Zhengd2fb3432008-12-11 16:30:39 -0500568 if ((full_search || !cache->ro) &&
569 block_group_bits(cache, BTRFS_BLOCK_GROUP_METADATA)) {
Zheng Yane8569812008-09-26 10:05:48 -0400570 if (used + cache->pinned + cache->reserved <
Yan Zhengd2fb3432008-12-11 16:30:39 -0500571 div_factor(cache->key.offset, factor)) {
572 group_start = cache->key.objectid;
Chris Masonc286ac42008-07-22 23:06:41 -0400573 spin_unlock(&cache->lock);
Chris Masonfa9c0d792009-04-03 09:47:43 -0400574 btrfs_put_block_group(cache);
Chris Mason8790d502008-04-03 16:29:03 -0400575 goto found;
576 }
Chris Masoncd1bc462007-04-27 10:08:34 -0400577 }
Chris Masonc286ac42008-07-22 23:06:41 -0400578 spin_unlock(&cache->lock);
Chris Masonfa9c0d792009-04-03 09:47:43 -0400579 btrfs_put_block_group(cache);
Chris Masonde428b62007-05-18 13:28:27 -0400580 cond_resched();
Chris Masoncd1bc462007-04-27 10:08:34 -0400581 }
Chris Mason0ef3e662008-05-24 14:04:53 -0400582 if (!wrapped) {
583 last = search_start;
584 wrapped = 1;
585 goto again;
586 }
587 if (!full_search && factor < 10) {
Chris Masonbe744172007-05-06 10:15:01 -0400588 last = search_start;
Chris Mason31f3c992007-04-30 15:25:45 -0400589 full_search = 1;
Chris Mason0ef3e662008-05-24 14:04:53 -0400590 factor = 10;
Chris Mason31f3c992007-04-30 15:25:45 -0400591 goto again;
592 }
Chris Masonbe744172007-05-06 10:15:01 -0400593found:
Yan Zhengd2fb3432008-12-11 16:30:39 -0500594 return group_start;
Chris Mason925baed2008-06-25 16:01:30 -0400595}
Josef Bacik0f9dd462008-09-23 13:14:11 -0400596
Chris Masone02119d2008-09-05 16:13:11 -0400597/* simple helper to search for an existing extent at a given offset */
Zheng Yan31840ae2008-09-23 13:14:14 -0400598int btrfs_lookup_extent(struct btrfs_root *root, u64 start, u64 len)
Chris Masone02119d2008-09-05 16:13:11 -0400599{
600 int ret;
601 struct btrfs_key key;
Zheng Yan31840ae2008-09-23 13:14:14 -0400602 struct btrfs_path *path;
Chris Masone02119d2008-09-05 16:13:11 -0400603
Zheng Yan31840ae2008-09-23 13:14:14 -0400604 path = btrfs_alloc_path();
605 BUG_ON(!path);
Chris Masone02119d2008-09-05 16:13:11 -0400606 key.objectid = start;
607 key.offset = len;
608 btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
609 ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, path,
610 0, 0);
Zheng Yan31840ae2008-09-23 13:14:14 -0400611 btrfs_free_path(path);
Chris Mason7bb86312007-12-11 09:25:06 -0500612 return ret;
613}
614
Chris Masond8d5f3e2007-12-11 12:42:00 -0500615/*
616 * Back reference rules. Back refs have three main goals:
617 *
618 * 1) differentiate between all holders of references to an extent so that
619 * when a reference is dropped we can make sure it was a valid reference
620 * before freeing the extent.
621 *
622 * 2) Provide enough information to quickly find the holders of an extent
623 * if we notice a given block is corrupted or bad.
624 *
625 * 3) Make it easy to migrate blocks for FS shrinking or storage pool
626 * maintenance. This is actually the same as #2, but with a slightly
627 * different use case.
628 *
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400629 * There are two kinds of back refs. The implicit back refs is optimized
630 * for pointers in non-shared tree blocks. For a given pointer in a block,
631 * back refs of this kind provide information about the block's owner tree
632 * and the pointer's key. These information allow us to find the block by
633 * b-tree searching. The full back refs is for pointers in tree blocks not
634 * referenced by their owner trees. The location of tree block is recorded
635 * in the back refs. Actually the full back refs is generic, and can be
636 * used in all cases the implicit back refs is used. The major shortcoming
637 * of the full back refs is its overhead. Every time a tree block gets
638 * COWed, we have to update back refs entry for all pointers in it.
639 *
640 * For a newly allocated tree block, we use implicit back refs for
641 * pointers in it. This means most tree related operations only involve
642 * implicit back refs. For a tree block created in old transaction, the
643 * only way to drop a reference to it is COW it. So we can detect the
644 * event that tree block loses its owner tree's reference and do the
645 * back refs conversion.
646 *
647 * When a tree block is COW'd through a tree, there are four cases:
648 *
649 * The reference count of the block is one and the tree is the block's
650 * owner tree. Nothing to do in this case.
651 *
652 * The reference count of the block is one and the tree is not the
653 * block's owner tree. In this case, full back refs is used for pointers
654 * in the block. Remove these full back refs, add implicit back refs for
655 * every pointers in the new block.
656 *
657 * The reference count of the block is greater than one and the tree is
658 * the block's owner tree. In this case, implicit back refs is used for
659 * pointers in the block. Add full back refs for every pointers in the
660 * block, increase lower level extents' reference counts. The original
661 * implicit back refs are entailed to the new block.
662 *
663 * The reference count of the block is greater than one and the tree is
664 * not the block's owner tree. Add implicit back refs for every pointer in
665 * the new block, increase lower level extents' reference count.
666 *
667 * Back Reference Key composing:
668 *
669 * The key objectid corresponds to the first byte in the extent,
670 * The key type is used to differentiate between types of back refs.
671 * There are different meanings of the key offset for different types
672 * of back refs.
673 *
Chris Masond8d5f3e2007-12-11 12:42:00 -0500674 * File extents can be referenced by:
675 *
676 * - multiple snapshots, subvolumes, or different generations in one subvol
Zheng Yan31840ae2008-09-23 13:14:14 -0400677 * - different files inside a single subvolume
Chris Masond8d5f3e2007-12-11 12:42:00 -0500678 * - different offsets inside a file (bookend extents in file.c)
679 *
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400680 * The extent ref structure for the implicit back refs has fields for:
Chris Masond8d5f3e2007-12-11 12:42:00 -0500681 *
682 * - Objectid of the subvolume root
Chris Masond8d5f3e2007-12-11 12:42:00 -0500683 * - objectid of the file holding the reference
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400684 * - original offset in the file
685 * - how many bookend extents
Zheng Yan31840ae2008-09-23 13:14:14 -0400686 *
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400687 * The key offset for the implicit back refs is hash of the first
688 * three fields.
Chris Masond8d5f3e2007-12-11 12:42:00 -0500689 *
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400690 * The extent ref structure for the full back refs has field for:
Chris Masond8d5f3e2007-12-11 12:42:00 -0500691 *
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400692 * - number of pointers in the tree leaf
Chris Masond8d5f3e2007-12-11 12:42:00 -0500693 *
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400694 * The key offset for the implicit back refs is the first byte of
695 * the tree leaf
Chris Masond8d5f3e2007-12-11 12:42:00 -0500696 *
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400697 * When a file extent is allocated, The implicit back refs is used.
698 * the fields are filled in:
Chris Masond8d5f3e2007-12-11 12:42:00 -0500699 *
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400700 * (root_key.objectid, inode objectid, offset in file, 1)
701 *
702 * When a file extent is removed file truncation, we find the
703 * corresponding implicit back refs and check the following fields:
704 *
705 * (btrfs_header_owner(leaf), inode objectid, offset in file)
Chris Masond8d5f3e2007-12-11 12:42:00 -0500706 *
707 * Btree extents can be referenced by:
708 *
709 * - Different subvolumes
Chris Masond8d5f3e2007-12-11 12:42:00 -0500710 *
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400711 * Both the implicit back refs and the full back refs for tree blocks
712 * only consist of key. The key offset for the implicit back refs is
713 * objectid of block's owner tree. The key offset for the full back refs
714 * is the first byte of parent block.
Chris Masond8d5f3e2007-12-11 12:42:00 -0500715 *
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400716 * When implicit back refs is used, information about the lowest key and
717 * level of the tree block are required. These information are stored in
718 * tree block info structure.
Chris Masond8d5f3e2007-12-11 12:42:00 -0500719 */
Zheng Yan31840ae2008-09-23 13:14:14 -0400720
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400721#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
722static int convert_extent_item_v0(struct btrfs_trans_handle *trans,
723 struct btrfs_root *root,
724 struct btrfs_path *path,
725 u64 owner, u32 extra_size)
Chris Mason74493f72007-12-11 09:25:06 -0500726{
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400727 struct btrfs_extent_item *item;
728 struct btrfs_extent_item_v0 *ei0;
729 struct btrfs_extent_ref_v0 *ref0;
730 struct btrfs_tree_block_info *bi;
Zheng Yan31840ae2008-09-23 13:14:14 -0400731 struct extent_buffer *leaf;
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400732 struct btrfs_key key;
733 struct btrfs_key found_key;
734 u32 new_size = sizeof(*item);
735 u64 refs;
Chris Mason74493f72007-12-11 09:25:06 -0500736 int ret;
737
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400738 leaf = path->nodes[0];
739 BUG_ON(btrfs_item_size_nr(leaf, path->slots[0]) != sizeof(*ei0));
Chris Mason74493f72007-12-11 09:25:06 -0500740
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400741 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
742 ei0 = btrfs_item_ptr(leaf, path->slots[0],
743 struct btrfs_extent_item_v0);
744 refs = btrfs_extent_refs_v0(leaf, ei0);
745
746 if (owner == (u64)-1) {
747 while (1) {
748 if (path->slots[0] >= btrfs_header_nritems(leaf)) {
749 ret = btrfs_next_leaf(root, path);
750 if (ret < 0)
751 return ret;
752 BUG_ON(ret > 0);
753 leaf = path->nodes[0];
754 }
755 btrfs_item_key_to_cpu(leaf, &found_key,
756 path->slots[0]);
757 BUG_ON(key.objectid != found_key.objectid);
758 if (found_key.type != BTRFS_EXTENT_REF_V0_KEY) {
759 path->slots[0]++;
760 continue;
761 }
762 ref0 = btrfs_item_ptr(leaf, path->slots[0],
763 struct btrfs_extent_ref_v0);
764 owner = btrfs_ref_objectid_v0(leaf, ref0);
765 break;
766 }
767 }
768 btrfs_release_path(root, path);
769
770 if (owner < BTRFS_FIRST_FREE_OBJECTID)
771 new_size += sizeof(*bi);
772
773 new_size -= sizeof(*ei0);
774 ret = btrfs_search_slot(trans, root, &key, path,
775 new_size + extra_size, 1);
Zheng Yan31840ae2008-09-23 13:14:14 -0400776 if (ret < 0)
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400777 return ret;
778 BUG_ON(ret);
779
780 ret = btrfs_extend_item(trans, root, path, new_size);
781 BUG_ON(ret);
782
783 leaf = path->nodes[0];
784 item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
785 btrfs_set_extent_refs(leaf, item, refs);
786 /* FIXME: get real generation */
787 btrfs_set_extent_generation(leaf, item, 0);
788 if (owner < BTRFS_FIRST_FREE_OBJECTID) {
789 btrfs_set_extent_flags(leaf, item,
790 BTRFS_EXTENT_FLAG_TREE_BLOCK |
791 BTRFS_BLOCK_FLAG_FULL_BACKREF);
792 bi = (struct btrfs_tree_block_info *)(item + 1);
793 /* FIXME: get first key of the block */
794 memset_extent_buffer(leaf, 0, (unsigned long)bi, sizeof(*bi));
795 btrfs_set_tree_block_level(leaf, bi, (int)owner);
796 } else {
797 btrfs_set_extent_flags(leaf, item, BTRFS_EXTENT_FLAG_DATA);
798 }
799 btrfs_mark_buffer_dirty(leaf);
800 return 0;
801}
802#endif
803
804static u64 hash_extent_data_ref(u64 root_objectid, u64 owner, u64 offset)
805{
806 u32 high_crc = ~(u32)0;
807 u32 low_crc = ~(u32)0;
808 __le64 lenum;
809
810 lenum = cpu_to_le64(root_objectid);
David Woodhouse163e7832009-04-19 13:02:41 +0100811 high_crc = crc32c(high_crc, &lenum, sizeof(lenum));
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400812 lenum = cpu_to_le64(owner);
David Woodhouse163e7832009-04-19 13:02:41 +0100813 low_crc = crc32c(low_crc, &lenum, sizeof(lenum));
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400814 lenum = cpu_to_le64(offset);
David Woodhouse163e7832009-04-19 13:02:41 +0100815 low_crc = crc32c(low_crc, &lenum, sizeof(lenum));
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400816
817 return ((u64)high_crc << 31) ^ (u64)low_crc;
818}
819
820static u64 hash_extent_data_ref_item(struct extent_buffer *leaf,
821 struct btrfs_extent_data_ref *ref)
822{
823 return hash_extent_data_ref(btrfs_extent_data_ref_root(leaf, ref),
824 btrfs_extent_data_ref_objectid(leaf, ref),
825 btrfs_extent_data_ref_offset(leaf, ref));
826}
827
828static int match_extent_data_ref(struct extent_buffer *leaf,
829 struct btrfs_extent_data_ref *ref,
830 u64 root_objectid, u64 owner, u64 offset)
831{
832 if (btrfs_extent_data_ref_root(leaf, ref) != root_objectid ||
833 btrfs_extent_data_ref_objectid(leaf, ref) != owner ||
834 btrfs_extent_data_ref_offset(leaf, ref) != offset)
835 return 0;
836 return 1;
837}
838
839static noinline int lookup_extent_data_ref(struct btrfs_trans_handle *trans,
840 struct btrfs_root *root,
841 struct btrfs_path *path,
842 u64 bytenr, u64 parent,
843 u64 root_objectid,
844 u64 owner, u64 offset)
845{
846 struct btrfs_key key;
847 struct btrfs_extent_data_ref *ref;
848 struct extent_buffer *leaf;
849 u32 nritems;
850 int ret;
851 int recow;
852 int err = -ENOENT;
853
854 key.objectid = bytenr;
855 if (parent) {
856 key.type = BTRFS_SHARED_DATA_REF_KEY;
857 key.offset = parent;
858 } else {
859 key.type = BTRFS_EXTENT_DATA_REF_KEY;
860 key.offset = hash_extent_data_ref(root_objectid,
861 owner, offset);
862 }
863again:
864 recow = 0;
865 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
866 if (ret < 0) {
867 err = ret;
868 goto fail;
869 }
870
871 if (parent) {
872 if (!ret)
873 return 0;
874#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
875 key.type = BTRFS_EXTENT_REF_V0_KEY;
876 btrfs_release_path(root, path);
877 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
878 if (ret < 0) {
879 err = ret;
880 goto fail;
881 }
882 if (!ret)
883 return 0;
884#endif
885 goto fail;
Zheng Yan31840ae2008-09-23 13:14:14 -0400886 }
887
888 leaf = path->nodes[0];
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400889 nritems = btrfs_header_nritems(leaf);
890 while (1) {
891 if (path->slots[0] >= nritems) {
892 ret = btrfs_next_leaf(root, path);
893 if (ret < 0)
894 err = ret;
895 if (ret)
896 goto fail;
897
898 leaf = path->nodes[0];
899 nritems = btrfs_header_nritems(leaf);
900 recow = 1;
901 }
902
903 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
904 if (key.objectid != bytenr ||
905 key.type != BTRFS_EXTENT_DATA_REF_KEY)
906 goto fail;
907
908 ref = btrfs_item_ptr(leaf, path->slots[0],
909 struct btrfs_extent_data_ref);
910
911 if (match_extent_data_ref(leaf, ref, root_objectid,
912 owner, offset)) {
913 if (recow) {
914 btrfs_release_path(root, path);
915 goto again;
916 }
917 err = 0;
918 break;
919 }
920 path->slots[0]++;
Zheng Yan31840ae2008-09-23 13:14:14 -0400921 }
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400922fail:
923 return err;
Zheng Yan31840ae2008-09-23 13:14:14 -0400924}
925
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400926static noinline int insert_extent_data_ref(struct btrfs_trans_handle *trans,
927 struct btrfs_root *root,
928 struct btrfs_path *path,
929 u64 bytenr, u64 parent,
930 u64 root_objectid, u64 owner,
931 u64 offset, int refs_to_add)
Zheng Yan31840ae2008-09-23 13:14:14 -0400932{
933 struct btrfs_key key;
934 struct extent_buffer *leaf;
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400935 u32 size;
Zheng Yan31840ae2008-09-23 13:14:14 -0400936 u32 num_refs;
937 int ret;
938
939 key.objectid = bytenr;
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400940 if (parent) {
941 key.type = BTRFS_SHARED_DATA_REF_KEY;
942 key.offset = parent;
943 size = sizeof(struct btrfs_shared_data_ref);
Zheng Yan31840ae2008-09-23 13:14:14 -0400944 } else {
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400945 key.type = BTRFS_EXTENT_DATA_REF_KEY;
946 key.offset = hash_extent_data_ref(root_objectid,
947 owner, offset);
948 size = sizeof(struct btrfs_extent_data_ref);
Zheng Yan31840ae2008-09-23 13:14:14 -0400949 }
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400950
951 ret = btrfs_insert_empty_item(trans, root, path, &key, size);
952 if (ret && ret != -EEXIST)
953 goto fail;
954
955 leaf = path->nodes[0];
956 if (parent) {
957 struct btrfs_shared_data_ref *ref;
958 ref = btrfs_item_ptr(leaf, path->slots[0],
959 struct btrfs_shared_data_ref);
960 if (ret == 0) {
961 btrfs_set_shared_data_ref_count(leaf, ref, refs_to_add);
962 } else {
963 num_refs = btrfs_shared_data_ref_count(leaf, ref);
964 num_refs += refs_to_add;
965 btrfs_set_shared_data_ref_count(leaf, ref, num_refs);
966 }
967 } else {
968 struct btrfs_extent_data_ref *ref;
969 while (ret == -EEXIST) {
970 ref = btrfs_item_ptr(leaf, path->slots[0],
971 struct btrfs_extent_data_ref);
972 if (match_extent_data_ref(leaf, ref, root_objectid,
973 owner, offset))
974 break;
975 btrfs_release_path(root, path);
976 key.offset++;
977 ret = btrfs_insert_empty_item(trans, root, path, &key,
978 size);
979 if (ret && ret != -EEXIST)
980 goto fail;
981
982 leaf = path->nodes[0];
983 }
984 ref = btrfs_item_ptr(leaf, path->slots[0],
985 struct btrfs_extent_data_ref);
986 if (ret == 0) {
987 btrfs_set_extent_data_ref_root(leaf, ref,
988 root_objectid);
989 btrfs_set_extent_data_ref_objectid(leaf, ref, owner);
990 btrfs_set_extent_data_ref_offset(leaf, ref, offset);
991 btrfs_set_extent_data_ref_count(leaf, ref, refs_to_add);
992 } else {
993 num_refs = btrfs_extent_data_ref_count(leaf, ref);
994 num_refs += refs_to_add;
995 btrfs_set_extent_data_ref_count(leaf, ref, num_refs);
996 }
997 }
998 btrfs_mark_buffer_dirty(leaf);
999 ret = 0;
1000fail:
Chris Mason7bb86312007-12-11 09:25:06 -05001001 btrfs_release_path(root, path);
1002 return ret;
Chris Mason74493f72007-12-11 09:25:06 -05001003}
1004
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001005static noinline int remove_extent_data_ref(struct btrfs_trans_handle *trans,
1006 struct btrfs_root *root,
1007 struct btrfs_path *path,
1008 int refs_to_drop)
Zheng Yan31840ae2008-09-23 13:14:14 -04001009{
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001010 struct btrfs_key key;
1011 struct btrfs_extent_data_ref *ref1 = NULL;
1012 struct btrfs_shared_data_ref *ref2 = NULL;
Zheng Yan31840ae2008-09-23 13:14:14 -04001013 struct extent_buffer *leaf;
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001014 u32 num_refs = 0;
Zheng Yan31840ae2008-09-23 13:14:14 -04001015 int ret = 0;
1016
1017 leaf = path->nodes[0];
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001018 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
1019
1020 if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
1021 ref1 = btrfs_item_ptr(leaf, path->slots[0],
1022 struct btrfs_extent_data_ref);
1023 num_refs = btrfs_extent_data_ref_count(leaf, ref1);
1024 } else if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
1025 ref2 = btrfs_item_ptr(leaf, path->slots[0],
1026 struct btrfs_shared_data_ref);
1027 num_refs = btrfs_shared_data_ref_count(leaf, ref2);
1028#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
1029 } else if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
1030 struct btrfs_extent_ref_v0 *ref0;
1031 ref0 = btrfs_item_ptr(leaf, path->slots[0],
1032 struct btrfs_extent_ref_v0);
1033 num_refs = btrfs_ref_count_v0(leaf, ref0);
1034#endif
1035 } else {
1036 BUG();
1037 }
1038
Chris Mason56bec292009-03-13 10:10:06 -04001039 BUG_ON(num_refs < refs_to_drop);
1040 num_refs -= refs_to_drop;
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001041
Zheng Yan31840ae2008-09-23 13:14:14 -04001042 if (num_refs == 0) {
1043 ret = btrfs_del_item(trans, root, path);
1044 } else {
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001045 if (key.type == BTRFS_EXTENT_DATA_REF_KEY)
1046 btrfs_set_extent_data_ref_count(leaf, ref1, num_refs);
1047 else if (key.type == BTRFS_SHARED_DATA_REF_KEY)
1048 btrfs_set_shared_data_ref_count(leaf, ref2, num_refs);
1049#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
1050 else {
1051 struct btrfs_extent_ref_v0 *ref0;
1052 ref0 = btrfs_item_ptr(leaf, path->slots[0],
1053 struct btrfs_extent_ref_v0);
1054 btrfs_set_ref_count_v0(leaf, ref0, num_refs);
1055 }
1056#endif
Zheng Yan31840ae2008-09-23 13:14:14 -04001057 btrfs_mark_buffer_dirty(leaf);
1058 }
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001059 return ret;
1060}
1061
1062static noinline u32 extent_data_ref_count(struct btrfs_root *root,
1063 struct btrfs_path *path,
1064 struct btrfs_extent_inline_ref *iref)
1065{
1066 struct btrfs_key key;
1067 struct extent_buffer *leaf;
1068 struct btrfs_extent_data_ref *ref1;
1069 struct btrfs_shared_data_ref *ref2;
1070 u32 num_refs = 0;
1071
1072 leaf = path->nodes[0];
1073 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
1074 if (iref) {
1075 if (btrfs_extent_inline_ref_type(leaf, iref) ==
1076 BTRFS_EXTENT_DATA_REF_KEY) {
1077 ref1 = (struct btrfs_extent_data_ref *)(&iref->offset);
1078 num_refs = btrfs_extent_data_ref_count(leaf, ref1);
1079 } else {
1080 ref2 = (struct btrfs_shared_data_ref *)(iref + 1);
1081 num_refs = btrfs_shared_data_ref_count(leaf, ref2);
1082 }
1083 } else if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
1084 ref1 = btrfs_item_ptr(leaf, path->slots[0],
1085 struct btrfs_extent_data_ref);
1086 num_refs = btrfs_extent_data_ref_count(leaf, ref1);
1087 } else if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
1088 ref2 = btrfs_item_ptr(leaf, path->slots[0],
1089 struct btrfs_shared_data_ref);
1090 num_refs = btrfs_shared_data_ref_count(leaf, ref2);
1091#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
1092 } else if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
1093 struct btrfs_extent_ref_v0 *ref0;
1094 ref0 = btrfs_item_ptr(leaf, path->slots[0],
1095 struct btrfs_extent_ref_v0);
1096 num_refs = btrfs_ref_count_v0(leaf, ref0);
1097#endif
1098 } else {
1099 WARN_ON(1);
1100 }
1101 return num_refs;
1102}
1103
1104static noinline int lookup_tree_block_ref(struct btrfs_trans_handle *trans,
1105 struct btrfs_root *root,
1106 struct btrfs_path *path,
1107 u64 bytenr, u64 parent,
1108 u64 root_objectid)
1109{
1110 struct btrfs_key key;
1111 int ret;
1112
1113 key.objectid = bytenr;
1114 if (parent) {
1115 key.type = BTRFS_SHARED_BLOCK_REF_KEY;
1116 key.offset = parent;
1117 } else {
1118 key.type = BTRFS_TREE_BLOCK_REF_KEY;
1119 key.offset = root_objectid;
1120 }
1121
1122 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
1123 if (ret > 0)
1124 ret = -ENOENT;
1125#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
1126 if (ret == -ENOENT && parent) {
1127 btrfs_release_path(root, path);
1128 key.type = BTRFS_EXTENT_REF_V0_KEY;
1129 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
1130 if (ret > 0)
1131 ret = -ENOENT;
1132 }
1133#endif
1134 return ret;
1135}
1136
1137static noinline int insert_tree_block_ref(struct btrfs_trans_handle *trans,
1138 struct btrfs_root *root,
1139 struct btrfs_path *path,
1140 u64 bytenr, u64 parent,
1141 u64 root_objectid)
1142{
1143 struct btrfs_key key;
1144 int ret;
1145
1146 key.objectid = bytenr;
1147 if (parent) {
1148 key.type = BTRFS_SHARED_BLOCK_REF_KEY;
1149 key.offset = parent;
1150 } else {
1151 key.type = BTRFS_TREE_BLOCK_REF_KEY;
1152 key.offset = root_objectid;
1153 }
1154
1155 ret = btrfs_insert_empty_item(trans, root, path, &key, 0);
Zheng Yan31840ae2008-09-23 13:14:14 -04001156 btrfs_release_path(root, path);
1157 return ret;
1158}
1159
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001160static inline int extent_ref_type(u64 parent, u64 owner)
1161{
1162 int type;
1163 if (owner < BTRFS_FIRST_FREE_OBJECTID) {
1164 if (parent > 0)
1165 type = BTRFS_SHARED_BLOCK_REF_KEY;
1166 else
1167 type = BTRFS_TREE_BLOCK_REF_KEY;
1168 } else {
1169 if (parent > 0)
1170 type = BTRFS_SHARED_DATA_REF_KEY;
1171 else
1172 type = BTRFS_EXTENT_DATA_REF_KEY;
1173 }
1174 return type;
1175}
1176
Yan Zheng2c47e6052009-06-27 21:07:35 -04001177static int find_next_key(struct btrfs_path *path, int level,
1178 struct btrfs_key *key)
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001179
1180{
Yan Zheng2c47e6052009-06-27 21:07:35 -04001181 for (; level < BTRFS_MAX_LEVEL; level++) {
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001182 if (!path->nodes[level])
1183 break;
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001184 if (path->slots[level] + 1 >=
1185 btrfs_header_nritems(path->nodes[level]))
1186 continue;
1187 if (level == 0)
1188 btrfs_item_key_to_cpu(path->nodes[level], key,
1189 path->slots[level] + 1);
1190 else
1191 btrfs_node_key_to_cpu(path->nodes[level], key,
1192 path->slots[level] + 1);
1193 return 0;
1194 }
1195 return 1;
1196}
1197
1198/*
1199 * look for inline back ref. if back ref is found, *ref_ret is set
1200 * to the address of inline back ref, and 0 is returned.
1201 *
1202 * if back ref isn't found, *ref_ret is set to the address where it
1203 * should be inserted, and -ENOENT is returned.
1204 *
1205 * if insert is true and there are too many inline back refs, the path
1206 * points to the extent item, and -EAGAIN is returned.
1207 *
1208 * NOTE: inline back refs are ordered in the same way that back ref
1209 * items in the tree are ordered.
1210 */
1211static noinline_for_stack
1212int lookup_inline_extent_backref(struct btrfs_trans_handle *trans,
1213 struct btrfs_root *root,
1214 struct btrfs_path *path,
1215 struct btrfs_extent_inline_ref **ref_ret,
1216 u64 bytenr, u64 num_bytes,
1217 u64 parent, u64 root_objectid,
1218 u64 owner, u64 offset, int insert)
1219{
1220 struct btrfs_key key;
1221 struct extent_buffer *leaf;
1222 struct btrfs_extent_item *ei;
1223 struct btrfs_extent_inline_ref *iref;
1224 u64 flags;
1225 u64 item_size;
1226 unsigned long ptr;
1227 unsigned long end;
1228 int extra_size;
1229 int type;
1230 int want;
1231 int ret;
1232 int err = 0;
1233
1234 key.objectid = bytenr;
1235 key.type = BTRFS_EXTENT_ITEM_KEY;
1236 key.offset = num_bytes;
1237
1238 want = extent_ref_type(parent, owner);
1239 if (insert) {
1240 extra_size = btrfs_extent_inline_ref_size(want);
Yan Zheng85d41982009-06-11 08:51:10 -04001241 path->keep_locks = 1;
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001242 } else
1243 extra_size = -1;
1244 ret = btrfs_search_slot(trans, root, &key, path, extra_size, 1);
1245 if (ret < 0) {
1246 err = ret;
1247 goto out;
1248 }
1249 BUG_ON(ret);
1250
1251 leaf = path->nodes[0];
1252 item_size = btrfs_item_size_nr(leaf, path->slots[0]);
1253#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
1254 if (item_size < sizeof(*ei)) {
1255 if (!insert) {
1256 err = -ENOENT;
1257 goto out;
1258 }
1259 ret = convert_extent_item_v0(trans, root, path, owner,
1260 extra_size);
1261 if (ret < 0) {
1262 err = ret;
1263 goto out;
1264 }
1265 leaf = path->nodes[0];
1266 item_size = btrfs_item_size_nr(leaf, path->slots[0]);
1267 }
1268#endif
1269 BUG_ON(item_size < sizeof(*ei));
1270
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001271 ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
1272 flags = btrfs_extent_flags(leaf, ei);
1273
1274 ptr = (unsigned long)(ei + 1);
1275 end = (unsigned long)ei + item_size;
1276
1277 if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
1278 ptr += sizeof(struct btrfs_tree_block_info);
1279 BUG_ON(ptr > end);
1280 } else {
1281 BUG_ON(!(flags & BTRFS_EXTENT_FLAG_DATA));
1282 }
1283
1284 err = -ENOENT;
1285 while (1) {
1286 if (ptr >= end) {
1287 WARN_ON(ptr > end);
1288 break;
1289 }
1290 iref = (struct btrfs_extent_inline_ref *)ptr;
1291 type = btrfs_extent_inline_ref_type(leaf, iref);
1292 if (want < type)
1293 break;
1294 if (want > type) {
1295 ptr += btrfs_extent_inline_ref_size(type);
1296 continue;
1297 }
1298
1299 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
1300 struct btrfs_extent_data_ref *dref;
1301 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
1302 if (match_extent_data_ref(leaf, dref, root_objectid,
1303 owner, offset)) {
1304 err = 0;
1305 break;
1306 }
1307 if (hash_extent_data_ref_item(leaf, dref) <
1308 hash_extent_data_ref(root_objectid, owner, offset))
1309 break;
1310 } else {
1311 u64 ref_offset;
1312 ref_offset = btrfs_extent_inline_ref_offset(leaf, iref);
1313 if (parent > 0) {
1314 if (parent == ref_offset) {
1315 err = 0;
1316 break;
1317 }
1318 if (ref_offset < parent)
1319 break;
1320 } else {
1321 if (root_objectid == ref_offset) {
1322 err = 0;
1323 break;
1324 }
1325 if (ref_offset < root_objectid)
1326 break;
1327 }
1328 }
1329 ptr += btrfs_extent_inline_ref_size(type);
1330 }
1331 if (err == -ENOENT && insert) {
1332 if (item_size + extra_size >=
1333 BTRFS_MAX_EXTENT_ITEM_SIZE(root)) {
1334 err = -EAGAIN;
1335 goto out;
1336 }
1337 /*
1338 * To add new inline back ref, we have to make sure
1339 * there is no corresponding back ref item.
1340 * For simplicity, we just do not add new inline back
1341 * ref if there is any kind of item for this block
1342 */
Yan Zheng2c47e6052009-06-27 21:07:35 -04001343 if (find_next_key(path, 0, &key) == 0 &&
1344 key.objectid == bytenr &&
Yan Zheng85d41982009-06-11 08:51:10 -04001345 key.type < BTRFS_BLOCK_GROUP_ITEM_KEY) {
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001346 err = -EAGAIN;
1347 goto out;
1348 }
1349 }
1350 *ref_ret = (struct btrfs_extent_inline_ref *)ptr;
1351out:
Yan Zheng85d41982009-06-11 08:51:10 -04001352 if (insert) {
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001353 path->keep_locks = 0;
1354 btrfs_unlock_up_safe(path, 1);
1355 }
1356 return err;
1357}
1358
1359/*
1360 * helper to add new inline back ref
1361 */
1362static noinline_for_stack
1363int setup_inline_extent_backref(struct btrfs_trans_handle *trans,
1364 struct btrfs_root *root,
1365 struct btrfs_path *path,
1366 struct btrfs_extent_inline_ref *iref,
1367 u64 parent, u64 root_objectid,
1368 u64 owner, u64 offset, int refs_to_add,
1369 struct btrfs_delayed_extent_op *extent_op)
1370{
1371 struct extent_buffer *leaf;
1372 struct btrfs_extent_item *ei;
1373 unsigned long ptr;
1374 unsigned long end;
1375 unsigned long item_offset;
1376 u64 refs;
1377 int size;
1378 int type;
1379 int ret;
1380
1381 leaf = path->nodes[0];
1382 ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
1383 item_offset = (unsigned long)iref - (unsigned long)ei;
1384
1385 type = extent_ref_type(parent, owner);
1386 size = btrfs_extent_inline_ref_size(type);
1387
1388 ret = btrfs_extend_item(trans, root, path, size);
1389 BUG_ON(ret);
1390
1391 ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
1392 refs = btrfs_extent_refs(leaf, ei);
1393 refs += refs_to_add;
1394 btrfs_set_extent_refs(leaf, ei, refs);
1395 if (extent_op)
1396 __run_delayed_extent_op(extent_op, leaf, ei);
1397
1398 ptr = (unsigned long)ei + item_offset;
1399 end = (unsigned long)ei + btrfs_item_size_nr(leaf, path->slots[0]);
1400 if (ptr < end - size)
1401 memmove_extent_buffer(leaf, ptr + size, ptr,
1402 end - size - ptr);
1403
1404 iref = (struct btrfs_extent_inline_ref *)ptr;
1405 btrfs_set_extent_inline_ref_type(leaf, iref, type);
1406 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
1407 struct btrfs_extent_data_ref *dref;
1408 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
1409 btrfs_set_extent_data_ref_root(leaf, dref, root_objectid);
1410 btrfs_set_extent_data_ref_objectid(leaf, dref, owner);
1411 btrfs_set_extent_data_ref_offset(leaf, dref, offset);
1412 btrfs_set_extent_data_ref_count(leaf, dref, refs_to_add);
1413 } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
1414 struct btrfs_shared_data_ref *sref;
1415 sref = (struct btrfs_shared_data_ref *)(iref + 1);
1416 btrfs_set_shared_data_ref_count(leaf, sref, refs_to_add);
1417 btrfs_set_extent_inline_ref_offset(leaf, iref, parent);
1418 } else if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
1419 btrfs_set_extent_inline_ref_offset(leaf, iref, parent);
1420 } else {
1421 btrfs_set_extent_inline_ref_offset(leaf, iref, root_objectid);
1422 }
1423 btrfs_mark_buffer_dirty(leaf);
1424 return 0;
1425}
1426
1427static int lookup_extent_backref(struct btrfs_trans_handle *trans,
1428 struct btrfs_root *root,
1429 struct btrfs_path *path,
1430 struct btrfs_extent_inline_ref **ref_ret,
1431 u64 bytenr, u64 num_bytes, u64 parent,
1432 u64 root_objectid, u64 owner, u64 offset)
1433{
1434 int ret;
1435
1436 ret = lookup_inline_extent_backref(trans, root, path, ref_ret,
1437 bytenr, num_bytes, parent,
1438 root_objectid, owner, offset, 0);
1439 if (ret != -ENOENT)
1440 return ret;
1441
1442 btrfs_release_path(root, path);
1443 *ref_ret = NULL;
1444
1445 if (owner < BTRFS_FIRST_FREE_OBJECTID) {
1446 ret = lookup_tree_block_ref(trans, root, path, bytenr, parent,
1447 root_objectid);
1448 } else {
1449 ret = lookup_extent_data_ref(trans, root, path, bytenr, parent,
1450 root_objectid, owner, offset);
1451 }
1452 return ret;
1453}
1454
1455/*
1456 * helper to update/remove inline back ref
1457 */
1458static noinline_for_stack
1459int update_inline_extent_backref(struct btrfs_trans_handle *trans,
1460 struct btrfs_root *root,
1461 struct btrfs_path *path,
1462 struct btrfs_extent_inline_ref *iref,
1463 int refs_to_mod,
1464 struct btrfs_delayed_extent_op *extent_op)
1465{
1466 struct extent_buffer *leaf;
1467 struct btrfs_extent_item *ei;
1468 struct btrfs_extent_data_ref *dref = NULL;
1469 struct btrfs_shared_data_ref *sref = NULL;
1470 unsigned long ptr;
1471 unsigned long end;
1472 u32 item_size;
1473 int size;
1474 int type;
1475 int ret;
1476 u64 refs;
1477
1478 leaf = path->nodes[0];
1479 ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
1480 refs = btrfs_extent_refs(leaf, ei);
1481 WARN_ON(refs_to_mod < 0 && refs + refs_to_mod <= 0);
1482 refs += refs_to_mod;
1483 btrfs_set_extent_refs(leaf, ei, refs);
1484 if (extent_op)
1485 __run_delayed_extent_op(extent_op, leaf, ei);
1486
1487 type = btrfs_extent_inline_ref_type(leaf, iref);
1488
1489 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
1490 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
1491 refs = btrfs_extent_data_ref_count(leaf, dref);
1492 } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
1493 sref = (struct btrfs_shared_data_ref *)(iref + 1);
1494 refs = btrfs_shared_data_ref_count(leaf, sref);
1495 } else {
1496 refs = 1;
1497 BUG_ON(refs_to_mod != -1);
1498 }
1499
1500 BUG_ON(refs_to_mod < 0 && refs < -refs_to_mod);
1501 refs += refs_to_mod;
1502
1503 if (refs > 0) {
1504 if (type == BTRFS_EXTENT_DATA_REF_KEY)
1505 btrfs_set_extent_data_ref_count(leaf, dref, refs);
1506 else
1507 btrfs_set_shared_data_ref_count(leaf, sref, refs);
1508 } else {
1509 size = btrfs_extent_inline_ref_size(type);
1510 item_size = btrfs_item_size_nr(leaf, path->slots[0]);
1511 ptr = (unsigned long)iref;
1512 end = (unsigned long)ei + item_size;
1513 if (ptr + size < end)
1514 memmove_extent_buffer(leaf, ptr, ptr + size,
1515 end - ptr - size);
1516 item_size -= size;
1517 ret = btrfs_truncate_item(trans, root, path, item_size, 1);
1518 BUG_ON(ret);
1519 }
1520 btrfs_mark_buffer_dirty(leaf);
1521 return 0;
1522}
1523
1524static noinline_for_stack
1525int insert_inline_extent_backref(struct btrfs_trans_handle *trans,
1526 struct btrfs_root *root,
1527 struct btrfs_path *path,
1528 u64 bytenr, u64 num_bytes, u64 parent,
1529 u64 root_objectid, u64 owner,
1530 u64 offset, int refs_to_add,
1531 struct btrfs_delayed_extent_op *extent_op)
1532{
1533 struct btrfs_extent_inline_ref *iref;
1534 int ret;
1535
1536 ret = lookup_inline_extent_backref(trans, root, path, &iref,
1537 bytenr, num_bytes, parent,
1538 root_objectid, owner, offset, 1);
1539 if (ret == 0) {
1540 BUG_ON(owner < BTRFS_FIRST_FREE_OBJECTID);
1541 ret = update_inline_extent_backref(trans, root, path, iref,
1542 refs_to_add, extent_op);
1543 } else if (ret == -ENOENT) {
1544 ret = setup_inline_extent_backref(trans, root, path, iref,
1545 parent, root_objectid,
1546 owner, offset, refs_to_add,
1547 extent_op);
1548 }
1549 return ret;
1550}
1551
1552static int insert_extent_backref(struct btrfs_trans_handle *trans,
1553 struct btrfs_root *root,
1554 struct btrfs_path *path,
1555 u64 bytenr, u64 parent, u64 root_objectid,
1556 u64 owner, u64 offset, int refs_to_add)
1557{
1558 int ret;
1559 if (owner < BTRFS_FIRST_FREE_OBJECTID) {
1560 BUG_ON(refs_to_add != 1);
1561 ret = insert_tree_block_ref(trans, root, path, bytenr,
1562 parent, root_objectid);
1563 } else {
1564 ret = insert_extent_data_ref(trans, root, path, bytenr,
1565 parent, root_objectid,
1566 owner, offset, refs_to_add);
1567 }
1568 return ret;
1569}
1570
1571static int remove_extent_backref(struct btrfs_trans_handle *trans,
1572 struct btrfs_root *root,
1573 struct btrfs_path *path,
1574 struct btrfs_extent_inline_ref *iref,
1575 int refs_to_drop, int is_data)
1576{
1577 int ret;
1578
1579 BUG_ON(!is_data && refs_to_drop != 1);
1580 if (iref) {
1581 ret = update_inline_extent_backref(trans, root, path, iref,
1582 -refs_to_drop, NULL);
1583 } else if (is_data) {
1584 ret = remove_extent_data_ref(trans, root, path, refs_to_drop);
1585 } else {
1586 ret = btrfs_del_item(trans, root, path);
1587 }
1588 return ret;
1589}
1590
Chris Mason15916de2008-11-19 21:17:22 -05001591static void btrfs_issue_discard(struct block_device *bdev,
1592 u64 start, u64 len)
1593{
Christoph Hellwig746cd1e2009-09-12 07:35:43 +02001594 blkdev_issue_discard(bdev, start >> 9, len >> 9, GFP_KERNEL,
1595 DISCARD_FL_BARRIER);
Chris Mason15916de2008-11-19 21:17:22 -05001596}
Chris Mason15916de2008-11-19 21:17:22 -05001597
Liu Hui1f3c79a2009-01-05 15:57:51 -05001598static int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr,
1599 u64 num_bytes)
1600{
Liu Hui1f3c79a2009-01-05 15:57:51 -05001601 int ret;
1602 u64 map_length = num_bytes;
1603 struct btrfs_multi_bio *multi = NULL;
1604
Christoph Hellwige244a0a2009-10-14 09:24:59 -04001605 if (!btrfs_test_opt(root, DISCARD))
1606 return 0;
1607
Liu Hui1f3c79a2009-01-05 15:57:51 -05001608 /* Tell the block device(s) that the sectors can be discarded */
1609 ret = btrfs_map_block(&root->fs_info->mapping_tree, READ,
1610 bytenr, &map_length, &multi, 0);
1611 if (!ret) {
1612 struct btrfs_bio_stripe *stripe = multi->stripes;
1613 int i;
1614
1615 if (map_length > num_bytes)
1616 map_length = num_bytes;
1617
1618 for (i = 0; i < multi->num_stripes; i++, stripe++) {
1619 btrfs_issue_discard(stripe->dev->bdev,
1620 stripe->physical,
1621 map_length);
1622 }
1623 kfree(multi);
1624 }
1625
1626 return ret;
Liu Hui1f3c79a2009-01-05 15:57:51 -05001627}
1628
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001629int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
1630 struct btrfs_root *root,
1631 u64 bytenr, u64 num_bytes, u64 parent,
1632 u64 root_objectid, u64 owner, u64 offset)
Zheng Yan31840ae2008-09-23 13:14:14 -04001633{
1634 int ret;
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001635 BUG_ON(owner < BTRFS_FIRST_FREE_OBJECTID &&
1636 root_objectid == BTRFS_TREE_LOG_OBJECTID);
Zheng Yan31840ae2008-09-23 13:14:14 -04001637
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001638 if (owner < BTRFS_FIRST_FREE_OBJECTID) {
1639 ret = btrfs_add_delayed_tree_ref(trans, bytenr, num_bytes,
1640 parent, root_objectid, (int)owner,
1641 BTRFS_ADD_DELAYED_REF, NULL);
1642 } else {
1643 ret = btrfs_add_delayed_data_ref(trans, bytenr, num_bytes,
1644 parent, root_objectid, owner, offset,
1645 BTRFS_ADD_DELAYED_REF, NULL);
1646 }
Zheng Yan31840ae2008-09-23 13:14:14 -04001647 return ret;
1648}
1649
Chris Mason925baed2008-06-25 16:01:30 -04001650static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001651 struct btrfs_root *root,
1652 u64 bytenr, u64 num_bytes,
1653 u64 parent, u64 root_objectid,
1654 u64 owner, u64 offset, int refs_to_add,
1655 struct btrfs_delayed_extent_op *extent_op)
Chris Mason56bec292009-03-13 10:10:06 -04001656{
Chris Mason5caf2a02007-04-02 11:20:42 -04001657 struct btrfs_path *path;
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001658 struct extent_buffer *leaf;
Chris Mason234b63a2007-03-13 10:46:10 -04001659 struct btrfs_extent_item *item;
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001660 u64 refs;
1661 int ret;
1662 int err = 0;
Chris Mason037e6392007-03-07 11:50:24 -05001663
Chris Mason5caf2a02007-04-02 11:20:42 -04001664 path = btrfs_alloc_path();
Chris Mason54aa1f42007-06-22 14:16:25 -04001665 if (!path)
1666 return -ENOMEM;
Chris Mason26b80032007-08-08 20:17:12 -04001667
Chris Mason3c12ac72008-04-21 12:01:38 -04001668 path->reada = 1;
Chris Masonb9473432009-03-13 11:00:37 -04001669 path->leave_spinning = 1;
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001670 /* this will setup the path even if it fails to insert the back ref */
1671 ret = insert_inline_extent_backref(trans, root->fs_info->extent_root,
1672 path, bytenr, num_bytes, parent,
1673 root_objectid, owner, offset,
1674 refs_to_add, extent_op);
1675 if (ret == 0)
1676 goto out;
Zheng Yan31840ae2008-09-23 13:14:14 -04001677
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001678 if (ret != -EAGAIN) {
1679 err = ret;
1680 goto out;
Chris Masonb9473432009-03-13 11:00:37 -04001681 }
Zheng Yan31840ae2008-09-23 13:14:14 -04001682
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001683 leaf = path->nodes[0];
1684 item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
1685 refs = btrfs_extent_refs(leaf, item);
1686 btrfs_set_extent_refs(leaf, item, refs + refs_to_add);
1687 if (extent_op)
1688 __run_delayed_extent_op(extent_op, leaf, item);
Zheng Yan31840ae2008-09-23 13:14:14 -04001689
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001690 btrfs_mark_buffer_dirty(leaf);
Chris Mason5caf2a02007-04-02 11:20:42 -04001691 btrfs_release_path(root->fs_info->extent_root, path);
Chris Mason7bb86312007-12-11 09:25:06 -05001692
Chris Mason3c12ac72008-04-21 12:01:38 -04001693 path->reada = 1;
Chris Masonb9473432009-03-13 11:00:37 -04001694 path->leave_spinning = 1;
1695
Chris Mason56bec292009-03-13 10:10:06 -04001696 /* now insert the actual backref */
Zheng Yan31840ae2008-09-23 13:14:14 -04001697 ret = insert_extent_backref(trans, root->fs_info->extent_root,
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001698 path, bytenr, parent, root_objectid,
1699 owner, offset, refs_to_add);
Chris Mason7bb86312007-12-11 09:25:06 -05001700 BUG_ON(ret);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001701out:
Chris Mason74493f72007-12-11 09:25:06 -05001702 btrfs_free_path(path);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001703 return err;
Chris Mason02217ed2007-03-02 16:08:05 -05001704}
1705
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001706static int run_delayed_data_ref(struct btrfs_trans_handle *trans,
1707 struct btrfs_root *root,
1708 struct btrfs_delayed_ref_node *node,
1709 struct btrfs_delayed_extent_op *extent_op,
1710 int insert_reserved)
Chris Masone9d0b132007-08-10 14:06:19 -04001711{
Chris Mason56bec292009-03-13 10:10:06 -04001712 int ret = 0;
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001713 struct btrfs_delayed_data_ref *ref;
1714 struct btrfs_key ins;
1715 u64 parent = 0;
1716 u64 ref_root = 0;
1717 u64 flags = 0;
Chris Mason56bec292009-03-13 10:10:06 -04001718
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001719 ins.objectid = node->bytenr;
1720 ins.offset = node->num_bytes;
1721 ins.type = BTRFS_EXTENT_ITEM_KEY;
Chris Mason56bec292009-03-13 10:10:06 -04001722
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001723 ref = btrfs_delayed_node_to_data_ref(node);
1724 if (node->type == BTRFS_SHARED_DATA_REF_KEY)
1725 parent = ref->parent;
1726 else
1727 ref_root = ref->root;
1728
1729 if (node->action == BTRFS_ADD_DELAYED_REF && insert_reserved) {
1730 if (extent_op) {
1731 BUG_ON(extent_op->update_key);
1732 flags |= extent_op->flags_to_set;
1733 }
1734 ret = alloc_reserved_file_extent(trans, root,
1735 parent, ref_root, flags,
1736 ref->objectid, ref->offset,
1737 &ins, node->ref_mod);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001738 } else if (node->action == BTRFS_ADD_DELAYED_REF) {
1739 ret = __btrfs_inc_extent_ref(trans, root, node->bytenr,
1740 node->num_bytes, parent,
1741 ref_root, ref->objectid,
1742 ref->offset, node->ref_mod,
1743 extent_op);
1744 } else if (node->action == BTRFS_DROP_DELAYED_REF) {
1745 ret = __btrfs_free_extent(trans, root, node->bytenr,
1746 node->num_bytes, parent,
1747 ref_root, ref->objectid,
1748 ref->offset, node->ref_mod,
1749 extent_op);
1750 } else {
1751 BUG();
1752 }
Chris Mason56bec292009-03-13 10:10:06 -04001753 return ret;
1754}
1755
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001756static void __run_delayed_extent_op(struct btrfs_delayed_extent_op *extent_op,
1757 struct extent_buffer *leaf,
1758 struct btrfs_extent_item *ei)
1759{
1760 u64 flags = btrfs_extent_flags(leaf, ei);
1761 if (extent_op->update_flags) {
1762 flags |= extent_op->flags_to_set;
1763 btrfs_set_extent_flags(leaf, ei, flags);
1764 }
1765
1766 if (extent_op->update_key) {
1767 struct btrfs_tree_block_info *bi;
1768 BUG_ON(!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK));
1769 bi = (struct btrfs_tree_block_info *)(ei + 1);
1770 btrfs_set_tree_block_key(leaf, bi, &extent_op->key);
1771 }
1772}
1773
1774static int run_delayed_extent_op(struct btrfs_trans_handle *trans,
1775 struct btrfs_root *root,
1776 struct btrfs_delayed_ref_node *node,
1777 struct btrfs_delayed_extent_op *extent_op)
1778{
1779 struct btrfs_key key;
1780 struct btrfs_path *path;
1781 struct btrfs_extent_item *ei;
1782 struct extent_buffer *leaf;
1783 u32 item_size;
1784 int ret;
1785 int err = 0;
1786
1787 path = btrfs_alloc_path();
1788 if (!path)
1789 return -ENOMEM;
1790
1791 key.objectid = node->bytenr;
1792 key.type = BTRFS_EXTENT_ITEM_KEY;
1793 key.offset = node->num_bytes;
1794
1795 path->reada = 1;
1796 path->leave_spinning = 1;
1797 ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key,
1798 path, 0, 1);
1799 if (ret < 0) {
1800 err = ret;
1801 goto out;
1802 }
1803 if (ret > 0) {
1804 err = -EIO;
1805 goto out;
1806 }
1807
1808 leaf = path->nodes[0];
1809 item_size = btrfs_item_size_nr(leaf, path->slots[0]);
1810#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
1811 if (item_size < sizeof(*ei)) {
1812 ret = convert_extent_item_v0(trans, root->fs_info->extent_root,
1813 path, (u64)-1, 0);
1814 if (ret < 0) {
1815 err = ret;
1816 goto out;
1817 }
1818 leaf = path->nodes[0];
1819 item_size = btrfs_item_size_nr(leaf, path->slots[0]);
1820 }
1821#endif
1822 BUG_ON(item_size < sizeof(*ei));
1823 ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
1824 __run_delayed_extent_op(extent_op, leaf, ei);
1825
1826 btrfs_mark_buffer_dirty(leaf);
1827out:
1828 btrfs_free_path(path);
1829 return err;
1830}
1831
1832static int run_delayed_tree_ref(struct btrfs_trans_handle *trans,
1833 struct btrfs_root *root,
1834 struct btrfs_delayed_ref_node *node,
1835 struct btrfs_delayed_extent_op *extent_op,
1836 int insert_reserved)
1837{
1838 int ret = 0;
1839 struct btrfs_delayed_tree_ref *ref;
1840 struct btrfs_key ins;
1841 u64 parent = 0;
1842 u64 ref_root = 0;
1843
1844 ins.objectid = node->bytenr;
1845 ins.offset = node->num_bytes;
1846 ins.type = BTRFS_EXTENT_ITEM_KEY;
1847
1848 ref = btrfs_delayed_node_to_tree_ref(node);
1849 if (node->type == BTRFS_SHARED_BLOCK_REF_KEY)
1850 parent = ref->parent;
1851 else
1852 ref_root = ref->root;
1853
1854 BUG_ON(node->ref_mod != 1);
1855 if (node->action == BTRFS_ADD_DELAYED_REF && insert_reserved) {
1856 BUG_ON(!extent_op || !extent_op->update_flags ||
1857 !extent_op->update_key);
1858 ret = alloc_reserved_tree_block(trans, root,
1859 parent, ref_root,
1860 extent_op->flags_to_set,
1861 &extent_op->key,
1862 ref->level, &ins);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001863 } else if (node->action == BTRFS_ADD_DELAYED_REF) {
1864 ret = __btrfs_inc_extent_ref(trans, root, node->bytenr,
1865 node->num_bytes, parent, ref_root,
1866 ref->level, 0, 1, extent_op);
1867 } else if (node->action == BTRFS_DROP_DELAYED_REF) {
1868 ret = __btrfs_free_extent(trans, root, node->bytenr,
1869 node->num_bytes, parent, ref_root,
1870 ref->level, 0, 1, extent_op);
1871 } else {
1872 BUG();
1873 }
1874 return ret;
1875}
1876
1877
Chris Mason56bec292009-03-13 10:10:06 -04001878/* helper function to actually process a single delayed ref entry */
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001879static int run_one_delayed_ref(struct btrfs_trans_handle *trans,
1880 struct btrfs_root *root,
1881 struct btrfs_delayed_ref_node *node,
1882 struct btrfs_delayed_extent_op *extent_op,
1883 int insert_reserved)
Chris Mason56bec292009-03-13 10:10:06 -04001884{
Josef Bacikeb099672009-02-12 09:27:38 -05001885 int ret;
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001886 if (btrfs_delayed_ref_is_head(node)) {
Chris Mason56bec292009-03-13 10:10:06 -04001887 struct btrfs_delayed_ref_head *head;
1888 /*
1889 * we've hit the end of the chain and we were supposed
1890 * to insert this extent into the tree. But, it got
1891 * deleted before we ever needed to insert it, so all
1892 * we have to do is clean up the accounting
1893 */
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001894 BUG_ON(extent_op);
1895 head = btrfs_delayed_node_to_head(node);
Chris Mason56bec292009-03-13 10:10:06 -04001896 if (insert_reserved) {
Yan Zheng11833d62009-09-11 16:11:19 -04001897 int mark_free = 0;
1898 struct extent_buffer *must_clean = NULL;
1899
1900 ret = pin_down_bytes(trans, root, NULL,
1901 node->bytenr, node->num_bytes,
1902 head->is_data, 1, &must_clean);
1903 if (ret > 0)
1904 mark_free = 1;
1905
1906 if (must_clean) {
1907 clean_tree_block(NULL, root, must_clean);
1908 btrfs_tree_unlock(must_clean);
1909 free_extent_buffer(must_clean);
1910 }
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001911 if (head->is_data) {
1912 ret = btrfs_del_csums(trans, root,
1913 node->bytenr,
1914 node->num_bytes);
1915 BUG_ON(ret);
1916 }
Yan Zheng11833d62009-09-11 16:11:19 -04001917 if (mark_free) {
1918 ret = btrfs_free_reserved_extent(root,
1919 node->bytenr,
1920 node->num_bytes);
1921 BUG_ON(ret);
1922 }
Chris Mason56bec292009-03-13 10:10:06 -04001923 }
Chris Mason56bec292009-03-13 10:10:06 -04001924 mutex_unlock(&head->mutex);
1925 return 0;
1926 }
Josef Bacikeb099672009-02-12 09:27:38 -05001927
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001928 if (node->type == BTRFS_TREE_BLOCK_REF_KEY ||
1929 node->type == BTRFS_SHARED_BLOCK_REF_KEY)
1930 ret = run_delayed_tree_ref(trans, root, node, extent_op,
1931 insert_reserved);
1932 else if (node->type == BTRFS_EXTENT_DATA_REF_KEY ||
1933 node->type == BTRFS_SHARED_DATA_REF_KEY)
1934 ret = run_delayed_data_ref(trans, root, node, extent_op,
1935 insert_reserved);
1936 else
1937 BUG();
1938 return ret;
Chris Masone9d0b132007-08-10 14:06:19 -04001939}
1940
Chris Mason56bec292009-03-13 10:10:06 -04001941static noinline struct btrfs_delayed_ref_node *
1942select_delayed_ref(struct btrfs_delayed_ref_head *head)
Chris Masona28ec192007-03-06 20:08:01 -05001943{
Chris Mason56bec292009-03-13 10:10:06 -04001944 struct rb_node *node;
1945 struct btrfs_delayed_ref_node *ref;
1946 int action = BTRFS_ADD_DELAYED_REF;
1947again:
1948 /*
1949 * select delayed ref of type BTRFS_ADD_DELAYED_REF first.
1950 * this prevents ref count from going down to zero when
1951 * there still are pending delayed ref.
1952 */
1953 node = rb_prev(&head->node.rb_node);
1954 while (1) {
1955 if (!node)
1956 break;
1957 ref = rb_entry(node, struct btrfs_delayed_ref_node,
1958 rb_node);
1959 if (ref->bytenr != head->node.bytenr)
1960 break;
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001961 if (ref->action == action)
Chris Mason56bec292009-03-13 10:10:06 -04001962 return ref;
1963 node = rb_prev(node);
Chris Mason5f39d392007-10-15 16:14:19 -04001964 }
Chris Mason56bec292009-03-13 10:10:06 -04001965 if (action == BTRFS_ADD_DELAYED_REF) {
1966 action = BTRFS_DROP_DELAYED_REF;
1967 goto again;
1968 }
1969 return NULL;
1970}
1971
Chris Masonc3e69d52009-03-13 10:17:05 -04001972static noinline int run_clustered_refs(struct btrfs_trans_handle *trans,
1973 struct btrfs_root *root,
1974 struct list_head *cluster)
Chris Mason56bec292009-03-13 10:10:06 -04001975{
Chris Mason56bec292009-03-13 10:10:06 -04001976 struct btrfs_delayed_ref_root *delayed_refs;
1977 struct btrfs_delayed_ref_node *ref;
1978 struct btrfs_delayed_ref_head *locked_ref = NULL;
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001979 struct btrfs_delayed_extent_op *extent_op;
Chris Mason56bec292009-03-13 10:10:06 -04001980 int ret;
Chris Masonc3e69d52009-03-13 10:17:05 -04001981 int count = 0;
Chris Mason56bec292009-03-13 10:10:06 -04001982 int must_insert_reserved = 0;
Chris Mason56bec292009-03-13 10:10:06 -04001983
1984 delayed_refs = &trans->transaction->delayed_refs;
Chris Mason56bec292009-03-13 10:10:06 -04001985 while (1) {
1986 if (!locked_ref) {
Chris Masonc3e69d52009-03-13 10:17:05 -04001987 /* pick a new head ref from the cluster list */
1988 if (list_empty(cluster))
Chris Mason56bec292009-03-13 10:10:06 -04001989 break;
Chris Mason56bec292009-03-13 10:10:06 -04001990
Chris Masonc3e69d52009-03-13 10:17:05 -04001991 locked_ref = list_entry(cluster->next,
1992 struct btrfs_delayed_ref_head, cluster);
1993
1994 /* grab the lock that says we are going to process
1995 * all the refs for this head */
1996 ret = btrfs_delayed_ref_lock(trans, locked_ref);
1997
1998 /*
1999 * we may have dropped the spin lock to get the head
2000 * mutex lock, and that might have given someone else
2001 * time to free the head. If that's true, it has been
2002 * removed from our list and we can move on.
2003 */
2004 if (ret == -EAGAIN) {
2005 locked_ref = NULL;
2006 count++;
2007 continue;
Chris Mason56bec292009-03-13 10:10:06 -04002008 }
2009 }
2010
2011 /*
2012 * record the must insert reserved flag before we
2013 * drop the spin lock.
2014 */
2015 must_insert_reserved = locked_ref->must_insert_reserved;
2016 locked_ref->must_insert_reserved = 0;
2017
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002018 extent_op = locked_ref->extent_op;
2019 locked_ref->extent_op = NULL;
2020
Chris Mason56bec292009-03-13 10:10:06 -04002021 /*
2022 * locked_ref is the head node, so we have to go one
2023 * node back for any delayed ref updates
2024 */
Chris Mason56bec292009-03-13 10:10:06 -04002025 ref = select_delayed_ref(locked_ref);
2026 if (!ref) {
2027 /* All delayed refs have been processed, Go ahead
2028 * and send the head node to run_one_delayed_ref,
2029 * so that any accounting fixes can happen
2030 */
2031 ref = &locked_ref->node;
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002032
2033 if (extent_op && must_insert_reserved) {
2034 kfree(extent_op);
2035 extent_op = NULL;
2036 }
2037
2038 if (extent_op) {
2039 spin_unlock(&delayed_refs->lock);
2040
2041 ret = run_delayed_extent_op(trans, root,
2042 ref, extent_op);
2043 BUG_ON(ret);
2044 kfree(extent_op);
2045
2046 cond_resched();
2047 spin_lock(&delayed_refs->lock);
2048 continue;
2049 }
2050
Chris Masonc3e69d52009-03-13 10:17:05 -04002051 list_del_init(&locked_ref->cluster);
Chris Mason56bec292009-03-13 10:10:06 -04002052 locked_ref = NULL;
2053 }
2054
2055 ref->in_tree = 0;
2056 rb_erase(&ref->rb_node, &delayed_refs->root);
2057 delayed_refs->num_entries--;
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002058
Chris Mason56bec292009-03-13 10:10:06 -04002059 spin_unlock(&delayed_refs->lock);
2060
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002061 ret = run_one_delayed_ref(trans, root, ref, extent_op,
Chris Mason56bec292009-03-13 10:10:06 -04002062 must_insert_reserved);
2063 BUG_ON(ret);
Chris Mason56bec292009-03-13 10:10:06 -04002064
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002065 btrfs_put_delayed_ref(ref);
2066 kfree(extent_op);
Chris Masonc3e69d52009-03-13 10:17:05 -04002067 count++;
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002068
Chris Mason1887be62009-03-13 10:11:24 -04002069 cond_resched();
Chris Mason56bec292009-03-13 10:10:06 -04002070 spin_lock(&delayed_refs->lock);
2071 }
Chris Masonc3e69d52009-03-13 10:17:05 -04002072 return count;
2073}
2074
2075/*
2076 * this starts processing the delayed reference count updates and
2077 * extent insertions we have queued up so far. count can be
2078 * 0, which means to process everything in the tree at the start
2079 * of the run (but not newly added entries), or it can be some target
2080 * number you'd like to process.
2081 */
2082int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
2083 struct btrfs_root *root, unsigned long count)
2084{
2085 struct rb_node *node;
2086 struct btrfs_delayed_ref_root *delayed_refs;
2087 struct btrfs_delayed_ref_node *ref;
2088 struct list_head cluster;
2089 int ret;
2090 int run_all = count == (unsigned long)-1;
2091 int run_most = 0;
2092
2093 if (root == root->fs_info->extent_root)
2094 root = root->fs_info->tree_root;
2095
2096 delayed_refs = &trans->transaction->delayed_refs;
2097 INIT_LIST_HEAD(&cluster);
2098again:
2099 spin_lock(&delayed_refs->lock);
2100 if (count == 0) {
2101 count = delayed_refs->num_entries * 2;
2102 run_most = 1;
2103 }
2104 while (1) {
2105 if (!(run_all || run_most) &&
2106 delayed_refs->num_heads_ready < 64)
2107 break;
2108
2109 /*
2110 * go find something we can process in the rbtree. We start at
2111 * the beginning of the tree, and then build a cluster
2112 * of refs to process starting at the first one we are able to
2113 * lock
2114 */
2115 ret = btrfs_find_ref_cluster(trans, &cluster,
2116 delayed_refs->run_delayed_start);
2117 if (ret)
2118 break;
2119
2120 ret = run_clustered_refs(trans, root, &cluster);
2121 BUG_ON(ret < 0);
2122
2123 count -= min_t(unsigned long, ret, count);
2124
2125 if (count == 0)
2126 break;
2127 }
2128
Chris Mason56bec292009-03-13 10:10:06 -04002129 if (run_all) {
Chris Mason56bec292009-03-13 10:10:06 -04002130 node = rb_first(&delayed_refs->root);
Chris Masonc3e69d52009-03-13 10:17:05 -04002131 if (!node)
Chris Mason56bec292009-03-13 10:10:06 -04002132 goto out;
Chris Masonc3e69d52009-03-13 10:17:05 -04002133 count = (unsigned long)-1;
Chris Mason56bec292009-03-13 10:10:06 -04002134
2135 while (node) {
2136 ref = rb_entry(node, struct btrfs_delayed_ref_node,
2137 rb_node);
2138 if (btrfs_delayed_ref_is_head(ref)) {
2139 struct btrfs_delayed_ref_head *head;
2140
2141 head = btrfs_delayed_node_to_head(ref);
2142 atomic_inc(&ref->refs);
2143
2144 spin_unlock(&delayed_refs->lock);
2145 mutex_lock(&head->mutex);
2146 mutex_unlock(&head->mutex);
2147
2148 btrfs_put_delayed_ref(ref);
Chris Mason1887be62009-03-13 10:11:24 -04002149 cond_resched();
Chris Mason56bec292009-03-13 10:10:06 -04002150 goto again;
2151 }
2152 node = rb_next(node);
2153 }
2154 spin_unlock(&delayed_refs->lock);
Chris Mason56bec292009-03-13 10:10:06 -04002155 schedule_timeout(1);
2156 goto again;
2157 }
Chris Mason54aa1f42007-06-22 14:16:25 -04002158out:
Chris Masonc3e69d52009-03-13 10:17:05 -04002159 spin_unlock(&delayed_refs->lock);
Chris Masona28ec192007-03-06 20:08:01 -05002160 return 0;
2161}
2162
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002163int btrfs_set_disk_extent_flags(struct btrfs_trans_handle *trans,
2164 struct btrfs_root *root,
2165 u64 bytenr, u64 num_bytes, u64 flags,
2166 int is_data)
2167{
2168 struct btrfs_delayed_extent_op *extent_op;
2169 int ret;
2170
2171 extent_op = kmalloc(sizeof(*extent_op), GFP_NOFS);
2172 if (!extent_op)
2173 return -ENOMEM;
2174
2175 extent_op->flags_to_set = flags;
2176 extent_op->update_flags = 1;
2177 extent_op->update_key = 0;
2178 extent_op->is_data = is_data ? 1 : 0;
2179
2180 ret = btrfs_add_delayed_extent_op(trans, bytenr, num_bytes, extent_op);
2181 if (ret)
2182 kfree(extent_op);
2183 return ret;
2184}
2185
2186static noinline int check_delayed_ref(struct btrfs_trans_handle *trans,
2187 struct btrfs_root *root,
2188 struct btrfs_path *path,
2189 u64 objectid, u64 offset, u64 bytenr)
2190{
2191 struct btrfs_delayed_ref_head *head;
2192 struct btrfs_delayed_ref_node *ref;
2193 struct btrfs_delayed_data_ref *data_ref;
2194 struct btrfs_delayed_ref_root *delayed_refs;
2195 struct rb_node *node;
2196 int ret = 0;
2197
2198 ret = -ENOENT;
2199 delayed_refs = &trans->transaction->delayed_refs;
2200 spin_lock(&delayed_refs->lock);
2201 head = btrfs_find_delayed_ref_head(trans, bytenr);
2202 if (!head)
2203 goto out;
2204
2205 if (!mutex_trylock(&head->mutex)) {
2206 atomic_inc(&head->node.refs);
2207 spin_unlock(&delayed_refs->lock);
2208
2209 btrfs_release_path(root->fs_info->extent_root, path);
2210
2211 mutex_lock(&head->mutex);
2212 mutex_unlock(&head->mutex);
2213 btrfs_put_delayed_ref(&head->node);
2214 return -EAGAIN;
2215 }
2216
2217 node = rb_prev(&head->node.rb_node);
2218 if (!node)
2219 goto out_unlock;
2220
2221 ref = rb_entry(node, struct btrfs_delayed_ref_node, rb_node);
2222
2223 if (ref->bytenr != bytenr)
2224 goto out_unlock;
2225
2226 ret = 1;
2227 if (ref->type != BTRFS_EXTENT_DATA_REF_KEY)
2228 goto out_unlock;
2229
2230 data_ref = btrfs_delayed_node_to_data_ref(ref);
2231
2232 node = rb_prev(node);
2233 if (node) {
2234 ref = rb_entry(node, struct btrfs_delayed_ref_node, rb_node);
2235 if (ref->bytenr == bytenr)
2236 goto out_unlock;
2237 }
2238
2239 if (data_ref->root != root->root_key.objectid ||
2240 data_ref->objectid != objectid || data_ref->offset != offset)
2241 goto out_unlock;
2242
2243 ret = 0;
2244out_unlock:
2245 mutex_unlock(&head->mutex);
2246out:
2247 spin_unlock(&delayed_refs->lock);
2248 return ret;
2249}
2250
2251static noinline int check_committed_ref(struct btrfs_trans_handle *trans,
2252 struct btrfs_root *root,
2253 struct btrfs_path *path,
2254 u64 objectid, u64 offset, u64 bytenr)
Chris Masonbe20aa92007-12-17 20:14:01 -05002255{
2256 struct btrfs_root *extent_root = root->fs_info->extent_root;
Yan Zhengf321e492008-07-30 09:26:11 -04002257 struct extent_buffer *leaf;
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002258 struct btrfs_extent_data_ref *ref;
2259 struct btrfs_extent_inline_ref *iref;
2260 struct btrfs_extent_item *ei;
Chris Masonbe20aa92007-12-17 20:14:01 -05002261 struct btrfs_key key;
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002262 u32 item_size;
Yan Zhengf321e492008-07-30 09:26:11 -04002263 int ret;
Chris Masonbe20aa92007-12-17 20:14:01 -05002264
Chris Masonbe20aa92007-12-17 20:14:01 -05002265 key.objectid = bytenr;
Zheng Yan31840ae2008-09-23 13:14:14 -04002266 key.offset = (u64)-1;
Yan Zhengf321e492008-07-30 09:26:11 -04002267 key.type = BTRFS_EXTENT_ITEM_KEY;
Chris Masonbe20aa92007-12-17 20:14:01 -05002268
Chris Masonbe20aa92007-12-17 20:14:01 -05002269 ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
2270 if (ret < 0)
2271 goto out;
2272 BUG_ON(ret == 0);
Yan Zheng80ff3852008-10-30 14:20:02 -04002273
2274 ret = -ENOENT;
2275 if (path->slots[0] == 0)
Zheng Yan31840ae2008-09-23 13:14:14 -04002276 goto out;
Chris Masonbe20aa92007-12-17 20:14:01 -05002277
Zheng Yan31840ae2008-09-23 13:14:14 -04002278 path->slots[0]--;
Yan Zhengf321e492008-07-30 09:26:11 -04002279 leaf = path->nodes[0];
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002280 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
Chris Masonbe20aa92007-12-17 20:14:01 -05002281
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002282 if (key.objectid != bytenr || key.type != BTRFS_EXTENT_ITEM_KEY)
Chris Masonbe20aa92007-12-17 20:14:01 -05002283 goto out;
Chris Masonbe20aa92007-12-17 20:14:01 -05002284
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002285 ret = 1;
2286 item_size = btrfs_item_size_nr(leaf, path->slots[0]);
2287#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
2288 if (item_size < sizeof(*ei)) {
2289 WARN_ON(item_size != sizeof(struct btrfs_extent_item_v0));
2290 goto out;
Chris Masonbe20aa92007-12-17 20:14:01 -05002291 }
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002292#endif
2293 ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
2294
2295 if (item_size != sizeof(*ei) +
2296 btrfs_extent_inline_ref_size(BTRFS_EXTENT_DATA_REF_KEY))
2297 goto out;
2298
2299 if (btrfs_extent_generation(leaf, ei) <=
2300 btrfs_root_last_snapshot(&root->root_item))
2301 goto out;
2302
2303 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
2304 if (btrfs_extent_inline_ref_type(leaf, iref) !=
2305 BTRFS_EXTENT_DATA_REF_KEY)
2306 goto out;
2307
2308 ref = (struct btrfs_extent_data_ref *)(&iref->offset);
2309 if (btrfs_extent_refs(leaf, ei) !=
2310 btrfs_extent_data_ref_count(leaf, ref) ||
2311 btrfs_extent_data_ref_root(leaf, ref) !=
2312 root->root_key.objectid ||
2313 btrfs_extent_data_ref_objectid(leaf, ref) != objectid ||
2314 btrfs_extent_data_ref_offset(leaf, ref) != offset)
2315 goto out;
2316
Yan Zhengf321e492008-07-30 09:26:11 -04002317 ret = 0;
Chris Masonbe20aa92007-12-17 20:14:01 -05002318out:
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002319 return ret;
2320}
2321
2322int btrfs_cross_ref_exist(struct btrfs_trans_handle *trans,
2323 struct btrfs_root *root,
2324 u64 objectid, u64 offset, u64 bytenr)
2325{
2326 struct btrfs_path *path;
2327 int ret;
2328 int ret2;
2329
2330 path = btrfs_alloc_path();
2331 if (!path)
2332 return -ENOENT;
2333
2334 do {
2335 ret = check_committed_ref(trans, root, path, objectid,
2336 offset, bytenr);
2337 if (ret && ret != -ENOENT)
2338 goto out;
2339
2340 ret2 = check_delayed_ref(trans, root, path, objectid,
2341 offset, bytenr);
2342 } while (ret2 == -EAGAIN);
2343
2344 if (ret2 && ret2 != -ENOENT) {
2345 ret = ret2;
2346 goto out;
2347 }
2348
2349 if (ret != -ENOENT || ret2 != -ENOENT)
2350 ret = 0;
2351out:
Yan Zhengf321e492008-07-30 09:26:11 -04002352 btrfs_free_path(path);
2353 return ret;
2354}
2355
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002356#if 0
Zheng Yan31840ae2008-09-23 13:14:14 -04002357int btrfs_cache_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
2358 struct extent_buffer *buf, u32 nr_extents)
Chris Mason02217ed2007-03-02 16:08:05 -05002359{
Chris Mason5f39d392007-10-15 16:14:19 -04002360 struct btrfs_key key;
Chris Mason6407bf62007-03-27 06:33:00 -04002361 struct btrfs_file_extent_item *fi;
Zheng Yane4657682008-09-26 10:04:53 -04002362 u64 root_gen;
2363 u32 nritems;
Chris Mason02217ed2007-03-02 16:08:05 -05002364 int i;
Chris Masondb945352007-10-15 16:15:53 -04002365 int level;
Zheng Yan31840ae2008-09-23 13:14:14 -04002366 int ret = 0;
Zheng Yane4657682008-09-26 10:04:53 -04002367 int shared = 0;
Chris Masona28ec192007-03-06 20:08:01 -05002368
Chris Mason3768f362007-03-13 16:47:54 -04002369 if (!root->ref_cows)
Chris Masona28ec192007-03-06 20:08:01 -05002370 return 0;
Chris Mason5f39d392007-10-15 16:14:19 -04002371
Zheng Yane4657682008-09-26 10:04:53 -04002372 if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
2373 shared = 0;
2374 root_gen = root->root_key.offset;
2375 } else {
2376 shared = 1;
2377 root_gen = trans->transid - 1;
2378 }
2379
Chris Masondb945352007-10-15 16:15:53 -04002380 level = btrfs_header_level(buf);
Chris Mason5f39d392007-10-15 16:14:19 -04002381 nritems = btrfs_header_nritems(buf);
Chris Mason4a096752008-07-21 10:29:44 -04002382
Zheng Yan31840ae2008-09-23 13:14:14 -04002383 if (level == 0) {
Yan Zheng31153d82008-07-28 15:32:19 -04002384 struct btrfs_leaf_ref *ref;
2385 struct btrfs_extent_info *info;
2386
Zheng Yan31840ae2008-09-23 13:14:14 -04002387 ref = btrfs_alloc_leaf_ref(root, nr_extents);
Yan Zheng31153d82008-07-28 15:32:19 -04002388 if (!ref) {
Zheng Yan31840ae2008-09-23 13:14:14 -04002389 ret = -ENOMEM;
Yan Zheng31153d82008-07-28 15:32:19 -04002390 goto out;
2391 }
2392
Zheng Yane4657682008-09-26 10:04:53 -04002393 ref->root_gen = root_gen;
Yan Zheng31153d82008-07-28 15:32:19 -04002394 ref->bytenr = buf->start;
2395 ref->owner = btrfs_header_owner(buf);
2396 ref->generation = btrfs_header_generation(buf);
Zheng Yan31840ae2008-09-23 13:14:14 -04002397 ref->nritems = nr_extents;
Yan Zheng31153d82008-07-28 15:32:19 -04002398 info = ref->extents;
Yanbcc63ab2008-07-30 16:29:20 -04002399
Zheng Yan31840ae2008-09-23 13:14:14 -04002400 for (i = 0; nr_extents > 0 && i < nritems; i++) {
Yan Zheng31153d82008-07-28 15:32:19 -04002401 u64 disk_bytenr;
2402 btrfs_item_key_to_cpu(buf, &key, i);
2403 if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY)
2404 continue;
2405 fi = btrfs_item_ptr(buf, i,
2406 struct btrfs_file_extent_item);
2407 if (btrfs_file_extent_type(buf, fi) ==
2408 BTRFS_FILE_EXTENT_INLINE)
2409 continue;
2410 disk_bytenr = btrfs_file_extent_disk_bytenr(buf, fi);
2411 if (disk_bytenr == 0)
2412 continue;
2413
2414 info->bytenr = disk_bytenr;
2415 info->num_bytes =
2416 btrfs_file_extent_disk_num_bytes(buf, fi);
2417 info->objectid = key.objectid;
2418 info->offset = key.offset;
2419 info++;
2420 }
2421
Zheng Yane4657682008-09-26 10:04:53 -04002422 ret = btrfs_add_leaf_ref(root, ref, shared);
Yan Zheng5b84e8d2008-10-09 11:46:19 -04002423 if (ret == -EEXIST && shared) {
2424 struct btrfs_leaf_ref *old;
2425 old = btrfs_lookup_leaf_ref(root, ref->bytenr);
2426 BUG_ON(!old);
2427 btrfs_remove_leaf_ref(root, old);
2428 btrfs_free_leaf_ref(root, old);
2429 ret = btrfs_add_leaf_ref(root, ref, shared);
2430 }
Yan Zheng31153d82008-07-28 15:32:19 -04002431 WARN_ON(ret);
Yanbcc63ab2008-07-30 16:29:20 -04002432 btrfs_free_leaf_ref(root, ref);
Yan Zheng31153d82008-07-28 15:32:19 -04002433 }
2434out:
Zheng Yan31840ae2008-09-23 13:14:14 -04002435 return ret;
2436}
2437
Chris Masonb7a9f292009-02-04 09:23:45 -05002438/* when a block goes through cow, we update the reference counts of
2439 * everything that block points to. The internal pointers of the block
2440 * can be in just about any order, and it is likely to have clusters of
2441 * things that are close together and clusters of things that are not.
2442 *
2443 * To help reduce the seeks that come with updating all of these reference
2444 * counts, sort them by byte number before actual updates are done.
2445 *
2446 * struct refsort is used to match byte number to slot in the btree block.
2447 * we sort based on the byte number and then use the slot to actually
2448 * find the item.
Chris Masonbd56b302009-02-04 09:27:02 -05002449 *
2450 * struct refsort is smaller than strcut btrfs_item and smaller than
2451 * struct btrfs_key_ptr. Since we're currently limited to the page size
2452 * for a btree block, there's no way for a kmalloc of refsorts for a
2453 * single node to be bigger than a page.
Chris Masonb7a9f292009-02-04 09:23:45 -05002454 */
2455struct refsort {
2456 u64 bytenr;
2457 u32 slot;
2458};
2459
2460/*
2461 * for passing into sort()
2462 */
2463static int refsort_cmp(const void *a_void, const void *b_void)
2464{
2465 const struct refsort *a = a_void;
2466 const struct refsort *b = b_void;
2467
2468 if (a->bytenr < b->bytenr)
2469 return -1;
2470 if (a->bytenr > b->bytenr)
2471 return 1;
2472 return 0;
2473}
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002474#endif
Chris Masonb7a9f292009-02-04 09:23:45 -05002475
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002476static int __btrfs_mod_ref(struct btrfs_trans_handle *trans,
Chris Masonb7a9f292009-02-04 09:23:45 -05002477 struct btrfs_root *root,
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002478 struct extent_buffer *buf,
2479 int full_backref, int inc)
Zheng Yan31840ae2008-09-23 13:14:14 -04002480{
2481 u64 bytenr;
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002482 u64 num_bytes;
2483 u64 parent;
Zheng Yan31840ae2008-09-23 13:14:14 -04002484 u64 ref_root;
Zheng Yan31840ae2008-09-23 13:14:14 -04002485 u32 nritems;
Zheng Yan31840ae2008-09-23 13:14:14 -04002486 struct btrfs_key key;
2487 struct btrfs_file_extent_item *fi;
2488 int i;
2489 int level;
2490 int ret = 0;
Zheng Yan31840ae2008-09-23 13:14:14 -04002491 int (*process_func)(struct btrfs_trans_handle *, struct btrfs_root *,
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002492 u64, u64, u64, u64, u64, u64);
Zheng Yan31840ae2008-09-23 13:14:14 -04002493
2494 ref_root = btrfs_header_owner(buf);
Zheng Yan31840ae2008-09-23 13:14:14 -04002495 nritems = btrfs_header_nritems(buf);
2496 level = btrfs_header_level(buf);
2497
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002498 if (!root->ref_cows && level == 0)
2499 return 0;
Chris Masonb7a9f292009-02-04 09:23:45 -05002500
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002501 if (inc)
2502 process_func = btrfs_inc_extent_ref;
2503 else
2504 process_func = btrfs_free_extent;
Zheng Yan31840ae2008-09-23 13:14:14 -04002505
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002506 if (full_backref)
2507 parent = buf->start;
2508 else
2509 parent = 0;
2510
Zheng Yan31840ae2008-09-23 13:14:14 -04002511 for (i = 0; i < nritems; i++) {
Chris Masondb945352007-10-15 16:15:53 -04002512 if (level == 0) {
Chris Mason5f39d392007-10-15 16:14:19 -04002513 btrfs_item_key_to_cpu(buf, &key, i);
2514 if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY)
Chris Mason54aa1f42007-06-22 14:16:25 -04002515 continue;
Chris Mason5f39d392007-10-15 16:14:19 -04002516 fi = btrfs_item_ptr(buf, i,
Chris Mason54aa1f42007-06-22 14:16:25 -04002517 struct btrfs_file_extent_item);
Chris Mason5f39d392007-10-15 16:14:19 -04002518 if (btrfs_file_extent_type(buf, fi) ==
Chris Mason54aa1f42007-06-22 14:16:25 -04002519 BTRFS_FILE_EXTENT_INLINE)
2520 continue;
Zheng Yan31840ae2008-09-23 13:14:14 -04002521 bytenr = btrfs_file_extent_disk_bytenr(buf, fi);
2522 if (bytenr == 0)
Chris Mason54aa1f42007-06-22 14:16:25 -04002523 continue;
Zheng Yan31840ae2008-09-23 13:14:14 -04002524
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002525 num_bytes = btrfs_file_extent_disk_num_bytes(buf, fi);
2526 key.offset -= btrfs_file_extent_offset(buf, fi);
2527 ret = process_func(trans, root, bytenr, num_bytes,
2528 parent, ref_root, key.objectid,
2529 key.offset);
2530 if (ret)
2531 goto fail;
Chris Masonb7a9f292009-02-04 09:23:45 -05002532 } else {
2533 bytenr = btrfs_node_blockptr(buf, i);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002534 num_bytes = btrfs_level_size(root, level - 1);
2535 ret = process_func(trans, root, bytenr, num_bytes,
2536 parent, ref_root, level - 1, 0);
2537 if (ret)
Zheng Yan31840ae2008-09-23 13:14:14 -04002538 goto fail;
Chris Mason54aa1f42007-06-22 14:16:25 -04002539 }
2540 }
Zheng Yan31840ae2008-09-23 13:14:14 -04002541 return 0;
2542fail:
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002543 BUG();
Chris Mason54aa1f42007-06-22 14:16:25 -04002544 return ret;
Chris Mason02217ed2007-03-02 16:08:05 -05002545}
2546
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002547int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
2548 struct extent_buffer *buf, int full_backref)
Zheng Yan31840ae2008-09-23 13:14:14 -04002549{
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002550 return __btrfs_mod_ref(trans, root, buf, full_backref, 1);
2551}
Zheng Yan31840ae2008-09-23 13:14:14 -04002552
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002553int btrfs_dec_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
2554 struct extent_buffer *buf, int full_backref)
2555{
2556 return __btrfs_mod_ref(trans, root, buf, full_backref, 0);
Zheng Yan31840ae2008-09-23 13:14:14 -04002557}
2558
Chris Mason9078a3e2007-04-26 16:46:15 -04002559static int write_one_cache_group(struct btrfs_trans_handle *trans,
2560 struct btrfs_root *root,
2561 struct btrfs_path *path,
2562 struct btrfs_block_group_cache *cache)
2563{
2564 int ret;
Chris Mason9078a3e2007-04-26 16:46:15 -04002565 struct btrfs_root *extent_root = root->fs_info->extent_root;
Chris Mason5f39d392007-10-15 16:14:19 -04002566 unsigned long bi;
2567 struct extent_buffer *leaf;
Chris Mason9078a3e2007-04-26 16:46:15 -04002568
Chris Mason9078a3e2007-04-26 16:46:15 -04002569 ret = btrfs_search_slot(trans, extent_root, &cache->key, path, 0, 1);
Chris Mason54aa1f42007-06-22 14:16:25 -04002570 if (ret < 0)
2571 goto fail;
Chris Mason9078a3e2007-04-26 16:46:15 -04002572 BUG_ON(ret);
Chris Mason5f39d392007-10-15 16:14:19 -04002573
2574 leaf = path->nodes[0];
2575 bi = btrfs_item_ptr_offset(leaf, path->slots[0]);
2576 write_extent_buffer(leaf, &cache->item, bi, sizeof(cache->item));
2577 btrfs_mark_buffer_dirty(leaf);
Chris Mason9078a3e2007-04-26 16:46:15 -04002578 btrfs_release_path(extent_root, path);
Chris Mason54aa1f42007-06-22 14:16:25 -04002579fail:
Chris Mason9078a3e2007-04-26 16:46:15 -04002580 if (ret)
2581 return ret;
Chris Mason9078a3e2007-04-26 16:46:15 -04002582 return 0;
2583
2584}
2585
Yan Zheng4a8c9a62009-07-22 10:07:05 -04002586static struct btrfs_block_group_cache *
2587next_block_group(struct btrfs_root *root,
2588 struct btrfs_block_group_cache *cache)
2589{
2590 struct rb_node *node;
2591 spin_lock(&root->fs_info->block_group_cache_lock);
2592 node = rb_next(&cache->cache_node);
2593 btrfs_put_block_group(cache);
2594 if (node) {
2595 cache = rb_entry(node, struct btrfs_block_group_cache,
2596 cache_node);
Josef Bacik11dfe352009-11-13 20:12:59 +00002597 btrfs_get_block_group(cache);
Yan Zheng4a8c9a62009-07-22 10:07:05 -04002598 } else
2599 cache = NULL;
2600 spin_unlock(&root->fs_info->block_group_cache_lock);
2601 return cache;
2602}
2603
Chris Mason96b51792007-10-15 16:15:19 -04002604int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
2605 struct btrfs_root *root)
Chris Mason9078a3e2007-04-26 16:46:15 -04002606{
Yan Zheng4a8c9a62009-07-22 10:07:05 -04002607 struct btrfs_block_group_cache *cache;
Chris Mason9078a3e2007-04-26 16:46:15 -04002608 int err = 0;
Chris Mason9078a3e2007-04-26 16:46:15 -04002609 struct btrfs_path *path;
Chris Mason96b51792007-10-15 16:15:19 -04002610 u64 last = 0;
Chris Mason9078a3e2007-04-26 16:46:15 -04002611
2612 path = btrfs_alloc_path();
2613 if (!path)
2614 return -ENOMEM;
2615
Chris Masond3977122009-01-05 21:25:51 -05002616 while (1) {
Yan Zheng4a8c9a62009-07-22 10:07:05 -04002617 if (last == 0) {
2618 err = btrfs_run_delayed_refs(trans, root,
2619 (unsigned long)-1);
2620 BUG_ON(err);
Josef Bacik0f9dd462008-09-23 13:14:11 -04002621 }
Josef Bacik0f9dd462008-09-23 13:14:11 -04002622
Yan Zheng4a8c9a62009-07-22 10:07:05 -04002623 cache = btrfs_lookup_first_block_group(root->fs_info, last);
2624 while (cache) {
2625 if (cache->dirty)
2626 break;
2627 cache = next_block_group(root, cache);
2628 }
2629 if (!cache) {
2630 if (last == 0)
2631 break;
2632 last = 0;
Chris Mason96b51792007-10-15 16:15:19 -04002633 continue;
Chris Mason9078a3e2007-04-26 16:46:15 -04002634 }
Yan Zheng4a8c9a62009-07-22 10:07:05 -04002635
2636 cache->dirty = 0;
2637 last = cache->key.objectid + cache->key.offset;
2638
2639 err = write_one_cache_group(trans, root, path, cache);
2640 BUG_ON(err);
2641 btrfs_put_block_group(cache);
Chris Mason9078a3e2007-04-26 16:46:15 -04002642 }
Yan Zheng4a8c9a62009-07-22 10:07:05 -04002643
Chris Mason9078a3e2007-04-26 16:46:15 -04002644 btrfs_free_path(path);
Yan Zheng4a8c9a62009-07-22 10:07:05 -04002645 return 0;
Chris Mason9078a3e2007-04-26 16:46:15 -04002646}
2647
Yan Zhengd2fb3432008-12-11 16:30:39 -05002648int btrfs_extent_readonly(struct btrfs_root *root, u64 bytenr)
2649{
2650 struct btrfs_block_group_cache *block_group;
2651 int readonly = 0;
2652
2653 block_group = btrfs_lookup_block_group(root->fs_info, bytenr);
2654 if (!block_group || block_group->ro)
2655 readonly = 1;
2656 if (block_group)
Chris Masonfa9c0d792009-04-03 09:47:43 -04002657 btrfs_put_block_group(block_group);
Yan Zhengd2fb3432008-12-11 16:30:39 -05002658 return readonly;
2659}
2660
Chris Mason593060d2008-03-25 16:50:33 -04002661static int update_space_info(struct btrfs_fs_info *info, u64 flags,
2662 u64 total_bytes, u64 bytes_used,
2663 struct btrfs_space_info **space_info)
2664{
2665 struct btrfs_space_info *found;
Yan, Zhengb742bb822010-05-16 10:46:24 -04002666 int i;
2667 int factor;
2668
2669 if (flags & (BTRFS_BLOCK_GROUP_DUP | BTRFS_BLOCK_GROUP_RAID1 |
2670 BTRFS_BLOCK_GROUP_RAID10))
2671 factor = 2;
2672 else
2673 factor = 1;
Chris Mason593060d2008-03-25 16:50:33 -04002674
2675 found = __find_space_info(info, flags);
2676 if (found) {
Josef Bacik25179202008-10-29 14:49:05 -04002677 spin_lock(&found->lock);
Chris Mason593060d2008-03-25 16:50:33 -04002678 found->total_bytes += total_bytes;
2679 found->bytes_used += bytes_used;
Yan, Zhengb742bb822010-05-16 10:46:24 -04002680 found->disk_used += bytes_used * factor;
Chris Mason8f18cf12008-04-25 16:53:30 -04002681 found->full = 0;
Josef Bacik25179202008-10-29 14:49:05 -04002682 spin_unlock(&found->lock);
Chris Mason593060d2008-03-25 16:50:33 -04002683 *space_info = found;
2684 return 0;
2685 }
Yan Zhengc146afa2008-11-12 14:34:12 -05002686 found = kzalloc(sizeof(*found), GFP_NOFS);
Chris Mason593060d2008-03-25 16:50:33 -04002687 if (!found)
2688 return -ENOMEM;
2689
Yan, Zhengb742bb822010-05-16 10:46:24 -04002690 for (i = 0; i < BTRFS_NR_RAID_TYPES; i++)
2691 INIT_LIST_HEAD(&found->block_groups[i]);
Josef Bacik80eb2342008-10-29 14:49:05 -04002692 init_rwsem(&found->groups_sem);
Josef Bacikb5cb1602010-03-12 19:28:18 +00002693 init_waitqueue_head(&found->flush_wait);
2694 init_waitqueue_head(&found->allocate_wait);
Josef Bacik0f9dd462008-09-23 13:14:11 -04002695 spin_lock_init(&found->lock);
Yan, Zhengb742bb822010-05-16 10:46:24 -04002696 found->flags = flags & (BTRFS_BLOCK_GROUP_DATA |
2697 BTRFS_BLOCK_GROUP_SYSTEM |
2698 BTRFS_BLOCK_GROUP_METADATA);
Chris Mason593060d2008-03-25 16:50:33 -04002699 found->total_bytes = total_bytes;
2700 found->bytes_used = bytes_used;
Yan, Zhengb742bb822010-05-16 10:46:24 -04002701 found->disk_used = bytes_used * factor;
Chris Mason593060d2008-03-25 16:50:33 -04002702 found->bytes_pinned = 0;
Zheng Yane8569812008-09-26 10:05:48 -04002703 found->bytes_reserved = 0;
Yan Zhengc146afa2008-11-12 14:34:12 -05002704 found->bytes_readonly = 0;
Josef Bacik6a632092009-02-20 11:00:09 -05002705 found->bytes_delalloc = 0;
Chris Mason593060d2008-03-25 16:50:33 -04002706 found->full = 0;
Chris Mason0ef3e662008-05-24 14:04:53 -04002707 found->force_alloc = 0;
Chris Mason593060d2008-03-25 16:50:33 -04002708 *space_info = found;
Chris Mason4184ea72009-03-10 12:39:20 -04002709 list_add_rcu(&found->list, &info->space_info);
Josef Bacik817d52f2009-07-13 21:29:25 -04002710 atomic_set(&found->caching_threads, 0);
Chris Mason593060d2008-03-25 16:50:33 -04002711 return 0;
2712}
2713
Chris Mason8790d502008-04-03 16:29:03 -04002714static void set_avail_alloc_bits(struct btrfs_fs_info *fs_info, u64 flags)
2715{
2716 u64 extra_flags = flags & (BTRFS_BLOCK_GROUP_RAID0 |
Chris Mason611f0e02008-04-03 16:29:03 -04002717 BTRFS_BLOCK_GROUP_RAID1 |
Chris Mason321aecc2008-04-16 10:49:51 -04002718 BTRFS_BLOCK_GROUP_RAID10 |
Chris Mason611f0e02008-04-03 16:29:03 -04002719 BTRFS_BLOCK_GROUP_DUP);
Chris Mason8790d502008-04-03 16:29:03 -04002720 if (extra_flags) {
2721 if (flags & BTRFS_BLOCK_GROUP_DATA)
2722 fs_info->avail_data_alloc_bits |= extra_flags;
2723 if (flags & BTRFS_BLOCK_GROUP_METADATA)
2724 fs_info->avail_metadata_alloc_bits |= extra_flags;
2725 if (flags & BTRFS_BLOCK_GROUP_SYSTEM)
2726 fs_info->avail_system_alloc_bits |= extra_flags;
2727 }
2728}
Chris Mason593060d2008-03-25 16:50:33 -04002729
Yan Zhengc146afa2008-11-12 14:34:12 -05002730static void set_block_group_readonly(struct btrfs_block_group_cache *cache)
2731{
2732 spin_lock(&cache->space_info->lock);
2733 spin_lock(&cache->lock);
2734 if (!cache->ro) {
2735 cache->space_info->bytes_readonly += cache->key.offset -
2736 btrfs_block_group_used(&cache->item);
2737 cache->ro = 1;
2738 }
2739 spin_unlock(&cache->lock);
2740 spin_unlock(&cache->space_info->lock);
2741}
2742
Yan Zheng2b820322008-11-17 21:11:30 -05002743u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags)
Chris Masonec44a352008-04-28 15:29:52 -04002744{
Yan Zheng2b820322008-11-17 21:11:30 -05002745 u64 num_devices = root->fs_info->fs_devices->rw_devices;
Chris Masona061fc82008-05-07 11:43:44 -04002746
2747 if (num_devices == 1)
2748 flags &= ~(BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID0);
2749 if (num_devices < 4)
2750 flags &= ~BTRFS_BLOCK_GROUP_RAID10;
2751
Chris Masonec44a352008-04-28 15:29:52 -04002752 if ((flags & BTRFS_BLOCK_GROUP_DUP) &&
2753 (flags & (BTRFS_BLOCK_GROUP_RAID1 |
Chris Masona061fc82008-05-07 11:43:44 -04002754 BTRFS_BLOCK_GROUP_RAID10))) {
Chris Masonec44a352008-04-28 15:29:52 -04002755 flags &= ~BTRFS_BLOCK_GROUP_DUP;
Chris Masona061fc82008-05-07 11:43:44 -04002756 }
Chris Masonec44a352008-04-28 15:29:52 -04002757
2758 if ((flags & BTRFS_BLOCK_GROUP_RAID1) &&
Chris Masona061fc82008-05-07 11:43:44 -04002759 (flags & BTRFS_BLOCK_GROUP_RAID10)) {
Chris Masonec44a352008-04-28 15:29:52 -04002760 flags &= ~BTRFS_BLOCK_GROUP_RAID1;
Chris Masona061fc82008-05-07 11:43:44 -04002761 }
Chris Masonec44a352008-04-28 15:29:52 -04002762
2763 if ((flags & BTRFS_BLOCK_GROUP_RAID0) &&
2764 ((flags & BTRFS_BLOCK_GROUP_RAID1) |
2765 (flags & BTRFS_BLOCK_GROUP_RAID10) |
2766 (flags & BTRFS_BLOCK_GROUP_DUP)))
2767 flags &= ~BTRFS_BLOCK_GROUP_RAID0;
2768 return flags;
2769}
2770
Yan, Zhengb742bb822010-05-16 10:46:24 -04002771static u64 get_alloc_profile(struct btrfs_root *root, u64 flags)
Josef Bacik6a632092009-02-20 11:00:09 -05002772{
Yan, Zhengb742bb822010-05-16 10:46:24 -04002773 if (flags & BTRFS_BLOCK_GROUP_DATA)
2774 flags |= root->fs_info->avail_data_alloc_bits &
2775 root->fs_info->data_alloc_profile;
2776 else if (flags & BTRFS_BLOCK_GROUP_SYSTEM)
2777 flags |= root->fs_info->avail_system_alloc_bits &
2778 root->fs_info->system_alloc_profile;
2779 else if (flags & BTRFS_BLOCK_GROUP_METADATA)
2780 flags |= root->fs_info->avail_metadata_alloc_bits &
2781 root->fs_info->metadata_alloc_profile;
2782 return btrfs_reduce_alloc_profile(root, flags);
2783}
Josef Bacik6a632092009-02-20 11:00:09 -05002784
Yan, Zhengb742bb822010-05-16 10:46:24 -04002785static u64 btrfs_get_alloc_profile(struct btrfs_root *root, int data)
2786{
2787 u64 flags;
Josef Bacik6a632092009-02-20 11:00:09 -05002788
Yan, Zhengb742bb822010-05-16 10:46:24 -04002789 if (data)
2790 flags = BTRFS_BLOCK_GROUP_DATA;
2791 else if (root == root->fs_info->chunk_root)
2792 flags = BTRFS_BLOCK_GROUP_SYSTEM;
2793 else
2794 flags = BTRFS_BLOCK_GROUP_METADATA;
2795
2796 return get_alloc_profile(root, flags);
Josef Bacik6a632092009-02-20 11:00:09 -05002797}
2798
2799void btrfs_set_inode_space_info(struct btrfs_root *root, struct inode *inode)
2800{
2801 u64 alloc_target;
2802
2803 alloc_target = btrfs_get_alloc_profile(root, 1);
2804 BTRFS_I(inode)->space_info = __find_space_info(root->fs_info,
2805 alloc_target);
2806}
2807
Josef Bacik9ed74f22009-09-11 16:12:44 -04002808static u64 calculate_bytes_needed(struct btrfs_root *root, int num_items)
2809{
2810 u64 num_bytes;
2811 int level;
2812
2813 level = BTRFS_MAX_LEVEL - 2;
2814 /*
2815 * NOTE: these calculations are absolutely the worst possible case.
2816 * This assumes that _every_ item we insert will require a new leaf, and
2817 * that the tree has grown to its maximum level size.
2818 */
2819
2820 /*
2821 * for every item we insert we could insert both an extent item and a
2822 * extent ref item. Then for ever item we insert, we will need to cow
2823 * both the original leaf, plus the leaf to the left and right of it.
2824 *
2825 * Unless we are talking about the extent root, then we just want the
2826 * number of items * 2, since we just need the extent item plus its ref.
2827 */
2828 if (root == root->fs_info->extent_root)
2829 num_bytes = num_items * 2;
2830 else
2831 num_bytes = (num_items + (2 * num_items)) * 3;
2832
2833 /*
2834 * num_bytes is total number of leaves we could need times the leaf
2835 * size, and then for every leaf we could end up cow'ing 2 nodes per
2836 * level, down to the leaf level.
2837 */
2838 num_bytes = (num_bytes * root->leafsize) +
2839 (num_bytes * (level * 2)) * root->nodesize;
2840
2841 return num_bytes;
2842}
2843
Josef Bacik6a632092009-02-20 11:00:09 -05002844/*
Josef Bacik9ed74f22009-09-11 16:12:44 -04002845 * Unreserve metadata space for delalloc. If we have less reserved credits than
2846 * we have extents, this function does nothing.
Josef Bacik6a632092009-02-20 11:00:09 -05002847 */
Josef Bacik9ed74f22009-09-11 16:12:44 -04002848int btrfs_unreserve_metadata_for_delalloc(struct btrfs_root *root,
2849 struct inode *inode, int num_items)
Josef Bacik6a632092009-02-20 11:00:09 -05002850{
2851 struct btrfs_fs_info *info = root->fs_info;
2852 struct btrfs_space_info *meta_sinfo;
Josef Bacik9ed74f22009-09-11 16:12:44 -04002853 u64 num_bytes;
2854 u64 alloc_target;
2855 bool bug = false;
Josef Bacik6a632092009-02-20 11:00:09 -05002856
2857 /* get the space info for where the metadata will live */
2858 alloc_target = btrfs_get_alloc_profile(root, 0);
2859 meta_sinfo = __find_space_info(info, alloc_target);
2860
Josef Bacik9ed74f22009-09-11 16:12:44 -04002861 num_bytes = calculate_bytes_needed(root->fs_info->extent_root,
2862 num_items);
2863
2864 spin_lock(&meta_sinfo->lock);
Josef Bacik32c00af2009-10-08 13:34:05 -04002865 spin_lock(&BTRFS_I(inode)->accounting_lock);
2866 if (BTRFS_I(inode)->reserved_extents <=
2867 BTRFS_I(inode)->outstanding_extents) {
2868 spin_unlock(&BTRFS_I(inode)->accounting_lock);
Josef Bacik9ed74f22009-09-11 16:12:44 -04002869 spin_unlock(&meta_sinfo->lock);
2870 return 0;
2871 }
Josef Bacik32c00af2009-10-08 13:34:05 -04002872 spin_unlock(&BTRFS_I(inode)->accounting_lock);
Josef Bacik9ed74f22009-09-11 16:12:44 -04002873
Josef Bacik287a0ab2010-03-19 18:07:23 +00002874 BTRFS_I(inode)->reserved_extents -= num_items;
Josef Bacik32c00af2009-10-08 13:34:05 -04002875 BUG_ON(BTRFS_I(inode)->reserved_extents < 0);
Josef Bacik9ed74f22009-09-11 16:12:44 -04002876
2877 if (meta_sinfo->bytes_delalloc < num_bytes) {
2878 bug = true;
2879 meta_sinfo->bytes_delalloc = 0;
2880 } else {
2881 meta_sinfo->bytes_delalloc -= num_bytes;
2882 }
2883 spin_unlock(&meta_sinfo->lock);
2884
2885 BUG_ON(bug);
2886
2887 return 0;
2888}
2889
2890static void check_force_delalloc(struct btrfs_space_info *meta_sinfo)
2891{
2892 u64 thresh;
2893
2894 thresh = meta_sinfo->bytes_used + meta_sinfo->bytes_reserved +
2895 meta_sinfo->bytes_pinned + meta_sinfo->bytes_readonly +
2896 meta_sinfo->bytes_super + meta_sinfo->bytes_root +
2897 meta_sinfo->bytes_may_use;
2898
2899 thresh = meta_sinfo->total_bytes - thresh;
2900 thresh *= 80;
2901 do_div(thresh, 100);
2902 if (thresh <= meta_sinfo->bytes_delalloc)
2903 meta_sinfo->force_delalloc = 1;
2904 else
2905 meta_sinfo->force_delalloc = 0;
2906}
2907
Josef Bacike3ccfa92009-10-07 20:44:34 -04002908struct async_flush {
2909 struct btrfs_root *root;
2910 struct btrfs_space_info *info;
2911 struct btrfs_work work;
2912};
2913
2914static noinline void flush_delalloc_async(struct btrfs_work *work)
2915{
2916 struct async_flush *async;
2917 struct btrfs_root *root;
2918 struct btrfs_space_info *info;
2919
2920 async = container_of(work, struct async_flush, work);
2921 root = async->root;
2922 info = async->info;
2923
Yan, Zheng24bbcf02009-11-12 09:36:34 +00002924 btrfs_start_delalloc_inodes(root, 0);
Josef Bacike3ccfa92009-10-07 20:44:34 -04002925 wake_up(&info->flush_wait);
Yan, Zheng24bbcf02009-11-12 09:36:34 +00002926 btrfs_wait_ordered_extents(root, 0, 0);
Josef Bacike3ccfa92009-10-07 20:44:34 -04002927
2928 spin_lock(&info->lock);
2929 info->flushing = 0;
2930 spin_unlock(&info->lock);
2931 wake_up(&info->flush_wait);
2932
2933 kfree(async);
2934}
2935
2936static void wait_on_flush(struct btrfs_space_info *info)
2937{
2938 DEFINE_WAIT(wait);
2939 u64 used;
2940
2941 while (1) {
2942 prepare_to_wait(&info->flush_wait, &wait,
2943 TASK_UNINTERRUPTIBLE);
2944 spin_lock(&info->lock);
2945 if (!info->flushing) {
2946 spin_unlock(&info->lock);
2947 break;
2948 }
2949
2950 used = info->bytes_used + info->bytes_reserved +
2951 info->bytes_pinned + info->bytes_readonly +
2952 info->bytes_super + info->bytes_root +
2953 info->bytes_may_use + info->bytes_delalloc;
2954 if (used < info->total_bytes) {
2955 spin_unlock(&info->lock);
2956 break;
2957 }
2958 spin_unlock(&info->lock);
2959 schedule();
2960 }
2961 finish_wait(&info->flush_wait, &wait);
2962}
2963
Josef Bacik32c00af2009-10-08 13:34:05 -04002964static void flush_delalloc(struct btrfs_root *root,
2965 struct btrfs_space_info *info)
2966{
Josef Bacike3ccfa92009-10-07 20:44:34 -04002967 struct async_flush *async;
Josef Bacik32c00af2009-10-08 13:34:05 -04002968 bool wait = false;
2969
2970 spin_lock(&info->lock);
2971
Josef Bacikb5cb1602010-03-12 19:28:18 +00002972 if (!info->flushing)
Josef Bacik32c00af2009-10-08 13:34:05 -04002973 info->flushing = 1;
Josef Bacikb5cb1602010-03-12 19:28:18 +00002974 else
Josef Bacik32c00af2009-10-08 13:34:05 -04002975 wait = true;
Josef Bacik32c00af2009-10-08 13:34:05 -04002976
2977 spin_unlock(&info->lock);
2978
2979 if (wait) {
Josef Bacike3ccfa92009-10-07 20:44:34 -04002980 wait_on_flush(info);
Josef Bacik32c00af2009-10-08 13:34:05 -04002981 return;
2982 }
2983
Josef Bacike3ccfa92009-10-07 20:44:34 -04002984 async = kzalloc(sizeof(*async), GFP_NOFS);
2985 if (!async)
2986 goto flush;
2987
2988 async->root = root;
2989 async->info = info;
2990 async->work.func = flush_delalloc_async;
2991
2992 btrfs_queue_worker(&root->fs_info->enospc_workers,
2993 &async->work);
2994 wait_on_flush(info);
2995 return;
2996
2997flush:
Yan, Zheng24bbcf02009-11-12 09:36:34 +00002998 btrfs_start_delalloc_inodes(root, 0);
2999 btrfs_wait_ordered_extents(root, 0, 0);
Josef Bacik32c00af2009-10-08 13:34:05 -04003000
3001 spin_lock(&info->lock);
3002 info->flushing = 0;
3003 spin_unlock(&info->lock);
3004 wake_up(&info->flush_wait);
3005}
3006
Josef Bacik9ed74f22009-09-11 16:12:44 -04003007static int maybe_allocate_chunk(struct btrfs_root *root,
3008 struct btrfs_space_info *info)
3009{
3010 struct btrfs_super_block *disk_super = &root->fs_info->super_copy;
3011 struct btrfs_trans_handle *trans;
3012 bool wait = false;
3013 int ret = 0;
3014 u64 min_metadata;
3015 u64 free_space;
3016
3017 free_space = btrfs_super_total_bytes(disk_super);
3018 /*
Chris Mason33b25802009-11-11 10:16:57 -05003019 * we allow the metadata to grow to a max of either 10gb or 5% of the
Josef Bacik9ed74f22009-09-11 16:12:44 -04003020 * space in the volume.
3021 */
Chris Mason33b25802009-11-11 10:16:57 -05003022 min_metadata = min((u64)10 * 1024 * 1024 * 1024,
Josef Bacik9ed74f22009-09-11 16:12:44 -04003023 div64_u64(free_space * 5, 100));
3024 if (info->total_bytes >= min_metadata) {
3025 spin_unlock(&info->lock);
3026 return 0;
3027 }
3028
3029 if (info->full) {
3030 spin_unlock(&info->lock);
3031 return 0;
3032 }
3033
3034 if (!info->allocating_chunk) {
3035 info->force_alloc = 1;
3036 info->allocating_chunk = 1;
Josef Bacik9ed74f22009-09-11 16:12:44 -04003037 } else {
3038 wait = true;
3039 }
3040
3041 spin_unlock(&info->lock);
3042
3043 if (wait) {
Josef Bacike3ccfa92009-10-07 20:44:34 -04003044 wait_event(info->allocate_wait,
Josef Bacik9ed74f22009-09-11 16:12:44 -04003045 !info->allocating_chunk);
3046 return 1;
3047 }
3048
3049 trans = btrfs_start_transaction(root, 1);
3050 if (!trans) {
3051 ret = -ENOMEM;
3052 goto out;
3053 }
3054
3055 ret = do_chunk_alloc(trans, root->fs_info->extent_root,
3056 4096 + 2 * 1024 * 1024,
3057 info->flags, 0);
3058 btrfs_end_transaction(trans, root);
3059 if (ret)
3060 goto out;
3061out:
3062 spin_lock(&info->lock);
3063 info->allocating_chunk = 0;
3064 spin_unlock(&info->lock);
Josef Bacike3ccfa92009-10-07 20:44:34 -04003065 wake_up(&info->allocate_wait);
Josef Bacik9ed74f22009-09-11 16:12:44 -04003066
3067 if (ret)
3068 return 0;
3069 return 1;
3070}
3071
3072/*
3073 * Reserve metadata space for delalloc.
3074 */
3075int btrfs_reserve_metadata_for_delalloc(struct btrfs_root *root,
3076 struct inode *inode, int num_items)
3077{
3078 struct btrfs_fs_info *info = root->fs_info;
3079 struct btrfs_space_info *meta_sinfo;
3080 u64 num_bytes;
3081 u64 used;
3082 u64 alloc_target;
3083 int flushed = 0;
3084 int force_delalloc;
3085
3086 /* get the space info for where the metadata will live */
3087 alloc_target = btrfs_get_alloc_profile(root, 0);
3088 meta_sinfo = __find_space_info(info, alloc_target);
3089
3090 num_bytes = calculate_bytes_needed(root->fs_info->extent_root,
3091 num_items);
Josef Bacik4e06bdd2009-02-20 10:59:53 -05003092again:
Josef Bacik6a632092009-02-20 11:00:09 -05003093 spin_lock(&meta_sinfo->lock);
Josef Bacik6a632092009-02-20 11:00:09 -05003094
Josef Bacik9ed74f22009-09-11 16:12:44 -04003095 force_delalloc = meta_sinfo->force_delalloc;
Josef Bacik6a632092009-02-20 11:00:09 -05003096
Josef Bacik9ed74f22009-09-11 16:12:44 -04003097 if (unlikely(!meta_sinfo->bytes_root))
3098 meta_sinfo->bytes_root = calculate_bytes_needed(root, 6);
3099
3100 if (!flushed)
3101 meta_sinfo->bytes_delalloc += num_bytes;
3102
3103 used = meta_sinfo->bytes_used + meta_sinfo->bytes_reserved +
3104 meta_sinfo->bytes_pinned + meta_sinfo->bytes_readonly +
3105 meta_sinfo->bytes_super + meta_sinfo->bytes_root +
3106 meta_sinfo->bytes_may_use + meta_sinfo->bytes_delalloc;
3107
3108 if (used > meta_sinfo->total_bytes) {
3109 flushed++;
3110
3111 if (flushed == 1) {
3112 if (maybe_allocate_chunk(root, meta_sinfo))
3113 goto again;
3114 flushed++;
3115 } else {
Josef Bacik4e06bdd2009-02-20 10:59:53 -05003116 spin_unlock(&meta_sinfo->lock);
Josef Bacik9ed74f22009-09-11 16:12:44 -04003117 }
Josef Bacik4e06bdd2009-02-20 10:59:53 -05003118
Josef Bacik9ed74f22009-09-11 16:12:44 -04003119 if (flushed == 2) {
3120 filemap_flush(inode->i_mapping);
3121 goto again;
3122 } else if (flushed == 3) {
Josef Bacik32c00af2009-10-08 13:34:05 -04003123 flush_delalloc(root, meta_sinfo);
Josef Bacik4e06bdd2009-02-20 10:59:53 -05003124 goto again;
3125 }
Josef Bacik9ed74f22009-09-11 16:12:44 -04003126 spin_lock(&meta_sinfo->lock);
3127 meta_sinfo->bytes_delalloc -= num_bytes;
Josef Bacik6a632092009-02-20 11:00:09 -05003128 spin_unlock(&meta_sinfo->lock);
Josef Bacik9ed74f22009-09-11 16:12:44 -04003129 printk(KERN_ERR "enospc, has %d, reserved %d\n",
Josef Bacik32c00af2009-10-08 13:34:05 -04003130 BTRFS_I(inode)->outstanding_extents,
3131 BTRFS_I(inode)->reserved_extents);
Josef Bacik9ed74f22009-09-11 16:12:44 -04003132 dump_space_info(meta_sinfo, 0, 0);
Josef Bacik6a632092009-02-20 11:00:09 -05003133 return -ENOSPC;
3134 }
Josef Bacik9ed74f22009-09-11 16:12:44 -04003135
Josef Bacik287a0ab2010-03-19 18:07:23 +00003136 BTRFS_I(inode)->reserved_extents += num_items;
Josef Bacik9ed74f22009-09-11 16:12:44 -04003137 check_force_delalloc(meta_sinfo);
3138 spin_unlock(&meta_sinfo->lock);
3139
3140 if (!flushed && force_delalloc)
3141 filemap_flush(inode->i_mapping);
3142
3143 return 0;
3144}
3145
3146/*
3147 * unreserve num_items number of items worth of metadata space. This needs to
3148 * be paired with btrfs_reserve_metadata_space.
3149 *
3150 * NOTE: if you have the option, run this _AFTER_ you do a
3151 * btrfs_end_transaction, since btrfs_end_transaction will run delayed ref
3152 * oprations which will result in more used metadata, so we want to make sure we
3153 * can do that without issue.
3154 */
3155int btrfs_unreserve_metadata_space(struct btrfs_root *root, int num_items)
3156{
3157 struct btrfs_fs_info *info = root->fs_info;
3158 struct btrfs_space_info *meta_sinfo;
3159 u64 num_bytes;
3160 u64 alloc_target;
3161 bool bug = false;
3162
3163 /* get the space info for where the metadata will live */
3164 alloc_target = btrfs_get_alloc_profile(root, 0);
3165 meta_sinfo = __find_space_info(info, alloc_target);
3166
3167 num_bytes = calculate_bytes_needed(root, num_items);
3168
3169 spin_lock(&meta_sinfo->lock);
3170 if (meta_sinfo->bytes_may_use < num_bytes) {
3171 bug = true;
3172 meta_sinfo->bytes_may_use = 0;
3173 } else {
3174 meta_sinfo->bytes_may_use -= num_bytes;
3175 }
3176 spin_unlock(&meta_sinfo->lock);
3177
3178 BUG_ON(bug);
3179
3180 return 0;
3181}
3182
3183/*
3184 * Reserve some metadata space for use. We'll calculate the worste case number
3185 * of bytes that would be needed to modify num_items number of items. If we
3186 * have space, fantastic, if not, you get -ENOSPC. Please call
3187 * btrfs_unreserve_metadata_space when you are done for the _SAME_ number of
3188 * items you reserved, since whatever metadata you needed should have already
3189 * been allocated.
3190 *
3191 * This will commit the transaction to make more space if we don't have enough
3192 * metadata space. THe only time we don't do this is if we're reserving space
3193 * inside of a transaction, then we will just return -ENOSPC and it is the
3194 * callers responsibility to handle it properly.
3195 */
3196int btrfs_reserve_metadata_space(struct btrfs_root *root, int num_items)
3197{
3198 struct btrfs_fs_info *info = root->fs_info;
3199 struct btrfs_space_info *meta_sinfo;
3200 u64 num_bytes;
3201 u64 used;
3202 u64 alloc_target;
3203 int retries = 0;
3204
3205 /* get the space info for where the metadata will live */
3206 alloc_target = btrfs_get_alloc_profile(root, 0);
3207 meta_sinfo = __find_space_info(info, alloc_target);
3208
3209 num_bytes = calculate_bytes_needed(root, num_items);
3210again:
3211 spin_lock(&meta_sinfo->lock);
3212
3213 if (unlikely(!meta_sinfo->bytes_root))
3214 meta_sinfo->bytes_root = calculate_bytes_needed(root, 6);
3215
3216 if (!retries)
3217 meta_sinfo->bytes_may_use += num_bytes;
3218
3219 used = meta_sinfo->bytes_used + meta_sinfo->bytes_reserved +
3220 meta_sinfo->bytes_pinned + meta_sinfo->bytes_readonly +
3221 meta_sinfo->bytes_super + meta_sinfo->bytes_root +
3222 meta_sinfo->bytes_may_use + meta_sinfo->bytes_delalloc;
3223
3224 if (used > meta_sinfo->total_bytes) {
3225 retries++;
3226 if (retries == 1) {
3227 if (maybe_allocate_chunk(root, meta_sinfo))
3228 goto again;
3229 retries++;
3230 } else {
3231 spin_unlock(&meta_sinfo->lock);
3232 }
3233
3234 if (retries == 2) {
Josef Bacik32c00af2009-10-08 13:34:05 -04003235 flush_delalloc(root, meta_sinfo);
Josef Bacik9ed74f22009-09-11 16:12:44 -04003236 goto again;
3237 }
3238 spin_lock(&meta_sinfo->lock);
3239 meta_sinfo->bytes_may_use -= num_bytes;
3240 spin_unlock(&meta_sinfo->lock);
3241
3242 dump_space_info(meta_sinfo, 0, 0);
3243 return -ENOSPC;
3244 }
3245
3246 check_force_delalloc(meta_sinfo);
Josef Bacik6a632092009-02-20 11:00:09 -05003247 spin_unlock(&meta_sinfo->lock);
3248
3249 return 0;
3250}
3251
3252/*
3253 * This will check the space that the inode allocates from to make sure we have
3254 * enough space for bytes.
3255 */
3256int btrfs_check_data_free_space(struct btrfs_root *root, struct inode *inode,
3257 u64 bytes)
3258{
3259 struct btrfs_space_info *data_sinfo;
Josef Bacikab6e24102010-03-19 14:38:13 +00003260 u64 used;
3261 int ret = 0, committed = 0, flushed = 0;
Josef Bacik6a632092009-02-20 11:00:09 -05003262
3263 /* make sure bytes are sectorsize aligned */
3264 bytes = (bytes + root->sectorsize - 1) & ~((u64)root->sectorsize - 1);
3265
3266 data_sinfo = BTRFS_I(inode)->space_info;
Chris Mason33b4d472009-09-22 14:45:50 -04003267 if (!data_sinfo)
3268 goto alloc;
3269
Josef Bacik6a632092009-02-20 11:00:09 -05003270again:
3271 /* make sure we have enough space to handle the data first */
3272 spin_lock(&data_sinfo->lock);
Josef Bacikab6e24102010-03-19 14:38:13 +00003273 used = data_sinfo->bytes_used + data_sinfo->bytes_delalloc +
3274 data_sinfo->bytes_reserved + data_sinfo->bytes_pinned +
3275 data_sinfo->bytes_readonly + data_sinfo->bytes_may_use +
3276 data_sinfo->bytes_super;
3277
3278 if (used + bytes > data_sinfo->total_bytes) {
Josef Bacik4e06bdd2009-02-20 10:59:53 -05003279 struct btrfs_trans_handle *trans;
3280
Josef Bacikab6e24102010-03-19 14:38:13 +00003281 if (!flushed) {
3282 spin_unlock(&data_sinfo->lock);
3283 flush_delalloc(root, data_sinfo);
3284 flushed = 1;
3285 goto again;
3286 }
3287
Josef Bacik6a632092009-02-20 11:00:09 -05003288 /*
3289 * if we don't have enough free bytes in this space then we need
3290 * to alloc a new chunk.
3291 */
3292 if (!data_sinfo->full) {
3293 u64 alloc_target;
Josef Bacik6a632092009-02-20 11:00:09 -05003294
3295 data_sinfo->force_alloc = 1;
3296 spin_unlock(&data_sinfo->lock);
Chris Mason33b4d472009-09-22 14:45:50 -04003297alloc:
Josef Bacik6a632092009-02-20 11:00:09 -05003298 alloc_target = btrfs_get_alloc_profile(root, 1);
3299 trans = btrfs_start_transaction(root, 1);
3300 if (!trans)
3301 return -ENOMEM;
3302
3303 ret = do_chunk_alloc(trans, root->fs_info->extent_root,
3304 bytes + 2 * 1024 * 1024,
3305 alloc_target, 0);
3306 btrfs_end_transaction(trans, root);
3307 if (ret)
3308 return ret;
Chris Mason33b4d472009-09-22 14:45:50 -04003309
3310 if (!data_sinfo) {
3311 btrfs_set_inode_space_info(root, inode);
3312 data_sinfo = BTRFS_I(inode)->space_info;
3313 }
Josef Bacik6a632092009-02-20 11:00:09 -05003314 goto again;
3315 }
3316 spin_unlock(&data_sinfo->lock);
Josef Bacik4e06bdd2009-02-20 10:59:53 -05003317
3318 /* commit the current transaction and try again */
Sage Weildd7e0b72009-09-29 18:38:44 -04003319 if (!committed && !root->fs_info->open_ioctl_trans) {
Josef Bacik4e06bdd2009-02-20 10:59:53 -05003320 committed = 1;
3321 trans = btrfs_join_transaction(root, 1);
3322 if (!trans)
3323 return -ENOMEM;
3324 ret = btrfs_commit_transaction(trans, root);
3325 if (ret)
3326 return ret;
3327 goto again;
3328 }
3329
Josef Bacik6a632092009-02-20 11:00:09 -05003330 printk(KERN_ERR "no space left, need %llu, %llu delalloc bytes"
3331 ", %llu bytes_used, %llu bytes_reserved, "
Hu Tao68f5a382009-07-02 13:55:45 -04003332 "%llu bytes_pinned, %llu bytes_readonly, %llu may use "
Joel Becker21380932009-04-21 12:38:29 -07003333 "%llu total\n", (unsigned long long)bytes,
3334 (unsigned long long)data_sinfo->bytes_delalloc,
3335 (unsigned long long)data_sinfo->bytes_used,
3336 (unsigned long long)data_sinfo->bytes_reserved,
3337 (unsigned long long)data_sinfo->bytes_pinned,
3338 (unsigned long long)data_sinfo->bytes_readonly,
3339 (unsigned long long)data_sinfo->bytes_may_use,
3340 (unsigned long long)data_sinfo->total_bytes);
Josef Bacik6a632092009-02-20 11:00:09 -05003341 return -ENOSPC;
3342 }
3343 data_sinfo->bytes_may_use += bytes;
3344 BTRFS_I(inode)->reserved_bytes += bytes;
3345 spin_unlock(&data_sinfo->lock);
3346
Josef Bacik9ed74f22009-09-11 16:12:44 -04003347 return 0;
Josef Bacik6a632092009-02-20 11:00:09 -05003348}
3349
3350/*
3351 * if there was an error for whatever reason after calling
3352 * btrfs_check_data_free_space, call this so we can cleanup the counters.
3353 */
3354void btrfs_free_reserved_data_space(struct btrfs_root *root,
3355 struct inode *inode, u64 bytes)
3356{
3357 struct btrfs_space_info *data_sinfo;
3358
3359 /* make sure bytes are sectorsize aligned */
3360 bytes = (bytes + root->sectorsize - 1) & ~((u64)root->sectorsize - 1);
3361
3362 data_sinfo = BTRFS_I(inode)->space_info;
3363 spin_lock(&data_sinfo->lock);
3364 data_sinfo->bytes_may_use -= bytes;
3365 BTRFS_I(inode)->reserved_bytes -= bytes;
3366 spin_unlock(&data_sinfo->lock);
3367}
3368
3369/* called when we are adding a delalloc extent to the inode's io_tree */
3370void btrfs_delalloc_reserve_space(struct btrfs_root *root, struct inode *inode,
3371 u64 bytes)
3372{
3373 struct btrfs_space_info *data_sinfo;
3374
3375 /* get the space info for where this inode will be storing its data */
3376 data_sinfo = BTRFS_I(inode)->space_info;
3377
3378 /* make sure we have enough space to handle the data first */
3379 spin_lock(&data_sinfo->lock);
3380 data_sinfo->bytes_delalloc += bytes;
3381
3382 /*
3383 * we are adding a delalloc extent without calling
3384 * btrfs_check_data_free_space first. This happens on a weird
3385 * writepage condition, but shouldn't hurt our accounting
3386 */
3387 if (unlikely(bytes > BTRFS_I(inode)->reserved_bytes)) {
3388 data_sinfo->bytes_may_use -= BTRFS_I(inode)->reserved_bytes;
3389 BTRFS_I(inode)->reserved_bytes = 0;
3390 } else {
3391 data_sinfo->bytes_may_use -= bytes;
3392 BTRFS_I(inode)->reserved_bytes -= bytes;
3393 }
3394
3395 spin_unlock(&data_sinfo->lock);
3396}
3397
3398/* called when we are clearing an delalloc extent from the inode's io_tree */
3399void btrfs_delalloc_free_space(struct btrfs_root *root, struct inode *inode,
3400 u64 bytes)
3401{
3402 struct btrfs_space_info *info;
3403
3404 info = BTRFS_I(inode)->space_info;
3405
3406 spin_lock(&info->lock);
3407 info->bytes_delalloc -= bytes;
3408 spin_unlock(&info->lock);
3409}
3410
Josef Bacik97e728d2009-04-21 17:40:57 -04003411static void force_metadata_allocation(struct btrfs_fs_info *info)
3412{
3413 struct list_head *head = &info->space_info;
3414 struct btrfs_space_info *found;
3415
3416 rcu_read_lock();
3417 list_for_each_entry_rcu(found, head, list) {
3418 if (found->flags & BTRFS_BLOCK_GROUP_METADATA)
3419 found->force_alloc = 1;
3420 }
3421 rcu_read_unlock();
3422}
3423
Chris Mason6324fbf2008-03-24 15:01:59 -04003424static int do_chunk_alloc(struct btrfs_trans_handle *trans,
3425 struct btrfs_root *extent_root, u64 alloc_bytes,
Chris Mason0ef3e662008-05-24 14:04:53 -04003426 u64 flags, int force)
Chris Mason6324fbf2008-03-24 15:01:59 -04003427{
3428 struct btrfs_space_info *space_info;
Josef Bacik97e728d2009-04-21 17:40:57 -04003429 struct btrfs_fs_info *fs_info = extent_root->fs_info;
Chris Mason6324fbf2008-03-24 15:01:59 -04003430 u64 thresh;
Yan Zhengc146afa2008-11-12 14:34:12 -05003431 int ret = 0;
3432
Josef Bacik97e728d2009-04-21 17:40:57 -04003433 mutex_lock(&fs_info->chunk_mutex);
Chris Mason6324fbf2008-03-24 15:01:59 -04003434
Yan Zheng2b820322008-11-17 21:11:30 -05003435 flags = btrfs_reduce_alloc_profile(extent_root, flags);
Chris Masonec44a352008-04-28 15:29:52 -04003436
Chris Mason6324fbf2008-03-24 15:01:59 -04003437 space_info = __find_space_info(extent_root->fs_info, flags);
Chris Mason593060d2008-03-25 16:50:33 -04003438 if (!space_info) {
3439 ret = update_space_info(extent_root->fs_info, flags,
3440 0, 0, &space_info);
3441 BUG_ON(ret);
3442 }
Chris Mason6324fbf2008-03-24 15:01:59 -04003443 BUG_ON(!space_info);
3444
Josef Bacik25179202008-10-29 14:49:05 -04003445 spin_lock(&space_info->lock);
Josef Bacik9ed74f22009-09-11 16:12:44 -04003446 if (space_info->force_alloc)
Chris Mason0ef3e662008-05-24 14:04:53 -04003447 force = 1;
Josef Bacik25179202008-10-29 14:49:05 -04003448 if (space_info->full) {
3449 spin_unlock(&space_info->lock);
Chris Mason925baed2008-06-25 16:01:30 -04003450 goto out;
Josef Bacik25179202008-10-29 14:49:05 -04003451 }
Chris Mason6324fbf2008-03-24 15:01:59 -04003452
Yan Zhengc146afa2008-11-12 14:34:12 -05003453 thresh = space_info->total_bytes - space_info->bytes_readonly;
Josef Bacik9ed74f22009-09-11 16:12:44 -04003454 thresh = div_factor(thresh, 8);
Chris Mason0ef3e662008-05-24 14:04:53 -04003455 if (!force &&
Zheng Yane8569812008-09-26 10:05:48 -04003456 (space_info->bytes_used + space_info->bytes_pinned +
Josef Bacik25179202008-10-29 14:49:05 -04003457 space_info->bytes_reserved + alloc_bytes) < thresh) {
3458 spin_unlock(&space_info->lock);
Chris Mason925baed2008-06-25 16:01:30 -04003459 goto out;
Josef Bacik25179202008-10-29 14:49:05 -04003460 }
Josef Bacik25179202008-10-29 14:49:05 -04003461 spin_unlock(&space_info->lock);
3462
Josef Bacik97e728d2009-04-21 17:40:57 -04003463 /*
3464 * if we're doing a data chunk, go ahead and make sure that
3465 * we keep a reasonable number of metadata chunks allocated in the
3466 * FS as well.
3467 */
Josef Bacik9ed74f22009-09-11 16:12:44 -04003468 if (flags & BTRFS_BLOCK_GROUP_DATA && fs_info->metadata_ratio) {
Josef Bacik97e728d2009-04-21 17:40:57 -04003469 fs_info->data_chunk_allocations++;
3470 if (!(fs_info->data_chunk_allocations %
3471 fs_info->metadata_ratio))
3472 force_metadata_allocation(fs_info);
3473 }
3474
Yan Zheng2b820322008-11-17 21:11:30 -05003475 ret = btrfs_alloc_chunk(trans, extent_root, flags);
Josef Bacik9ed74f22009-09-11 16:12:44 -04003476 spin_lock(&space_info->lock);
Chris Masond3977122009-01-05 21:25:51 -05003477 if (ret)
Chris Mason6324fbf2008-03-24 15:01:59 -04003478 space_info->full = 1;
Josef Bacik9ed74f22009-09-11 16:12:44 -04003479 space_info->force_alloc = 0;
3480 spin_unlock(&space_info->lock);
Chris Masona74a4b92008-06-25 16:01:31 -04003481out:
Yan Zhengc146afa2008-11-12 14:34:12 -05003482 mutex_unlock(&extent_root->fs_info->chunk_mutex);
Josef Bacik0f9dd462008-09-23 13:14:11 -04003483 return ret;
Chris Mason6324fbf2008-03-24 15:01:59 -04003484}
3485
Chris Mason9078a3e2007-04-26 16:46:15 -04003486static int update_block_group(struct btrfs_trans_handle *trans,
3487 struct btrfs_root *root,
Chris Masondb945352007-10-15 16:15:53 -04003488 u64 bytenr, u64 num_bytes, int alloc,
Chris Mason0b86a832008-03-24 15:01:56 -04003489 int mark_free)
Chris Mason9078a3e2007-04-26 16:46:15 -04003490{
3491 struct btrfs_block_group_cache *cache;
3492 struct btrfs_fs_info *info = root->fs_info;
Yan, Zhengb742bb822010-05-16 10:46:24 -04003493 int factor;
Chris Masondb945352007-10-15 16:15:53 -04003494 u64 total = num_bytes;
Chris Mason9078a3e2007-04-26 16:46:15 -04003495 u64 old_val;
Chris Masondb945352007-10-15 16:15:53 -04003496 u64 byte_in_group;
Chris Mason3e1ad542007-05-07 20:03:49 -04003497
Yan Zheng5d4f98a2009-06-10 10:45:14 -04003498 /* block accounting for super block */
3499 spin_lock(&info->delalloc_lock);
3500 old_val = btrfs_super_bytes_used(&info->super_copy);
3501 if (alloc)
3502 old_val += num_bytes;
3503 else
3504 old_val -= num_bytes;
3505 btrfs_set_super_bytes_used(&info->super_copy, old_val);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04003506 spin_unlock(&info->delalloc_lock);
3507
Chris Masond3977122009-01-05 21:25:51 -05003508 while (total) {
Chris Masondb945352007-10-15 16:15:53 -04003509 cache = btrfs_lookup_block_group(info, bytenr);
Josef Bacikf3465ca2008-11-12 14:19:50 -05003510 if (!cache)
Chris Mason9078a3e2007-04-26 16:46:15 -04003511 return -1;
Yan, Zhengb742bb822010-05-16 10:46:24 -04003512 if (cache->flags & (BTRFS_BLOCK_GROUP_DUP |
3513 BTRFS_BLOCK_GROUP_RAID1 |
3514 BTRFS_BLOCK_GROUP_RAID10))
3515 factor = 2;
3516 else
3517 factor = 1;
Chris Masondb945352007-10-15 16:15:53 -04003518 byte_in_group = bytenr - cache->key.objectid;
3519 WARN_ON(byte_in_group > cache->key.offset);
Chris Mason9078a3e2007-04-26 16:46:15 -04003520
Josef Bacik25179202008-10-29 14:49:05 -04003521 spin_lock(&cache->space_info->lock);
Chris Masonc286ac42008-07-22 23:06:41 -04003522 spin_lock(&cache->lock);
Josef Bacik0f9dd462008-09-23 13:14:11 -04003523 cache->dirty = 1;
Chris Mason9078a3e2007-04-26 16:46:15 -04003524 old_val = btrfs_block_group_used(&cache->item);
Chris Masondb945352007-10-15 16:15:53 -04003525 num_bytes = min(total, cache->key.offset - byte_in_group);
Chris Masoncd1bc462007-04-27 10:08:34 -04003526 if (alloc) {
Chris Masondb945352007-10-15 16:15:53 -04003527 old_val += num_bytes;
Yan Zheng11833d62009-09-11 16:11:19 -04003528 btrfs_set_block_group_used(&cache->item, old_val);
3529 cache->reserved -= num_bytes;
Yan Zheng11833d62009-09-11 16:11:19 -04003530 cache->space_info->bytes_reserved -= num_bytes;
Yan, Zhengb742bb822010-05-16 10:46:24 -04003531 cache->space_info->bytes_used += num_bytes;
3532 cache->space_info->disk_used += num_bytes * factor;
Yan Zhenga512bbf2008-12-08 16:46:26 -05003533 if (cache->ro)
Yan Zhengc146afa2008-11-12 14:34:12 -05003534 cache->space_info->bytes_readonly -= num_bytes;
Chris Masonc286ac42008-07-22 23:06:41 -04003535 spin_unlock(&cache->lock);
Josef Bacik25179202008-10-29 14:49:05 -04003536 spin_unlock(&cache->space_info->lock);
Chris Masoncd1bc462007-04-27 10:08:34 -04003537 } else {
Chris Masondb945352007-10-15 16:15:53 -04003538 old_val -= num_bytes;
Yan, Zhengb742bb822010-05-16 10:46:24 -04003539 btrfs_set_block_group_used(&cache->item, old_val);
Chris Mason6324fbf2008-03-24 15:01:59 -04003540 cache->space_info->bytes_used -= num_bytes;
Yan, Zhengb742bb822010-05-16 10:46:24 -04003541 cache->space_info->disk_used -= num_bytes * factor;
Yan Zhengc146afa2008-11-12 14:34:12 -05003542 if (cache->ro)
3543 cache->space_info->bytes_readonly += num_bytes;
Chris Masonc286ac42008-07-22 23:06:41 -04003544 spin_unlock(&cache->lock);
Josef Bacik25179202008-10-29 14:49:05 -04003545 spin_unlock(&cache->space_info->lock);
Chris Masonf510cfe2007-10-15 16:14:48 -04003546 if (mark_free) {
Josef Bacik0f9dd462008-09-23 13:14:11 -04003547 int ret;
Liu Hui1f3c79a2009-01-05 15:57:51 -05003548
3549 ret = btrfs_discard_extent(root, bytenr,
3550 num_bytes);
3551 WARN_ON(ret);
3552
Josef Bacik0f9dd462008-09-23 13:14:11 -04003553 ret = btrfs_add_free_space(cache, bytenr,
3554 num_bytes);
Yan Zhengd2fb3432008-12-11 16:30:39 -05003555 WARN_ON(ret);
Chris Masone37c9e62007-05-09 20:13:14 -04003556 }
Chris Masoncd1bc462007-04-27 10:08:34 -04003557 }
Chris Masonfa9c0d792009-04-03 09:47:43 -04003558 btrfs_put_block_group(cache);
Chris Masondb945352007-10-15 16:15:53 -04003559 total -= num_bytes;
3560 bytenr += num_bytes;
Chris Mason9078a3e2007-04-26 16:46:15 -04003561 }
3562 return 0;
3563}
Chris Mason6324fbf2008-03-24 15:01:59 -04003564
Chris Masona061fc82008-05-07 11:43:44 -04003565static u64 first_logical_byte(struct btrfs_root *root, u64 search_start)
3566{
Josef Bacik0f9dd462008-09-23 13:14:11 -04003567 struct btrfs_block_group_cache *cache;
Yan Zhengd2fb3432008-12-11 16:30:39 -05003568 u64 bytenr;
Josef Bacik0f9dd462008-09-23 13:14:11 -04003569
3570 cache = btrfs_lookup_first_block_group(root->fs_info, search_start);
3571 if (!cache)
Chris Masona061fc82008-05-07 11:43:44 -04003572 return 0;
Josef Bacik0f9dd462008-09-23 13:14:11 -04003573
Yan Zhengd2fb3432008-12-11 16:30:39 -05003574 bytenr = cache->key.objectid;
Chris Masonfa9c0d792009-04-03 09:47:43 -04003575 btrfs_put_block_group(cache);
Yan Zhengd2fb3432008-12-11 16:30:39 -05003576
3577 return bytenr;
Chris Masona061fc82008-05-07 11:43:44 -04003578}
3579
Yan Zheng11833d62009-09-11 16:11:19 -04003580/*
3581 * this function must be called within transaction
3582 */
3583int btrfs_pin_extent(struct btrfs_root *root,
3584 u64 bytenr, u64 num_bytes, int reserved)
Yan324ae4d2007-11-16 14:57:08 -05003585{
Yan324ae4d2007-11-16 14:57:08 -05003586 struct btrfs_fs_info *fs_info = root->fs_info;
Yan Zheng11833d62009-09-11 16:11:19 -04003587 struct btrfs_block_group_cache *cache;
Yan324ae4d2007-11-16 14:57:08 -05003588
Yan Zheng11833d62009-09-11 16:11:19 -04003589 cache = btrfs_lookup_block_group(fs_info, bytenr);
3590 BUG_ON(!cache);
Chris Masonb9473432009-03-13 11:00:37 -04003591
Yan Zheng11833d62009-09-11 16:11:19 -04003592 spin_lock(&cache->space_info->lock);
3593 spin_lock(&cache->lock);
3594 cache->pinned += num_bytes;
3595 cache->space_info->bytes_pinned += num_bytes;
3596 if (reserved) {
3597 cache->reserved -= num_bytes;
3598 cache->space_info->bytes_reserved -= num_bytes;
Yan324ae4d2007-11-16 14:57:08 -05003599 }
Yan Zheng11833d62009-09-11 16:11:19 -04003600 spin_unlock(&cache->lock);
3601 spin_unlock(&cache->space_info->lock);
3602
3603 btrfs_put_block_group(cache);
3604
3605 set_extent_dirty(fs_info->pinned_extents,
3606 bytenr, bytenr + num_bytes - 1, GFP_NOFS);
Yan324ae4d2007-11-16 14:57:08 -05003607 return 0;
3608}
Chris Mason9078a3e2007-04-26 16:46:15 -04003609
Yan Zheng11833d62009-09-11 16:11:19 -04003610static int update_reserved_extents(struct btrfs_block_group_cache *cache,
3611 u64 num_bytes, int reserve)
Zheng Yane8569812008-09-26 10:05:48 -04003612{
Yan Zheng11833d62009-09-11 16:11:19 -04003613 spin_lock(&cache->space_info->lock);
3614 spin_lock(&cache->lock);
3615 if (reserve) {
3616 cache->reserved += num_bytes;
3617 cache->space_info->bytes_reserved += num_bytes;
3618 } else {
3619 cache->reserved -= num_bytes;
3620 cache->space_info->bytes_reserved -= num_bytes;
3621 }
3622 spin_unlock(&cache->lock);
3623 spin_unlock(&cache->space_info->lock);
3624 return 0;
3625}
Zheng Yane8569812008-09-26 10:05:48 -04003626
Yan Zheng11833d62009-09-11 16:11:19 -04003627int btrfs_prepare_extent_commit(struct btrfs_trans_handle *trans,
3628 struct btrfs_root *root)
3629{
3630 struct btrfs_fs_info *fs_info = root->fs_info;
3631 struct btrfs_caching_control *next;
3632 struct btrfs_caching_control *caching_ctl;
3633 struct btrfs_block_group_cache *cache;
3634
3635 down_write(&fs_info->extent_commit_sem);
3636
3637 list_for_each_entry_safe(caching_ctl, next,
3638 &fs_info->caching_block_groups, list) {
3639 cache = caching_ctl->block_group;
3640 if (block_group_cache_done(cache)) {
3641 cache->last_byte_to_unpin = (u64)-1;
3642 list_del_init(&caching_ctl->list);
3643 put_caching_control(caching_ctl);
3644 } else {
3645 cache->last_byte_to_unpin = caching_ctl->progress;
3646 }
3647 }
3648
3649 if (fs_info->pinned_extents == &fs_info->freed_extents[0])
3650 fs_info->pinned_extents = &fs_info->freed_extents[1];
3651 else
3652 fs_info->pinned_extents = &fs_info->freed_extents[0];
3653
3654 up_write(&fs_info->extent_commit_sem);
3655 return 0;
3656}
3657
3658static int unpin_extent_range(struct btrfs_root *root, u64 start, u64 end)
3659{
3660 struct btrfs_fs_info *fs_info = root->fs_info;
3661 struct btrfs_block_group_cache *cache = NULL;
3662 u64 len;
3663
3664 while (start <= end) {
3665 if (!cache ||
3666 start >= cache->key.objectid + cache->key.offset) {
3667 if (cache)
3668 btrfs_put_block_group(cache);
3669 cache = btrfs_lookup_block_group(fs_info, start);
3670 BUG_ON(!cache);
3671 }
3672
3673 len = cache->key.objectid + cache->key.offset - start;
3674 len = min(len, end + 1 - start);
3675
3676 if (start < cache->last_byte_to_unpin) {
3677 len = min(len, cache->last_byte_to_unpin - start);
3678 btrfs_add_free_space(cache, start, len);
3679 }
Josef Bacik25179202008-10-29 14:49:05 -04003680
3681 spin_lock(&cache->space_info->lock);
3682 spin_lock(&cache->lock);
Yan Zheng11833d62009-09-11 16:11:19 -04003683 cache->pinned -= len;
3684 cache->space_info->bytes_pinned -= len;
Josef Bacik25179202008-10-29 14:49:05 -04003685 spin_unlock(&cache->lock);
3686 spin_unlock(&cache->space_info->lock);
Yan Zheng11833d62009-09-11 16:11:19 -04003687
3688 start += len;
3689 }
3690
3691 if (cache)
Chris Masonfa9c0d792009-04-03 09:47:43 -04003692 btrfs_put_block_group(cache);
Chris Masonccd467d2007-06-28 15:57:36 -04003693 return 0;
3694}
3695
3696int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans,
Yan Zheng11833d62009-09-11 16:11:19 -04003697 struct btrfs_root *root)
Chris Masona28ec192007-03-06 20:08:01 -05003698{
Yan Zheng11833d62009-09-11 16:11:19 -04003699 struct btrfs_fs_info *fs_info = root->fs_info;
3700 struct extent_io_tree *unpin;
Chris Mason1a5bc162007-10-15 16:15:26 -04003701 u64 start;
3702 u64 end;
Chris Masona28ec192007-03-06 20:08:01 -05003703 int ret;
Chris Masona28ec192007-03-06 20:08:01 -05003704
Yan Zheng11833d62009-09-11 16:11:19 -04003705 if (fs_info->pinned_extents == &fs_info->freed_extents[0])
3706 unpin = &fs_info->freed_extents[1];
3707 else
3708 unpin = &fs_info->freed_extents[0];
3709
Chris Masond3977122009-01-05 21:25:51 -05003710 while (1) {
Chris Mason1a5bc162007-10-15 16:15:26 -04003711 ret = find_first_extent_bit(unpin, 0, &start, &end,
3712 EXTENT_DIRTY);
3713 if (ret)
Chris Masona28ec192007-03-06 20:08:01 -05003714 break;
Liu Hui1f3c79a2009-01-05 15:57:51 -05003715
3716 ret = btrfs_discard_extent(root, start, end + 1 - start);
3717
Chris Mason1a5bc162007-10-15 16:15:26 -04003718 clear_extent_dirty(unpin, start, end, GFP_NOFS);
Yan Zheng11833d62009-09-11 16:11:19 -04003719 unpin_extent_range(root, start, end);
Chris Masonb9473432009-03-13 11:00:37 -04003720 cond_resched();
Chris Masona28ec192007-03-06 20:08:01 -05003721 }
Josef Bacik817d52f2009-07-13 21:29:25 -04003722
Liu Hui1f3c79a2009-01-05 15:57:51 -05003723 return ret;
Chris Masona28ec192007-03-06 20:08:01 -05003724}
3725
Zheng Yan31840ae2008-09-23 13:14:14 -04003726static int pin_down_bytes(struct btrfs_trans_handle *trans,
3727 struct btrfs_root *root,
Chris Masonb9473432009-03-13 11:00:37 -04003728 struct btrfs_path *path,
Yan Zheng11833d62009-09-11 16:11:19 -04003729 u64 bytenr, u64 num_bytes,
3730 int is_data, int reserved,
Chris Masonb9473432009-03-13 11:00:37 -04003731 struct extent_buffer **must_clean)
Chris Masone20d96d2007-03-22 12:13:20 -04003732{
Chris Mason1a5bc162007-10-15 16:15:26 -04003733 int err = 0;
Zheng Yan31840ae2008-09-23 13:14:14 -04003734 struct extent_buffer *buf;
Chris Mason78fae272007-03-25 11:35:08 -04003735
Zheng Yan31840ae2008-09-23 13:14:14 -04003736 if (is_data)
3737 goto pinit;
Chris Mason4bef0842008-09-08 11:18:08 -04003738
Chris Mason444528b2009-10-14 09:38:28 -04003739 /*
3740 * discard is sloooow, and so triggering discards on
3741 * individual btree blocks isn't a good plan. Just
3742 * pin everything in discard mode.
3743 */
3744 if (btrfs_test_opt(root, DISCARD))
3745 goto pinit;
3746
Zheng Yan31840ae2008-09-23 13:14:14 -04003747 buf = btrfs_find_tree_block(root, bytenr, num_bytes);
3748 if (!buf)
3749 goto pinit;
Chris Mason4bef0842008-09-08 11:18:08 -04003750
Zheng Yan31840ae2008-09-23 13:14:14 -04003751 /* we can reuse a block if it hasn't been written
3752 * and it is from this transaction. We can't
3753 * reuse anything from the tree log root because
3754 * it has tiny sub-transactions.
3755 */
3756 if (btrfs_buffer_uptodate(buf, 0) &&
3757 btrfs_try_tree_lock(buf)) {
3758 u64 header_owner = btrfs_header_owner(buf);
3759 u64 header_transid = btrfs_header_generation(buf);
3760 if (header_owner != BTRFS_TREE_LOG_OBJECTID &&
3761 header_transid == trans->transid &&
3762 !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN)) {
Chris Masonb9473432009-03-13 11:00:37 -04003763 *must_clean = buf;
Zheng Yan31840ae2008-09-23 13:14:14 -04003764 return 1;
Chris Mason8ef97622007-03-26 10:15:30 -04003765 }
Zheng Yan31840ae2008-09-23 13:14:14 -04003766 btrfs_tree_unlock(buf);
Chris Masonf4b9aa82007-03-27 11:05:53 -04003767 }
Zheng Yan31840ae2008-09-23 13:14:14 -04003768 free_extent_buffer(buf);
3769pinit:
Yan Zheng11833d62009-09-11 16:11:19 -04003770 if (path)
3771 btrfs_set_path_blocking(path);
Chris Masonb9473432009-03-13 11:00:37 -04003772 /* unlocks the pinned mutex */
Yan Zheng11833d62009-09-11 16:11:19 -04003773 btrfs_pin_extent(root, bytenr, num_bytes, reserved);
Zheng Yan31840ae2008-09-23 13:14:14 -04003774
Chris Masonbe744172007-05-06 10:15:01 -04003775 BUG_ON(err < 0);
Chris Masone20d96d2007-03-22 12:13:20 -04003776 return 0;
3777}
3778
Yan Zheng5d4f98a2009-06-10 10:45:14 -04003779static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
3780 struct btrfs_root *root,
3781 u64 bytenr, u64 num_bytes, u64 parent,
3782 u64 root_objectid, u64 owner_objectid,
3783 u64 owner_offset, int refs_to_drop,
3784 struct btrfs_delayed_extent_op *extent_op)
Chris Masona28ec192007-03-06 20:08:01 -05003785{
Chris Masone2fa7222007-03-12 16:22:34 -04003786 struct btrfs_key key;
Yan Zheng5d4f98a2009-06-10 10:45:14 -04003787 struct btrfs_path *path;
Chris Mason1261ec42007-03-20 20:35:03 -04003788 struct btrfs_fs_info *info = root->fs_info;
3789 struct btrfs_root *extent_root = info->extent_root;
Chris Mason5f39d392007-10-15 16:14:19 -04003790 struct extent_buffer *leaf;
Yan Zheng5d4f98a2009-06-10 10:45:14 -04003791 struct btrfs_extent_item *ei;
3792 struct btrfs_extent_inline_ref *iref;
Chris Masona28ec192007-03-06 20:08:01 -05003793 int ret;
Yan Zheng5d4f98a2009-06-10 10:45:14 -04003794 int is_data;
Chris Mason952fcca2008-02-18 16:33:44 -05003795 int extent_slot = 0;
3796 int found_extent = 0;
3797 int num_to_del = 1;
Yan Zheng5d4f98a2009-06-10 10:45:14 -04003798 u32 item_size;
3799 u64 refs;
Chris Mason037e6392007-03-07 11:50:24 -05003800
Chris Mason5caf2a02007-04-02 11:20:42 -04003801 path = btrfs_alloc_path();
Chris Mason54aa1f42007-06-22 14:16:25 -04003802 if (!path)
3803 return -ENOMEM;
3804
Chris Mason3c12ac72008-04-21 12:01:38 -04003805 path->reada = 1;
Chris Masonb9473432009-03-13 11:00:37 -04003806 path->leave_spinning = 1;
Yan Zheng5d4f98a2009-06-10 10:45:14 -04003807
3808 is_data = owner_objectid >= BTRFS_FIRST_FREE_OBJECTID;
3809 BUG_ON(!is_data && refs_to_drop != 1);
3810
3811 ret = lookup_extent_backref(trans, extent_root, path, &iref,
3812 bytenr, num_bytes, parent,
3813 root_objectid, owner_objectid,
3814 owner_offset);
Chris Mason7bb86312007-12-11 09:25:06 -05003815 if (ret == 0) {
Chris Mason952fcca2008-02-18 16:33:44 -05003816 extent_slot = path->slots[0];
Yan Zheng5d4f98a2009-06-10 10:45:14 -04003817 while (extent_slot >= 0) {
3818 btrfs_item_key_to_cpu(path->nodes[0], &key,
Chris Mason952fcca2008-02-18 16:33:44 -05003819 extent_slot);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04003820 if (key.objectid != bytenr)
Chris Mason952fcca2008-02-18 16:33:44 -05003821 break;
Yan Zheng5d4f98a2009-06-10 10:45:14 -04003822 if (key.type == BTRFS_EXTENT_ITEM_KEY &&
3823 key.offset == num_bytes) {
Chris Mason952fcca2008-02-18 16:33:44 -05003824 found_extent = 1;
3825 break;
3826 }
3827 if (path->slots[0] - extent_slot > 5)
3828 break;
Yan Zheng5d4f98a2009-06-10 10:45:14 -04003829 extent_slot--;
Chris Mason952fcca2008-02-18 16:33:44 -05003830 }
Yan Zheng5d4f98a2009-06-10 10:45:14 -04003831#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
3832 item_size = btrfs_item_size_nr(path->nodes[0], extent_slot);
3833 if (found_extent && item_size < sizeof(*ei))
3834 found_extent = 0;
3835#endif
Zheng Yan31840ae2008-09-23 13:14:14 -04003836 if (!found_extent) {
Yan Zheng5d4f98a2009-06-10 10:45:14 -04003837 BUG_ON(iref);
Chris Mason56bec292009-03-13 10:10:06 -04003838 ret = remove_extent_backref(trans, extent_root, path,
Yan Zheng5d4f98a2009-06-10 10:45:14 -04003839 NULL, refs_to_drop,
3840 is_data);
Zheng Yan31840ae2008-09-23 13:14:14 -04003841 BUG_ON(ret);
3842 btrfs_release_path(extent_root, path);
Chris Masonb9473432009-03-13 11:00:37 -04003843 path->leave_spinning = 1;
Yan Zheng5d4f98a2009-06-10 10:45:14 -04003844
3845 key.objectid = bytenr;
3846 key.type = BTRFS_EXTENT_ITEM_KEY;
3847 key.offset = num_bytes;
3848
Zheng Yan31840ae2008-09-23 13:14:14 -04003849 ret = btrfs_search_slot(trans, extent_root,
3850 &key, path, -1, 1);
Josef Bacikf3465ca2008-11-12 14:19:50 -05003851 if (ret) {
3852 printk(KERN_ERR "umm, got %d back from search"
Chris Masond3977122009-01-05 21:25:51 -05003853 ", was looking for %llu\n", ret,
3854 (unsigned long long)bytenr);
Josef Bacikf3465ca2008-11-12 14:19:50 -05003855 btrfs_print_leaf(extent_root, path->nodes[0]);
3856 }
Zheng Yan31840ae2008-09-23 13:14:14 -04003857 BUG_ON(ret);
3858 extent_slot = path->slots[0];
3859 }
Chris Mason7bb86312007-12-11 09:25:06 -05003860 } else {
3861 btrfs_print_leaf(extent_root, path->nodes[0]);
3862 WARN_ON(1);
Chris Masond3977122009-01-05 21:25:51 -05003863 printk(KERN_ERR "btrfs unable to find ref byte nr %llu "
Yan Zheng5d4f98a2009-06-10 10:45:14 -04003864 "parent %llu root %llu owner %llu offset %llu\n",
Chris Masond3977122009-01-05 21:25:51 -05003865 (unsigned long long)bytenr,
Chris Mason56bec292009-03-13 10:10:06 -04003866 (unsigned long long)parent,
Chris Masond3977122009-01-05 21:25:51 -05003867 (unsigned long long)root_objectid,
Yan Zheng5d4f98a2009-06-10 10:45:14 -04003868 (unsigned long long)owner_objectid,
3869 (unsigned long long)owner_offset);
Chris Mason7bb86312007-12-11 09:25:06 -05003870 }
Chris Mason5f39d392007-10-15 16:14:19 -04003871
3872 leaf = path->nodes[0];
Yan Zheng5d4f98a2009-06-10 10:45:14 -04003873 item_size = btrfs_item_size_nr(leaf, extent_slot);
3874#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
3875 if (item_size < sizeof(*ei)) {
3876 BUG_ON(found_extent || extent_slot != path->slots[0]);
3877 ret = convert_extent_item_v0(trans, extent_root, path,
3878 owner_objectid, 0);
3879 BUG_ON(ret < 0);
3880
3881 btrfs_release_path(extent_root, path);
3882 path->leave_spinning = 1;
3883
3884 key.objectid = bytenr;
3885 key.type = BTRFS_EXTENT_ITEM_KEY;
3886 key.offset = num_bytes;
3887
3888 ret = btrfs_search_slot(trans, extent_root, &key, path,
3889 -1, 1);
3890 if (ret) {
3891 printk(KERN_ERR "umm, got %d back from search"
3892 ", was looking for %llu\n", ret,
3893 (unsigned long long)bytenr);
3894 btrfs_print_leaf(extent_root, path->nodes[0]);
3895 }
3896 BUG_ON(ret);
3897 extent_slot = path->slots[0];
3898 leaf = path->nodes[0];
3899 item_size = btrfs_item_size_nr(leaf, extent_slot);
3900 }
3901#endif
3902 BUG_ON(item_size < sizeof(*ei));
Chris Mason952fcca2008-02-18 16:33:44 -05003903 ei = btrfs_item_ptr(leaf, extent_slot,
Chris Mason123abc82007-03-14 14:14:43 -04003904 struct btrfs_extent_item);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04003905 if (owner_objectid < BTRFS_FIRST_FREE_OBJECTID) {
3906 struct btrfs_tree_block_info *bi;
3907 BUG_ON(item_size < sizeof(*ei) + sizeof(*bi));
3908 bi = (struct btrfs_tree_block_info *)(ei + 1);
3909 WARN_ON(owner_objectid != btrfs_tree_block_level(leaf, bi));
Chris Mason952fcca2008-02-18 16:33:44 -05003910 }
3911
Yan Zheng5d4f98a2009-06-10 10:45:14 -04003912 refs = btrfs_extent_refs(leaf, ei);
3913 BUG_ON(refs < refs_to_drop);
3914 refs -= refs_to_drop;
3915
3916 if (refs > 0) {
3917 if (extent_op)
3918 __run_delayed_extent_op(extent_op, leaf, ei);
3919 /*
3920 * In the case of inline back ref, reference count will
3921 * be updated by remove_extent_backref
3922 */
3923 if (iref) {
3924 BUG_ON(!found_extent);
3925 } else {
3926 btrfs_set_extent_refs(leaf, ei, refs);
3927 btrfs_mark_buffer_dirty(leaf);
3928 }
3929 if (found_extent) {
3930 ret = remove_extent_backref(trans, extent_root, path,
3931 iref, refs_to_drop,
3932 is_data);
3933 BUG_ON(ret);
3934 }
3935 } else {
3936 int mark_free = 0;
Chris Masonb9473432009-03-13 11:00:37 -04003937 struct extent_buffer *must_clean = NULL;
Chris Mason78fae272007-03-25 11:35:08 -04003938
Yan Zheng5d4f98a2009-06-10 10:45:14 -04003939 if (found_extent) {
3940 BUG_ON(is_data && refs_to_drop !=
3941 extent_data_ref_count(root, path, iref));
3942 if (iref) {
3943 BUG_ON(path->slots[0] != extent_slot);
3944 } else {
3945 BUG_ON(path->slots[0] != extent_slot + 1);
3946 path->slots[0] = extent_slot;
3947 num_to_del = 2;
3948 }
Chris Mason78fae272007-03-25 11:35:08 -04003949 }
Chris Masonb9473432009-03-13 11:00:37 -04003950
Yan Zheng5d4f98a2009-06-10 10:45:14 -04003951 ret = pin_down_bytes(trans, root, path, bytenr,
Yan Zheng11833d62009-09-11 16:11:19 -04003952 num_bytes, is_data, 0, &must_clean);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04003953 if (ret > 0)
3954 mark_free = 1;
3955 BUG_ON(ret < 0);
Chris Masonb9473432009-03-13 11:00:37 -04003956 /*
3957 * it is going to be very rare for someone to be waiting
3958 * on the block we're freeing. del_items might need to
3959 * schedule, so rather than get fancy, just force it
3960 * to blocking here
3961 */
3962 if (must_clean)
3963 btrfs_set_lock_blocking(must_clean);
3964
Chris Mason952fcca2008-02-18 16:33:44 -05003965 ret = btrfs_del_items(trans, extent_root, path, path->slots[0],
3966 num_to_del);
Zheng Yan31840ae2008-09-23 13:14:14 -04003967 BUG_ON(ret);
Josef Bacik25179202008-10-29 14:49:05 -04003968 btrfs_release_path(extent_root, path);
David Woodhouse21af8042008-08-12 14:13:26 +01003969
Chris Masonb9473432009-03-13 11:00:37 -04003970 if (must_clean) {
3971 clean_tree_block(NULL, root, must_clean);
3972 btrfs_tree_unlock(must_clean);
3973 free_extent_buffer(must_clean);
3974 }
3975
Yan Zheng5d4f98a2009-06-10 10:45:14 -04003976 if (is_data) {
Chris Mason459931e2008-12-10 09:10:46 -05003977 ret = btrfs_del_csums(trans, root, bytenr, num_bytes);
3978 BUG_ON(ret);
Chris Masond57e62b2009-03-31 13:47:50 -04003979 } else {
3980 invalidate_mapping_pages(info->btree_inode->i_mapping,
3981 bytenr >> PAGE_CACHE_SHIFT,
3982 (bytenr + num_bytes - 1) >> PAGE_CACHE_SHIFT);
Chris Mason459931e2008-12-10 09:10:46 -05003983 }
3984
Chris Masondcbdd4d2008-12-16 13:51:01 -05003985 ret = update_block_group(trans, root, bytenr, num_bytes, 0,
3986 mark_free);
3987 BUG_ON(ret);
Chris Masona28ec192007-03-06 20:08:01 -05003988 }
Chris Mason5caf2a02007-04-02 11:20:42 -04003989 btrfs_free_path(path);
Chris Masona28ec192007-03-06 20:08:01 -05003990 return ret;
3991}
3992
3993/*
Chris Mason1887be62009-03-13 10:11:24 -04003994 * when we free an extent, it is possible (and likely) that we free the last
3995 * delayed ref for that extent as well. This searches the delayed ref tree for
3996 * a given extent, and if there are no other delayed refs to be processed, it
3997 * removes it from the tree.
3998 */
3999static noinline int check_ref_cleanup(struct btrfs_trans_handle *trans,
4000 struct btrfs_root *root, u64 bytenr)
4001{
4002 struct btrfs_delayed_ref_head *head;
4003 struct btrfs_delayed_ref_root *delayed_refs;
4004 struct btrfs_delayed_ref_node *ref;
4005 struct rb_node *node;
4006 int ret;
4007
4008 delayed_refs = &trans->transaction->delayed_refs;
4009 spin_lock(&delayed_refs->lock);
4010 head = btrfs_find_delayed_ref_head(trans, bytenr);
4011 if (!head)
4012 goto out;
4013
4014 node = rb_prev(&head->node.rb_node);
4015 if (!node)
4016 goto out;
4017
4018 ref = rb_entry(node, struct btrfs_delayed_ref_node, rb_node);
4019
4020 /* there are still entries for this ref, we can't drop it */
4021 if (ref->bytenr == bytenr)
4022 goto out;
4023
Yan Zheng5d4f98a2009-06-10 10:45:14 -04004024 if (head->extent_op) {
4025 if (!head->must_insert_reserved)
4026 goto out;
4027 kfree(head->extent_op);
4028 head->extent_op = NULL;
4029 }
4030
Chris Mason1887be62009-03-13 10:11:24 -04004031 /*
4032 * waiting for the lock here would deadlock. If someone else has it
4033 * locked they are already in the process of dropping it anyway
4034 */
4035 if (!mutex_trylock(&head->mutex))
4036 goto out;
4037
4038 /*
4039 * at this point we have a head with no other entries. Go
4040 * ahead and process it.
4041 */
4042 head->node.in_tree = 0;
4043 rb_erase(&head->node.rb_node, &delayed_refs->root);
Chris Masonc3e69d52009-03-13 10:17:05 -04004044
Chris Mason1887be62009-03-13 10:11:24 -04004045 delayed_refs->num_entries--;
4046
4047 /*
4048 * we don't take a ref on the node because we're removing it from the
4049 * tree, so we just steal the ref the tree was holding.
4050 */
Chris Masonc3e69d52009-03-13 10:17:05 -04004051 delayed_refs->num_heads--;
4052 if (list_empty(&head->cluster))
4053 delayed_refs->num_heads_ready--;
4054
4055 list_del_init(&head->cluster);
Chris Mason1887be62009-03-13 10:11:24 -04004056 spin_unlock(&delayed_refs->lock);
4057
4058 ret = run_one_delayed_ref(trans, root->fs_info->tree_root,
Yan Zheng5d4f98a2009-06-10 10:45:14 -04004059 &head->node, head->extent_op,
4060 head->must_insert_reserved);
Chris Mason1887be62009-03-13 10:11:24 -04004061 BUG_ON(ret);
4062 btrfs_put_delayed_ref(&head->node);
4063 return 0;
4064out:
4065 spin_unlock(&delayed_refs->lock);
4066 return 0;
4067}
4068
Chris Mason925baed2008-06-25 16:01:30 -04004069int btrfs_free_extent(struct btrfs_trans_handle *trans,
Zheng Yan31840ae2008-09-23 13:14:14 -04004070 struct btrfs_root *root,
4071 u64 bytenr, u64 num_bytes, u64 parent,
Yan Zheng5d4f98a2009-06-10 10:45:14 -04004072 u64 root_objectid, u64 owner, u64 offset)
Chris Mason925baed2008-06-25 16:01:30 -04004073{
4074 int ret;
4075
Chris Mason56bec292009-03-13 10:10:06 -04004076 /*
4077 * tree log blocks never actually go into the extent allocation
4078 * tree, just update pinning info and exit early.
Chris Mason56bec292009-03-13 10:10:06 -04004079 */
Yan Zheng5d4f98a2009-06-10 10:45:14 -04004080 if (root_objectid == BTRFS_TREE_LOG_OBJECTID) {
4081 WARN_ON(owner >= BTRFS_FIRST_FREE_OBJECTID);
Chris Masonb9473432009-03-13 11:00:37 -04004082 /* unlocks the pinned mutex */
Yan Zheng11833d62009-09-11 16:11:19 -04004083 btrfs_pin_extent(root, bytenr, num_bytes, 1);
Chris Mason56bec292009-03-13 10:10:06 -04004084 ret = 0;
Yan Zheng5d4f98a2009-06-10 10:45:14 -04004085 } else if (owner < BTRFS_FIRST_FREE_OBJECTID) {
4086 ret = btrfs_add_delayed_tree_ref(trans, bytenr, num_bytes,
4087 parent, root_objectid, (int)owner,
4088 BTRFS_DROP_DELAYED_REF, NULL);
Chris Mason1887be62009-03-13 10:11:24 -04004089 BUG_ON(ret);
4090 ret = check_ref_cleanup(trans, root, bytenr);
4091 BUG_ON(ret);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04004092 } else {
4093 ret = btrfs_add_delayed_data_ref(trans, bytenr, num_bytes,
4094 parent, root_objectid, owner,
4095 offset, BTRFS_DROP_DELAYED_REF, NULL);
4096 BUG_ON(ret);
Chris Mason56bec292009-03-13 10:10:06 -04004097 }
Chris Mason925baed2008-06-25 16:01:30 -04004098 return ret;
4099}
4100
Yan, Zheng86b9f2e2009-11-12 09:36:50 +00004101int btrfs_free_tree_block(struct btrfs_trans_handle *trans,
4102 struct btrfs_root *root,
4103 u64 bytenr, u32 blocksize,
4104 u64 parent, u64 root_objectid, int level)
4105{
4106 u64 used;
4107 spin_lock(&root->node_lock);
4108 used = btrfs_root_used(&root->root_item) - blocksize;
4109 btrfs_set_root_used(&root->root_item, used);
4110 spin_unlock(&root->node_lock);
4111
4112 return btrfs_free_extent(trans, root, bytenr, blocksize,
4113 parent, root_objectid, level, 0);
4114}
4115
Chris Mason87ee04e2007-11-30 11:30:34 -05004116static u64 stripe_align(struct btrfs_root *root, u64 val)
4117{
4118 u64 mask = ((u64)root->stripesize - 1);
4119 u64 ret = (val + mask) & ~mask;
4120 return ret;
4121}
4122
Chris Masonfec577f2007-02-26 10:40:21 -05004123/*
Josef Bacik817d52f2009-07-13 21:29:25 -04004124 * when we wait for progress in the block group caching, its because
4125 * our allocation attempt failed at least once. So, we must sleep
4126 * and let some progress happen before we try again.
4127 *
4128 * This function will sleep at least once waiting for new free space to
4129 * show up, and then it will check the block group free space numbers
4130 * for our min num_bytes. Another option is to have it go ahead
4131 * and look in the rbtree for a free extent of a given size, but this
4132 * is a good start.
4133 */
4134static noinline int
4135wait_block_group_cache_progress(struct btrfs_block_group_cache *cache,
4136 u64 num_bytes)
4137{
Yan Zheng11833d62009-09-11 16:11:19 -04004138 struct btrfs_caching_control *caching_ctl;
Josef Bacik817d52f2009-07-13 21:29:25 -04004139 DEFINE_WAIT(wait);
4140
Yan Zheng11833d62009-09-11 16:11:19 -04004141 caching_ctl = get_caching_control(cache);
4142 if (!caching_ctl)
Josef Bacik817d52f2009-07-13 21:29:25 -04004143 return 0;
Josef Bacik817d52f2009-07-13 21:29:25 -04004144
Yan Zheng11833d62009-09-11 16:11:19 -04004145 wait_event(caching_ctl->wait, block_group_cache_done(cache) ||
Josef Bacik817d52f2009-07-13 21:29:25 -04004146 (cache->free_space >= num_bytes));
Yan Zheng11833d62009-09-11 16:11:19 -04004147
4148 put_caching_control(caching_ctl);
4149 return 0;
4150}
4151
4152static noinline int
4153wait_block_group_cache_done(struct btrfs_block_group_cache *cache)
4154{
4155 struct btrfs_caching_control *caching_ctl;
4156 DEFINE_WAIT(wait);
4157
4158 caching_ctl = get_caching_control(cache);
4159 if (!caching_ctl)
4160 return 0;
4161
4162 wait_event(caching_ctl->wait, block_group_cache_done(cache));
4163
4164 put_caching_control(caching_ctl);
Josef Bacik817d52f2009-07-13 21:29:25 -04004165 return 0;
4166}
4167
Yan, Zhengb742bb822010-05-16 10:46:24 -04004168static int get_block_group_index(struct btrfs_block_group_cache *cache)
4169{
4170 int index;
4171 if (cache->flags & BTRFS_BLOCK_GROUP_RAID10)
4172 index = 0;
4173 else if (cache->flags & BTRFS_BLOCK_GROUP_RAID1)
4174 index = 1;
4175 else if (cache->flags & BTRFS_BLOCK_GROUP_DUP)
4176 index = 2;
4177 else if (cache->flags & BTRFS_BLOCK_GROUP_RAID0)
4178 index = 3;
4179 else
4180 index = 4;
4181 return index;
4182}
4183
Josef Bacik817d52f2009-07-13 21:29:25 -04004184enum btrfs_loop_type {
Josef Bacikccf0e722009-11-10 21:23:48 -05004185 LOOP_FIND_IDEAL = 0,
Josef Bacik817d52f2009-07-13 21:29:25 -04004186 LOOP_CACHING_NOWAIT = 1,
4187 LOOP_CACHING_WAIT = 2,
4188 LOOP_ALLOC_CHUNK = 3,
4189 LOOP_NO_EMPTY_SIZE = 4,
4190};
4191
4192/*
Chris Masonfec577f2007-02-26 10:40:21 -05004193 * walks the btree of allocated extents and find a hole of a given size.
4194 * The key ins is changed to record the hole:
4195 * ins->objectid == block start
Chris Mason62e27492007-03-15 12:56:47 -04004196 * ins->flags = BTRFS_EXTENT_ITEM_KEY
Chris Masonfec577f2007-02-26 10:40:21 -05004197 * ins->offset == number of blocks
4198 * Any available blocks before search_start are skipped.
4199 */
Chris Masond3977122009-01-05 21:25:51 -05004200static noinline int find_free_extent(struct btrfs_trans_handle *trans,
Chris Mason98ed5172008-01-03 10:01:48 -05004201 struct btrfs_root *orig_root,
4202 u64 num_bytes, u64 empty_size,
4203 u64 search_start, u64 search_end,
4204 u64 hint_byte, struct btrfs_key *ins,
4205 u64 exclude_start, u64 exclude_nr,
4206 int data)
Chris Masonfec577f2007-02-26 10:40:21 -05004207{
Josef Bacik80eb2342008-10-29 14:49:05 -04004208 int ret = 0;
Chris Masond3977122009-01-05 21:25:51 -05004209 struct btrfs_root *root = orig_root->fs_info->extent_root;
Chris Masonfa9c0d792009-04-03 09:47:43 -04004210 struct btrfs_free_cluster *last_ptr = NULL;
Josef Bacik80eb2342008-10-29 14:49:05 -04004211 struct btrfs_block_group_cache *block_group = NULL;
Chris Mason239b14b2008-03-24 15:02:07 -04004212 int empty_cluster = 2 * 1024 * 1024;
Chris Mason0ef3e662008-05-24 14:04:53 -04004213 int allowed_chunk_alloc = 0;
Josef Bacikccf0e722009-11-10 21:23:48 -05004214 int done_chunk_alloc = 0;
Josef Bacik80eb2342008-10-29 14:49:05 -04004215 struct btrfs_space_info *space_info;
Chris Masonfa9c0d792009-04-03 09:47:43 -04004216 int last_ptr_loop = 0;
Yan, Zhengb742bb822010-05-16 10:46:24 -04004217 int index = 0;
Chris Masonfa9c0d792009-04-03 09:47:43 -04004218 int loop = 0;
Josef Bacik817d52f2009-07-13 21:29:25 -04004219 bool found_uncached_bg = false;
Josef Bacik0a243252009-09-11 16:11:20 -04004220 bool failed_cluster_refill = false;
Josef Bacik1cdda9b2009-10-06 10:04:28 -04004221 bool failed_alloc = false;
Josef Bacikccf0e722009-11-10 21:23:48 -05004222 u64 ideal_cache_percent = 0;
4223 u64 ideal_cache_offset = 0;
Chris Masonfec577f2007-02-26 10:40:21 -05004224
Chris Masondb945352007-10-15 16:15:53 -04004225 WARN_ON(num_bytes < root->sectorsize);
Chris Masonb1a4d962007-04-04 15:27:52 -04004226 btrfs_set_key_type(ins, BTRFS_EXTENT_ITEM_KEY);
Josef Bacik80eb2342008-10-29 14:49:05 -04004227 ins->objectid = 0;
4228 ins->offset = 0;
Chris Masonb1a4d962007-04-04 15:27:52 -04004229
Josef Bacik2552d172009-04-03 10:14:19 -04004230 space_info = __find_space_info(root->fs_info, data);
Josef Bacik1b1d1f62010-03-19 20:49:55 +00004231 if (!space_info) {
4232 printk(KERN_ERR "No space info for %d\n", data);
4233 return -ENOSPC;
4234 }
Josef Bacik2552d172009-04-03 10:14:19 -04004235
Chris Mason0ef3e662008-05-24 14:04:53 -04004236 if (orig_root->ref_cows || empty_size)
4237 allowed_chunk_alloc = 1;
4238
Chris Mason239b14b2008-03-24 15:02:07 -04004239 if (data & BTRFS_BLOCK_GROUP_METADATA) {
Chris Masonfa9c0d792009-04-03 09:47:43 -04004240 last_ptr = &root->fs_info->meta_alloc_cluster;
Chris Mason536ac8a2009-02-12 09:41:38 -05004241 if (!btrfs_test_opt(root, SSD))
4242 empty_cluster = 64 * 1024;
Chris Mason239b14b2008-03-24 15:02:07 -04004243 }
4244
Chris Masonfa9c0d792009-04-03 09:47:43 -04004245 if ((data & BTRFS_BLOCK_GROUP_DATA) && btrfs_test_opt(root, SSD)) {
4246 last_ptr = &root->fs_info->data_alloc_cluster;
4247 }
Josef Bacik0f9dd462008-09-23 13:14:11 -04004248
Chris Mason239b14b2008-03-24 15:02:07 -04004249 if (last_ptr) {
Chris Masonfa9c0d792009-04-03 09:47:43 -04004250 spin_lock(&last_ptr->lock);
4251 if (last_ptr->block_group)
4252 hint_byte = last_ptr->window_start;
4253 spin_unlock(&last_ptr->lock);
Chris Mason239b14b2008-03-24 15:02:07 -04004254 }
Chris Masonfa9c0d792009-04-03 09:47:43 -04004255
Chris Masona061fc82008-05-07 11:43:44 -04004256 search_start = max(search_start, first_logical_byte(root, 0));
Chris Mason239b14b2008-03-24 15:02:07 -04004257 search_start = max(search_start, hint_byte);
Chris Mason0b86a832008-03-24 15:01:56 -04004258
Josef Bacik817d52f2009-07-13 21:29:25 -04004259 if (!last_ptr)
Chris Masonfa9c0d792009-04-03 09:47:43 -04004260 empty_cluster = 0;
Chris Masonfa9c0d792009-04-03 09:47:43 -04004261
Josef Bacik2552d172009-04-03 10:14:19 -04004262 if (search_start == hint_byte) {
Josef Bacikccf0e722009-11-10 21:23:48 -05004263ideal_cache:
Josef Bacik2552d172009-04-03 10:14:19 -04004264 block_group = btrfs_lookup_block_group(root->fs_info,
4265 search_start);
Josef Bacik817d52f2009-07-13 21:29:25 -04004266 /*
4267 * we don't want to use the block group if it doesn't match our
4268 * allocation bits, or if its not cached.
Josef Bacikccf0e722009-11-10 21:23:48 -05004269 *
4270 * However if we are re-searching with an ideal block group
4271 * picked out then we don't care that the block group is cached.
Josef Bacik817d52f2009-07-13 21:29:25 -04004272 */
4273 if (block_group && block_group_bits(block_group, data) &&
Josef Bacikccf0e722009-11-10 21:23:48 -05004274 (block_group->cached != BTRFS_CACHE_NO ||
4275 search_start == ideal_cache_offset)) {
Josef Bacik2552d172009-04-03 10:14:19 -04004276 down_read(&space_info->groups_sem);
Chris Mason44fb5512009-06-04 15:34:51 -04004277 if (list_empty(&block_group->list) ||
4278 block_group->ro) {
4279 /*
4280 * someone is removing this block group,
4281 * we can't jump into the have_block_group
4282 * target because our list pointers are not
4283 * valid
4284 */
4285 btrfs_put_block_group(block_group);
4286 up_read(&space_info->groups_sem);
Josef Bacikccf0e722009-11-10 21:23:48 -05004287 } else {
Yan, Zhengb742bb822010-05-16 10:46:24 -04004288 index = get_block_group_index(block_group);
Chris Mason44fb5512009-06-04 15:34:51 -04004289 goto have_block_group;
Josef Bacikccf0e722009-11-10 21:23:48 -05004290 }
Josef Bacik2552d172009-04-03 10:14:19 -04004291 } else if (block_group) {
Chris Masonfa9c0d792009-04-03 09:47:43 -04004292 btrfs_put_block_group(block_group);
Josef Bacik2552d172009-04-03 10:14:19 -04004293 }
Chris Mason42e70e72008-11-07 18:17:11 -05004294 }
Josef Bacik2552d172009-04-03 10:14:19 -04004295search:
Josef Bacik80eb2342008-10-29 14:49:05 -04004296 down_read(&space_info->groups_sem);
Yan, Zhengb742bb822010-05-16 10:46:24 -04004297 list_for_each_entry(block_group, &space_info->block_groups[index],
4298 list) {
Josef Bacik6226cb02009-04-03 10:14:18 -04004299 u64 offset;
Josef Bacik817d52f2009-07-13 21:29:25 -04004300 int cached;
Chris Mason8a1413a22008-11-10 16:13:54 -05004301
Josef Bacik11dfe352009-11-13 20:12:59 +00004302 btrfs_get_block_group(block_group);
Josef Bacik2552d172009-04-03 10:14:19 -04004303 search_start = block_group->key.objectid;
Chris Mason42e70e72008-11-07 18:17:11 -05004304
Josef Bacik2552d172009-04-03 10:14:19 -04004305have_block_group:
Josef Bacik817d52f2009-07-13 21:29:25 -04004306 if (unlikely(block_group->cached == BTRFS_CACHE_NO)) {
Josef Bacikccf0e722009-11-10 21:23:48 -05004307 u64 free_percent;
4308
4309 free_percent = btrfs_block_group_used(&block_group->item);
4310 free_percent *= 100;
4311 free_percent = div64_u64(free_percent,
4312 block_group->key.offset);
4313 free_percent = 100 - free_percent;
4314 if (free_percent > ideal_cache_percent &&
4315 likely(!block_group->ro)) {
4316 ideal_cache_offset = block_group->key.objectid;
4317 ideal_cache_percent = free_percent;
4318 }
4319
Josef Bacik817d52f2009-07-13 21:29:25 -04004320 /*
Josef Bacikccf0e722009-11-10 21:23:48 -05004321 * We only want to start kthread caching if we are at
4322 * the point where we will wait for caching to make
4323 * progress, or if our ideal search is over and we've
4324 * found somebody to start caching.
Josef Bacik817d52f2009-07-13 21:29:25 -04004325 */
4326 if (loop > LOOP_CACHING_NOWAIT ||
Josef Bacikccf0e722009-11-10 21:23:48 -05004327 (loop > LOOP_FIND_IDEAL &&
4328 atomic_read(&space_info->caching_threads) < 2)) {
Josef Bacik817d52f2009-07-13 21:29:25 -04004329 ret = cache_block_group(block_group);
4330 BUG_ON(ret);
Josef Bacik2552d172009-04-03 10:14:19 -04004331 }
Josef Bacikccf0e722009-11-10 21:23:48 -05004332 found_uncached_bg = true;
4333
4334 /*
4335 * If loop is set for cached only, try the next block
4336 * group.
4337 */
4338 if (loop == LOOP_FIND_IDEAL)
4339 goto loop;
Josef Bacikea6a4782008-11-20 12:16:16 -05004340 }
4341
Josef Bacik817d52f2009-07-13 21:29:25 -04004342 cached = block_group_cache_done(block_group);
Josef Bacikccf0e722009-11-10 21:23:48 -05004343 if (unlikely(!cached))
Josef Bacik817d52f2009-07-13 21:29:25 -04004344 found_uncached_bg = true;
4345
Josef Bacikea6a4782008-11-20 12:16:16 -05004346 if (unlikely(block_group->ro))
Josef Bacik2552d172009-04-03 10:14:19 -04004347 goto loop;
Josef Bacik0f9dd462008-09-23 13:14:11 -04004348
Josef Bacik0a243252009-09-11 16:11:20 -04004349 /*
4350 * Ok we want to try and use the cluster allocator, so lets look
4351 * there, unless we are on LOOP_NO_EMPTY_SIZE, since we will
4352 * have tried the cluster allocator plenty of times at this
4353 * point and not have found anything, so we are likely way too
4354 * fragmented for the clustering stuff to find anything, so lets
4355 * just skip it and let the allocator find whatever block it can
4356 * find
4357 */
4358 if (last_ptr && loop < LOOP_NO_EMPTY_SIZE) {
Chris Masonfa9c0d792009-04-03 09:47:43 -04004359 /*
4360 * the refill lock keeps out other
4361 * people trying to start a new cluster
4362 */
4363 spin_lock(&last_ptr->refill_lock);
Chris Mason44fb5512009-06-04 15:34:51 -04004364 if (last_ptr->block_group &&
4365 (last_ptr->block_group->ro ||
4366 !block_group_bits(last_ptr->block_group, data))) {
4367 offset = 0;
4368 goto refill_cluster;
4369 }
4370
Chris Masonfa9c0d792009-04-03 09:47:43 -04004371 offset = btrfs_alloc_from_cluster(block_group, last_ptr,
4372 num_bytes, search_start);
4373 if (offset) {
4374 /* we have a block, we're done */
4375 spin_unlock(&last_ptr->refill_lock);
4376 goto checks;
4377 }
4378
4379 spin_lock(&last_ptr->lock);
4380 /*
4381 * whoops, this cluster doesn't actually point to
4382 * this block group. Get a ref on the block
4383 * group is does point to and try again
4384 */
4385 if (!last_ptr_loop && last_ptr->block_group &&
4386 last_ptr->block_group != block_group) {
4387
4388 btrfs_put_block_group(block_group);
4389 block_group = last_ptr->block_group;
Josef Bacik11dfe352009-11-13 20:12:59 +00004390 btrfs_get_block_group(block_group);
Chris Masonfa9c0d792009-04-03 09:47:43 -04004391 spin_unlock(&last_ptr->lock);
4392 spin_unlock(&last_ptr->refill_lock);
4393
4394 last_ptr_loop = 1;
4395 search_start = block_group->key.objectid;
Chris Mason44fb5512009-06-04 15:34:51 -04004396 /*
4397 * we know this block group is properly
4398 * in the list because
4399 * btrfs_remove_block_group, drops the
4400 * cluster before it removes the block
4401 * group from the list
4402 */
Chris Masonfa9c0d792009-04-03 09:47:43 -04004403 goto have_block_group;
4404 }
4405 spin_unlock(&last_ptr->lock);
Chris Mason44fb5512009-06-04 15:34:51 -04004406refill_cluster:
Chris Masonfa9c0d792009-04-03 09:47:43 -04004407 /*
4408 * this cluster didn't work out, free it and
4409 * start over
4410 */
4411 btrfs_return_cluster_to_free_space(NULL, last_ptr);
4412
4413 last_ptr_loop = 0;
4414
4415 /* allocate a cluster in this block group */
Chris Mason451d7582009-06-09 20:28:34 -04004416 ret = btrfs_find_space_cluster(trans, root,
Chris Masonfa9c0d792009-04-03 09:47:43 -04004417 block_group, last_ptr,
4418 offset, num_bytes,
4419 empty_cluster + empty_size);
4420 if (ret == 0) {
4421 /*
4422 * now pull our allocation out of this
4423 * cluster
4424 */
4425 offset = btrfs_alloc_from_cluster(block_group,
4426 last_ptr, num_bytes,
4427 search_start);
4428 if (offset) {
4429 /* we found one, proceed */
4430 spin_unlock(&last_ptr->refill_lock);
4431 goto checks;
4432 }
Josef Bacik0a243252009-09-11 16:11:20 -04004433 } else if (!cached && loop > LOOP_CACHING_NOWAIT
4434 && !failed_cluster_refill) {
Josef Bacik817d52f2009-07-13 21:29:25 -04004435 spin_unlock(&last_ptr->refill_lock);
4436
Josef Bacik0a243252009-09-11 16:11:20 -04004437 failed_cluster_refill = true;
Josef Bacik817d52f2009-07-13 21:29:25 -04004438 wait_block_group_cache_progress(block_group,
4439 num_bytes + empty_cluster + empty_size);
4440 goto have_block_group;
Chris Masonfa9c0d792009-04-03 09:47:43 -04004441 }
Josef Bacik817d52f2009-07-13 21:29:25 -04004442
Chris Masonfa9c0d792009-04-03 09:47:43 -04004443 /*
4444 * at this point we either didn't find a cluster
4445 * or we weren't able to allocate a block from our
4446 * cluster. Free the cluster we've been trying
4447 * to use, and go to the next block group
4448 */
Josef Bacik0a243252009-09-11 16:11:20 -04004449 btrfs_return_cluster_to_free_space(NULL, last_ptr);
Chris Masonfa9c0d792009-04-03 09:47:43 -04004450 spin_unlock(&last_ptr->refill_lock);
Josef Bacik0a243252009-09-11 16:11:20 -04004451 goto loop;
Chris Masonfa9c0d792009-04-03 09:47:43 -04004452 }
4453
Josef Bacik6226cb02009-04-03 10:14:18 -04004454 offset = btrfs_find_space_for_alloc(block_group, search_start,
4455 num_bytes, empty_size);
Josef Bacik1cdda9b2009-10-06 10:04:28 -04004456 /*
4457 * If we didn't find a chunk, and we haven't failed on this
4458 * block group before, and this block group is in the middle of
4459 * caching and we are ok with waiting, then go ahead and wait
4460 * for progress to be made, and set failed_alloc to true.
4461 *
4462 * If failed_alloc is true then we've already waited on this
4463 * block group once and should move on to the next block group.
4464 */
4465 if (!offset && !failed_alloc && !cached &&
4466 loop > LOOP_CACHING_NOWAIT) {
Josef Bacik817d52f2009-07-13 21:29:25 -04004467 wait_block_group_cache_progress(block_group,
Josef Bacik1cdda9b2009-10-06 10:04:28 -04004468 num_bytes + empty_size);
4469 failed_alloc = true;
Josef Bacik817d52f2009-07-13 21:29:25 -04004470 goto have_block_group;
Josef Bacik1cdda9b2009-10-06 10:04:28 -04004471 } else if (!offset) {
4472 goto loop;
Josef Bacik817d52f2009-07-13 21:29:25 -04004473 }
Chris Masonfa9c0d792009-04-03 09:47:43 -04004474checks:
Josef Bacik6226cb02009-04-03 10:14:18 -04004475 search_start = stripe_align(root, offset);
Josef Bacik2552d172009-04-03 10:14:19 -04004476 /* move on to the next group */
Josef Bacik6226cb02009-04-03 10:14:18 -04004477 if (search_start + num_bytes >= search_end) {
4478 btrfs_add_free_space(block_group, offset, num_bytes);
Josef Bacik2552d172009-04-03 10:14:19 -04004479 goto loop;
Josef Bacik6226cb02009-04-03 10:14:18 -04004480 }
Chris Masone37c9e62007-05-09 20:13:14 -04004481
Josef Bacik2552d172009-04-03 10:14:19 -04004482 /* move on to the next group */
4483 if (search_start + num_bytes >
Josef Bacik6226cb02009-04-03 10:14:18 -04004484 block_group->key.objectid + block_group->key.offset) {
4485 btrfs_add_free_space(block_group, offset, num_bytes);
Josef Bacik2552d172009-04-03 10:14:19 -04004486 goto loop;
Josef Bacik6226cb02009-04-03 10:14:18 -04004487 }
Josef Bacik80eb2342008-10-29 14:49:05 -04004488
Josef Bacik2552d172009-04-03 10:14:19 -04004489 if (exclude_nr > 0 &&
4490 (search_start + num_bytes > exclude_start &&
4491 search_start < exclude_start + exclude_nr)) {
4492 search_start = exclude_start + exclude_nr;
4493
Josef Bacik6226cb02009-04-03 10:14:18 -04004494 btrfs_add_free_space(block_group, offset, num_bytes);
Josef Bacik2552d172009-04-03 10:14:19 -04004495 /*
4496 * if search_start is still in this block group
4497 * then we just re-search this block group
4498 */
4499 if (search_start >= block_group->key.objectid &&
4500 search_start < (block_group->key.objectid +
Josef Bacik6226cb02009-04-03 10:14:18 -04004501 block_group->key.offset))
Josef Bacik2552d172009-04-03 10:14:19 -04004502 goto have_block_group;
Josef Bacik2552d172009-04-03 10:14:19 -04004503 goto loop;
Josef Bacik0f9dd462008-09-23 13:14:11 -04004504 }
Josef Bacik2552d172009-04-03 10:14:19 -04004505
4506 ins->objectid = search_start;
4507 ins->offset = num_bytes;
4508
Josef Bacik6226cb02009-04-03 10:14:18 -04004509 if (offset < search_start)
4510 btrfs_add_free_space(block_group, offset,
4511 search_start - offset);
4512 BUG_ON(offset > search_start);
4513
Yan Zheng11833d62009-09-11 16:11:19 -04004514 update_reserved_extents(block_group, num_bytes, 1);
4515
Josef Bacik2552d172009-04-03 10:14:19 -04004516 /* we are all good, lets return */
Josef Bacik2552d172009-04-03 10:14:19 -04004517 break;
4518loop:
Josef Bacik0a243252009-09-11 16:11:20 -04004519 failed_cluster_refill = false;
Josef Bacik1cdda9b2009-10-06 10:04:28 -04004520 failed_alloc = false;
Yan, Zhengb742bb822010-05-16 10:46:24 -04004521 BUG_ON(index != get_block_group_index(block_group));
Chris Masonfa9c0d792009-04-03 09:47:43 -04004522 btrfs_put_block_group(block_group);
Josef Bacik2552d172009-04-03 10:14:19 -04004523 }
4524 up_read(&space_info->groups_sem);
Chris Masonf5a31e12008-11-10 11:47:09 -05004525
Yan, Zhengb742bb822010-05-16 10:46:24 -04004526 if (!ins->objectid && ++index < BTRFS_NR_RAID_TYPES)
4527 goto search;
4528
Josef Bacikccf0e722009-11-10 21:23:48 -05004529 /* LOOP_FIND_IDEAL, only search caching/cached bg's, and don't wait for
4530 * for them to make caching progress. Also
4531 * determine the best possible bg to cache
4532 * LOOP_CACHING_NOWAIT, search partially cached block groups, kicking
4533 * caching kthreads as we move along
Josef Bacik817d52f2009-07-13 21:29:25 -04004534 * LOOP_CACHING_WAIT, search everything, and wait if our bg is caching
4535 * LOOP_ALLOC_CHUNK, force a chunk allocation and try again
4536 * LOOP_NO_EMPTY_SIZE, set empty_size and empty_cluster to 0 and try
4537 * again
Chris Masonfa9c0d792009-04-03 09:47:43 -04004538 */
Josef Bacik817d52f2009-07-13 21:29:25 -04004539 if (!ins->objectid && loop < LOOP_NO_EMPTY_SIZE &&
4540 (found_uncached_bg || empty_size || empty_cluster ||
4541 allowed_chunk_alloc)) {
Yan, Zhengb742bb822010-05-16 10:46:24 -04004542 index = 0;
Josef Bacikccf0e722009-11-10 21:23:48 -05004543 if (loop == LOOP_FIND_IDEAL && found_uncached_bg) {
Josef Bacik817d52f2009-07-13 21:29:25 -04004544 found_uncached_bg = false;
Josef Bacikccf0e722009-11-10 21:23:48 -05004545 loop++;
4546 if (!ideal_cache_percent &&
4547 atomic_read(&space_info->caching_threads))
Josef Bacik817d52f2009-07-13 21:29:25 -04004548 goto search;
Josef Bacikccf0e722009-11-10 21:23:48 -05004549
4550 /*
4551 * 1 of the following 2 things have happened so far
4552 *
4553 * 1) We found an ideal block group for caching that
4554 * is mostly full and will cache quickly, so we might
4555 * as well wait for it.
4556 *
4557 * 2) We searched for cached only and we didn't find
4558 * anything, and we didn't start any caching kthreads
4559 * either, so chances are we will loop through and
4560 * start a couple caching kthreads, and then come back
4561 * around and just wait for them. This will be slower
4562 * because we will have 2 caching kthreads reading at
4563 * the same time when we could have just started one
4564 * and waited for it to get far enough to give us an
4565 * allocation, so go ahead and go to the wait caching
4566 * loop.
4567 */
4568 loop = LOOP_CACHING_WAIT;
4569 search_start = ideal_cache_offset;
4570 ideal_cache_percent = 0;
4571 goto ideal_cache;
4572 } else if (loop == LOOP_FIND_IDEAL) {
4573 /*
4574 * Didn't find a uncached bg, wait on anything we find
4575 * next.
4576 */
4577 loop = LOOP_CACHING_WAIT;
4578 goto search;
4579 }
4580
4581 if (loop < LOOP_CACHING_WAIT) {
4582 loop++;
4583 goto search;
Josef Bacik817d52f2009-07-13 21:29:25 -04004584 }
4585
4586 if (loop == LOOP_ALLOC_CHUNK) {
Chris Masonfa9c0d792009-04-03 09:47:43 -04004587 empty_size = 0;
4588 empty_cluster = 0;
4589 }
Chris Mason42e70e72008-11-07 18:17:11 -05004590
Josef Bacik2552d172009-04-03 10:14:19 -04004591 if (allowed_chunk_alloc) {
4592 ret = do_chunk_alloc(trans, root, num_bytes +
4593 2 * 1024 * 1024, data, 1);
Josef Bacik2552d172009-04-03 10:14:19 -04004594 allowed_chunk_alloc = 0;
Josef Bacikccf0e722009-11-10 21:23:48 -05004595 done_chunk_alloc = 1;
4596 } else if (!done_chunk_alloc) {
Josef Bacik2552d172009-04-03 10:14:19 -04004597 space_info->force_alloc = 1;
Josef Bacik0f9dd462008-09-23 13:14:11 -04004598 }
Josef Bacik80eb2342008-10-29 14:49:05 -04004599
Josef Bacik817d52f2009-07-13 21:29:25 -04004600 if (loop < LOOP_NO_EMPTY_SIZE) {
Chris Masonfa9c0d792009-04-03 09:47:43 -04004601 loop++;
Josef Bacik2552d172009-04-03 10:14:19 -04004602 goto search;
Chris Masonfa9c0d792009-04-03 09:47:43 -04004603 }
Josef Bacik2552d172009-04-03 10:14:19 -04004604 ret = -ENOSPC;
4605 } else if (!ins->objectid) {
4606 ret = -ENOSPC;
Chris Masone19caa52007-10-15 16:17:44 -04004607 }
Chris Mason0b86a832008-03-24 15:01:56 -04004608
Josef Bacik80eb2342008-10-29 14:49:05 -04004609 /* we found what we needed */
4610 if (ins->objectid) {
4611 if (!(data & BTRFS_BLOCK_GROUP_DATA))
Yan Zhengd2fb3432008-12-11 16:30:39 -05004612 trans->block_group = block_group->key.objectid;
Josef Bacik80eb2342008-10-29 14:49:05 -04004613
Chris Masonfa9c0d792009-04-03 09:47:43 -04004614 btrfs_put_block_group(block_group);
Josef Bacik2552d172009-04-03 10:14:19 -04004615 ret = 0;
4616 }
Chris Mason0b86a832008-03-24 15:01:56 -04004617
Chris Mason0f70abe2007-02-28 16:46:22 -05004618 return ret;
Chris Masonfec577f2007-02-26 10:40:21 -05004619}
Chris Masonec44a352008-04-28 15:29:52 -04004620
Josef Bacik9ed74f22009-09-11 16:12:44 -04004621static void dump_space_info(struct btrfs_space_info *info, u64 bytes,
4622 int dump_block_groups)
Josef Bacik0f9dd462008-09-23 13:14:11 -04004623{
4624 struct btrfs_block_group_cache *cache;
Yan, Zhengb742bb822010-05-16 10:46:24 -04004625 int index = 0;
Josef Bacik0f9dd462008-09-23 13:14:11 -04004626
Josef Bacik9ed74f22009-09-11 16:12:44 -04004627 spin_lock(&info->lock);
Chris Masond3977122009-01-05 21:25:51 -05004628 printk(KERN_INFO "space_info has %llu free, is %sfull\n",
4629 (unsigned long long)(info->total_bytes - info->bytes_used -
Josef Bacik9ed74f22009-09-11 16:12:44 -04004630 info->bytes_pinned - info->bytes_reserved -
4631 info->bytes_super),
Chris Masond3977122009-01-05 21:25:51 -05004632 (info->full) ? "" : "not ");
Josef Bacik6a632092009-02-20 11:00:09 -05004633 printk(KERN_INFO "space_info total=%llu, pinned=%llu, delalloc=%llu,"
Josef Bacik9ed74f22009-09-11 16:12:44 -04004634 " may_use=%llu, used=%llu, root=%llu, super=%llu, reserved=%llu"
4635 "\n",
Joel Becker21380932009-04-21 12:38:29 -07004636 (unsigned long long)info->total_bytes,
4637 (unsigned long long)info->bytes_pinned,
4638 (unsigned long long)info->bytes_delalloc,
4639 (unsigned long long)info->bytes_may_use,
Josef Bacik9ed74f22009-09-11 16:12:44 -04004640 (unsigned long long)info->bytes_used,
4641 (unsigned long long)info->bytes_root,
4642 (unsigned long long)info->bytes_super,
4643 (unsigned long long)info->bytes_reserved);
4644 spin_unlock(&info->lock);
4645
4646 if (!dump_block_groups)
4647 return;
Josef Bacik0f9dd462008-09-23 13:14:11 -04004648
Josef Bacik80eb2342008-10-29 14:49:05 -04004649 down_read(&info->groups_sem);
Yan, Zhengb742bb822010-05-16 10:46:24 -04004650again:
4651 list_for_each_entry(cache, &info->block_groups[index], list) {
Josef Bacik0f9dd462008-09-23 13:14:11 -04004652 spin_lock(&cache->lock);
Chris Masond3977122009-01-05 21:25:51 -05004653 printk(KERN_INFO "block group %llu has %llu bytes, %llu used "
4654 "%llu pinned %llu reserved\n",
4655 (unsigned long long)cache->key.objectid,
4656 (unsigned long long)cache->key.offset,
4657 (unsigned long long)btrfs_block_group_used(&cache->item),
4658 (unsigned long long)cache->pinned,
4659 (unsigned long long)cache->reserved);
Josef Bacik0f9dd462008-09-23 13:14:11 -04004660 btrfs_dump_free_space(cache, bytes);
4661 spin_unlock(&cache->lock);
4662 }
Yan, Zhengb742bb822010-05-16 10:46:24 -04004663 if (++index < BTRFS_NR_RAID_TYPES)
4664 goto again;
Josef Bacik80eb2342008-10-29 14:49:05 -04004665 up_read(&info->groups_sem);
Josef Bacik0f9dd462008-09-23 13:14:11 -04004666}
Zheng Yane8569812008-09-26 10:05:48 -04004667
Yan Zheng11833d62009-09-11 16:11:19 -04004668int btrfs_reserve_extent(struct btrfs_trans_handle *trans,
4669 struct btrfs_root *root,
4670 u64 num_bytes, u64 min_alloc_size,
4671 u64 empty_size, u64 hint_byte,
4672 u64 search_end, struct btrfs_key *ins,
4673 u64 data)
Chris Masonfec577f2007-02-26 10:40:21 -05004674{
4675 int ret;
Chris Masonfbdc7622007-05-30 10:22:12 -04004676 u64 search_start = 0;
Chris Mason925baed2008-06-25 16:01:30 -04004677
Josef Bacik6a632092009-02-20 11:00:09 -05004678 data = btrfs_get_alloc_profile(root, data);
Chris Mason98d20f62008-04-14 09:46:10 -04004679again:
Chris Mason0ef3e662008-05-24 14:04:53 -04004680 /*
4681 * the only place that sets empty_size is btrfs_realloc_node, which
4682 * is not called recursively on allocations
4683 */
Josef Bacik83d3c962009-12-07 21:45:59 +00004684 if (empty_size || root->ref_cows)
Chris Mason6324fbf2008-03-24 15:01:59 -04004685 ret = do_chunk_alloc(trans, root->fs_info->extent_root,
Chris Mason0ef3e662008-05-24 14:04:53 -04004686 num_bytes + 2 * 1024 * 1024, data, 0);
Chris Mason0b86a832008-03-24 15:01:56 -04004687
Chris Masondb945352007-10-15 16:15:53 -04004688 WARN_ON(num_bytes < root->sectorsize);
4689 ret = find_free_extent(trans, root, num_bytes, empty_size,
4690 search_start, search_end, hint_byte, ins,
Chris Mason26b80032007-08-08 20:17:12 -04004691 trans->alloc_exclude_start,
4692 trans->alloc_exclude_nr, data);
Chris Mason3b951512008-04-17 11:29:12 -04004693
Chris Mason98d20f62008-04-14 09:46:10 -04004694 if (ret == -ENOSPC && num_bytes > min_alloc_size) {
4695 num_bytes = num_bytes >> 1;
Josef Bacik0f9dd462008-09-23 13:14:11 -04004696 num_bytes = num_bytes & ~(root->sectorsize - 1);
Chris Mason98d20f62008-04-14 09:46:10 -04004697 num_bytes = max(num_bytes, min_alloc_size);
Chris Mason0ef3e662008-05-24 14:04:53 -04004698 do_chunk_alloc(trans, root->fs_info->extent_root,
4699 num_bytes, data, 1);
Chris Mason98d20f62008-04-14 09:46:10 -04004700 goto again;
4701 }
Josef Bacik817d52f2009-07-13 21:29:25 -04004702 if (ret == -ENOSPC) {
Josef Bacik0f9dd462008-09-23 13:14:11 -04004703 struct btrfs_space_info *sinfo;
4704
4705 sinfo = __find_space_info(root->fs_info, data);
Chris Masond3977122009-01-05 21:25:51 -05004706 printk(KERN_ERR "btrfs allocation failed flags %llu, "
4707 "wanted %llu\n", (unsigned long long)data,
4708 (unsigned long long)num_bytes);
Josef Bacik9ed74f22009-09-11 16:12:44 -04004709 dump_space_info(sinfo, num_bytes, 1);
Chris Mason925baed2008-06-25 16:01:30 -04004710 }
Josef Bacik0f9dd462008-09-23 13:14:11 -04004711
4712 return ret;
Chris Masone6dcd2d2008-07-17 12:53:50 -04004713}
4714
Chris Mason65b51a02008-08-01 15:11:20 -04004715int btrfs_free_reserved_extent(struct btrfs_root *root, u64 start, u64 len)
4716{
Josef Bacik0f9dd462008-09-23 13:14:11 -04004717 struct btrfs_block_group_cache *cache;
Liu Hui1f3c79a2009-01-05 15:57:51 -05004718 int ret = 0;
Josef Bacik0f9dd462008-09-23 13:14:11 -04004719
Josef Bacik0f9dd462008-09-23 13:14:11 -04004720 cache = btrfs_lookup_block_group(root->fs_info, start);
4721 if (!cache) {
Chris Masond3977122009-01-05 21:25:51 -05004722 printk(KERN_ERR "Unable to find block group for %llu\n",
4723 (unsigned long long)start);
Josef Bacik0f9dd462008-09-23 13:14:11 -04004724 return -ENOSPC;
4725 }
Liu Hui1f3c79a2009-01-05 15:57:51 -05004726
4727 ret = btrfs_discard_extent(root, start, len);
4728
Josef Bacik0f9dd462008-09-23 13:14:11 -04004729 btrfs_add_free_space(cache, start, len);
Yan Zheng11833d62009-09-11 16:11:19 -04004730 update_reserved_extents(cache, len, 0);
Chris Masonfa9c0d792009-04-03 09:47:43 -04004731 btrfs_put_block_group(cache);
Josef Bacik817d52f2009-07-13 21:29:25 -04004732
Chris Masone6dcd2d2008-07-17 12:53:50 -04004733 return ret;
4734}
4735
Yan Zheng5d4f98a2009-06-10 10:45:14 -04004736static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
4737 struct btrfs_root *root,
4738 u64 parent, u64 root_objectid,
4739 u64 flags, u64 owner, u64 offset,
4740 struct btrfs_key *ins, int ref_mod)
Chris Masone6dcd2d2008-07-17 12:53:50 -04004741{
4742 int ret;
Yan Zheng5d4f98a2009-06-10 10:45:14 -04004743 struct btrfs_fs_info *fs_info = root->fs_info;
Chris Masone6dcd2d2008-07-17 12:53:50 -04004744 struct btrfs_extent_item *extent_item;
Yan Zheng5d4f98a2009-06-10 10:45:14 -04004745 struct btrfs_extent_inline_ref *iref;
Chris Masone6dcd2d2008-07-17 12:53:50 -04004746 struct btrfs_path *path;
Yan Zheng5d4f98a2009-06-10 10:45:14 -04004747 struct extent_buffer *leaf;
4748 int type;
4749 u32 size;
Chris Masonf2654de2007-06-26 12:20:46 -04004750
Yan Zheng5d4f98a2009-06-10 10:45:14 -04004751 if (parent > 0)
4752 type = BTRFS_SHARED_DATA_REF_KEY;
4753 else
4754 type = BTRFS_EXTENT_DATA_REF_KEY;
Zheng Yan31840ae2008-09-23 13:14:14 -04004755
Yan Zheng5d4f98a2009-06-10 10:45:14 -04004756 size = sizeof(*extent_item) + btrfs_extent_inline_ref_size(type);
Chris Mason7bb86312007-12-11 09:25:06 -05004757
4758 path = btrfs_alloc_path();
4759 BUG_ON(!path);
Chris Mason47e4bb92008-02-01 14:51:59 -05004760
Chris Masonb9473432009-03-13 11:00:37 -04004761 path->leave_spinning = 1;
Yan Zheng5d4f98a2009-06-10 10:45:14 -04004762 ret = btrfs_insert_empty_item(trans, fs_info->extent_root, path,
4763 ins, size);
Chris Masonccd467d2007-06-28 15:57:36 -04004764 BUG_ON(ret);
Josef Bacik0f9dd462008-09-23 13:14:11 -04004765
Yan Zheng5d4f98a2009-06-10 10:45:14 -04004766 leaf = path->nodes[0];
4767 extent_item = btrfs_item_ptr(leaf, path->slots[0],
Chris Mason47e4bb92008-02-01 14:51:59 -05004768 struct btrfs_extent_item);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04004769 btrfs_set_extent_refs(leaf, extent_item, ref_mod);
4770 btrfs_set_extent_generation(leaf, extent_item, trans->transid);
4771 btrfs_set_extent_flags(leaf, extent_item,
4772 flags | BTRFS_EXTENT_FLAG_DATA);
Chris Mason47e4bb92008-02-01 14:51:59 -05004773
Yan Zheng5d4f98a2009-06-10 10:45:14 -04004774 iref = (struct btrfs_extent_inline_ref *)(extent_item + 1);
4775 btrfs_set_extent_inline_ref_type(leaf, iref, type);
4776 if (parent > 0) {
4777 struct btrfs_shared_data_ref *ref;
4778 ref = (struct btrfs_shared_data_ref *)(iref + 1);
4779 btrfs_set_extent_inline_ref_offset(leaf, iref, parent);
4780 btrfs_set_shared_data_ref_count(leaf, ref, ref_mod);
4781 } else {
4782 struct btrfs_extent_data_ref *ref;
4783 ref = (struct btrfs_extent_data_ref *)(&iref->offset);
4784 btrfs_set_extent_data_ref_root(leaf, ref, root_objectid);
4785 btrfs_set_extent_data_ref_objectid(leaf, ref, owner);
4786 btrfs_set_extent_data_ref_offset(leaf, ref, offset);
4787 btrfs_set_extent_data_ref_count(leaf, ref, ref_mod);
4788 }
Chris Mason47e4bb92008-02-01 14:51:59 -05004789
4790 btrfs_mark_buffer_dirty(path->nodes[0]);
Chris Mason7bb86312007-12-11 09:25:06 -05004791 btrfs_free_path(path);
Chris Masonf510cfe2007-10-15 16:14:48 -04004792
Yan Zheng5d4f98a2009-06-10 10:45:14 -04004793 ret = update_block_group(trans, root, ins->objectid, ins->offset,
4794 1, 0);
Chris Masonf5947062008-02-04 10:10:13 -05004795 if (ret) {
Chris Masond3977122009-01-05 21:25:51 -05004796 printk(KERN_ERR "btrfs update block group failed for %llu "
4797 "%llu\n", (unsigned long long)ins->objectid,
4798 (unsigned long long)ins->offset);
Chris Masonf5947062008-02-04 10:10:13 -05004799 BUG();
4800 }
Chris Masone6dcd2d2008-07-17 12:53:50 -04004801 return ret;
4802}
4803
Yan Zheng5d4f98a2009-06-10 10:45:14 -04004804static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans,
4805 struct btrfs_root *root,
4806 u64 parent, u64 root_objectid,
4807 u64 flags, struct btrfs_disk_key *key,
4808 int level, struct btrfs_key *ins)
4809{
4810 int ret;
4811 struct btrfs_fs_info *fs_info = root->fs_info;
4812 struct btrfs_extent_item *extent_item;
4813 struct btrfs_tree_block_info *block_info;
4814 struct btrfs_extent_inline_ref *iref;
4815 struct btrfs_path *path;
4816 struct extent_buffer *leaf;
4817 u32 size = sizeof(*extent_item) + sizeof(*block_info) + sizeof(*iref);
4818
4819 path = btrfs_alloc_path();
4820 BUG_ON(!path);
4821
4822 path->leave_spinning = 1;
4823 ret = btrfs_insert_empty_item(trans, fs_info->extent_root, path,
4824 ins, size);
4825 BUG_ON(ret);
4826
4827 leaf = path->nodes[0];
4828 extent_item = btrfs_item_ptr(leaf, path->slots[0],
4829 struct btrfs_extent_item);
4830 btrfs_set_extent_refs(leaf, extent_item, 1);
4831 btrfs_set_extent_generation(leaf, extent_item, trans->transid);
4832 btrfs_set_extent_flags(leaf, extent_item,
4833 flags | BTRFS_EXTENT_FLAG_TREE_BLOCK);
4834 block_info = (struct btrfs_tree_block_info *)(extent_item + 1);
4835
4836 btrfs_set_tree_block_key(leaf, block_info, key);
4837 btrfs_set_tree_block_level(leaf, block_info, level);
4838
4839 iref = (struct btrfs_extent_inline_ref *)(block_info + 1);
4840 if (parent > 0) {
4841 BUG_ON(!(flags & BTRFS_BLOCK_FLAG_FULL_BACKREF));
4842 btrfs_set_extent_inline_ref_type(leaf, iref,
4843 BTRFS_SHARED_BLOCK_REF_KEY);
4844 btrfs_set_extent_inline_ref_offset(leaf, iref, parent);
4845 } else {
4846 btrfs_set_extent_inline_ref_type(leaf, iref,
4847 BTRFS_TREE_BLOCK_REF_KEY);
4848 btrfs_set_extent_inline_ref_offset(leaf, iref, root_objectid);
4849 }
4850
4851 btrfs_mark_buffer_dirty(leaf);
4852 btrfs_free_path(path);
4853
4854 ret = update_block_group(trans, root, ins->objectid, ins->offset,
4855 1, 0);
4856 if (ret) {
4857 printk(KERN_ERR "btrfs update block group failed for %llu "
4858 "%llu\n", (unsigned long long)ins->objectid,
4859 (unsigned long long)ins->offset);
4860 BUG();
4861 }
4862 return ret;
4863}
4864
4865int btrfs_alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
4866 struct btrfs_root *root,
4867 u64 root_objectid, u64 owner,
4868 u64 offset, struct btrfs_key *ins)
Chris Masone6dcd2d2008-07-17 12:53:50 -04004869{
4870 int ret;
Chris Mason1c2308f82008-09-23 13:14:13 -04004871
Yan Zheng5d4f98a2009-06-10 10:45:14 -04004872 BUG_ON(root_objectid == BTRFS_TREE_LOG_OBJECTID);
Chris Mason56bec292009-03-13 10:10:06 -04004873
Yan Zheng5d4f98a2009-06-10 10:45:14 -04004874 ret = btrfs_add_delayed_data_ref(trans, ins->objectid, ins->offset,
4875 0, root_objectid, owner, offset,
4876 BTRFS_ADD_DELAYED_EXTENT, NULL);
Chris Masone6dcd2d2008-07-17 12:53:50 -04004877 return ret;
4878}
Chris Masone02119d2008-09-05 16:13:11 -04004879
4880/*
4881 * this is used by the tree logging recovery code. It records that
4882 * an extent has been allocated and makes sure to clear the free
4883 * space cache bits as well
4884 */
Yan Zheng5d4f98a2009-06-10 10:45:14 -04004885int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans,
4886 struct btrfs_root *root,
4887 u64 root_objectid, u64 owner, u64 offset,
4888 struct btrfs_key *ins)
Chris Masone02119d2008-09-05 16:13:11 -04004889{
4890 int ret;
4891 struct btrfs_block_group_cache *block_group;
Yan Zheng11833d62009-09-11 16:11:19 -04004892 struct btrfs_caching_control *caching_ctl;
4893 u64 start = ins->objectid;
4894 u64 num_bytes = ins->offset;
Chris Masone02119d2008-09-05 16:13:11 -04004895
Chris Masone02119d2008-09-05 16:13:11 -04004896 block_group = btrfs_lookup_block_group(root->fs_info, ins->objectid);
Josef Bacik817d52f2009-07-13 21:29:25 -04004897 cache_block_group(block_group);
Yan Zheng11833d62009-09-11 16:11:19 -04004898 caching_ctl = get_caching_control(block_group);
Chris Masone02119d2008-09-05 16:13:11 -04004899
Yan Zheng11833d62009-09-11 16:11:19 -04004900 if (!caching_ctl) {
4901 BUG_ON(!block_group_cache_done(block_group));
4902 ret = btrfs_remove_free_space(block_group, start, num_bytes);
4903 BUG_ON(ret);
4904 } else {
4905 mutex_lock(&caching_ctl->mutex);
4906
4907 if (start >= caching_ctl->progress) {
4908 ret = add_excluded_extent(root, start, num_bytes);
4909 BUG_ON(ret);
4910 } else if (start + num_bytes <= caching_ctl->progress) {
4911 ret = btrfs_remove_free_space(block_group,
4912 start, num_bytes);
4913 BUG_ON(ret);
4914 } else {
4915 num_bytes = caching_ctl->progress - start;
4916 ret = btrfs_remove_free_space(block_group,
4917 start, num_bytes);
4918 BUG_ON(ret);
4919
4920 start = caching_ctl->progress;
4921 num_bytes = ins->objectid + ins->offset -
4922 caching_ctl->progress;
4923 ret = add_excluded_extent(root, start, num_bytes);
4924 BUG_ON(ret);
4925 }
4926
4927 mutex_unlock(&caching_ctl->mutex);
4928 put_caching_control(caching_ctl);
4929 }
4930
4931 update_reserved_extents(block_group, ins->offset, 1);
Chris Masonfa9c0d792009-04-03 09:47:43 -04004932 btrfs_put_block_group(block_group);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04004933 ret = alloc_reserved_file_extent(trans, root, 0, root_objectid,
4934 0, owner, offset, ins, 1);
Chris Masone02119d2008-09-05 16:13:11 -04004935 return ret;
4936}
4937
Chris Masone6dcd2d2008-07-17 12:53:50 -04004938/*
4939 * finds a free extent and does all the dirty work required for allocation
4940 * returns the key for the extent through ins, and a tree buffer for
4941 * the first block of the extent through buf.
4942 *
4943 * returns 0 if everything worked, non-zero otherwise.
4944 */
Yan Zheng5d4f98a2009-06-10 10:45:14 -04004945static int alloc_tree_block(struct btrfs_trans_handle *trans,
4946 struct btrfs_root *root,
4947 u64 num_bytes, u64 parent, u64 root_objectid,
4948 struct btrfs_disk_key *key, int level,
4949 u64 empty_size, u64 hint_byte, u64 search_end,
4950 struct btrfs_key *ins)
Chris Masone6dcd2d2008-07-17 12:53:50 -04004951{
4952 int ret;
Yan Zheng5d4f98a2009-06-10 10:45:14 -04004953 u64 flags = 0;
4954
Yan Zheng11833d62009-09-11 16:11:19 -04004955 ret = btrfs_reserve_extent(trans, root, num_bytes, num_bytes,
4956 empty_size, hint_byte, search_end,
4957 ins, 0);
Josef Bacik817d52f2009-07-13 21:29:25 -04004958 if (ret)
4959 return ret;
Yan Zheng5d4f98a2009-06-10 10:45:14 -04004960
4961 if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
4962 if (parent == 0)
4963 parent = ins->objectid;
4964 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
4965 } else
4966 BUG_ON(parent > 0);
4967
Chris Masond00aff02008-09-11 15:54:42 -04004968 if (root_objectid != BTRFS_TREE_LOG_OBJECTID) {
Yan Zheng5d4f98a2009-06-10 10:45:14 -04004969 struct btrfs_delayed_extent_op *extent_op;
4970 extent_op = kmalloc(sizeof(*extent_op), GFP_NOFS);
4971 BUG_ON(!extent_op);
4972 if (key)
4973 memcpy(&extent_op->key, key, sizeof(extent_op->key));
4974 else
4975 memset(&extent_op->key, 0, sizeof(extent_op->key));
4976 extent_op->flags_to_set = flags;
4977 extent_op->update_key = 1;
4978 extent_op->update_flags = 1;
4979 extent_op->is_data = 0;
4980
4981 ret = btrfs_add_delayed_tree_ref(trans, ins->objectid,
4982 ins->offset, parent, root_objectid,
4983 level, BTRFS_ADD_DELAYED_EXTENT,
4984 extent_op);
Chris Masond00aff02008-09-11 15:54:42 -04004985 BUG_ON(ret);
Chris Masond00aff02008-09-11 15:54:42 -04004986 }
Yan, Zheng86b9f2e2009-11-12 09:36:50 +00004987
4988 if (root_objectid == root->root_key.objectid) {
4989 u64 used;
4990 spin_lock(&root->node_lock);
4991 used = btrfs_root_used(&root->root_item) + num_bytes;
4992 btrfs_set_root_used(&root->root_item, used);
4993 spin_unlock(&root->node_lock);
4994 }
Chris Mason925baed2008-06-25 16:01:30 -04004995 return ret;
Chris Masonfec577f2007-02-26 10:40:21 -05004996}
Chris Mason65b51a02008-08-01 15:11:20 -04004997
4998struct extent_buffer *btrfs_init_new_buffer(struct btrfs_trans_handle *trans,
4999 struct btrfs_root *root,
Chris Mason4008c042009-02-12 14:09:45 -05005000 u64 bytenr, u32 blocksize,
5001 int level)
Chris Mason65b51a02008-08-01 15:11:20 -04005002{
5003 struct extent_buffer *buf;
5004
5005 buf = btrfs_find_create_tree_block(root, bytenr, blocksize);
5006 if (!buf)
5007 return ERR_PTR(-ENOMEM);
5008 btrfs_set_header_generation(buf, trans->transid);
Chris Mason4008c042009-02-12 14:09:45 -05005009 btrfs_set_buffer_lockdep_class(buf, level);
Chris Mason65b51a02008-08-01 15:11:20 -04005010 btrfs_tree_lock(buf);
5011 clean_tree_block(trans, root, buf);
Chris Masonb4ce94d2009-02-04 09:25:08 -05005012
5013 btrfs_set_lock_blocking(buf);
Chris Mason65b51a02008-08-01 15:11:20 -04005014 btrfs_set_buffer_uptodate(buf);
Chris Masonb4ce94d2009-02-04 09:25:08 -05005015
Chris Masond0c803c2008-09-11 16:17:57 -04005016 if (root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID) {
Yan, Zheng8cef4e12009-11-12 09:33:26 +00005017 /*
5018 * we allow two log transactions at a time, use different
5019 * EXENT bit to differentiate dirty pages.
5020 */
5021 if (root->log_transid % 2 == 0)
5022 set_extent_dirty(&root->dirty_log_pages, buf->start,
5023 buf->start + buf->len - 1, GFP_NOFS);
5024 else
5025 set_extent_new(&root->dirty_log_pages, buf->start,
5026 buf->start + buf->len - 1, GFP_NOFS);
Chris Masond0c803c2008-09-11 16:17:57 -04005027 } else {
5028 set_extent_dirty(&trans->transaction->dirty_pages, buf->start,
5029 buf->start + buf->len - 1, GFP_NOFS);
5030 }
Chris Mason65b51a02008-08-01 15:11:20 -04005031 trans->blocks_used++;
Chris Masonb4ce94d2009-02-04 09:25:08 -05005032 /* this returns a buffer locked for blocking */
Chris Mason65b51a02008-08-01 15:11:20 -04005033 return buf;
5034}
5035
Chris Masonfec577f2007-02-26 10:40:21 -05005036/*
5037 * helper function to allocate a block for a given tree
5038 * returns the tree buffer or NULL.
5039 */
Chris Mason5f39d392007-10-15 16:14:19 -04005040struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
Yan Zheng5d4f98a2009-06-10 10:45:14 -04005041 struct btrfs_root *root, u32 blocksize,
5042 u64 parent, u64 root_objectid,
5043 struct btrfs_disk_key *key, int level,
5044 u64 hint, u64 empty_size)
Chris Masonfec577f2007-02-26 10:40:21 -05005045{
Chris Masone2fa7222007-03-12 16:22:34 -04005046 struct btrfs_key ins;
Chris Masonfec577f2007-02-26 10:40:21 -05005047 int ret;
Chris Mason5f39d392007-10-15 16:14:19 -04005048 struct extent_buffer *buf;
Chris Masonfec577f2007-02-26 10:40:21 -05005049
Yan Zheng5d4f98a2009-06-10 10:45:14 -04005050 ret = alloc_tree_block(trans, root, blocksize, parent, root_objectid,
5051 key, level, empty_size, hint, (u64)-1, &ins);
Chris Masonfec577f2007-02-26 10:40:21 -05005052 if (ret) {
Chris Mason54aa1f42007-06-22 14:16:25 -04005053 BUG_ON(ret > 0);
5054 return ERR_PTR(ret);
Chris Masonfec577f2007-02-26 10:40:21 -05005055 }
Chris Mason55c69072008-01-09 15:55:33 -05005056
Chris Mason4008c042009-02-12 14:09:45 -05005057 buf = btrfs_init_new_buffer(trans, root, ins.objectid,
5058 blocksize, level);
Chris Masonfec577f2007-02-26 10:40:21 -05005059 return buf;
5060}
Chris Masona28ec192007-03-06 20:08:01 -05005061
Yan Zheng2c47e6052009-06-27 21:07:35 -04005062struct walk_control {
5063 u64 refs[BTRFS_MAX_LEVEL];
5064 u64 flags[BTRFS_MAX_LEVEL];
5065 struct btrfs_key update_progress;
5066 int stage;
5067 int level;
5068 int shared_level;
5069 int update_ref;
5070 int keep_locks;
Yan, Zheng1c4850e2009-09-21 15:55:59 -04005071 int reada_slot;
5072 int reada_count;
Yan Zheng2c47e6052009-06-27 21:07:35 -04005073};
5074
5075#define DROP_REFERENCE 1
5076#define UPDATE_BACKREF 2
5077
Yan, Zheng1c4850e2009-09-21 15:55:59 -04005078static noinline void reada_walk_down(struct btrfs_trans_handle *trans,
5079 struct btrfs_root *root,
5080 struct walk_control *wc,
5081 struct btrfs_path *path)
5082{
5083 u64 bytenr;
5084 u64 generation;
5085 u64 refs;
Yan, Zheng94fcca92009-10-09 09:25:16 -04005086 u64 flags;
Yan, Zheng1c4850e2009-09-21 15:55:59 -04005087 u64 last = 0;
5088 u32 nritems;
5089 u32 blocksize;
5090 struct btrfs_key key;
5091 struct extent_buffer *eb;
5092 int ret;
5093 int slot;
5094 int nread = 0;
5095
5096 if (path->slots[wc->level] < wc->reada_slot) {
5097 wc->reada_count = wc->reada_count * 2 / 3;
5098 wc->reada_count = max(wc->reada_count, 2);
5099 } else {
5100 wc->reada_count = wc->reada_count * 3 / 2;
5101 wc->reada_count = min_t(int, wc->reada_count,
5102 BTRFS_NODEPTRS_PER_BLOCK(root));
5103 }
5104
5105 eb = path->nodes[wc->level];
5106 nritems = btrfs_header_nritems(eb);
5107 blocksize = btrfs_level_size(root, wc->level - 1);
5108
5109 for (slot = path->slots[wc->level]; slot < nritems; slot++) {
5110 if (nread >= wc->reada_count)
5111 break;
5112
5113 cond_resched();
5114 bytenr = btrfs_node_blockptr(eb, slot);
5115 generation = btrfs_node_ptr_generation(eb, slot);
5116
5117 if (slot == path->slots[wc->level])
5118 goto reada;
5119
5120 if (wc->stage == UPDATE_BACKREF &&
5121 generation <= root->root_key.offset)
5122 continue;
5123
Yan, Zheng94fcca92009-10-09 09:25:16 -04005124 /* We don't lock the tree block, it's OK to be racy here */
5125 ret = btrfs_lookup_extent_info(trans, root, bytenr, blocksize,
5126 &refs, &flags);
5127 BUG_ON(ret);
5128 BUG_ON(refs == 0);
5129
Yan, Zheng1c4850e2009-09-21 15:55:59 -04005130 if (wc->stage == DROP_REFERENCE) {
Yan, Zheng1c4850e2009-09-21 15:55:59 -04005131 if (refs == 1)
5132 goto reada;
5133
Yan, Zheng94fcca92009-10-09 09:25:16 -04005134 if (wc->level == 1 &&
5135 (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF))
5136 continue;
Yan, Zheng1c4850e2009-09-21 15:55:59 -04005137 if (!wc->update_ref ||
5138 generation <= root->root_key.offset)
5139 continue;
5140 btrfs_node_key_to_cpu(eb, &key, slot);
5141 ret = btrfs_comp_cpu_keys(&key,
5142 &wc->update_progress);
5143 if (ret < 0)
5144 continue;
Yan, Zheng94fcca92009-10-09 09:25:16 -04005145 } else {
5146 if (wc->level == 1 &&
5147 (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF))
5148 continue;
Yan, Zheng1c4850e2009-09-21 15:55:59 -04005149 }
5150reada:
5151 ret = readahead_tree_block(root, bytenr, blocksize,
5152 generation);
5153 if (ret)
5154 break;
5155 last = bytenr + blocksize;
5156 nread++;
5157 }
5158 wc->reada_slot = slot;
5159}
5160
Chris Mason9aca1d52007-03-13 11:09:37 -04005161/*
Yan Zheng2c47e6052009-06-27 21:07:35 -04005162 * hepler to process tree block while walking down the tree.
5163 *
Yan Zheng2c47e6052009-06-27 21:07:35 -04005164 * when wc->stage == UPDATE_BACKREF, this function updates
5165 * back refs for pointers in the block.
5166 *
5167 * NOTE: return value 1 means we should stop walking down.
Yan Zhengf82d02d2008-10-29 14:49:05 -04005168 */
Yan Zheng2c47e6052009-06-27 21:07:35 -04005169static noinline int walk_down_proc(struct btrfs_trans_handle *trans,
5170 struct btrfs_root *root,
5171 struct btrfs_path *path,
Yan, Zheng94fcca92009-10-09 09:25:16 -04005172 struct walk_control *wc, int lookup_info)
Yan Zheng2c47e6052009-06-27 21:07:35 -04005173{
5174 int level = wc->level;
5175 struct extent_buffer *eb = path->nodes[level];
Yan Zheng2c47e6052009-06-27 21:07:35 -04005176 u64 flag = BTRFS_BLOCK_FLAG_FULL_BACKREF;
5177 int ret;
5178
5179 if (wc->stage == UPDATE_BACKREF &&
5180 btrfs_header_owner(eb) != root->root_key.objectid)
5181 return 1;
5182
5183 /*
5184 * when reference count of tree block is 1, it won't increase
5185 * again. once full backref flag is set, we never clear it.
5186 */
Yan, Zheng94fcca92009-10-09 09:25:16 -04005187 if (lookup_info &&
5188 ((wc->stage == DROP_REFERENCE && wc->refs[level] != 1) ||
5189 (wc->stage == UPDATE_BACKREF && !(wc->flags[level] & flag)))) {
Yan Zheng2c47e6052009-06-27 21:07:35 -04005190 BUG_ON(!path->locks[level]);
5191 ret = btrfs_lookup_extent_info(trans, root,
5192 eb->start, eb->len,
5193 &wc->refs[level],
5194 &wc->flags[level]);
5195 BUG_ON(ret);
5196 BUG_ON(wc->refs[level] == 0);
5197 }
5198
Yan Zheng2c47e6052009-06-27 21:07:35 -04005199 if (wc->stage == DROP_REFERENCE) {
5200 if (wc->refs[level] > 1)
5201 return 1;
5202
5203 if (path->locks[level] && !wc->keep_locks) {
5204 btrfs_tree_unlock(eb);
5205 path->locks[level] = 0;
5206 }
5207 return 0;
5208 }
5209
5210 /* wc->stage == UPDATE_BACKREF */
5211 if (!(wc->flags[level] & flag)) {
5212 BUG_ON(!path->locks[level]);
5213 ret = btrfs_inc_ref(trans, root, eb, 1);
5214 BUG_ON(ret);
5215 ret = btrfs_dec_ref(trans, root, eb, 0);
5216 BUG_ON(ret);
5217 ret = btrfs_set_disk_extent_flags(trans, root, eb->start,
5218 eb->len, flag, 0);
5219 BUG_ON(ret);
5220 wc->flags[level] |= flag;
5221 }
5222
5223 /*
5224 * the block is shared by multiple trees, so it's not good to
5225 * keep the tree lock
5226 */
5227 if (path->locks[level] && level > 0) {
5228 btrfs_tree_unlock(eb);
5229 path->locks[level] = 0;
5230 }
5231 return 0;
5232}
5233
5234/*
Yan, Zheng1c4850e2009-09-21 15:55:59 -04005235 * hepler to process tree block pointer.
5236 *
5237 * when wc->stage == DROP_REFERENCE, this function checks
5238 * reference count of the block pointed to. if the block
5239 * is shared and we need update back refs for the subtree
5240 * rooted at the block, this function changes wc->stage to
5241 * UPDATE_BACKREF. if the block is shared and there is no
5242 * need to update back, this function drops the reference
5243 * to the block.
5244 *
5245 * NOTE: return value 1 means we should stop walking down.
5246 */
5247static noinline int do_walk_down(struct btrfs_trans_handle *trans,
5248 struct btrfs_root *root,
5249 struct btrfs_path *path,
Yan, Zheng94fcca92009-10-09 09:25:16 -04005250 struct walk_control *wc, int *lookup_info)
Yan, Zheng1c4850e2009-09-21 15:55:59 -04005251{
5252 u64 bytenr;
5253 u64 generation;
5254 u64 parent;
5255 u32 blocksize;
5256 struct btrfs_key key;
5257 struct extent_buffer *next;
5258 int level = wc->level;
5259 int reada = 0;
5260 int ret = 0;
5261
5262 generation = btrfs_node_ptr_generation(path->nodes[level],
5263 path->slots[level]);
5264 /*
5265 * if the lower level block was created before the snapshot
5266 * was created, we know there is no need to update back refs
5267 * for the subtree
5268 */
5269 if (wc->stage == UPDATE_BACKREF &&
Yan, Zheng94fcca92009-10-09 09:25:16 -04005270 generation <= root->root_key.offset) {
5271 *lookup_info = 1;
Yan, Zheng1c4850e2009-09-21 15:55:59 -04005272 return 1;
Yan, Zheng94fcca92009-10-09 09:25:16 -04005273 }
Yan, Zheng1c4850e2009-09-21 15:55:59 -04005274
5275 bytenr = btrfs_node_blockptr(path->nodes[level], path->slots[level]);
5276 blocksize = btrfs_level_size(root, level - 1);
5277
5278 next = btrfs_find_tree_block(root, bytenr, blocksize);
5279 if (!next) {
5280 next = btrfs_find_create_tree_block(root, bytenr, blocksize);
Miao Xie90d2c51d2010-03-25 12:37:12 +00005281 if (!next)
5282 return -ENOMEM;
Yan, Zheng1c4850e2009-09-21 15:55:59 -04005283 reada = 1;
5284 }
5285 btrfs_tree_lock(next);
5286 btrfs_set_lock_blocking(next);
5287
Yan, Zheng94fcca92009-10-09 09:25:16 -04005288 ret = btrfs_lookup_extent_info(trans, root, bytenr, blocksize,
5289 &wc->refs[level - 1],
5290 &wc->flags[level - 1]);
5291 BUG_ON(ret);
5292 BUG_ON(wc->refs[level - 1] == 0);
5293 *lookup_info = 0;
Yan, Zheng1c4850e2009-09-21 15:55:59 -04005294
Yan, Zheng94fcca92009-10-09 09:25:16 -04005295 if (wc->stage == DROP_REFERENCE) {
Yan, Zheng1c4850e2009-09-21 15:55:59 -04005296 if (wc->refs[level - 1] > 1) {
Yan, Zheng94fcca92009-10-09 09:25:16 -04005297 if (level == 1 &&
5298 (wc->flags[0] & BTRFS_BLOCK_FLAG_FULL_BACKREF))
5299 goto skip;
5300
Yan, Zheng1c4850e2009-09-21 15:55:59 -04005301 if (!wc->update_ref ||
5302 generation <= root->root_key.offset)
5303 goto skip;
5304
5305 btrfs_node_key_to_cpu(path->nodes[level], &key,
5306 path->slots[level]);
5307 ret = btrfs_comp_cpu_keys(&key, &wc->update_progress);
5308 if (ret < 0)
5309 goto skip;
5310
5311 wc->stage = UPDATE_BACKREF;
5312 wc->shared_level = level - 1;
5313 }
Yan, Zheng94fcca92009-10-09 09:25:16 -04005314 } else {
5315 if (level == 1 &&
5316 (wc->flags[0] & BTRFS_BLOCK_FLAG_FULL_BACKREF))
5317 goto skip;
Yan, Zheng1c4850e2009-09-21 15:55:59 -04005318 }
5319
5320 if (!btrfs_buffer_uptodate(next, generation)) {
5321 btrfs_tree_unlock(next);
5322 free_extent_buffer(next);
5323 next = NULL;
Yan, Zheng94fcca92009-10-09 09:25:16 -04005324 *lookup_info = 1;
Yan, Zheng1c4850e2009-09-21 15:55:59 -04005325 }
5326
5327 if (!next) {
5328 if (reada && level == 1)
5329 reada_walk_down(trans, root, wc, path);
5330 next = read_tree_block(root, bytenr, blocksize, generation);
5331 btrfs_tree_lock(next);
5332 btrfs_set_lock_blocking(next);
5333 }
5334
5335 level--;
5336 BUG_ON(level != btrfs_header_level(next));
5337 path->nodes[level] = next;
5338 path->slots[level] = 0;
5339 path->locks[level] = 1;
5340 wc->level = level;
5341 if (wc->level == 1)
5342 wc->reada_slot = 0;
5343 return 0;
5344skip:
5345 wc->refs[level - 1] = 0;
5346 wc->flags[level - 1] = 0;
Yan, Zheng94fcca92009-10-09 09:25:16 -04005347 if (wc->stage == DROP_REFERENCE) {
5348 if (wc->flags[level] & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
5349 parent = path->nodes[level]->start;
5350 } else {
5351 BUG_ON(root->root_key.objectid !=
5352 btrfs_header_owner(path->nodes[level]));
5353 parent = 0;
5354 }
Yan, Zheng1c4850e2009-09-21 15:55:59 -04005355
Yan, Zheng94fcca92009-10-09 09:25:16 -04005356 ret = btrfs_free_extent(trans, root, bytenr, blocksize, parent,
5357 root->root_key.objectid, level - 1, 0);
5358 BUG_ON(ret);
Yan, Zheng1c4850e2009-09-21 15:55:59 -04005359 }
Yan, Zheng1c4850e2009-09-21 15:55:59 -04005360 btrfs_tree_unlock(next);
5361 free_extent_buffer(next);
Yan, Zheng94fcca92009-10-09 09:25:16 -04005362 *lookup_info = 1;
Yan, Zheng1c4850e2009-09-21 15:55:59 -04005363 return 1;
5364}
5365
5366/*
Yan Zheng2c47e6052009-06-27 21:07:35 -04005367 * hepler to process tree block while walking up the tree.
5368 *
5369 * when wc->stage == DROP_REFERENCE, this function drops
5370 * reference count on the block.
5371 *
5372 * when wc->stage == UPDATE_BACKREF, this function changes
5373 * wc->stage back to DROP_REFERENCE if we changed wc->stage
5374 * to UPDATE_BACKREF previously while processing the block.
5375 *
5376 * NOTE: return value 1 means we should stop walking up.
5377 */
5378static noinline int walk_up_proc(struct btrfs_trans_handle *trans,
5379 struct btrfs_root *root,
5380 struct btrfs_path *path,
5381 struct walk_control *wc)
5382{
5383 int ret = 0;
5384 int level = wc->level;
5385 struct extent_buffer *eb = path->nodes[level];
5386 u64 parent = 0;
5387
5388 if (wc->stage == UPDATE_BACKREF) {
5389 BUG_ON(wc->shared_level < level);
5390 if (level < wc->shared_level)
5391 goto out;
5392
Yan Zheng2c47e6052009-06-27 21:07:35 -04005393 ret = find_next_key(path, level + 1, &wc->update_progress);
5394 if (ret > 0)
5395 wc->update_ref = 0;
5396
5397 wc->stage = DROP_REFERENCE;
5398 wc->shared_level = -1;
5399 path->slots[level] = 0;
5400
5401 /*
5402 * check reference count again if the block isn't locked.
5403 * we should start walking down the tree again if reference
5404 * count is one.
5405 */
5406 if (!path->locks[level]) {
5407 BUG_ON(level == 0);
5408 btrfs_tree_lock(eb);
5409 btrfs_set_lock_blocking(eb);
5410 path->locks[level] = 1;
5411
5412 ret = btrfs_lookup_extent_info(trans, root,
5413 eb->start, eb->len,
5414 &wc->refs[level],
5415 &wc->flags[level]);
5416 BUG_ON(ret);
5417 BUG_ON(wc->refs[level] == 0);
5418 if (wc->refs[level] == 1) {
5419 btrfs_tree_unlock(eb);
5420 path->locks[level] = 0;
5421 return 1;
5422 }
Yan Zheng2c47e6052009-06-27 21:07:35 -04005423 }
5424 }
5425
5426 /* wc->stage == DROP_REFERENCE */
5427 BUG_ON(wc->refs[level] > 1 && !path->locks[level]);
5428
5429 if (wc->refs[level] == 1) {
5430 if (level == 0) {
5431 if (wc->flags[level] & BTRFS_BLOCK_FLAG_FULL_BACKREF)
5432 ret = btrfs_dec_ref(trans, root, eb, 1);
5433 else
5434 ret = btrfs_dec_ref(trans, root, eb, 0);
5435 BUG_ON(ret);
5436 }
5437 /* make block locked assertion in clean_tree_block happy */
5438 if (!path->locks[level] &&
5439 btrfs_header_generation(eb) == trans->transid) {
5440 btrfs_tree_lock(eb);
5441 btrfs_set_lock_blocking(eb);
5442 path->locks[level] = 1;
5443 }
5444 clean_tree_block(trans, root, eb);
5445 }
5446
5447 if (eb == root->node) {
5448 if (wc->flags[level] & BTRFS_BLOCK_FLAG_FULL_BACKREF)
5449 parent = eb->start;
5450 else
5451 BUG_ON(root->root_key.objectid !=
5452 btrfs_header_owner(eb));
5453 } else {
5454 if (wc->flags[level + 1] & BTRFS_BLOCK_FLAG_FULL_BACKREF)
5455 parent = path->nodes[level + 1]->start;
5456 else
5457 BUG_ON(root->root_key.objectid !=
5458 btrfs_header_owner(path->nodes[level + 1]));
5459 }
5460
5461 ret = btrfs_free_extent(trans, root, eb->start, eb->len, parent,
5462 root->root_key.objectid, level, 0);
5463 BUG_ON(ret);
5464out:
5465 wc->refs[level] = 0;
5466 wc->flags[level] = 0;
5467 return ret;
5468}
5469
Yan Zheng5d4f98a2009-06-10 10:45:14 -04005470static noinline int walk_down_tree(struct btrfs_trans_handle *trans,
5471 struct btrfs_root *root,
Yan Zheng2c47e6052009-06-27 21:07:35 -04005472 struct btrfs_path *path,
5473 struct walk_control *wc)
Yan Zhengf82d02d2008-10-29 14:49:05 -04005474{
Yan Zheng2c47e6052009-06-27 21:07:35 -04005475 int level = wc->level;
Yan, Zheng94fcca92009-10-09 09:25:16 -04005476 int lookup_info = 1;
Yan Zhengf82d02d2008-10-29 14:49:05 -04005477 int ret;
5478
Yan Zheng2c47e6052009-06-27 21:07:35 -04005479 while (level >= 0) {
Yan, Zheng94fcca92009-10-09 09:25:16 -04005480 ret = walk_down_proc(trans, root, path, wc, lookup_info);
Yan Zheng2c47e6052009-06-27 21:07:35 -04005481 if (ret > 0)
Yan Zhengf82d02d2008-10-29 14:49:05 -04005482 break;
Yan Zhengf82d02d2008-10-29 14:49:05 -04005483
Yan Zheng2c47e6052009-06-27 21:07:35 -04005484 if (level == 0)
5485 break;
5486
Yan, Zheng7a7965f2010-02-01 02:41:17 +00005487 if (path->slots[level] >=
5488 btrfs_header_nritems(path->nodes[level]))
5489 break;
5490
Yan, Zheng94fcca92009-10-09 09:25:16 -04005491 ret = do_walk_down(trans, root, path, wc, &lookup_info);
Yan, Zheng1c4850e2009-09-21 15:55:59 -04005492 if (ret > 0) {
5493 path->slots[level]++;
5494 continue;
Miao Xie90d2c51d2010-03-25 12:37:12 +00005495 } else if (ret < 0)
5496 return ret;
Yan, Zheng1c4850e2009-09-21 15:55:59 -04005497 level = wc->level;
Yan Zhengf82d02d2008-10-29 14:49:05 -04005498 }
Yan Zhengf82d02d2008-10-29 14:49:05 -04005499 return 0;
5500}
5501
Chris Masond3977122009-01-05 21:25:51 -05005502static noinline int walk_up_tree(struct btrfs_trans_handle *trans,
Chris Mason98ed5172008-01-03 10:01:48 -05005503 struct btrfs_root *root,
Yan Zhengf82d02d2008-10-29 14:49:05 -04005504 struct btrfs_path *path,
Yan Zheng2c47e6052009-06-27 21:07:35 -04005505 struct walk_control *wc, int max_level)
Chris Mason20524f02007-03-10 06:35:47 -05005506{
Yan Zheng2c47e6052009-06-27 21:07:35 -04005507 int level = wc->level;
Chris Mason20524f02007-03-10 06:35:47 -05005508 int ret;
Chris Mason9f3a7422007-08-07 15:52:19 -04005509
Yan Zheng2c47e6052009-06-27 21:07:35 -04005510 path->slots[level] = btrfs_header_nritems(path->nodes[level]);
5511 while (level < max_level && path->nodes[level]) {
5512 wc->level = level;
5513 if (path->slots[level] + 1 <
5514 btrfs_header_nritems(path->nodes[level])) {
5515 path->slots[level]++;
Chris Mason20524f02007-03-10 06:35:47 -05005516 return 0;
5517 } else {
Yan Zheng2c47e6052009-06-27 21:07:35 -04005518 ret = walk_up_proc(trans, root, path, wc);
5519 if (ret > 0)
5520 return 0;
Chris Masonbd56b302009-02-04 09:27:02 -05005521
Yan Zheng2c47e6052009-06-27 21:07:35 -04005522 if (path->locks[level]) {
5523 btrfs_tree_unlock(path->nodes[level]);
5524 path->locks[level] = 0;
Yan Zhengf82d02d2008-10-29 14:49:05 -04005525 }
Yan Zheng2c47e6052009-06-27 21:07:35 -04005526 free_extent_buffer(path->nodes[level]);
5527 path->nodes[level] = NULL;
5528 level++;
Chris Mason20524f02007-03-10 06:35:47 -05005529 }
5530 }
5531 return 1;
5532}
5533
Chris Mason9aca1d52007-03-13 11:09:37 -04005534/*
Yan Zheng2c47e6052009-06-27 21:07:35 -04005535 * drop a subvolume tree.
5536 *
5537 * this function traverses the tree freeing any blocks that only
5538 * referenced by the tree.
5539 *
5540 * when a shared tree block is found. this function decreases its
5541 * reference count by one. if update_ref is true, this function
5542 * also make sure backrefs for the shared block and all lower level
5543 * blocks are properly updated.
Chris Mason9aca1d52007-03-13 11:09:37 -04005544 */
Yan Zheng2c47e6052009-06-27 21:07:35 -04005545int btrfs_drop_snapshot(struct btrfs_root *root, int update_ref)
Chris Mason20524f02007-03-10 06:35:47 -05005546{
Chris Mason5caf2a02007-04-02 11:20:42 -04005547 struct btrfs_path *path;
Yan Zheng2c47e6052009-06-27 21:07:35 -04005548 struct btrfs_trans_handle *trans;
5549 struct btrfs_root *tree_root = root->fs_info->tree_root;
Chris Mason9f3a7422007-08-07 15:52:19 -04005550 struct btrfs_root_item *root_item = &root->root_item;
Yan Zheng2c47e6052009-06-27 21:07:35 -04005551 struct walk_control *wc;
5552 struct btrfs_key key;
5553 int err = 0;
5554 int ret;
5555 int level;
Chris Mason20524f02007-03-10 06:35:47 -05005556
Chris Mason5caf2a02007-04-02 11:20:42 -04005557 path = btrfs_alloc_path();
5558 BUG_ON(!path);
Chris Mason20524f02007-03-10 06:35:47 -05005559
Yan Zheng2c47e6052009-06-27 21:07:35 -04005560 wc = kzalloc(sizeof(*wc), GFP_NOFS);
5561 BUG_ON(!wc);
5562
5563 trans = btrfs_start_transaction(tree_root, 1);
5564
Chris Mason9f3a7422007-08-07 15:52:19 -04005565 if (btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
Yan Zheng2c47e6052009-06-27 21:07:35 -04005566 level = btrfs_header_level(root->node);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04005567 path->nodes[level] = btrfs_lock_root_node(root);
5568 btrfs_set_lock_blocking(path->nodes[level]);
Chris Mason9f3a7422007-08-07 15:52:19 -04005569 path->slots[level] = 0;
Yan Zheng5d4f98a2009-06-10 10:45:14 -04005570 path->locks[level] = 1;
Yan Zheng2c47e6052009-06-27 21:07:35 -04005571 memset(&wc->update_progress, 0,
5572 sizeof(wc->update_progress));
Chris Mason9f3a7422007-08-07 15:52:19 -04005573 } else {
Chris Mason9f3a7422007-08-07 15:52:19 -04005574 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
Yan Zheng2c47e6052009-06-27 21:07:35 -04005575 memcpy(&wc->update_progress, &key,
5576 sizeof(wc->update_progress));
5577
Chris Mason6702ed42007-08-07 16:15:09 -04005578 level = root_item->drop_level;
Yan Zheng2c47e6052009-06-27 21:07:35 -04005579 BUG_ON(level == 0);
Chris Mason6702ed42007-08-07 16:15:09 -04005580 path->lowest_level = level;
Yan Zheng2c47e6052009-06-27 21:07:35 -04005581 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
5582 path->lowest_level = 0;
5583 if (ret < 0) {
5584 err = ret;
Chris Mason9f3a7422007-08-07 15:52:19 -04005585 goto out;
5586 }
Yan, Zheng1c4850e2009-09-21 15:55:59 -04005587 WARN_ON(ret > 0);
Yan Zheng2c47e6052009-06-27 21:07:35 -04005588
Chris Mason7d9eb122008-07-08 14:19:17 -04005589 /*
5590 * unlock our path, this is safe because only this
5591 * function is allowed to delete this snapshot
5592 */
Yan Zheng5d4f98a2009-06-10 10:45:14 -04005593 btrfs_unlock_up_safe(path, 0);
Chris Mason9aca1d52007-03-13 11:09:37 -04005594
Yan Zheng2c47e6052009-06-27 21:07:35 -04005595 level = btrfs_header_level(root->node);
5596 while (1) {
5597 btrfs_tree_lock(path->nodes[level]);
5598 btrfs_set_lock_blocking(path->nodes[level]);
5599
5600 ret = btrfs_lookup_extent_info(trans, root,
5601 path->nodes[level]->start,
5602 path->nodes[level]->len,
5603 &wc->refs[level],
5604 &wc->flags[level]);
5605 BUG_ON(ret);
5606 BUG_ON(wc->refs[level] == 0);
5607
5608 if (level == root_item->drop_level)
5609 break;
5610
5611 btrfs_tree_unlock(path->nodes[level]);
5612 WARN_ON(wc->refs[level] != 1);
5613 level--;
5614 }
5615 }
5616
5617 wc->level = level;
5618 wc->shared_level = -1;
5619 wc->stage = DROP_REFERENCE;
5620 wc->update_ref = update_ref;
5621 wc->keep_locks = 0;
Yan, Zheng1c4850e2009-09-21 15:55:59 -04005622 wc->reada_count = BTRFS_NODEPTRS_PER_BLOCK(root);
Yan Zheng2c47e6052009-06-27 21:07:35 -04005623
5624 while (1) {
5625 ret = walk_down_tree(trans, root, path, wc);
5626 if (ret < 0) {
5627 err = ret;
Chris Masone7a84562008-06-25 16:01:31 -04005628 break;
5629 }
Yan Zheng2c47e6052009-06-27 21:07:35 -04005630
5631 ret = walk_up_tree(trans, root, path, wc, BTRFS_MAX_LEVEL);
5632 if (ret < 0) {
5633 err = ret;
5634 break;
5635 }
5636
5637 if (ret > 0) {
5638 BUG_ON(wc->stage != DROP_REFERENCE);
5639 break;
5640 }
5641
5642 if (wc->stage == DROP_REFERENCE) {
5643 level = wc->level;
5644 btrfs_node_key(path->nodes[level],
5645 &root_item->drop_progress,
5646 path->slots[level]);
5647 root_item->drop_level = level;
5648 }
5649
5650 BUG_ON(wc->level == 0);
5651 if (trans->transaction->in_commit ||
5652 trans->transaction->delayed_refs.flushing) {
5653 ret = btrfs_update_root(trans, tree_root,
5654 &root->root_key,
5655 root_item);
5656 BUG_ON(ret);
5657
5658 btrfs_end_transaction(trans, tree_root);
5659 trans = btrfs_start_transaction(tree_root, 1);
5660 } else {
5661 unsigned long update;
Chris Masonc3e69d52009-03-13 10:17:05 -04005662 update = trans->delayed_ref_updates;
5663 trans->delayed_ref_updates = 0;
5664 if (update)
Yan Zheng2c47e6052009-06-27 21:07:35 -04005665 btrfs_run_delayed_refs(trans, tree_root,
5666 update);
Chris Masonc3e69d52009-03-13 10:17:05 -04005667 }
Chris Mason20524f02007-03-10 06:35:47 -05005668 }
Yan Zheng2c47e6052009-06-27 21:07:35 -04005669 btrfs_release_path(root, path);
5670 BUG_ON(err);
5671
5672 ret = btrfs_del_root(trans, tree_root, &root->root_key);
5673 BUG_ON(ret);
5674
Yan, Zheng76dda932009-09-21 16:00:26 -04005675 if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
5676 ret = btrfs_find_last_root(tree_root, root->root_key.objectid,
5677 NULL, NULL);
5678 BUG_ON(ret < 0);
5679 if (ret > 0) {
5680 ret = btrfs_del_orphan_item(trans, tree_root,
5681 root->root_key.objectid);
5682 BUG_ON(ret);
5683 }
5684 }
5685
5686 if (root->in_radix) {
5687 btrfs_free_fs_root(tree_root->fs_info, root);
5688 } else {
5689 free_extent_buffer(root->node);
5690 free_extent_buffer(root->commit_root);
5691 kfree(root);
5692 }
Chris Mason9f3a7422007-08-07 15:52:19 -04005693out:
Yan Zheng2c47e6052009-06-27 21:07:35 -04005694 btrfs_end_transaction(trans, tree_root);
5695 kfree(wc);
Chris Mason5caf2a02007-04-02 11:20:42 -04005696 btrfs_free_path(path);
Yan Zheng2c47e6052009-06-27 21:07:35 -04005697 return err;
Chris Mason20524f02007-03-10 06:35:47 -05005698}
Chris Mason9078a3e2007-04-26 16:46:15 -04005699
Yan Zheng2c47e6052009-06-27 21:07:35 -04005700/*
5701 * drop subtree rooted at tree block 'node'.
5702 *
5703 * NOTE: this function will unlock and release tree block 'node'
5704 */
Yan Zhengf82d02d2008-10-29 14:49:05 -04005705int btrfs_drop_subtree(struct btrfs_trans_handle *trans,
5706 struct btrfs_root *root,
5707 struct extent_buffer *node,
5708 struct extent_buffer *parent)
5709{
5710 struct btrfs_path *path;
Yan Zheng2c47e6052009-06-27 21:07:35 -04005711 struct walk_control *wc;
Yan Zhengf82d02d2008-10-29 14:49:05 -04005712 int level;
5713 int parent_level;
5714 int ret = 0;
5715 int wret;
5716
Yan Zheng2c47e6052009-06-27 21:07:35 -04005717 BUG_ON(root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID);
5718
Yan Zhengf82d02d2008-10-29 14:49:05 -04005719 path = btrfs_alloc_path();
5720 BUG_ON(!path);
5721
Yan Zheng2c47e6052009-06-27 21:07:35 -04005722 wc = kzalloc(sizeof(*wc), GFP_NOFS);
5723 BUG_ON(!wc);
5724
Chris Masonb9447ef82009-03-09 11:45:38 -04005725 btrfs_assert_tree_locked(parent);
Yan Zhengf82d02d2008-10-29 14:49:05 -04005726 parent_level = btrfs_header_level(parent);
5727 extent_buffer_get(parent);
5728 path->nodes[parent_level] = parent;
5729 path->slots[parent_level] = btrfs_header_nritems(parent);
5730
Chris Masonb9447ef82009-03-09 11:45:38 -04005731 btrfs_assert_tree_locked(node);
Yan Zhengf82d02d2008-10-29 14:49:05 -04005732 level = btrfs_header_level(node);
Yan Zhengf82d02d2008-10-29 14:49:05 -04005733 path->nodes[level] = node;
5734 path->slots[level] = 0;
Yan Zheng2c47e6052009-06-27 21:07:35 -04005735 path->locks[level] = 1;
5736
5737 wc->refs[parent_level] = 1;
5738 wc->flags[parent_level] = BTRFS_BLOCK_FLAG_FULL_BACKREF;
5739 wc->level = level;
5740 wc->shared_level = -1;
5741 wc->stage = DROP_REFERENCE;
5742 wc->update_ref = 0;
5743 wc->keep_locks = 1;
Yan, Zheng1c4850e2009-09-21 15:55:59 -04005744 wc->reada_count = BTRFS_NODEPTRS_PER_BLOCK(root);
Yan Zhengf82d02d2008-10-29 14:49:05 -04005745
5746 while (1) {
Yan Zheng2c47e6052009-06-27 21:07:35 -04005747 wret = walk_down_tree(trans, root, path, wc);
5748 if (wret < 0) {
Yan Zhengf82d02d2008-10-29 14:49:05 -04005749 ret = wret;
Yan Zhengf82d02d2008-10-29 14:49:05 -04005750 break;
Yan Zheng2c47e6052009-06-27 21:07:35 -04005751 }
Yan Zhengf82d02d2008-10-29 14:49:05 -04005752
Yan Zheng2c47e6052009-06-27 21:07:35 -04005753 wret = walk_up_tree(trans, root, path, wc, parent_level);
Yan Zhengf82d02d2008-10-29 14:49:05 -04005754 if (wret < 0)
5755 ret = wret;
5756 if (wret != 0)
5757 break;
5758 }
5759
Yan Zheng2c47e6052009-06-27 21:07:35 -04005760 kfree(wc);
Yan Zhengf82d02d2008-10-29 14:49:05 -04005761 btrfs_free_path(path);
5762 return ret;
5763}
5764
Yan Zheng5d4f98a2009-06-10 10:45:14 -04005765#if 0
Chris Mason8e7bf942008-04-28 09:02:36 -04005766static unsigned long calc_ra(unsigned long start, unsigned long last,
5767 unsigned long nr)
5768{
5769 return min(last, start + nr - 1);
5770}
5771
Chris Masond3977122009-01-05 21:25:51 -05005772static noinline int relocate_inode_pages(struct inode *inode, u64 start,
Chris Mason98ed5172008-01-03 10:01:48 -05005773 u64 len)
Chris Masonedbd8d42007-12-21 16:27:24 -05005774{
5775 u64 page_start;
5776 u64 page_end;
Zheng Yan1a40e232008-09-26 10:09:34 -04005777 unsigned long first_index;
Chris Masonedbd8d42007-12-21 16:27:24 -05005778 unsigned long last_index;
Chris Masonedbd8d42007-12-21 16:27:24 -05005779 unsigned long i;
5780 struct page *page;
Chris Masond1310b22008-01-24 16:13:08 -05005781 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
Chris Mason4313b392008-01-03 09:08:48 -05005782 struct file_ra_state *ra;
Chris Mason3eaa2882008-07-24 11:57:52 -04005783 struct btrfs_ordered_extent *ordered;
Zheng Yan1a40e232008-09-26 10:09:34 -04005784 unsigned int total_read = 0;
5785 unsigned int total_dirty = 0;
5786 int ret = 0;
Chris Mason4313b392008-01-03 09:08:48 -05005787
5788 ra = kzalloc(sizeof(*ra), GFP_NOFS);
Chris Masonedbd8d42007-12-21 16:27:24 -05005789
5790 mutex_lock(&inode->i_mutex);
Zheng Yan1a40e232008-09-26 10:09:34 -04005791 first_index = start >> PAGE_CACHE_SHIFT;
Chris Masonedbd8d42007-12-21 16:27:24 -05005792 last_index = (start + len - 1) >> PAGE_CACHE_SHIFT;
5793
Zheng Yan1a40e232008-09-26 10:09:34 -04005794 /* make sure the dirty trick played by the caller work */
5795 ret = invalidate_inode_pages2_range(inode->i_mapping,
5796 first_index, last_index);
5797 if (ret)
5798 goto out_unlock;
Chris Mason8e7bf942008-04-28 09:02:36 -04005799
Chris Mason4313b392008-01-03 09:08:48 -05005800 file_ra_state_init(ra, inode->i_mapping);
Chris Masonedbd8d42007-12-21 16:27:24 -05005801
Zheng Yan1a40e232008-09-26 10:09:34 -04005802 for (i = first_index ; i <= last_index; i++) {
5803 if (total_read % ra->ra_pages == 0) {
Chris Mason8e7bf942008-04-28 09:02:36 -04005804 btrfs_force_ra(inode->i_mapping, ra, NULL, i,
Zheng Yan1a40e232008-09-26 10:09:34 -04005805 calc_ra(i, last_index, ra->ra_pages));
Chris Mason8e7bf942008-04-28 09:02:36 -04005806 }
5807 total_read++;
Chris Mason3eaa2882008-07-24 11:57:52 -04005808again:
5809 if (((u64)i << PAGE_CACHE_SHIFT) > i_size_read(inode))
Zheng Yan1a40e232008-09-26 10:09:34 -04005810 BUG_ON(1);
Chris Masonedbd8d42007-12-21 16:27:24 -05005811 page = grab_cache_page(inode->i_mapping, i);
Chris Masona061fc82008-05-07 11:43:44 -04005812 if (!page) {
Zheng Yan1a40e232008-09-26 10:09:34 -04005813 ret = -ENOMEM;
Chris Masonedbd8d42007-12-21 16:27:24 -05005814 goto out_unlock;
Chris Masona061fc82008-05-07 11:43:44 -04005815 }
Chris Masonedbd8d42007-12-21 16:27:24 -05005816 if (!PageUptodate(page)) {
5817 btrfs_readpage(NULL, page);
5818 lock_page(page);
5819 if (!PageUptodate(page)) {
5820 unlock_page(page);
5821 page_cache_release(page);
Zheng Yan1a40e232008-09-26 10:09:34 -04005822 ret = -EIO;
Chris Masonedbd8d42007-12-21 16:27:24 -05005823 goto out_unlock;
5824 }
5825 }
Chris Masonec44a352008-04-28 15:29:52 -04005826 wait_on_page_writeback(page);
Chris Mason3eaa2882008-07-24 11:57:52 -04005827
Chris Masonedbd8d42007-12-21 16:27:24 -05005828 page_start = (u64)page->index << PAGE_CACHE_SHIFT;
5829 page_end = page_start + PAGE_CACHE_SIZE - 1;
Chris Masond1310b22008-01-24 16:13:08 -05005830 lock_extent(io_tree, page_start, page_end, GFP_NOFS);
Chris Masonedbd8d42007-12-21 16:27:24 -05005831
Chris Mason3eaa2882008-07-24 11:57:52 -04005832 ordered = btrfs_lookup_ordered_extent(inode, page_start);
5833 if (ordered) {
5834 unlock_extent(io_tree, page_start, page_end, GFP_NOFS);
5835 unlock_page(page);
5836 page_cache_release(page);
5837 btrfs_start_ordered_extent(inode, ordered, 1);
5838 btrfs_put_ordered_extent(ordered);
5839 goto again;
5840 }
5841 set_page_extent_mapped(page);
5842
Zheng Yan1a40e232008-09-26 10:09:34 -04005843 if (i == first_index)
5844 set_extent_bits(io_tree, page_start, page_end,
5845 EXTENT_BOUNDARY, GFP_NOFS);
Yan Zheng1f80e4d2008-12-19 10:59:04 -05005846 btrfs_set_extent_delalloc(inode, page_start, page_end);
Zheng Yan1a40e232008-09-26 10:09:34 -04005847
Chris Masona061fc82008-05-07 11:43:44 -04005848 set_page_dirty(page);
Zheng Yan1a40e232008-09-26 10:09:34 -04005849 total_dirty++;
Chris Masonedbd8d42007-12-21 16:27:24 -05005850
Chris Masond1310b22008-01-24 16:13:08 -05005851 unlock_extent(io_tree, page_start, page_end, GFP_NOFS);
Chris Masonedbd8d42007-12-21 16:27:24 -05005852 unlock_page(page);
5853 page_cache_release(page);
5854 }
5855
5856out_unlock:
Chris Masonec44a352008-04-28 15:29:52 -04005857 kfree(ra);
Chris Masonedbd8d42007-12-21 16:27:24 -05005858 mutex_unlock(&inode->i_mutex);
Zheng Yan1a40e232008-09-26 10:09:34 -04005859 balance_dirty_pages_ratelimited_nr(inode->i_mapping, total_dirty);
Chris Masonbf4ef672008-05-08 13:26:18 -04005860 return ret;
5861}
5862
Chris Masond3977122009-01-05 21:25:51 -05005863static noinline int relocate_data_extent(struct inode *reloc_inode,
Zheng Yan1a40e232008-09-26 10:09:34 -04005864 struct btrfs_key *extent_key,
5865 u64 offset)
Chris Masonedbd8d42007-12-21 16:27:24 -05005866{
Zheng Yan1a40e232008-09-26 10:09:34 -04005867 struct btrfs_root *root = BTRFS_I(reloc_inode)->root;
5868 struct extent_map_tree *em_tree = &BTRFS_I(reloc_inode)->extent_tree;
5869 struct extent_map *em;
Yan Zheng66435582008-10-30 14:19:50 -04005870 u64 start = extent_key->objectid - offset;
5871 u64 end = start + extent_key->offset - 1;
Zheng Yan1a40e232008-09-26 10:09:34 -04005872
5873 em = alloc_extent_map(GFP_NOFS);
5874 BUG_ON(!em || IS_ERR(em));
5875
Yan Zheng66435582008-10-30 14:19:50 -04005876 em->start = start;
Zheng Yan1a40e232008-09-26 10:09:34 -04005877 em->len = extent_key->offset;
Chris Masonc8b97812008-10-29 14:49:59 -04005878 em->block_len = extent_key->offset;
Zheng Yan1a40e232008-09-26 10:09:34 -04005879 em->block_start = extent_key->objectid;
5880 em->bdev = root->fs_info->fs_devices->latest_bdev;
5881 set_bit(EXTENT_FLAG_PINNED, &em->flags);
5882
5883 /* setup extent map to cheat btrfs_readpage */
Yan Zheng66435582008-10-30 14:19:50 -04005884 lock_extent(&BTRFS_I(reloc_inode)->io_tree, start, end, GFP_NOFS);
Zheng Yan1a40e232008-09-26 10:09:34 -04005885 while (1) {
5886 int ret;
Chris Mason890871b2009-09-02 16:24:52 -04005887 write_lock(&em_tree->lock);
Zheng Yan1a40e232008-09-26 10:09:34 -04005888 ret = add_extent_mapping(em_tree, em);
Chris Mason890871b2009-09-02 16:24:52 -04005889 write_unlock(&em_tree->lock);
Zheng Yan1a40e232008-09-26 10:09:34 -04005890 if (ret != -EEXIST) {
5891 free_extent_map(em);
5892 break;
5893 }
Yan Zheng66435582008-10-30 14:19:50 -04005894 btrfs_drop_extent_cache(reloc_inode, start, end, 0);
Zheng Yan1a40e232008-09-26 10:09:34 -04005895 }
Yan Zheng66435582008-10-30 14:19:50 -04005896 unlock_extent(&BTRFS_I(reloc_inode)->io_tree, start, end, GFP_NOFS);
Zheng Yan1a40e232008-09-26 10:09:34 -04005897
Yan Zheng66435582008-10-30 14:19:50 -04005898 return relocate_inode_pages(reloc_inode, start, extent_key->offset);
Zheng Yan1a40e232008-09-26 10:09:34 -04005899}
5900
5901struct btrfs_ref_path {
5902 u64 extent_start;
5903 u64 nodes[BTRFS_MAX_LEVEL];
5904 u64 root_objectid;
5905 u64 root_generation;
5906 u64 owner_objectid;
Zheng Yan1a40e232008-09-26 10:09:34 -04005907 u32 num_refs;
5908 int lowest_level;
5909 int current_level;
Yan Zhengf82d02d2008-10-29 14:49:05 -04005910 int shared_level;
5911
5912 struct btrfs_key node_keys[BTRFS_MAX_LEVEL];
5913 u64 new_nodes[BTRFS_MAX_LEVEL];
Zheng Yan1a40e232008-09-26 10:09:34 -04005914};
5915
5916struct disk_extent {
Chris Masonc8b97812008-10-29 14:49:59 -04005917 u64 ram_bytes;
Zheng Yan1a40e232008-09-26 10:09:34 -04005918 u64 disk_bytenr;
5919 u64 disk_num_bytes;
5920 u64 offset;
5921 u64 num_bytes;
Chris Masonc8b97812008-10-29 14:49:59 -04005922 u8 compression;
5923 u8 encryption;
5924 u16 other_encoding;
Zheng Yan1a40e232008-09-26 10:09:34 -04005925};
5926
5927static int is_cowonly_root(u64 root_objectid)
5928{
5929 if (root_objectid == BTRFS_ROOT_TREE_OBJECTID ||
5930 root_objectid == BTRFS_EXTENT_TREE_OBJECTID ||
5931 root_objectid == BTRFS_CHUNK_TREE_OBJECTID ||
5932 root_objectid == BTRFS_DEV_TREE_OBJECTID ||
Yan Zheng0403e472008-12-10 20:32:51 -05005933 root_objectid == BTRFS_TREE_LOG_OBJECTID ||
5934 root_objectid == BTRFS_CSUM_TREE_OBJECTID)
Zheng Yan1a40e232008-09-26 10:09:34 -04005935 return 1;
5936 return 0;
5937}
5938
Chris Masond3977122009-01-05 21:25:51 -05005939static noinline int __next_ref_path(struct btrfs_trans_handle *trans,
Zheng Yan1a40e232008-09-26 10:09:34 -04005940 struct btrfs_root *extent_root,
5941 struct btrfs_ref_path *ref_path,
5942 int first_time)
5943{
5944 struct extent_buffer *leaf;
5945 struct btrfs_path *path;
Chris Mason4313b392008-01-03 09:08:48 -05005946 struct btrfs_extent_ref *ref;
Chris Masonedbd8d42007-12-21 16:27:24 -05005947 struct btrfs_key key;
5948 struct btrfs_key found_key;
Zheng Yan1a40e232008-09-26 10:09:34 -04005949 u64 bytenr;
Chris Masonedbd8d42007-12-21 16:27:24 -05005950 u32 nritems;
Zheng Yan1a40e232008-09-26 10:09:34 -04005951 int level;
5952 int ret = 1;
Chris Masonedbd8d42007-12-21 16:27:24 -05005953
Zheng Yan1a40e232008-09-26 10:09:34 -04005954 path = btrfs_alloc_path();
5955 if (!path)
5956 return -ENOMEM;
5957
Zheng Yan1a40e232008-09-26 10:09:34 -04005958 if (first_time) {
5959 ref_path->lowest_level = -1;
5960 ref_path->current_level = -1;
Yan Zhengf82d02d2008-10-29 14:49:05 -04005961 ref_path->shared_level = -1;
Zheng Yan1a40e232008-09-26 10:09:34 -04005962 goto walk_up;
Chris Masona061fc82008-05-07 11:43:44 -04005963 }
Zheng Yan1a40e232008-09-26 10:09:34 -04005964walk_down:
5965 level = ref_path->current_level - 1;
5966 while (level >= -1) {
5967 u64 parent;
5968 if (level < ref_path->lowest_level)
5969 break;
Chris Masonedbd8d42007-12-21 16:27:24 -05005970
Chris Masond3977122009-01-05 21:25:51 -05005971 if (level >= 0)
Zheng Yan1a40e232008-09-26 10:09:34 -04005972 bytenr = ref_path->nodes[level];
Chris Masond3977122009-01-05 21:25:51 -05005973 else
Zheng Yan1a40e232008-09-26 10:09:34 -04005974 bytenr = ref_path->extent_start;
Zheng Yan1a40e232008-09-26 10:09:34 -04005975 BUG_ON(bytenr == 0);
Chris Masonedbd8d42007-12-21 16:27:24 -05005976
Zheng Yan1a40e232008-09-26 10:09:34 -04005977 parent = ref_path->nodes[level + 1];
5978 ref_path->nodes[level + 1] = 0;
5979 ref_path->current_level = level;
5980 BUG_ON(parent == 0);
5981
5982 key.objectid = bytenr;
5983 key.offset = parent + 1;
5984 key.type = BTRFS_EXTENT_REF_KEY;
5985
5986 ret = btrfs_search_slot(trans, extent_root, &key, path, 0, 0);
Chris Masonedbd8d42007-12-21 16:27:24 -05005987 if (ret < 0)
5988 goto out;
Zheng Yan1a40e232008-09-26 10:09:34 -04005989 BUG_ON(ret == 0);
Chris Masonedbd8d42007-12-21 16:27:24 -05005990
Chris Masonedbd8d42007-12-21 16:27:24 -05005991 leaf = path->nodes[0];
5992 nritems = btrfs_header_nritems(leaf);
Zheng Yan1a40e232008-09-26 10:09:34 -04005993 if (path->slots[0] >= nritems) {
Chris Masona061fc82008-05-07 11:43:44 -04005994 ret = btrfs_next_leaf(extent_root, path);
Chris Masona061fc82008-05-07 11:43:44 -04005995 if (ret < 0)
5996 goto out;
Zheng Yan1a40e232008-09-26 10:09:34 -04005997 if (ret > 0)
5998 goto next;
Chris Masonbf4ef672008-05-08 13:26:18 -04005999 leaf = path->nodes[0];
Chris Masona061fc82008-05-07 11:43:44 -04006000 }
Chris Masonedbd8d42007-12-21 16:27:24 -05006001
6002 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
Zheng Yan1a40e232008-09-26 10:09:34 -04006003 if (found_key.objectid == bytenr &&
Yan Zhengf82d02d2008-10-29 14:49:05 -04006004 found_key.type == BTRFS_EXTENT_REF_KEY) {
6005 if (level < ref_path->shared_level)
6006 ref_path->shared_level = level;
Zheng Yan1a40e232008-09-26 10:09:34 -04006007 goto found;
Yan Zhengf82d02d2008-10-29 14:49:05 -04006008 }
Zheng Yan1a40e232008-09-26 10:09:34 -04006009next:
6010 level--;
6011 btrfs_release_path(extent_root, path);
Yan Zhengd899e052008-10-30 14:25:28 -04006012 cond_resched();
Zheng Yan1a40e232008-09-26 10:09:34 -04006013 }
6014 /* reached lowest level */
6015 ret = 1;
6016 goto out;
6017walk_up:
6018 level = ref_path->current_level;
6019 while (level < BTRFS_MAX_LEVEL - 1) {
6020 u64 ref_objectid;
Chris Masond3977122009-01-05 21:25:51 -05006021
6022 if (level >= 0)
Zheng Yan1a40e232008-09-26 10:09:34 -04006023 bytenr = ref_path->nodes[level];
Chris Masond3977122009-01-05 21:25:51 -05006024 else
Zheng Yan1a40e232008-09-26 10:09:34 -04006025 bytenr = ref_path->extent_start;
Chris Masond3977122009-01-05 21:25:51 -05006026
Zheng Yan1a40e232008-09-26 10:09:34 -04006027 BUG_ON(bytenr == 0);
Chris Masonedbd8d42007-12-21 16:27:24 -05006028
Zheng Yan1a40e232008-09-26 10:09:34 -04006029 key.objectid = bytenr;
6030 key.offset = 0;
6031 key.type = BTRFS_EXTENT_REF_KEY;
Chris Masonedbd8d42007-12-21 16:27:24 -05006032
Zheng Yan1a40e232008-09-26 10:09:34 -04006033 ret = btrfs_search_slot(trans, extent_root, &key, path, 0, 0);
6034 if (ret < 0)
Chris Masonedbd8d42007-12-21 16:27:24 -05006035 goto out;
Zheng Yan1a40e232008-09-26 10:09:34 -04006036
6037 leaf = path->nodes[0];
6038 nritems = btrfs_header_nritems(leaf);
6039 if (path->slots[0] >= nritems) {
6040 ret = btrfs_next_leaf(extent_root, path);
6041 if (ret < 0)
6042 goto out;
6043 if (ret > 0) {
6044 /* the extent was freed by someone */
6045 if (ref_path->lowest_level == level)
6046 goto out;
6047 btrfs_release_path(extent_root, path);
6048 goto walk_down;
6049 }
6050 leaf = path->nodes[0];
6051 }
6052
6053 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
6054 if (found_key.objectid != bytenr ||
6055 found_key.type != BTRFS_EXTENT_REF_KEY) {
6056 /* the extent was freed by someone */
6057 if (ref_path->lowest_level == level) {
6058 ret = 1;
6059 goto out;
6060 }
6061 btrfs_release_path(extent_root, path);
6062 goto walk_down;
6063 }
6064found:
6065 ref = btrfs_item_ptr(leaf, path->slots[0],
6066 struct btrfs_extent_ref);
6067 ref_objectid = btrfs_ref_objectid(leaf, ref);
6068 if (ref_objectid < BTRFS_FIRST_FREE_OBJECTID) {
6069 if (first_time) {
6070 level = (int)ref_objectid;
6071 BUG_ON(level >= BTRFS_MAX_LEVEL);
6072 ref_path->lowest_level = level;
6073 ref_path->current_level = level;
6074 ref_path->nodes[level] = bytenr;
6075 } else {
6076 WARN_ON(ref_objectid != level);
6077 }
6078 } else {
6079 WARN_ON(level != -1);
6080 }
6081 first_time = 0;
6082
6083 if (ref_path->lowest_level == level) {
6084 ref_path->owner_objectid = ref_objectid;
Zheng Yan1a40e232008-09-26 10:09:34 -04006085 ref_path->num_refs = btrfs_ref_num_refs(leaf, ref);
6086 }
6087
6088 /*
6089 * the block is tree root or the block isn't in reference
6090 * counted tree.
6091 */
6092 if (found_key.objectid == found_key.offset ||
6093 is_cowonly_root(btrfs_ref_root(leaf, ref))) {
6094 ref_path->root_objectid = btrfs_ref_root(leaf, ref);
6095 ref_path->root_generation =
6096 btrfs_ref_generation(leaf, ref);
6097 if (level < 0) {
6098 /* special reference from the tree log */
6099 ref_path->nodes[0] = found_key.offset;
6100 ref_path->current_level = 0;
6101 }
6102 ret = 0;
6103 goto out;
6104 }
6105
6106 level++;
6107 BUG_ON(ref_path->nodes[level] != 0);
6108 ref_path->nodes[level] = found_key.offset;
6109 ref_path->current_level = level;
6110
6111 /*
6112 * the reference was created in the running transaction,
6113 * no need to continue walking up.
6114 */
6115 if (btrfs_ref_generation(leaf, ref) == trans->transid) {
6116 ref_path->root_objectid = btrfs_ref_root(leaf, ref);
6117 ref_path->root_generation =
6118 btrfs_ref_generation(leaf, ref);
6119 ret = 0;
6120 goto out;
6121 }
6122
6123 btrfs_release_path(extent_root, path);
Yan Zhengd899e052008-10-30 14:25:28 -04006124 cond_resched();
Zheng Yan1a40e232008-09-26 10:09:34 -04006125 }
6126 /* reached max tree level, but no tree root found. */
6127 BUG();
6128out:
Zheng Yan1a40e232008-09-26 10:09:34 -04006129 btrfs_free_path(path);
6130 return ret;
6131}
6132
6133static int btrfs_first_ref_path(struct btrfs_trans_handle *trans,
6134 struct btrfs_root *extent_root,
6135 struct btrfs_ref_path *ref_path,
6136 u64 extent_start)
6137{
6138 memset(ref_path, 0, sizeof(*ref_path));
6139 ref_path->extent_start = extent_start;
6140
6141 return __next_ref_path(trans, extent_root, ref_path, 1);
6142}
6143
6144static int btrfs_next_ref_path(struct btrfs_trans_handle *trans,
6145 struct btrfs_root *extent_root,
6146 struct btrfs_ref_path *ref_path)
6147{
6148 return __next_ref_path(trans, extent_root, ref_path, 0);
6149}
6150
Chris Masond3977122009-01-05 21:25:51 -05006151static noinline int get_new_locations(struct inode *reloc_inode,
Zheng Yan1a40e232008-09-26 10:09:34 -04006152 struct btrfs_key *extent_key,
6153 u64 offset, int no_fragment,
6154 struct disk_extent **extents,
6155 int *nr_extents)
6156{
6157 struct btrfs_root *root = BTRFS_I(reloc_inode)->root;
6158 struct btrfs_path *path;
6159 struct btrfs_file_extent_item *fi;
6160 struct extent_buffer *leaf;
6161 struct disk_extent *exts = *extents;
6162 struct btrfs_key found_key;
6163 u64 cur_pos;
6164 u64 last_byte;
6165 u32 nritems;
6166 int nr = 0;
6167 int max = *nr_extents;
6168 int ret;
6169
6170 WARN_ON(!no_fragment && *extents);
6171 if (!exts) {
6172 max = 1;
6173 exts = kmalloc(sizeof(*exts) * max, GFP_NOFS);
6174 if (!exts)
6175 return -ENOMEM;
6176 }
6177
6178 path = btrfs_alloc_path();
6179 BUG_ON(!path);
6180
6181 cur_pos = extent_key->objectid - offset;
6182 last_byte = extent_key->objectid + extent_key->offset;
6183 ret = btrfs_lookup_file_extent(NULL, root, path, reloc_inode->i_ino,
6184 cur_pos, 0);
6185 if (ret < 0)
6186 goto out;
6187 if (ret > 0) {
6188 ret = -ENOENT;
6189 goto out;
6190 }
6191
6192 while (1) {
6193 leaf = path->nodes[0];
6194 nritems = btrfs_header_nritems(leaf);
6195 if (path->slots[0] >= nritems) {
6196 ret = btrfs_next_leaf(root, path);
6197 if (ret < 0)
6198 goto out;
6199 if (ret > 0)
6200 break;
6201 leaf = path->nodes[0];
6202 }
6203
6204 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
6205 if (found_key.offset != cur_pos ||
6206 found_key.type != BTRFS_EXTENT_DATA_KEY ||
6207 found_key.objectid != reloc_inode->i_ino)
6208 break;
6209
6210 fi = btrfs_item_ptr(leaf, path->slots[0],
6211 struct btrfs_file_extent_item);
6212 if (btrfs_file_extent_type(leaf, fi) !=
6213 BTRFS_FILE_EXTENT_REG ||
6214 btrfs_file_extent_disk_bytenr(leaf, fi) == 0)
6215 break;
6216
6217 if (nr == max) {
6218 struct disk_extent *old = exts;
6219 max *= 2;
6220 exts = kzalloc(sizeof(*exts) * max, GFP_NOFS);
6221 memcpy(exts, old, sizeof(*exts) * nr);
6222 if (old != *extents)
6223 kfree(old);
6224 }
6225
6226 exts[nr].disk_bytenr =
6227 btrfs_file_extent_disk_bytenr(leaf, fi);
6228 exts[nr].disk_num_bytes =
6229 btrfs_file_extent_disk_num_bytes(leaf, fi);
6230 exts[nr].offset = btrfs_file_extent_offset(leaf, fi);
6231 exts[nr].num_bytes = btrfs_file_extent_num_bytes(leaf, fi);
Chris Masonc8b97812008-10-29 14:49:59 -04006232 exts[nr].ram_bytes = btrfs_file_extent_ram_bytes(leaf, fi);
6233 exts[nr].compression = btrfs_file_extent_compression(leaf, fi);
6234 exts[nr].encryption = btrfs_file_extent_encryption(leaf, fi);
6235 exts[nr].other_encoding = btrfs_file_extent_other_encoding(leaf,
6236 fi);
Yan Zhengd899e052008-10-30 14:25:28 -04006237 BUG_ON(exts[nr].offset > 0);
6238 BUG_ON(exts[nr].compression || exts[nr].encryption);
6239 BUG_ON(exts[nr].num_bytes != exts[nr].disk_num_bytes);
Zheng Yan1a40e232008-09-26 10:09:34 -04006240
6241 cur_pos += exts[nr].num_bytes;
6242 nr++;
6243
6244 if (cur_pos + offset >= last_byte)
6245 break;
6246
6247 if (no_fragment) {
6248 ret = 1;
6249 goto out;
6250 }
6251 path->slots[0]++;
6252 }
6253
Yan Zheng1f80e4d2008-12-19 10:59:04 -05006254 BUG_ON(cur_pos + offset > last_byte);
Zheng Yan1a40e232008-09-26 10:09:34 -04006255 if (cur_pos + offset < last_byte) {
6256 ret = -ENOENT;
6257 goto out;
Chris Masonedbd8d42007-12-21 16:27:24 -05006258 }
6259 ret = 0;
6260out:
Zheng Yan1a40e232008-09-26 10:09:34 -04006261 btrfs_free_path(path);
6262 if (ret) {
6263 if (exts != *extents)
6264 kfree(exts);
6265 } else {
6266 *extents = exts;
6267 *nr_extents = nr;
6268 }
6269 return ret;
6270}
6271
Chris Masond3977122009-01-05 21:25:51 -05006272static noinline int replace_one_extent(struct btrfs_trans_handle *trans,
Zheng Yan1a40e232008-09-26 10:09:34 -04006273 struct btrfs_root *root,
6274 struct btrfs_path *path,
6275 struct btrfs_key *extent_key,
6276 struct btrfs_key *leaf_key,
6277 struct btrfs_ref_path *ref_path,
6278 struct disk_extent *new_extents,
6279 int nr_extents)
6280{
6281 struct extent_buffer *leaf;
6282 struct btrfs_file_extent_item *fi;
6283 struct inode *inode = NULL;
6284 struct btrfs_key key;
6285 u64 lock_start = 0;
6286 u64 lock_end = 0;
6287 u64 num_bytes;
6288 u64 ext_offset;
Yan Zheng86288a12009-01-21 10:49:16 -05006289 u64 search_end = (u64)-1;
Zheng Yan1a40e232008-09-26 10:09:34 -04006290 u32 nritems;
Yan Zheng3bb1a1b2008-10-09 11:46:24 -04006291 int nr_scaned = 0;
Zheng Yan1a40e232008-09-26 10:09:34 -04006292 int extent_locked = 0;
Yan Zhengd899e052008-10-30 14:25:28 -04006293 int extent_type;
Zheng Yan1a40e232008-09-26 10:09:34 -04006294 int ret;
6295
Yan Zheng3bb1a1b2008-10-09 11:46:24 -04006296 memcpy(&key, leaf_key, sizeof(key));
Zheng Yan1a40e232008-09-26 10:09:34 -04006297 if (ref_path->owner_objectid != BTRFS_MULTIPLE_OBJECTIDS) {
Yan Zheng3bb1a1b2008-10-09 11:46:24 -04006298 if (key.objectid < ref_path->owner_objectid ||
6299 (key.objectid == ref_path->owner_objectid &&
6300 key.type < BTRFS_EXTENT_DATA_KEY)) {
6301 key.objectid = ref_path->owner_objectid;
6302 key.type = BTRFS_EXTENT_DATA_KEY;
6303 key.offset = 0;
6304 }
Zheng Yan1a40e232008-09-26 10:09:34 -04006305 }
6306
6307 while (1) {
6308 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
6309 if (ret < 0)
6310 goto out;
6311
6312 leaf = path->nodes[0];
6313 nritems = btrfs_header_nritems(leaf);
6314next:
6315 if (extent_locked && ret > 0) {
6316 /*
6317 * the file extent item was modified by someone
6318 * before the extent got locked.
6319 */
Zheng Yan1a40e232008-09-26 10:09:34 -04006320 unlock_extent(&BTRFS_I(inode)->io_tree, lock_start,
6321 lock_end, GFP_NOFS);
6322 extent_locked = 0;
6323 }
6324
6325 if (path->slots[0] >= nritems) {
Yan Zheng3bb1a1b2008-10-09 11:46:24 -04006326 if (++nr_scaned > 2)
Zheng Yan1a40e232008-09-26 10:09:34 -04006327 break;
6328
6329 BUG_ON(extent_locked);
6330 ret = btrfs_next_leaf(root, path);
6331 if (ret < 0)
6332 goto out;
6333 if (ret > 0)
6334 break;
6335 leaf = path->nodes[0];
6336 nritems = btrfs_header_nritems(leaf);
6337 }
6338
6339 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
6340
6341 if (ref_path->owner_objectid != BTRFS_MULTIPLE_OBJECTIDS) {
6342 if ((key.objectid > ref_path->owner_objectid) ||
6343 (key.objectid == ref_path->owner_objectid &&
6344 key.type > BTRFS_EXTENT_DATA_KEY) ||
Yan Zheng86288a12009-01-21 10:49:16 -05006345 key.offset >= search_end)
Zheng Yan1a40e232008-09-26 10:09:34 -04006346 break;
6347 }
6348
6349 if (inode && key.objectid != inode->i_ino) {
6350 BUG_ON(extent_locked);
6351 btrfs_release_path(root, path);
6352 mutex_unlock(&inode->i_mutex);
6353 iput(inode);
6354 inode = NULL;
6355 continue;
6356 }
6357
6358 if (key.type != BTRFS_EXTENT_DATA_KEY) {
6359 path->slots[0]++;
6360 ret = 1;
6361 goto next;
6362 }
6363 fi = btrfs_item_ptr(leaf, path->slots[0],
6364 struct btrfs_file_extent_item);
Yan Zhengd899e052008-10-30 14:25:28 -04006365 extent_type = btrfs_file_extent_type(leaf, fi);
6366 if ((extent_type != BTRFS_FILE_EXTENT_REG &&
6367 extent_type != BTRFS_FILE_EXTENT_PREALLOC) ||
Zheng Yan1a40e232008-09-26 10:09:34 -04006368 (btrfs_file_extent_disk_bytenr(leaf, fi) !=
6369 extent_key->objectid)) {
6370 path->slots[0]++;
6371 ret = 1;
6372 goto next;
6373 }
6374
6375 num_bytes = btrfs_file_extent_num_bytes(leaf, fi);
6376 ext_offset = btrfs_file_extent_offset(leaf, fi);
6377
Yan Zheng86288a12009-01-21 10:49:16 -05006378 if (search_end == (u64)-1) {
6379 search_end = key.offset - ext_offset +
6380 btrfs_file_extent_ram_bytes(leaf, fi);
6381 }
Zheng Yan1a40e232008-09-26 10:09:34 -04006382
6383 if (!extent_locked) {
6384 lock_start = key.offset;
6385 lock_end = lock_start + num_bytes - 1;
6386 } else {
Yan Zheng66435582008-10-30 14:19:50 -04006387 if (lock_start > key.offset ||
6388 lock_end + 1 < key.offset + num_bytes) {
6389 unlock_extent(&BTRFS_I(inode)->io_tree,
6390 lock_start, lock_end, GFP_NOFS);
6391 extent_locked = 0;
6392 }
Zheng Yan1a40e232008-09-26 10:09:34 -04006393 }
6394
6395 if (!inode) {
6396 btrfs_release_path(root, path);
6397
6398 inode = btrfs_iget_locked(root->fs_info->sb,
6399 key.objectid, root);
6400 if (inode->i_state & I_NEW) {
6401 BTRFS_I(inode)->root = root;
6402 BTRFS_I(inode)->location.objectid =
6403 key.objectid;
6404 BTRFS_I(inode)->location.type =
6405 BTRFS_INODE_ITEM_KEY;
6406 BTRFS_I(inode)->location.offset = 0;
6407 btrfs_read_locked_inode(inode);
6408 unlock_new_inode(inode);
6409 }
6410 /*
6411 * some code call btrfs_commit_transaction while
6412 * holding the i_mutex, so we can't use mutex_lock
6413 * here.
6414 */
6415 if (is_bad_inode(inode) ||
6416 !mutex_trylock(&inode->i_mutex)) {
6417 iput(inode);
6418 inode = NULL;
6419 key.offset = (u64)-1;
6420 goto skip;
6421 }
6422 }
6423
6424 if (!extent_locked) {
6425 struct btrfs_ordered_extent *ordered;
6426
6427 btrfs_release_path(root, path);
6428
6429 lock_extent(&BTRFS_I(inode)->io_tree, lock_start,
6430 lock_end, GFP_NOFS);
6431 ordered = btrfs_lookup_first_ordered_extent(inode,
6432 lock_end);
6433 if (ordered &&
6434 ordered->file_offset <= lock_end &&
6435 ordered->file_offset + ordered->len > lock_start) {
6436 unlock_extent(&BTRFS_I(inode)->io_tree,
6437 lock_start, lock_end, GFP_NOFS);
6438 btrfs_start_ordered_extent(inode, ordered, 1);
6439 btrfs_put_ordered_extent(ordered);
6440 key.offset += num_bytes;
6441 goto skip;
6442 }
6443 if (ordered)
6444 btrfs_put_ordered_extent(ordered);
6445
Zheng Yan1a40e232008-09-26 10:09:34 -04006446 extent_locked = 1;
6447 continue;
6448 }
6449
6450 if (nr_extents == 1) {
6451 /* update extent pointer in place */
Zheng Yan1a40e232008-09-26 10:09:34 -04006452 btrfs_set_file_extent_disk_bytenr(leaf, fi,
6453 new_extents[0].disk_bytenr);
6454 btrfs_set_file_extent_disk_num_bytes(leaf, fi,
6455 new_extents[0].disk_num_bytes);
Zheng Yan1a40e232008-09-26 10:09:34 -04006456 btrfs_mark_buffer_dirty(leaf);
6457
6458 btrfs_drop_extent_cache(inode, key.offset,
6459 key.offset + num_bytes - 1, 0);
6460
6461 ret = btrfs_inc_extent_ref(trans, root,
6462 new_extents[0].disk_bytenr,
6463 new_extents[0].disk_num_bytes,
6464 leaf->start,
6465 root->root_key.objectid,
6466 trans->transid,
Yan Zheng3bb1a1b2008-10-09 11:46:24 -04006467 key.objectid);
Zheng Yan1a40e232008-09-26 10:09:34 -04006468 BUG_ON(ret);
6469
6470 ret = btrfs_free_extent(trans, root,
6471 extent_key->objectid,
6472 extent_key->offset,
6473 leaf->start,
6474 btrfs_header_owner(leaf),
6475 btrfs_header_generation(leaf),
Yan Zheng3bb1a1b2008-10-09 11:46:24 -04006476 key.objectid, 0);
Zheng Yan1a40e232008-09-26 10:09:34 -04006477 BUG_ON(ret);
6478
6479 btrfs_release_path(root, path);
6480 key.offset += num_bytes;
6481 } else {
Yan Zhengd899e052008-10-30 14:25:28 -04006482 BUG_ON(1);
6483#if 0
Zheng Yan1a40e232008-09-26 10:09:34 -04006484 u64 alloc_hint;
6485 u64 extent_len;
6486 int i;
6487 /*
6488 * drop old extent pointer at first, then insert the
6489 * new pointers one bye one
6490 */
6491 btrfs_release_path(root, path);
6492 ret = btrfs_drop_extents(trans, root, inode, key.offset,
6493 key.offset + num_bytes,
6494 key.offset, &alloc_hint);
6495 BUG_ON(ret);
6496
6497 for (i = 0; i < nr_extents; i++) {
6498 if (ext_offset >= new_extents[i].num_bytes) {
6499 ext_offset -= new_extents[i].num_bytes;
6500 continue;
6501 }
6502 extent_len = min(new_extents[i].num_bytes -
6503 ext_offset, num_bytes);
6504
6505 ret = btrfs_insert_empty_item(trans, root,
6506 path, &key,
6507 sizeof(*fi));
6508 BUG_ON(ret);
6509
6510 leaf = path->nodes[0];
6511 fi = btrfs_item_ptr(leaf, path->slots[0],
6512 struct btrfs_file_extent_item);
6513 btrfs_set_file_extent_generation(leaf, fi,
6514 trans->transid);
6515 btrfs_set_file_extent_type(leaf, fi,
6516 BTRFS_FILE_EXTENT_REG);
6517 btrfs_set_file_extent_disk_bytenr(leaf, fi,
6518 new_extents[i].disk_bytenr);
6519 btrfs_set_file_extent_disk_num_bytes(leaf, fi,
6520 new_extents[i].disk_num_bytes);
Chris Masonc8b97812008-10-29 14:49:59 -04006521 btrfs_set_file_extent_ram_bytes(leaf, fi,
6522 new_extents[i].ram_bytes);
6523
6524 btrfs_set_file_extent_compression(leaf, fi,
6525 new_extents[i].compression);
6526 btrfs_set_file_extent_encryption(leaf, fi,
6527 new_extents[i].encryption);
6528 btrfs_set_file_extent_other_encoding(leaf, fi,
6529 new_extents[i].other_encoding);
6530
Zheng Yan1a40e232008-09-26 10:09:34 -04006531 btrfs_set_file_extent_num_bytes(leaf, fi,
6532 extent_len);
6533 ext_offset += new_extents[i].offset;
6534 btrfs_set_file_extent_offset(leaf, fi,
6535 ext_offset);
6536 btrfs_mark_buffer_dirty(leaf);
6537
6538 btrfs_drop_extent_cache(inode, key.offset,
6539 key.offset + extent_len - 1, 0);
6540
6541 ret = btrfs_inc_extent_ref(trans, root,
6542 new_extents[i].disk_bytenr,
6543 new_extents[i].disk_num_bytes,
6544 leaf->start,
6545 root->root_key.objectid,
Yan Zheng3bb1a1b2008-10-09 11:46:24 -04006546 trans->transid, key.objectid);
Zheng Yan1a40e232008-09-26 10:09:34 -04006547 BUG_ON(ret);
6548 btrfs_release_path(root, path);
6549
Yan Zhenga76a3cd2008-10-09 11:46:29 -04006550 inode_add_bytes(inode, extent_len);
Zheng Yan1a40e232008-09-26 10:09:34 -04006551
6552 ext_offset = 0;
6553 num_bytes -= extent_len;
6554 key.offset += extent_len;
6555
6556 if (num_bytes == 0)
6557 break;
6558 }
6559 BUG_ON(i >= nr_extents);
Yan Zhengd899e052008-10-30 14:25:28 -04006560#endif
Zheng Yan1a40e232008-09-26 10:09:34 -04006561 }
6562
6563 if (extent_locked) {
Zheng Yan1a40e232008-09-26 10:09:34 -04006564 unlock_extent(&BTRFS_I(inode)->io_tree, lock_start,
6565 lock_end, GFP_NOFS);
6566 extent_locked = 0;
6567 }
6568skip:
6569 if (ref_path->owner_objectid != BTRFS_MULTIPLE_OBJECTIDS &&
Yan Zheng86288a12009-01-21 10:49:16 -05006570 key.offset >= search_end)
Zheng Yan1a40e232008-09-26 10:09:34 -04006571 break;
6572
6573 cond_resched();
6574 }
6575 ret = 0;
6576out:
6577 btrfs_release_path(root, path);
6578 if (inode) {
6579 mutex_unlock(&inode->i_mutex);
6580 if (extent_locked) {
Zheng Yan1a40e232008-09-26 10:09:34 -04006581 unlock_extent(&BTRFS_I(inode)->io_tree, lock_start,
6582 lock_end, GFP_NOFS);
6583 }
6584 iput(inode);
6585 }
6586 return ret;
6587}
6588
Zheng Yan1a40e232008-09-26 10:09:34 -04006589int btrfs_reloc_tree_cache_ref(struct btrfs_trans_handle *trans,
6590 struct btrfs_root *root,
6591 struct extent_buffer *buf, u64 orig_start)
6592{
6593 int level;
6594 int ret;
6595
6596 BUG_ON(btrfs_header_generation(buf) != trans->transid);
6597 BUG_ON(root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID);
6598
6599 level = btrfs_header_level(buf);
6600 if (level == 0) {
6601 struct btrfs_leaf_ref *ref;
6602 struct btrfs_leaf_ref *orig_ref;
6603
6604 orig_ref = btrfs_lookup_leaf_ref(root, orig_start);
6605 if (!orig_ref)
6606 return -ENOENT;
6607
6608 ref = btrfs_alloc_leaf_ref(root, orig_ref->nritems);
6609 if (!ref) {
6610 btrfs_free_leaf_ref(root, orig_ref);
6611 return -ENOMEM;
6612 }
6613
6614 ref->nritems = orig_ref->nritems;
6615 memcpy(ref->extents, orig_ref->extents,
6616 sizeof(ref->extents[0]) * ref->nritems);
6617
6618 btrfs_free_leaf_ref(root, orig_ref);
6619
6620 ref->root_gen = trans->transid;
6621 ref->bytenr = buf->start;
6622 ref->owner = btrfs_header_owner(buf);
6623 ref->generation = btrfs_header_generation(buf);
Chris Masonbd56b302009-02-04 09:27:02 -05006624
Zheng Yan1a40e232008-09-26 10:09:34 -04006625 ret = btrfs_add_leaf_ref(root, ref, 0);
6626 WARN_ON(ret);
6627 btrfs_free_leaf_ref(root, ref);
6628 }
6629 return 0;
6630}
6631
Chris Masond3977122009-01-05 21:25:51 -05006632static noinline int invalidate_extent_cache(struct btrfs_root *root,
Zheng Yan1a40e232008-09-26 10:09:34 -04006633 struct extent_buffer *leaf,
6634 struct btrfs_block_group_cache *group,
6635 struct btrfs_root *target_root)
6636{
6637 struct btrfs_key key;
6638 struct inode *inode = NULL;
6639 struct btrfs_file_extent_item *fi;
Josef Bacik2ac55d42010-02-03 19:33:23 +00006640 struct extent_state *cached_state = NULL;
Zheng Yan1a40e232008-09-26 10:09:34 -04006641 u64 num_bytes;
6642 u64 skip_objectid = 0;
6643 u32 nritems;
6644 u32 i;
6645
6646 nritems = btrfs_header_nritems(leaf);
6647 for (i = 0; i < nritems; i++) {
6648 btrfs_item_key_to_cpu(leaf, &key, i);
6649 if (key.objectid == skip_objectid ||
6650 key.type != BTRFS_EXTENT_DATA_KEY)
6651 continue;
6652 fi = btrfs_item_ptr(leaf, i, struct btrfs_file_extent_item);
6653 if (btrfs_file_extent_type(leaf, fi) ==
6654 BTRFS_FILE_EXTENT_INLINE)
6655 continue;
6656 if (btrfs_file_extent_disk_bytenr(leaf, fi) == 0)
6657 continue;
6658 if (!inode || inode->i_ino != key.objectid) {
6659 iput(inode);
6660 inode = btrfs_ilookup(target_root->fs_info->sb,
6661 key.objectid, target_root, 1);
6662 }
6663 if (!inode) {
6664 skip_objectid = key.objectid;
6665 continue;
6666 }
6667 num_bytes = btrfs_file_extent_num_bytes(leaf, fi);
6668
Josef Bacik2ac55d42010-02-03 19:33:23 +00006669 lock_extent_bits(&BTRFS_I(inode)->io_tree, key.offset,
6670 key.offset + num_bytes - 1, 0, &cached_state,
6671 GFP_NOFS);
Zheng Yan1a40e232008-09-26 10:09:34 -04006672 btrfs_drop_extent_cache(inode, key.offset,
6673 key.offset + num_bytes - 1, 1);
Josef Bacik2ac55d42010-02-03 19:33:23 +00006674 unlock_extent_cached(&BTRFS_I(inode)->io_tree, key.offset,
6675 key.offset + num_bytes - 1, &cached_state,
6676 GFP_NOFS);
Zheng Yan1a40e232008-09-26 10:09:34 -04006677 cond_resched();
6678 }
6679 iput(inode);
6680 return 0;
6681}
6682
Chris Masond3977122009-01-05 21:25:51 -05006683static noinline int replace_extents_in_leaf(struct btrfs_trans_handle *trans,
Zheng Yan1a40e232008-09-26 10:09:34 -04006684 struct btrfs_root *root,
6685 struct extent_buffer *leaf,
6686 struct btrfs_block_group_cache *group,
6687 struct inode *reloc_inode)
6688{
6689 struct btrfs_key key;
6690 struct btrfs_key extent_key;
6691 struct btrfs_file_extent_item *fi;
6692 struct btrfs_leaf_ref *ref;
6693 struct disk_extent *new_extent;
6694 u64 bytenr;
6695 u64 num_bytes;
6696 u32 nritems;
6697 u32 i;
6698 int ext_index;
6699 int nr_extent;
6700 int ret;
6701
6702 new_extent = kmalloc(sizeof(*new_extent), GFP_NOFS);
6703 BUG_ON(!new_extent);
6704
6705 ref = btrfs_lookup_leaf_ref(root, leaf->start);
6706 BUG_ON(!ref);
6707
6708 ext_index = -1;
6709 nritems = btrfs_header_nritems(leaf);
6710 for (i = 0; i < nritems; i++) {
6711 btrfs_item_key_to_cpu(leaf, &key, i);
6712 if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY)
6713 continue;
6714 fi = btrfs_item_ptr(leaf, i, struct btrfs_file_extent_item);
6715 if (btrfs_file_extent_type(leaf, fi) ==
6716 BTRFS_FILE_EXTENT_INLINE)
6717 continue;
6718 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
6719 num_bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
6720 if (bytenr == 0)
6721 continue;
6722
6723 ext_index++;
6724 if (bytenr >= group->key.objectid + group->key.offset ||
6725 bytenr + num_bytes <= group->key.objectid)
6726 continue;
6727
6728 extent_key.objectid = bytenr;
6729 extent_key.offset = num_bytes;
6730 extent_key.type = BTRFS_EXTENT_ITEM_KEY;
6731 nr_extent = 1;
6732 ret = get_new_locations(reloc_inode, &extent_key,
6733 group->key.objectid, 1,
6734 &new_extent, &nr_extent);
6735 if (ret > 0)
6736 continue;
6737 BUG_ON(ret < 0);
6738
6739 BUG_ON(ref->extents[ext_index].bytenr != bytenr);
6740 BUG_ON(ref->extents[ext_index].num_bytes != num_bytes);
6741 ref->extents[ext_index].bytenr = new_extent->disk_bytenr;
6742 ref->extents[ext_index].num_bytes = new_extent->disk_num_bytes;
6743
Zheng Yan1a40e232008-09-26 10:09:34 -04006744 btrfs_set_file_extent_disk_bytenr(leaf, fi,
6745 new_extent->disk_bytenr);
6746 btrfs_set_file_extent_disk_num_bytes(leaf, fi,
6747 new_extent->disk_num_bytes);
Zheng Yan1a40e232008-09-26 10:09:34 -04006748 btrfs_mark_buffer_dirty(leaf);
6749
6750 ret = btrfs_inc_extent_ref(trans, root,
6751 new_extent->disk_bytenr,
6752 new_extent->disk_num_bytes,
6753 leaf->start,
6754 root->root_key.objectid,
Yan Zheng3bb1a1b2008-10-09 11:46:24 -04006755 trans->transid, key.objectid);
Zheng Yan1a40e232008-09-26 10:09:34 -04006756 BUG_ON(ret);
Chris Mason56bec292009-03-13 10:10:06 -04006757
Zheng Yan1a40e232008-09-26 10:09:34 -04006758 ret = btrfs_free_extent(trans, root,
6759 bytenr, num_bytes, leaf->start,
6760 btrfs_header_owner(leaf),
6761 btrfs_header_generation(leaf),
Yan Zheng3bb1a1b2008-10-09 11:46:24 -04006762 key.objectid, 0);
Zheng Yan1a40e232008-09-26 10:09:34 -04006763 BUG_ON(ret);
6764 cond_resched();
6765 }
6766 kfree(new_extent);
6767 BUG_ON(ext_index + 1 != ref->nritems);
6768 btrfs_free_leaf_ref(root, ref);
6769 return 0;
6770}
6771
Yan Zhengf82d02d2008-10-29 14:49:05 -04006772int btrfs_free_reloc_root(struct btrfs_trans_handle *trans,
6773 struct btrfs_root *root)
Zheng Yan1a40e232008-09-26 10:09:34 -04006774{
6775 struct btrfs_root *reloc_root;
Yan Zhengf82d02d2008-10-29 14:49:05 -04006776 int ret;
Zheng Yan1a40e232008-09-26 10:09:34 -04006777
6778 if (root->reloc_root) {
6779 reloc_root = root->reloc_root;
6780 root->reloc_root = NULL;
6781 list_add(&reloc_root->dead_list,
6782 &root->fs_info->dead_reloc_roots);
Yan Zhengf82d02d2008-10-29 14:49:05 -04006783
6784 btrfs_set_root_bytenr(&reloc_root->root_item,
6785 reloc_root->node->start);
6786 btrfs_set_root_level(&root->root_item,
6787 btrfs_header_level(reloc_root->node));
6788 memset(&reloc_root->root_item.drop_progress, 0,
6789 sizeof(struct btrfs_disk_key));
6790 reloc_root->root_item.drop_level = 0;
6791
6792 ret = btrfs_update_root(trans, root->fs_info->tree_root,
6793 &reloc_root->root_key,
6794 &reloc_root->root_item);
6795 BUG_ON(ret);
Zheng Yan1a40e232008-09-26 10:09:34 -04006796 }
6797 return 0;
6798}
6799
6800int btrfs_drop_dead_reloc_roots(struct btrfs_root *root)
6801{
6802 struct btrfs_trans_handle *trans;
6803 struct btrfs_root *reloc_root;
6804 struct btrfs_root *prev_root = NULL;
6805 struct list_head dead_roots;
6806 int ret;
6807 unsigned long nr;
6808
6809 INIT_LIST_HEAD(&dead_roots);
6810 list_splice_init(&root->fs_info->dead_reloc_roots, &dead_roots);
6811
6812 while (!list_empty(&dead_roots)) {
6813 reloc_root = list_entry(dead_roots.prev,
6814 struct btrfs_root, dead_list);
6815 list_del_init(&reloc_root->dead_list);
6816
6817 BUG_ON(reloc_root->commit_root != NULL);
6818 while (1) {
6819 trans = btrfs_join_transaction(root, 1);
6820 BUG_ON(!trans);
6821
6822 mutex_lock(&root->fs_info->drop_mutex);
6823 ret = btrfs_drop_snapshot(trans, reloc_root);
6824 if (ret != -EAGAIN)
6825 break;
6826 mutex_unlock(&root->fs_info->drop_mutex);
6827
6828 nr = trans->blocks_used;
6829 ret = btrfs_end_transaction(trans, root);
6830 BUG_ON(ret);
6831 btrfs_btree_balance_dirty(root, nr);
6832 }
6833
6834 free_extent_buffer(reloc_root->node);
6835
6836 ret = btrfs_del_root(trans, root->fs_info->tree_root,
6837 &reloc_root->root_key);
6838 BUG_ON(ret);
6839 mutex_unlock(&root->fs_info->drop_mutex);
6840
6841 nr = trans->blocks_used;
6842 ret = btrfs_end_transaction(trans, root);
6843 BUG_ON(ret);
6844 btrfs_btree_balance_dirty(root, nr);
6845
6846 kfree(prev_root);
6847 prev_root = reloc_root;
6848 }
6849 if (prev_root) {
6850 btrfs_remove_leaf_refs(prev_root, (u64)-1, 0);
6851 kfree(prev_root);
6852 }
6853 return 0;
6854}
6855
6856int btrfs_add_dead_reloc_root(struct btrfs_root *root)
6857{
6858 list_add(&root->dead_list, &root->fs_info->dead_reloc_roots);
6859 return 0;
6860}
6861
6862int btrfs_cleanup_reloc_trees(struct btrfs_root *root)
6863{
6864 struct btrfs_root *reloc_root;
6865 struct btrfs_trans_handle *trans;
6866 struct btrfs_key location;
6867 int found;
6868 int ret;
6869
6870 mutex_lock(&root->fs_info->tree_reloc_mutex);
6871 ret = btrfs_find_dead_roots(root, BTRFS_TREE_RELOC_OBJECTID, NULL);
6872 BUG_ON(ret);
6873 found = !list_empty(&root->fs_info->dead_reloc_roots);
6874 mutex_unlock(&root->fs_info->tree_reloc_mutex);
6875
6876 if (found) {
6877 trans = btrfs_start_transaction(root, 1);
6878 BUG_ON(!trans);
6879 ret = btrfs_commit_transaction(trans, root);
6880 BUG_ON(ret);
6881 }
6882
6883 location.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
6884 location.offset = (u64)-1;
6885 location.type = BTRFS_ROOT_ITEM_KEY;
6886
6887 reloc_root = btrfs_read_fs_root_no_name(root->fs_info, &location);
6888 BUG_ON(!reloc_root);
6889 btrfs_orphan_cleanup(reloc_root);
6890 return 0;
6891}
6892
Chris Masond3977122009-01-05 21:25:51 -05006893static noinline int init_reloc_tree(struct btrfs_trans_handle *trans,
Zheng Yan1a40e232008-09-26 10:09:34 -04006894 struct btrfs_root *root)
6895{
6896 struct btrfs_root *reloc_root;
6897 struct extent_buffer *eb;
6898 struct btrfs_root_item *root_item;
6899 struct btrfs_key root_key;
6900 int ret;
6901
6902 BUG_ON(!root->ref_cows);
6903 if (root->reloc_root)
6904 return 0;
6905
6906 root_item = kmalloc(sizeof(*root_item), GFP_NOFS);
6907 BUG_ON(!root_item);
6908
6909 ret = btrfs_copy_root(trans, root, root->commit_root,
6910 &eb, BTRFS_TREE_RELOC_OBJECTID);
6911 BUG_ON(ret);
6912
6913 root_key.objectid = BTRFS_TREE_RELOC_OBJECTID;
6914 root_key.offset = root->root_key.objectid;
6915 root_key.type = BTRFS_ROOT_ITEM_KEY;
6916
6917 memcpy(root_item, &root->root_item, sizeof(root_item));
6918 btrfs_set_root_refs(root_item, 0);
6919 btrfs_set_root_bytenr(root_item, eb->start);
6920 btrfs_set_root_level(root_item, btrfs_header_level(eb));
Yan Zheng84234f32008-10-29 14:49:05 -04006921 btrfs_set_root_generation(root_item, trans->transid);
Zheng Yan1a40e232008-09-26 10:09:34 -04006922
6923 btrfs_tree_unlock(eb);
6924 free_extent_buffer(eb);
6925
6926 ret = btrfs_insert_root(trans, root->fs_info->tree_root,
6927 &root_key, root_item);
6928 BUG_ON(ret);
6929 kfree(root_item);
6930
6931 reloc_root = btrfs_read_fs_root_no_radix(root->fs_info->tree_root,
6932 &root_key);
6933 BUG_ON(!reloc_root);
6934 reloc_root->last_trans = trans->transid;
6935 reloc_root->commit_root = NULL;
6936 reloc_root->ref_tree = &root->fs_info->reloc_ref_tree;
6937
6938 root->reloc_root = reloc_root;
6939 return 0;
6940}
6941
6942/*
6943 * Core function of space balance.
6944 *
6945 * The idea is using reloc trees to relocate tree blocks in reference
Yan Zhengf82d02d2008-10-29 14:49:05 -04006946 * counted roots. There is one reloc tree for each subvol, and all
6947 * reloc trees share same root key objectid. Reloc trees are snapshots
6948 * of the latest committed roots of subvols (root->commit_root).
6949 *
6950 * To relocate a tree block referenced by a subvol, there are two steps.
6951 * COW the block through subvol's reloc tree, then update block pointer
6952 * in the subvol to point to the new block. Since all reloc trees share
6953 * same root key objectid, doing special handing for tree blocks owned
6954 * by them is easy. Once a tree block has been COWed in one reloc tree,
6955 * we can use the resulting new block directly when the same block is
6956 * required to COW again through other reloc trees. By this way, relocated
6957 * tree blocks are shared between reloc trees, so they are also shared
6958 * between subvols.
Zheng Yan1a40e232008-09-26 10:09:34 -04006959 */
Chris Masond3977122009-01-05 21:25:51 -05006960static noinline int relocate_one_path(struct btrfs_trans_handle *trans,
Zheng Yan1a40e232008-09-26 10:09:34 -04006961 struct btrfs_root *root,
6962 struct btrfs_path *path,
6963 struct btrfs_key *first_key,
6964 struct btrfs_ref_path *ref_path,
6965 struct btrfs_block_group_cache *group,
6966 struct inode *reloc_inode)
6967{
6968 struct btrfs_root *reloc_root;
6969 struct extent_buffer *eb = NULL;
6970 struct btrfs_key *keys;
6971 u64 *nodes;
6972 int level;
Yan Zhengf82d02d2008-10-29 14:49:05 -04006973 int shared_level;
Zheng Yan1a40e232008-09-26 10:09:34 -04006974 int lowest_level = 0;
Zheng Yan1a40e232008-09-26 10:09:34 -04006975 int ret;
6976
6977 if (ref_path->owner_objectid < BTRFS_FIRST_FREE_OBJECTID)
6978 lowest_level = ref_path->owner_objectid;
6979
Yan Zhengf82d02d2008-10-29 14:49:05 -04006980 if (!root->ref_cows) {
Zheng Yan1a40e232008-09-26 10:09:34 -04006981 path->lowest_level = lowest_level;
6982 ret = btrfs_search_slot(trans, root, first_key, path, 0, 1);
6983 BUG_ON(ret < 0);
6984 path->lowest_level = 0;
6985 btrfs_release_path(root, path);
6986 return 0;
6987 }
6988
Zheng Yan1a40e232008-09-26 10:09:34 -04006989 mutex_lock(&root->fs_info->tree_reloc_mutex);
6990 ret = init_reloc_tree(trans, root);
6991 BUG_ON(ret);
6992 reloc_root = root->reloc_root;
6993
Yan Zhengf82d02d2008-10-29 14:49:05 -04006994 shared_level = ref_path->shared_level;
6995 ref_path->shared_level = BTRFS_MAX_LEVEL - 1;
Zheng Yan1a40e232008-09-26 10:09:34 -04006996
Yan Zhengf82d02d2008-10-29 14:49:05 -04006997 keys = ref_path->node_keys;
6998 nodes = ref_path->new_nodes;
6999 memset(&keys[shared_level + 1], 0,
7000 sizeof(*keys) * (BTRFS_MAX_LEVEL - shared_level - 1));
7001 memset(&nodes[shared_level + 1], 0,
7002 sizeof(*nodes) * (BTRFS_MAX_LEVEL - shared_level - 1));
Zheng Yan1a40e232008-09-26 10:09:34 -04007003
Yan Zhengf82d02d2008-10-29 14:49:05 -04007004 if (nodes[lowest_level] == 0) {
7005 path->lowest_level = lowest_level;
7006 ret = btrfs_search_slot(trans, reloc_root, first_key, path,
7007 0, 1);
7008 BUG_ON(ret);
7009 for (level = lowest_level; level < BTRFS_MAX_LEVEL; level++) {
7010 eb = path->nodes[level];
7011 if (!eb || eb == reloc_root->node)
7012 break;
7013 nodes[level] = eb->start;
7014 if (level == 0)
7015 btrfs_item_key_to_cpu(eb, &keys[level], 0);
7016 else
7017 btrfs_node_key_to_cpu(eb, &keys[level], 0);
7018 }
Yan Zheng2b820322008-11-17 21:11:30 -05007019 if (nodes[0] &&
7020 ref_path->owner_objectid >= BTRFS_FIRST_FREE_OBJECTID) {
Yan Zhengf82d02d2008-10-29 14:49:05 -04007021 eb = path->nodes[0];
7022 ret = replace_extents_in_leaf(trans, reloc_root, eb,
7023 group, reloc_inode);
7024 BUG_ON(ret);
7025 }
7026 btrfs_release_path(reloc_root, path);
7027 } else {
Zheng Yan1a40e232008-09-26 10:09:34 -04007028 ret = btrfs_merge_path(trans, reloc_root, keys, nodes,
Yan Zhengf82d02d2008-10-29 14:49:05 -04007029 lowest_level);
Zheng Yan1a40e232008-09-26 10:09:34 -04007030 BUG_ON(ret);
7031 }
7032
Zheng Yan1a40e232008-09-26 10:09:34 -04007033 /*
7034 * replace tree blocks in the fs tree with tree blocks in
7035 * the reloc tree.
7036 */
7037 ret = btrfs_merge_path(trans, root, keys, nodes, lowest_level);
7038 BUG_ON(ret < 0);
7039
7040 if (ref_path->owner_objectid >= BTRFS_FIRST_FREE_OBJECTID) {
Yan Zhengf82d02d2008-10-29 14:49:05 -04007041 ret = btrfs_search_slot(trans, reloc_root, first_key, path,
7042 0, 0);
7043 BUG_ON(ret);
7044 extent_buffer_get(path->nodes[0]);
7045 eb = path->nodes[0];
7046 btrfs_release_path(reloc_root, path);
Zheng Yan1a40e232008-09-26 10:09:34 -04007047 ret = invalidate_extent_cache(reloc_root, eb, group, root);
7048 BUG_ON(ret);
7049 free_extent_buffer(eb);
7050 }
Zheng Yan1a40e232008-09-26 10:09:34 -04007051
Yan Zhengf82d02d2008-10-29 14:49:05 -04007052 mutex_unlock(&root->fs_info->tree_reloc_mutex);
Zheng Yan1a40e232008-09-26 10:09:34 -04007053 path->lowest_level = 0;
Zheng Yan1a40e232008-09-26 10:09:34 -04007054 return 0;
7055}
7056
Chris Masond3977122009-01-05 21:25:51 -05007057static noinline int relocate_tree_block(struct btrfs_trans_handle *trans,
Zheng Yan1a40e232008-09-26 10:09:34 -04007058 struct btrfs_root *root,
7059 struct btrfs_path *path,
7060 struct btrfs_key *first_key,
7061 struct btrfs_ref_path *ref_path)
7062{
7063 int ret;
Zheng Yan1a40e232008-09-26 10:09:34 -04007064
7065 ret = relocate_one_path(trans, root, path, first_key,
7066 ref_path, NULL, NULL);
7067 BUG_ON(ret);
7068
Zheng Yan1a40e232008-09-26 10:09:34 -04007069 return 0;
7070}
7071
Chris Masond3977122009-01-05 21:25:51 -05007072static noinline int del_extent_zero(struct btrfs_trans_handle *trans,
Zheng Yan1a40e232008-09-26 10:09:34 -04007073 struct btrfs_root *extent_root,
7074 struct btrfs_path *path,
7075 struct btrfs_key *extent_key)
7076{
7077 int ret;
7078
Zheng Yan1a40e232008-09-26 10:09:34 -04007079 ret = btrfs_search_slot(trans, extent_root, extent_key, path, -1, 1);
7080 if (ret)
7081 goto out;
7082 ret = btrfs_del_item(trans, extent_root, path);
7083out:
Chris Masonedbd8d42007-12-21 16:27:24 -05007084 btrfs_release_path(extent_root, path);
Zheng Yan1a40e232008-09-26 10:09:34 -04007085 return ret;
7086}
7087
Chris Masond3977122009-01-05 21:25:51 -05007088static noinline struct btrfs_root *read_ref_root(struct btrfs_fs_info *fs_info,
Zheng Yan1a40e232008-09-26 10:09:34 -04007089 struct btrfs_ref_path *ref_path)
7090{
7091 struct btrfs_key root_key;
7092
7093 root_key.objectid = ref_path->root_objectid;
7094 root_key.type = BTRFS_ROOT_ITEM_KEY;
7095 if (is_cowonly_root(ref_path->root_objectid))
7096 root_key.offset = 0;
7097 else
7098 root_key.offset = (u64)-1;
7099
7100 return btrfs_read_fs_root_no_name(fs_info, &root_key);
7101}
7102
Chris Masond3977122009-01-05 21:25:51 -05007103static noinline int relocate_one_extent(struct btrfs_root *extent_root,
Zheng Yan1a40e232008-09-26 10:09:34 -04007104 struct btrfs_path *path,
7105 struct btrfs_key *extent_key,
7106 struct btrfs_block_group_cache *group,
7107 struct inode *reloc_inode, int pass)
7108{
7109 struct btrfs_trans_handle *trans;
7110 struct btrfs_root *found_root;
7111 struct btrfs_ref_path *ref_path = NULL;
7112 struct disk_extent *new_extents = NULL;
7113 int nr_extents = 0;
7114 int loops;
7115 int ret;
7116 int level;
7117 struct btrfs_key first_key;
7118 u64 prev_block = 0;
7119
Zheng Yan1a40e232008-09-26 10:09:34 -04007120
7121 trans = btrfs_start_transaction(extent_root, 1);
7122 BUG_ON(!trans);
7123
7124 if (extent_key->objectid == 0) {
7125 ret = del_extent_zero(trans, extent_root, path, extent_key);
7126 goto out;
7127 }
7128
7129 ref_path = kmalloc(sizeof(*ref_path), GFP_NOFS);
7130 if (!ref_path) {
Chris Masond3977122009-01-05 21:25:51 -05007131 ret = -ENOMEM;
7132 goto out;
Zheng Yan1a40e232008-09-26 10:09:34 -04007133 }
7134
7135 for (loops = 0; ; loops++) {
7136 if (loops == 0) {
7137 ret = btrfs_first_ref_path(trans, extent_root, ref_path,
7138 extent_key->objectid);
7139 } else {
7140 ret = btrfs_next_ref_path(trans, extent_root, ref_path);
7141 }
7142 if (ret < 0)
7143 goto out;
7144 if (ret > 0)
7145 break;
7146
7147 if (ref_path->root_objectid == BTRFS_TREE_LOG_OBJECTID ||
7148 ref_path->root_objectid == BTRFS_TREE_RELOC_OBJECTID)
7149 continue;
7150
7151 found_root = read_ref_root(extent_root->fs_info, ref_path);
7152 BUG_ON(!found_root);
7153 /*
7154 * for reference counted tree, only process reference paths
7155 * rooted at the latest committed root.
7156 */
7157 if (found_root->ref_cows &&
7158 ref_path->root_generation != found_root->root_key.offset)
7159 continue;
7160
7161 if (ref_path->owner_objectid >= BTRFS_FIRST_FREE_OBJECTID) {
7162 if (pass == 0) {
7163 /*
7164 * copy data extents to new locations
7165 */
7166 u64 group_start = group->key.objectid;
7167 ret = relocate_data_extent(reloc_inode,
7168 extent_key,
7169 group_start);
7170 if (ret < 0)
7171 goto out;
7172 break;
7173 }
7174 level = 0;
7175 } else {
7176 level = ref_path->owner_objectid;
7177 }
7178
7179 if (prev_block != ref_path->nodes[level]) {
7180 struct extent_buffer *eb;
7181 u64 block_start = ref_path->nodes[level];
7182 u64 block_size = btrfs_level_size(found_root, level);
7183
7184 eb = read_tree_block(found_root, block_start,
7185 block_size, 0);
7186 btrfs_tree_lock(eb);
7187 BUG_ON(level != btrfs_header_level(eb));
7188
7189 if (level == 0)
7190 btrfs_item_key_to_cpu(eb, &first_key, 0);
7191 else
7192 btrfs_node_key_to_cpu(eb, &first_key, 0);
7193
7194 btrfs_tree_unlock(eb);
7195 free_extent_buffer(eb);
7196 prev_block = block_start;
7197 }
7198
Yan Zheng24562422009-02-12 14:14:53 -05007199 mutex_lock(&extent_root->fs_info->trans_mutex);
Yan Zhenge4404d62008-12-12 10:03:26 -05007200 btrfs_record_root_in_trans(found_root);
Yan Zheng24562422009-02-12 14:14:53 -05007201 mutex_unlock(&extent_root->fs_info->trans_mutex);
Yan Zhenge4404d62008-12-12 10:03:26 -05007202 if (ref_path->owner_objectid >= BTRFS_FIRST_FREE_OBJECTID) {
7203 /*
7204 * try to update data extent references while
7205 * keeping metadata shared between snapshots.
7206 */
7207 if (pass == 1) {
7208 ret = relocate_one_path(trans, found_root,
7209 path, &first_key, ref_path,
7210 group, reloc_inode);
7211 if (ret < 0)
7212 goto out;
7213 continue;
7214 }
Zheng Yan1a40e232008-09-26 10:09:34 -04007215 /*
7216 * use fallback method to process the remaining
7217 * references.
7218 */
7219 if (!new_extents) {
7220 u64 group_start = group->key.objectid;
Yan Zhengd899e052008-10-30 14:25:28 -04007221 new_extents = kmalloc(sizeof(*new_extents),
7222 GFP_NOFS);
7223 nr_extents = 1;
Zheng Yan1a40e232008-09-26 10:09:34 -04007224 ret = get_new_locations(reloc_inode,
7225 extent_key,
Yan Zhengd899e052008-10-30 14:25:28 -04007226 group_start, 1,
Zheng Yan1a40e232008-09-26 10:09:34 -04007227 &new_extents,
7228 &nr_extents);
Yan Zhengd899e052008-10-30 14:25:28 -04007229 if (ret)
Zheng Yan1a40e232008-09-26 10:09:34 -04007230 goto out;
7231 }
Zheng Yan1a40e232008-09-26 10:09:34 -04007232 ret = replace_one_extent(trans, found_root,
7233 path, extent_key,
7234 &first_key, ref_path,
7235 new_extents, nr_extents);
Yan Zhenge4404d62008-12-12 10:03:26 -05007236 } else {
Zheng Yan1a40e232008-09-26 10:09:34 -04007237 ret = relocate_tree_block(trans, found_root, path,
7238 &first_key, ref_path);
Zheng Yan1a40e232008-09-26 10:09:34 -04007239 }
7240 if (ret < 0)
7241 goto out;
7242 }
7243 ret = 0;
7244out:
7245 btrfs_end_transaction(trans, extent_root);
7246 kfree(new_extents);
7247 kfree(ref_path);
Chris Masonedbd8d42007-12-21 16:27:24 -05007248 return ret;
7249}
Yan Zheng5d4f98a2009-06-10 10:45:14 -04007250#endif
Chris Masonedbd8d42007-12-21 16:27:24 -05007251
Chris Masonec44a352008-04-28 15:29:52 -04007252static u64 update_block_group_flags(struct btrfs_root *root, u64 flags)
7253{
7254 u64 num_devices;
7255 u64 stripped = BTRFS_BLOCK_GROUP_RAID0 |
7256 BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID10;
7257
Yan Zheng2b820322008-11-17 21:11:30 -05007258 num_devices = root->fs_info->fs_devices->rw_devices;
Chris Masonec44a352008-04-28 15:29:52 -04007259 if (num_devices == 1) {
7260 stripped |= BTRFS_BLOCK_GROUP_DUP;
7261 stripped = flags & ~stripped;
7262
7263 /* turn raid0 into single device chunks */
7264 if (flags & BTRFS_BLOCK_GROUP_RAID0)
7265 return stripped;
7266
7267 /* turn mirroring into duplication */
7268 if (flags & (BTRFS_BLOCK_GROUP_RAID1 |
7269 BTRFS_BLOCK_GROUP_RAID10))
7270 return stripped | BTRFS_BLOCK_GROUP_DUP;
7271 return flags;
7272 } else {
7273 /* they already had raid on here, just return */
Chris Masonec44a352008-04-28 15:29:52 -04007274 if (flags & stripped)
7275 return flags;
7276
7277 stripped |= BTRFS_BLOCK_GROUP_DUP;
7278 stripped = flags & ~stripped;
7279
7280 /* switch duplicated blocks with raid1 */
7281 if (flags & BTRFS_BLOCK_GROUP_DUP)
7282 return stripped | BTRFS_BLOCK_GROUP_RAID1;
7283
7284 /* turn single device chunks into raid0 */
7285 return stripped | BTRFS_BLOCK_GROUP_RAID0;
7286 }
7287 return flags;
7288}
7289
Christoph Hellwigb2950862008-12-02 09:54:17 -05007290static int __alloc_chunk_for_shrink(struct btrfs_root *root,
Chris Mason0ef3e662008-05-24 14:04:53 -04007291 struct btrfs_block_group_cache *shrink_block_group,
7292 int force)
7293{
7294 struct btrfs_trans_handle *trans;
7295 u64 new_alloc_flags;
7296 u64 calc;
7297
Chris Masonc286ac42008-07-22 23:06:41 -04007298 spin_lock(&shrink_block_group->lock);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04007299 if (btrfs_block_group_used(&shrink_block_group->item) +
7300 shrink_block_group->reserved > 0) {
Chris Masonc286ac42008-07-22 23:06:41 -04007301 spin_unlock(&shrink_block_group->lock);
Chris Masonc286ac42008-07-22 23:06:41 -04007302
Chris Mason0ef3e662008-05-24 14:04:53 -04007303 trans = btrfs_start_transaction(root, 1);
Chris Masonc286ac42008-07-22 23:06:41 -04007304 spin_lock(&shrink_block_group->lock);
Chris Mason7d9eb122008-07-08 14:19:17 -04007305
Chris Mason0ef3e662008-05-24 14:04:53 -04007306 new_alloc_flags = update_block_group_flags(root,
7307 shrink_block_group->flags);
7308 if (new_alloc_flags != shrink_block_group->flags) {
7309 calc =
7310 btrfs_block_group_used(&shrink_block_group->item);
7311 } else {
7312 calc = shrink_block_group->key.offset;
7313 }
Chris Masonc286ac42008-07-22 23:06:41 -04007314 spin_unlock(&shrink_block_group->lock);
7315
Chris Mason0ef3e662008-05-24 14:04:53 -04007316 do_chunk_alloc(trans, root->fs_info->extent_root,
7317 calc + 2 * 1024 * 1024, new_alloc_flags, force);
Chris Mason7d9eb122008-07-08 14:19:17 -04007318
Chris Mason0ef3e662008-05-24 14:04:53 -04007319 btrfs_end_transaction(trans, root);
Chris Masonc286ac42008-07-22 23:06:41 -04007320 } else
7321 spin_unlock(&shrink_block_group->lock);
Chris Mason0ef3e662008-05-24 14:04:53 -04007322 return 0;
7323}
7324
Yan Zheng5d4f98a2009-06-10 10:45:14 -04007325
7326int btrfs_prepare_block_group_relocation(struct btrfs_root *root,
7327 struct btrfs_block_group_cache *group)
7328
7329{
7330 __alloc_chunk_for_shrink(root, group, 1);
7331 set_block_group_readonly(group);
7332 return 0;
7333}
7334
Josef Bacikba1bf482009-09-11 16:11:19 -04007335/*
7336 * checks to see if its even possible to relocate this block group.
7337 *
7338 * @return - -1 if it's not a good idea to relocate this block group, 0 if its
7339 * ok to go ahead and try.
7340 */
7341int btrfs_can_relocate(struct btrfs_root *root, u64 bytenr)
Zheng Yan1a40e232008-09-26 10:09:34 -04007342{
Zheng Yan1a40e232008-09-26 10:09:34 -04007343 struct btrfs_block_group_cache *block_group;
Josef Bacikba1bf482009-09-11 16:11:19 -04007344 struct btrfs_space_info *space_info;
7345 struct btrfs_fs_devices *fs_devices = root->fs_info->fs_devices;
7346 struct btrfs_device *device;
7347 int full = 0;
7348 int ret = 0;
Chris Masonedbd8d42007-12-21 16:27:24 -05007349
Josef Bacikba1bf482009-09-11 16:11:19 -04007350 block_group = btrfs_lookup_block_group(root->fs_info, bytenr);
Zheng Yan1a40e232008-09-26 10:09:34 -04007351
Josef Bacikba1bf482009-09-11 16:11:19 -04007352 /* odd, couldn't find the block group, leave it alone */
7353 if (!block_group)
7354 return -1;
Chris Masonedbd8d42007-12-21 16:27:24 -05007355
Josef Bacikba1bf482009-09-11 16:11:19 -04007356 /* no bytes used, we're good */
7357 if (!btrfs_block_group_used(&block_group->item))
7358 goto out;
Chris Mason323da792008-05-09 11:46:48 -04007359
Josef Bacikba1bf482009-09-11 16:11:19 -04007360 space_info = block_group->space_info;
7361 spin_lock(&space_info->lock);
Chris Mason323da792008-05-09 11:46:48 -04007362
Josef Bacikba1bf482009-09-11 16:11:19 -04007363 full = space_info->full;
Zheng Yan1a40e232008-09-26 10:09:34 -04007364
Josef Bacikba1bf482009-09-11 16:11:19 -04007365 /*
7366 * if this is the last block group we have in this space, we can't
Chris Mason7ce618d2009-09-22 14:48:44 -04007367 * relocate it unless we're able to allocate a new chunk below.
7368 *
7369 * Otherwise, we need to make sure we have room in the space to handle
7370 * all of the extents from this block group. If we can, we're good
Josef Bacikba1bf482009-09-11 16:11:19 -04007371 */
Chris Mason7ce618d2009-09-22 14:48:44 -04007372 if ((space_info->total_bytes != block_group->key.offset) &&
7373 (space_info->bytes_used + space_info->bytes_reserved +
Josef Bacikba1bf482009-09-11 16:11:19 -04007374 space_info->bytes_pinned + space_info->bytes_readonly +
7375 btrfs_block_group_used(&block_group->item) <
Chris Mason7ce618d2009-09-22 14:48:44 -04007376 space_info->total_bytes)) {
Josef Bacikba1bf482009-09-11 16:11:19 -04007377 spin_unlock(&space_info->lock);
7378 goto out;
7379 }
7380 spin_unlock(&space_info->lock);
Zheng Yan1a40e232008-09-26 10:09:34 -04007381
Josef Bacikba1bf482009-09-11 16:11:19 -04007382 /*
7383 * ok we don't have enough space, but maybe we have free space on our
7384 * devices to allocate new chunks for relocation, so loop through our
7385 * alloc devices and guess if we have enough space. However, if we
7386 * were marked as full, then we know there aren't enough chunks, and we
7387 * can just return.
7388 */
7389 ret = -1;
7390 if (full)
7391 goto out;
Chris Mason4313b392008-01-03 09:08:48 -05007392
Josef Bacikba1bf482009-09-11 16:11:19 -04007393 mutex_lock(&root->fs_info->chunk_mutex);
7394 list_for_each_entry(device, &fs_devices->alloc_list, dev_alloc_list) {
7395 u64 min_free = btrfs_block_group_used(&block_group->item);
7396 u64 dev_offset, max_avail;
Chris Masonea8c2812008-08-04 23:17:27 -04007397
Josef Bacikba1bf482009-09-11 16:11:19 -04007398 /*
7399 * check to make sure we can actually find a chunk with enough
7400 * space to fit our block group in.
7401 */
7402 if (device->total_bytes > device->bytes_used + min_free) {
7403 ret = find_free_dev_extent(NULL, device, min_free,
7404 &dev_offset, &max_avail);
7405 if (!ret)
Yan73e48b22008-01-03 14:14:39 -05007406 break;
Josef Bacikba1bf482009-09-11 16:11:19 -04007407 ret = -1;
Yan73e48b22008-01-03 14:14:39 -05007408 }
Chris Masonedbd8d42007-12-21 16:27:24 -05007409 }
Josef Bacikba1bf482009-09-11 16:11:19 -04007410 mutex_unlock(&root->fs_info->chunk_mutex);
Chris Masonedbd8d42007-12-21 16:27:24 -05007411out:
Josef Bacikba1bf482009-09-11 16:11:19 -04007412 btrfs_put_block_group(block_group);
Chris Masonedbd8d42007-12-21 16:27:24 -05007413 return ret;
7414}
7415
Christoph Hellwigb2950862008-12-02 09:54:17 -05007416static int find_first_block_group(struct btrfs_root *root,
7417 struct btrfs_path *path, struct btrfs_key *key)
Chris Mason0b86a832008-03-24 15:01:56 -04007418{
Chris Mason925baed2008-06-25 16:01:30 -04007419 int ret = 0;
Chris Mason0b86a832008-03-24 15:01:56 -04007420 struct btrfs_key found_key;
7421 struct extent_buffer *leaf;
7422 int slot;
7423
7424 ret = btrfs_search_slot(NULL, root, key, path, 0, 0);
7425 if (ret < 0)
Chris Mason925baed2008-06-25 16:01:30 -04007426 goto out;
7427
Chris Masond3977122009-01-05 21:25:51 -05007428 while (1) {
Chris Mason0b86a832008-03-24 15:01:56 -04007429 slot = path->slots[0];
7430 leaf = path->nodes[0];
7431 if (slot >= btrfs_header_nritems(leaf)) {
7432 ret = btrfs_next_leaf(root, path);
7433 if (ret == 0)
7434 continue;
7435 if (ret < 0)
Chris Mason925baed2008-06-25 16:01:30 -04007436 goto out;
Chris Mason0b86a832008-03-24 15:01:56 -04007437 break;
7438 }
7439 btrfs_item_key_to_cpu(leaf, &found_key, slot);
7440
7441 if (found_key.objectid >= key->objectid &&
Chris Mason925baed2008-06-25 16:01:30 -04007442 found_key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
7443 ret = 0;
7444 goto out;
7445 }
Chris Mason0b86a832008-03-24 15:01:56 -04007446 path->slots[0]++;
7447 }
Chris Mason925baed2008-06-25 16:01:30 -04007448out:
Chris Mason0b86a832008-03-24 15:01:56 -04007449 return ret;
7450}
7451
Zheng Yan1a40e232008-09-26 10:09:34 -04007452int btrfs_free_block_groups(struct btrfs_fs_info *info)
7453{
7454 struct btrfs_block_group_cache *block_group;
Chris Mason4184ea72009-03-10 12:39:20 -04007455 struct btrfs_space_info *space_info;
Yan Zheng11833d62009-09-11 16:11:19 -04007456 struct btrfs_caching_control *caching_ctl;
Zheng Yan1a40e232008-09-26 10:09:34 -04007457 struct rb_node *n;
7458
Yan Zheng11833d62009-09-11 16:11:19 -04007459 down_write(&info->extent_commit_sem);
7460 while (!list_empty(&info->caching_block_groups)) {
7461 caching_ctl = list_entry(info->caching_block_groups.next,
7462 struct btrfs_caching_control, list);
7463 list_del(&caching_ctl->list);
7464 put_caching_control(caching_ctl);
7465 }
7466 up_write(&info->extent_commit_sem);
7467
Zheng Yan1a40e232008-09-26 10:09:34 -04007468 spin_lock(&info->block_group_cache_lock);
7469 while ((n = rb_last(&info->block_group_cache_tree)) != NULL) {
7470 block_group = rb_entry(n, struct btrfs_block_group_cache,
7471 cache_node);
Zheng Yan1a40e232008-09-26 10:09:34 -04007472 rb_erase(&block_group->cache_node,
7473 &info->block_group_cache_tree);
Yan Zhengd899e052008-10-30 14:25:28 -04007474 spin_unlock(&info->block_group_cache_lock);
7475
Josef Bacik80eb2342008-10-29 14:49:05 -04007476 down_write(&block_group->space_info->groups_sem);
Zheng Yan1a40e232008-09-26 10:09:34 -04007477 list_del(&block_group->list);
Josef Bacik80eb2342008-10-29 14:49:05 -04007478 up_write(&block_group->space_info->groups_sem);
Yan Zhengd2fb3432008-12-11 16:30:39 -05007479
Josef Bacik817d52f2009-07-13 21:29:25 -04007480 if (block_group->cached == BTRFS_CACHE_STARTED)
Yan Zheng11833d62009-09-11 16:11:19 -04007481 wait_block_group_cache_done(block_group);
Josef Bacik817d52f2009-07-13 21:29:25 -04007482
7483 btrfs_remove_free_space_cache(block_group);
Josef Bacik11dfe352009-11-13 20:12:59 +00007484 btrfs_put_block_group(block_group);
Yan Zhengd899e052008-10-30 14:25:28 -04007485
7486 spin_lock(&info->block_group_cache_lock);
Zheng Yan1a40e232008-09-26 10:09:34 -04007487 }
7488 spin_unlock(&info->block_group_cache_lock);
Chris Mason4184ea72009-03-10 12:39:20 -04007489
7490 /* now that all the block groups are freed, go through and
7491 * free all the space_info structs. This is only called during
7492 * the final stages of unmount, and so we know nobody is
7493 * using them. We call synchronize_rcu() once before we start,
7494 * just to be on the safe side.
7495 */
7496 synchronize_rcu();
7497
7498 while(!list_empty(&info->space_info)) {
7499 space_info = list_entry(info->space_info.next,
7500 struct btrfs_space_info,
7501 list);
7502
7503 list_del(&space_info->list);
7504 kfree(space_info);
7505 }
Zheng Yan1a40e232008-09-26 10:09:34 -04007506 return 0;
7507}
7508
Yan, Zhengb742bb822010-05-16 10:46:24 -04007509static void __link_block_group(struct btrfs_space_info *space_info,
7510 struct btrfs_block_group_cache *cache)
7511{
7512 int index = get_block_group_index(cache);
7513
7514 down_write(&space_info->groups_sem);
7515 list_add_tail(&cache->list, &space_info->block_groups[index]);
7516 up_write(&space_info->groups_sem);
7517}
7518
Chris Mason9078a3e2007-04-26 16:46:15 -04007519int btrfs_read_block_groups(struct btrfs_root *root)
7520{
7521 struct btrfs_path *path;
7522 int ret;
Chris Mason9078a3e2007-04-26 16:46:15 -04007523 struct btrfs_block_group_cache *cache;
Chris Masonbe744172007-05-06 10:15:01 -04007524 struct btrfs_fs_info *info = root->fs_info;
Chris Mason6324fbf2008-03-24 15:01:59 -04007525 struct btrfs_space_info *space_info;
Chris Mason9078a3e2007-04-26 16:46:15 -04007526 struct btrfs_key key;
7527 struct btrfs_key found_key;
Chris Mason5f39d392007-10-15 16:14:19 -04007528 struct extent_buffer *leaf;
Chris Mason96b51792007-10-15 16:15:19 -04007529
Chris Masonbe744172007-05-06 10:15:01 -04007530 root = info->extent_root;
Chris Mason9078a3e2007-04-26 16:46:15 -04007531 key.objectid = 0;
Chris Mason0b86a832008-03-24 15:01:56 -04007532 key.offset = 0;
Chris Mason9078a3e2007-04-26 16:46:15 -04007533 btrfs_set_key_type(&key, BTRFS_BLOCK_GROUP_ITEM_KEY);
Chris Mason9078a3e2007-04-26 16:46:15 -04007534 path = btrfs_alloc_path();
7535 if (!path)
7536 return -ENOMEM;
7537
Chris Masond3977122009-01-05 21:25:51 -05007538 while (1) {
Chris Mason0b86a832008-03-24 15:01:56 -04007539 ret = find_first_block_group(root, path, &key);
Yan, Zhengb742bb822010-05-16 10:46:24 -04007540 if (ret > 0)
7541 break;
Chris Mason0b86a832008-03-24 15:01:56 -04007542 if (ret != 0)
7543 goto error;
7544
Chris Mason5f39d392007-10-15 16:14:19 -04007545 leaf = path->nodes[0];
7546 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
Chris Mason8f18cf12008-04-25 16:53:30 -04007547 cache = kzalloc(sizeof(*cache), GFP_NOFS);
Chris Mason9078a3e2007-04-26 16:46:15 -04007548 if (!cache) {
Chris Mason0b86a832008-03-24 15:01:56 -04007549 ret = -ENOMEM;
Chris Mason9078a3e2007-04-26 16:46:15 -04007550 break;
7551 }
Chris Mason3e1ad542007-05-07 20:03:49 -04007552
Yan Zhengd2fb3432008-12-11 16:30:39 -05007553 atomic_set(&cache->count, 1);
Chris Masonc286ac42008-07-22 23:06:41 -04007554 spin_lock_init(&cache->lock);
Josef Bacik6226cb02009-04-03 10:14:18 -04007555 spin_lock_init(&cache->tree_lock);
Josef Bacik817d52f2009-07-13 21:29:25 -04007556 cache->fs_info = info;
Josef Bacik0f9dd462008-09-23 13:14:11 -04007557 INIT_LIST_HEAD(&cache->list);
Chris Masonfa9c0d792009-04-03 09:47:43 -04007558 INIT_LIST_HEAD(&cache->cluster_list);
Josef Bacik96303082009-07-13 21:29:25 -04007559
7560 /*
7561 * we only want to have 32k of ram per block group for keeping
7562 * track of free space, and if we pass 1/2 of that we want to
7563 * start converting things over to using bitmaps
7564 */
7565 cache->extents_thresh = ((1024 * 32) / 2) /
7566 sizeof(struct btrfs_free_space);
7567
Chris Mason5f39d392007-10-15 16:14:19 -04007568 read_extent_buffer(leaf, &cache->item,
7569 btrfs_item_ptr_offset(leaf, path->slots[0]),
7570 sizeof(cache->item));
Chris Mason9078a3e2007-04-26 16:46:15 -04007571 memcpy(&cache->key, &found_key, sizeof(found_key));
Chris Mason0b86a832008-03-24 15:01:56 -04007572
Chris Mason9078a3e2007-04-26 16:46:15 -04007573 key.objectid = found_key.objectid + found_key.offset;
7574 btrfs_release_path(root, path);
Chris Mason0b86a832008-03-24 15:01:56 -04007575 cache->flags = btrfs_block_group_flags(&cache->item);
Josef Bacik817d52f2009-07-13 21:29:25 -04007576 cache->sectorsize = root->sectorsize;
7577
Josef Bacik817d52f2009-07-13 21:29:25 -04007578 /*
7579 * check for two cases, either we are full, and therefore
7580 * don't need to bother with the caching work since we won't
7581 * find any space, or we are empty, and we can just add all
7582 * the space in and be done with it. This saves us _alot_ of
7583 * time, particularly in the full case.
7584 */
7585 if (found_key.offset == btrfs_block_group_used(&cache->item)) {
Josef Bacik1b2da372009-09-11 16:11:20 -04007586 exclude_super_stripes(root, cache);
Yan Zheng11833d62009-09-11 16:11:19 -04007587 cache->last_byte_to_unpin = (u64)-1;
Josef Bacik817d52f2009-07-13 21:29:25 -04007588 cache->cached = BTRFS_CACHE_FINISHED;
Josef Bacik1b2da372009-09-11 16:11:20 -04007589 free_excluded_extents(root, cache);
Josef Bacik817d52f2009-07-13 21:29:25 -04007590 } else if (btrfs_block_group_used(&cache->item) == 0) {
Yan Zheng11833d62009-09-11 16:11:19 -04007591 exclude_super_stripes(root, cache);
7592 cache->last_byte_to_unpin = (u64)-1;
Josef Bacik817d52f2009-07-13 21:29:25 -04007593 cache->cached = BTRFS_CACHE_FINISHED;
7594 add_new_free_space(cache, root->fs_info,
7595 found_key.objectid,
7596 found_key.objectid +
7597 found_key.offset);
Yan Zheng11833d62009-09-11 16:11:19 -04007598 free_excluded_extents(root, cache);
Josef Bacik817d52f2009-07-13 21:29:25 -04007599 }
Chris Mason96b51792007-10-15 16:15:19 -04007600
Chris Mason6324fbf2008-03-24 15:01:59 -04007601 ret = update_space_info(info, cache->flags, found_key.offset,
7602 btrfs_block_group_used(&cache->item),
7603 &space_info);
7604 BUG_ON(ret);
7605 cache->space_info = space_info;
Josef Bacik1b2da372009-09-11 16:11:20 -04007606 spin_lock(&cache->space_info->lock);
7607 cache->space_info->bytes_super += cache->bytes_super;
7608 spin_unlock(&cache->space_info->lock);
7609
Yan, Zhengb742bb822010-05-16 10:46:24 -04007610 __link_block_group(space_info, cache);
Chris Mason6324fbf2008-03-24 15:01:59 -04007611
Josef Bacik0f9dd462008-09-23 13:14:11 -04007612 ret = btrfs_add_block_group_cache(root->fs_info, cache);
7613 BUG_ON(ret);
Chris Mason75ccf472008-09-30 19:24:06 -04007614
7615 set_avail_alloc_bits(root->fs_info, cache->flags);
Yan Zheng2b820322008-11-17 21:11:30 -05007616 if (btrfs_chunk_readonly(root, cache->key.objectid))
7617 set_block_group_readonly(cache);
Chris Mason9078a3e2007-04-26 16:46:15 -04007618 }
Yan, Zhengb742bb822010-05-16 10:46:24 -04007619
7620 list_for_each_entry_rcu(space_info, &root->fs_info->space_info, list) {
7621 if (!(get_alloc_profile(root, space_info->flags) &
7622 (BTRFS_BLOCK_GROUP_RAID10 |
7623 BTRFS_BLOCK_GROUP_RAID1 |
7624 BTRFS_BLOCK_GROUP_DUP)))
7625 continue;
7626 /*
7627 * avoid allocating from un-mirrored block group if there are
7628 * mirrored block groups.
7629 */
7630 list_for_each_entry(cache, &space_info->block_groups[3], list)
7631 set_block_group_readonly(cache);
7632 list_for_each_entry(cache, &space_info->block_groups[4], list)
7633 set_block_group_readonly(cache);
7634 }
Chris Mason0b86a832008-03-24 15:01:56 -04007635 ret = 0;
7636error:
Chris Mason9078a3e2007-04-26 16:46:15 -04007637 btrfs_free_path(path);
Chris Mason0b86a832008-03-24 15:01:56 -04007638 return ret;
Chris Mason9078a3e2007-04-26 16:46:15 -04007639}
Chris Mason6324fbf2008-03-24 15:01:59 -04007640
7641int btrfs_make_block_group(struct btrfs_trans_handle *trans,
7642 struct btrfs_root *root, u64 bytes_used,
Chris Masone17cade2008-04-15 15:41:47 -04007643 u64 type, u64 chunk_objectid, u64 chunk_offset,
Chris Mason6324fbf2008-03-24 15:01:59 -04007644 u64 size)
7645{
7646 int ret;
Chris Mason6324fbf2008-03-24 15:01:59 -04007647 struct btrfs_root *extent_root;
7648 struct btrfs_block_group_cache *cache;
Chris Mason6324fbf2008-03-24 15:01:59 -04007649
7650 extent_root = root->fs_info->extent_root;
Chris Mason6324fbf2008-03-24 15:01:59 -04007651
Chris Mason12fcfd22009-03-24 10:24:20 -04007652 root->fs_info->last_trans_log_full_commit = trans->transid;
Chris Masone02119d2008-09-05 16:13:11 -04007653
Chris Mason8f18cf12008-04-25 16:53:30 -04007654 cache = kzalloc(sizeof(*cache), GFP_NOFS);
Josef Bacik0f9dd462008-09-23 13:14:11 -04007655 if (!cache)
7656 return -ENOMEM;
7657
Chris Masone17cade2008-04-15 15:41:47 -04007658 cache->key.objectid = chunk_offset;
Chris Mason6324fbf2008-03-24 15:01:59 -04007659 cache->key.offset = size;
Yan Zhengd2fb3432008-12-11 16:30:39 -05007660 cache->key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
Josef Bacik96303082009-07-13 21:29:25 -04007661 cache->sectorsize = root->sectorsize;
7662
7663 /*
7664 * we only want to have 32k of ram per block group for keeping track
7665 * of free space, and if we pass 1/2 of that we want to start
7666 * converting things over to using bitmaps
7667 */
7668 cache->extents_thresh = ((1024 * 32) / 2) /
7669 sizeof(struct btrfs_free_space);
Yan Zhengd2fb3432008-12-11 16:30:39 -05007670 atomic_set(&cache->count, 1);
Chris Masonc286ac42008-07-22 23:06:41 -04007671 spin_lock_init(&cache->lock);
Josef Bacik6226cb02009-04-03 10:14:18 -04007672 spin_lock_init(&cache->tree_lock);
Josef Bacik0f9dd462008-09-23 13:14:11 -04007673 INIT_LIST_HEAD(&cache->list);
Chris Masonfa9c0d792009-04-03 09:47:43 -04007674 INIT_LIST_HEAD(&cache->cluster_list);
Chris Mason0ef3e662008-05-24 14:04:53 -04007675
Chris Mason6324fbf2008-03-24 15:01:59 -04007676 btrfs_set_block_group_used(&cache->item, bytes_used);
Chris Mason6324fbf2008-03-24 15:01:59 -04007677 btrfs_set_block_group_chunk_objectid(&cache->item, chunk_objectid);
7678 cache->flags = type;
7679 btrfs_set_block_group_flags(&cache->item, type);
7680
Yan Zheng11833d62009-09-11 16:11:19 -04007681 cache->last_byte_to_unpin = (u64)-1;
Josef Bacik817d52f2009-07-13 21:29:25 -04007682 cache->cached = BTRFS_CACHE_FINISHED;
Yan Zheng11833d62009-09-11 16:11:19 -04007683 exclude_super_stripes(root, cache);
Josef Bacik96303082009-07-13 21:29:25 -04007684
Josef Bacik817d52f2009-07-13 21:29:25 -04007685 add_new_free_space(cache, root->fs_info, chunk_offset,
7686 chunk_offset + size);
7687
Yan Zheng11833d62009-09-11 16:11:19 -04007688 free_excluded_extents(root, cache);
7689
Chris Mason6324fbf2008-03-24 15:01:59 -04007690 ret = update_space_info(root->fs_info, cache->flags, size, bytes_used,
7691 &cache->space_info);
7692 BUG_ON(ret);
Josef Bacik1b2da372009-09-11 16:11:20 -04007693
7694 spin_lock(&cache->space_info->lock);
7695 cache->space_info->bytes_super += cache->bytes_super;
7696 spin_unlock(&cache->space_info->lock);
7697
Yan, Zhengb742bb822010-05-16 10:46:24 -04007698 __link_block_group(cache->space_info, cache);
Chris Mason6324fbf2008-03-24 15:01:59 -04007699
Josef Bacik0f9dd462008-09-23 13:14:11 -04007700 ret = btrfs_add_block_group_cache(root->fs_info, cache);
7701 BUG_ON(ret);
Chris Masonc286ac42008-07-22 23:06:41 -04007702
Chris Mason6324fbf2008-03-24 15:01:59 -04007703 ret = btrfs_insert_item(trans, extent_root, &cache->key, &cache->item,
7704 sizeof(cache->item));
7705 BUG_ON(ret);
7706
Chris Masond18a2c42008-04-04 15:40:00 -04007707 set_avail_alloc_bits(extent_root->fs_info, type);
Chris Mason925baed2008-06-25 16:01:30 -04007708
Chris Mason6324fbf2008-03-24 15:01:59 -04007709 return 0;
7710}
Zheng Yan1a40e232008-09-26 10:09:34 -04007711
7712int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
7713 struct btrfs_root *root, u64 group_start)
7714{
7715 struct btrfs_path *path;
7716 struct btrfs_block_group_cache *block_group;
Chris Mason44fb5512009-06-04 15:34:51 -04007717 struct btrfs_free_cluster *cluster;
Zheng Yan1a40e232008-09-26 10:09:34 -04007718 struct btrfs_key key;
7719 int ret;
7720
Zheng Yan1a40e232008-09-26 10:09:34 -04007721 root = root->fs_info->extent_root;
7722
7723 block_group = btrfs_lookup_block_group(root->fs_info, group_start);
7724 BUG_ON(!block_group);
Yan Zhengc146afa2008-11-12 14:34:12 -05007725 BUG_ON(!block_group->ro);
Zheng Yan1a40e232008-09-26 10:09:34 -04007726
7727 memcpy(&key, &block_group->key, sizeof(key));
7728
Chris Mason44fb5512009-06-04 15:34:51 -04007729 /* make sure this block group isn't part of an allocation cluster */
7730 cluster = &root->fs_info->data_alloc_cluster;
7731 spin_lock(&cluster->refill_lock);
7732 btrfs_return_cluster_to_free_space(block_group, cluster);
7733 spin_unlock(&cluster->refill_lock);
7734
7735 /*
7736 * make sure this block group isn't part of a metadata
7737 * allocation cluster
7738 */
7739 cluster = &root->fs_info->meta_alloc_cluster;
7740 spin_lock(&cluster->refill_lock);
7741 btrfs_return_cluster_to_free_space(block_group, cluster);
7742 spin_unlock(&cluster->refill_lock);
7743
Zheng Yan1a40e232008-09-26 10:09:34 -04007744 path = btrfs_alloc_path();
7745 BUG_ON(!path);
7746
Yan Zheng3dfdb932009-01-21 10:49:16 -05007747 spin_lock(&root->fs_info->block_group_cache_lock);
Zheng Yan1a40e232008-09-26 10:09:34 -04007748 rb_erase(&block_group->cache_node,
7749 &root->fs_info->block_group_cache_tree);
Yan Zheng3dfdb932009-01-21 10:49:16 -05007750 spin_unlock(&root->fs_info->block_group_cache_lock);
Josef Bacik817d52f2009-07-13 21:29:25 -04007751
Josef Bacik80eb2342008-10-29 14:49:05 -04007752 down_write(&block_group->space_info->groups_sem);
Chris Mason44fb5512009-06-04 15:34:51 -04007753 /*
7754 * we must use list_del_init so people can check to see if they
7755 * are still on the list after taking the semaphore
7756 */
7757 list_del_init(&block_group->list);
Josef Bacik80eb2342008-10-29 14:49:05 -04007758 up_write(&block_group->space_info->groups_sem);
Zheng Yan1a40e232008-09-26 10:09:34 -04007759
Josef Bacik817d52f2009-07-13 21:29:25 -04007760 if (block_group->cached == BTRFS_CACHE_STARTED)
Yan Zheng11833d62009-09-11 16:11:19 -04007761 wait_block_group_cache_done(block_group);
Josef Bacik817d52f2009-07-13 21:29:25 -04007762
7763 btrfs_remove_free_space_cache(block_group);
7764
Yan Zhengc146afa2008-11-12 14:34:12 -05007765 spin_lock(&block_group->space_info->lock);
7766 block_group->space_info->total_bytes -= block_group->key.offset;
7767 block_group->space_info->bytes_readonly -= block_group->key.offset;
7768 spin_unlock(&block_group->space_info->lock);
Chris Mason283bb192009-07-24 16:30:55 -04007769
7770 btrfs_clear_space_info_full(root->fs_info);
Yan Zhengc146afa2008-11-12 14:34:12 -05007771
Chris Masonfa9c0d792009-04-03 09:47:43 -04007772 btrfs_put_block_group(block_group);
7773 btrfs_put_block_group(block_group);
Zheng Yan1a40e232008-09-26 10:09:34 -04007774
7775 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
7776 if (ret > 0)
7777 ret = -EIO;
7778 if (ret < 0)
7779 goto out;
7780
7781 ret = btrfs_del_item(trans, root, path);
7782out:
7783 btrfs_free_path(path);
7784 return ret;
7785}