Btrfs: Fix extent_buffer and extent_state leaks
Signed-off-by: Chris Mason <chris.mason@oracle.com>
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index 9427b79..50ef351 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -87,6 +87,7 @@
if (IS_ERR(cow))
return PTR_ERR(cow);
+ cow->alloc_addr = (unsigned long)__builtin_return_address(0);
if (buf->len != root->sectorsize || cow->len != root->sectorsize)
WARN_ON(1);
@@ -132,6 +133,7 @@
struct extent_buffer **cow_ret)
{
u64 search_start;
+ int ret;
if (trans->transaction != root->fs_info->running_transaction) {
printk(KERN_CRIT "trans %Lu running %Lu\n", trans->transid,
root->fs_info->running_transaction->transid);
@@ -148,8 +150,10 @@
}
search_start = extent_buffer_blocknr(buf) & ~((u64)65535);
- return __btrfs_cow_block(trans, root, buf, parent,
+ ret = __btrfs_cow_block(trans, root, buf, parent,
parent_slot, cow_ret, search_start, 0);
+ (*cow_ret)->alloc_addr = (unsigned long)__builtin_return_address(0);
+ return ret;
}
static int close_blocks(u64 blocknr, u64 other)
@@ -1013,8 +1017,10 @@
if (sret)
return sret;
b = p->nodes[level];
- if (!b)
+ if (!b) {
+ btrfs_release_path(NULL, p);
goto again;
+ }
slot = p->slots[level];
BUG_ON(btrfs_header_nritems(b) == 1);
}
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index aed0861..5262b28 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -303,8 +303,8 @@
struct radix_tree_root pinned_radix;
struct radix_tree_root block_group_radix;
struct radix_tree_root block_group_data_radix;
- struct radix_tree_root extent_map_radix;
struct radix_tree_root extent_ins_radix;
+ struct extent_map_tree free_space_cache;
u64 generation;
u64 last_trans_committed;
struct btrfs_transaction *running_transaction;
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 8242933..09f4e69 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -46,18 +46,25 @@
u64 blocknr)
{
struct inode *btree_inode = root->fs_info->btree_inode;
- return find_extent_buffer(&BTRFS_I(btree_inode)->extent_tree,
+ struct extent_buffer *eb;
+ eb = find_extent_buffer(&BTRFS_I(btree_inode)->extent_tree,
blocknr * root->sectorsize,
root->sectorsize, GFP_NOFS);
+ if (eb)
+ eb->alloc_addr = (unsigned long)__builtin_return_address(0);
+ return eb;
}
struct extent_buffer *btrfs_find_create_tree_block(struct btrfs_root *root,
u64 blocknr)
{
struct inode *btree_inode = root->fs_info->btree_inode;
- return alloc_extent_buffer(&BTRFS_I(btree_inode)->extent_tree,
+ struct extent_buffer *eb;
+ eb = alloc_extent_buffer(&BTRFS_I(btree_inode)->extent_tree,
blocknr * root->sectorsize,
root->sectorsize, GFP_NOFS);
+ eb->alloc_addr = (unsigned long)__builtin_return_address(0);
+ return eb;
}
struct extent_map *btree_get_extent(struct inode *inode, struct page *page,
@@ -226,6 +233,7 @@
return NULL;
read_extent_buffer_pages(&BTRFS_I(btree_inode)->extent_tree,
buf, 1);
+ buf->alloc_addr = (unsigned long)__builtin_return_address(0);
return buf;
}
@@ -426,7 +434,6 @@
}
init_bit_radix(&fs_info->pinned_radix);
init_bit_radix(&fs_info->pending_del_radix);
- init_bit_radix(&fs_info->extent_map_radix);
init_bit_radix(&fs_info->extent_ins_radix);
INIT_RADIX_TREE(&fs_info->fs_roots_radix, GFP_NOFS);
INIT_RADIX_TREE(&fs_info->block_group_radix, GFP_KERNEL);
@@ -449,6 +456,8 @@
extent_map_tree_init(&BTRFS_I(fs_info->btree_inode)->extent_tree,
fs_info->btree_inode->i_mapping,
GFP_NOFS);
+ extent_map_tree_init(&fs_info->free_space_cache,
+ fs_info->btree_inode->i_mapping, GFP_NOFS);
fs_info->do_barriers = 1;
fs_info->closing = 0;
@@ -594,8 +603,10 @@
if (fs_info->extent_root->node)
free_extent_buffer(fs_info->extent_root->node);
+
if (fs_info->tree_root->node)
free_extent_buffer(fs_info->tree_root->node);
+
free_extent_buffer(fs_info->sb_buffer);
truncate_inode_pages(fs_info->btree_inode->i_mapping, 0);
iput(fs_info->btree_inode);
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 089c41c..74cfbee 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -34,21 +34,19 @@
int ret;
struct btrfs_key key;
struct extent_buffer *leaf;
- struct radix_tree_root *extent_radix;
+ struct extent_map_tree *free_space_cache;
int slot;
- u64 i;
u64 last = 0;
u64 hole_size;
u64 first_free;
int found = 0;
root = root->fs_info->extent_root;
- extent_radix = &root->fs_info->extent_map_radix;
+ free_space_cache = &root->fs_info->free_space_cache;
if (block_group->cached)
return 0;
- if (block_group->data)
- return 0;
+
path = btrfs_alloc_path();
if (!path)
return -ENOMEM;
@@ -98,9 +96,11 @@
last = first_free;
found = 1;
}
- hole_size = key.objectid - last;
- for (i = 0; i < hole_size; i++) {
- set_radix_bit(extent_radix, last + i);
+ if (key.objectid > last) {
+ hole_size = key.objectid - last;
+ set_extent_dirty(free_space_cache, last,
+ last + hole_size - 1,
+ GFP_NOFS);
}
last = key.objectid + key.offset;
}
@@ -114,9 +114,8 @@
block_group->key.offset > last) {
hole_size = block_group->key.objectid +
block_group->key.offset - last;
- for (i = 0; i < hole_size; i++) {
- set_radix_bit(extent_radix, last + i);
- }
+ set_extent_dirty(free_space_cache, last,
+ last + hole_size - 1, GFP_NOFS);
}
block_group->cached = 1;
err:
@@ -150,47 +149,33 @@
return NULL;
}
-static u64 leaf_range(struct btrfs_root *root)
-{
- u64 size = BTRFS_LEAF_DATA_SIZE(root);
- do_div(size, sizeof(struct btrfs_extent_item) +
- sizeof(struct btrfs_item));
- return size;
-}
-
static u64 find_search_start(struct btrfs_root *root,
struct btrfs_block_group_cache **cache_ret,
- u64 search_start, int num)
+ u64 search_start, int num, int data)
{
- unsigned long gang[8];
int ret;
struct btrfs_block_group_cache *cache = *cache_ret;
u64 last = max(search_start, cache->key.objectid);
+ u64 start = 0;
+ u64 end = 0;
- if (cache->data)
- goto out;
again:
ret = cache_block_group(root, cache);
if (ret)
goto out;
while(1) {
- ret = find_first_radix_bit(&root->fs_info->extent_map_radix,
- gang, last, ARRAY_SIZE(gang));
- if (!ret)
+ ret = find_first_extent_bit(&root->fs_info->free_space_cache,
+ last, &start, &end, EXTENT_DIRTY);
+ if (ret)
goto out;
- last = gang[ret-1] + 1;
- if (num > 1) {
- if (ret != ARRAY_SIZE(gang)) {
- goto new_group;
- }
- if (gang[ret-1] - gang[0] > leaf_range(root)) {
- continue;
- }
- }
- if (gang[0] >= cache->key.objectid + cache->key.offset) {
+
+ start = max(last, start);
+ last = end + 1;
+ if (end + 1 - start < num)
+ continue;
+ if (start + num > cache->key.objectid + cache->key.offset)
goto new_group;
- }
- return gang[0];
+ return start;
}
out:
return max(cache->last_alloc, search_start);
@@ -202,7 +187,7 @@
return max((*cache_ret)->last_alloc, search_start);
}
cache = btrfs_find_block_group(root, cache,
- last + cache->key.offset - 1, 0, 0);
+ last + cache->key.offset - 1, data, 0);
*cache_ret = cache;
goto again;
}
@@ -625,7 +610,6 @@
u64 total = num;
u64 old_val;
u64 block_in_group;
- u64 i;
int ret;
while(total) {
@@ -644,12 +628,6 @@
if (alloc) {
if (blocknr > cache->last_alloc)
cache->last_alloc = blocknr;
- if (!cache->data) {
- for (i = 0; i < num; i++) {
- clear_radix_bit(&info->extent_map_radix,
- blocknr + i);
- }
- }
if (cache->data != data &&
old_val < (cache->key.offset >> 1)) {
cache->data = data;
@@ -677,11 +655,10 @@
old_val -= num;
if (blocknr < cache->first_free)
cache->first_free = blocknr;
- if (!cache->data && mark_free) {
- for (i = 0; i < num; i++) {
- set_radix_bit(&info->extent_map_radix,
- blocknr + i);
- }
+ if (mark_free) {
+ set_extent_dirty(&info->free_space_cache,
+ blocknr, blocknr + num - 1,
+ GFP_NOFS);
}
if (old_val < (cache->key.offset >> 1) &&
old_val + num >= (cache->key.offset >> 1)) {
@@ -732,7 +709,9 @@
int ret;
int i;
struct radix_tree_root *pinned_radix = &root->fs_info->pinned_radix;
- struct radix_tree_root *extent_radix = &root->fs_info->extent_map_radix;
+ struct extent_map_tree *free_space_cache;
+
+ free_space_cache = &root->fs_info->free_space_cache;
while(1) {
ret = find_first_radix_bit(unpin_radix, gang, 0,
@@ -751,8 +730,11 @@
block_group->pinned--;
if (gang[i] < block_group->last_alloc)
block_group->last_alloc = gang[i];
- if (!block_group->data)
- set_radix_bit(extent_radix, gang[i]);
+ if (!block_group->data) {
+ set_extent_dirty(free_space_cache,
+ gang[i], gang[i],
+ GFP_NOFS);
+ }
}
}
}
@@ -995,6 +977,7 @@
struct btrfs_block_group_cache *block_group;
int full_scan = 0;
int wrapped = 0;
+ u64 cached_search_start = 0;
WARN_ON(num_blocks < 1);
btrfs_set_key_type(ins, BTRFS_EXTENT_ITEM_KEY);
@@ -1017,11 +1000,9 @@
path = btrfs_alloc_path();
check_failed:
- if (!block_group->data)
- search_start = find_search_start(root, &block_group,
- search_start, total_needed);
- else if (!full_scan)
- search_start = max(block_group->last_alloc, search_start);
+ search_start = find_search_start(root, &block_group,
+ search_start, total_needed, data);
+ cached_search_start = search_start;
btrfs_init_path(path);
ins->objectid = search_start;
@@ -1097,6 +1078,7 @@
start_found = 1;
last_block = key.objectid + key.offset;
+
if (!full_scan && last_block >= block_group->key.objectid +
block_group->key.offset) {
btrfs_release_path(root, path);
@@ -1138,6 +1120,9 @@
}
ins->offset = num_blocks;
btrfs_free_path(path);
+ if (0 && ins->objectid != cached_search_start) {
+printk("\tcached was %Lu found %Lu\n", cached_search_start, ins->objectid);
+ }
return 0;
new_group:
@@ -1209,6 +1194,10 @@
btrfs_set_root_used(&root->root_item, root_blocks_used +
num_blocks);
+ clear_extent_dirty(&root->fs_info->free_space_cache,
+ ins->objectid, ins->objectid + ins->offset - 1,
+ GFP_NOFS);
+
if (root == extent_root) {
BUG_ON(num_blocks != 1);
set_radix_bit(&root->fs_info->extent_ins_radix, ins->objectid);
@@ -1227,6 +1216,7 @@
BUG_ON(ret);
finish_current_insert(trans, extent_root);
pending_ret = del_pending_extents(trans, extent_root);
+
if (ret) {
return ret;
}
@@ -1265,6 +1255,7 @@
return ERR_PTR(-ENOMEM);
}
btrfs_set_buffer_uptodate(buf);
+ buf->alloc_addr = (unsigned long)__builtin_return_address(0);
set_extent_dirty(&trans->transaction->dirty_pages, buf->start,
buf->start + buf->len - 1, GFP_NOFS);
/*
@@ -1492,6 +1483,7 @@
orig_level = level;
if (btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
path->nodes[level] = root->node;
+ extent_buffer_get(root->node);
path->slots[level] = 0;
} else {
struct btrfs_key key;
@@ -1524,7 +1516,6 @@
if (wret < 0)
ret = wret;
ret = -EAGAIN;
- extent_buffer_get(root->node);
break;
}
for (i = 0; i <= orig_level; i++) {
@@ -1562,8 +1553,8 @@
{
int ret;
int ret2;
- unsigned long gang[16];
- int i;
+ u64 start;
+ u64 end;
ret = free_block_group_radix(&info->block_group_radix);
ret2 = free_block_group_radix(&info->block_group_data_radix);
@@ -1573,13 +1564,12 @@
return ret2;
while(1) {
- ret = find_first_radix_bit(&info->extent_map_radix,
- gang, 0, ARRAY_SIZE(gang));
- if (!ret)
+ ret = find_first_extent_bit(&info->free_space_cache, 0,
+ &start, &end, EXTENT_DIRTY);
+ if (ret)
break;
- for (i = 0; i < ret; i++) {
- clear_radix_bit(&info->extent_map_radix, gang[i]);
- }
+ clear_extent_dirty(&info->free_space_cache, start,
+ end, GFP_NOFS);
}
return 0;
}
diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c
index f150188..5b7dbca 100644
--- a/fs/btrfs/extent_map.c
+++ b/fs/btrfs/extent_map.c
@@ -19,8 +19,13 @@
static struct kmem_cache *extent_map_cache;
static struct kmem_cache *extent_state_cache;
static struct kmem_cache *extent_buffer_cache;
+
static LIST_HEAD(extent_buffers);
+static LIST_HEAD(buffers);
+static LIST_HEAD(states);
+
static spinlock_t extent_buffers_lock;
+static spinlock_t state_lock = SPIN_LOCK_UNLOCKED;
static int nr_extent_buffers;
#define MAX_EXTENT_BUFFER_CACHE 128
@@ -48,6 +53,7 @@
void __exit extent_map_exit(void)
{
struct extent_buffer *eb;
+ struct extent_state *state;
while (!list_empty(&extent_buffers)) {
eb = list_entry(extent_buffers.next,
@@ -55,6 +61,22 @@
list_del(&eb->list);
kmem_cache_free(extent_buffer_cache, eb);
}
+ while (!list_empty(&states)) {
+ state = list_entry(states.next, struct extent_state, list);
+ printk("state leak: start %Lu end %Lu state %lu in tree %d refs %d\n", state->start, state->end, state->state, state->in_tree, atomic_read(&state->refs));
+ list_del(&state->list);
+ kmem_cache_free(extent_state_cache, state);
+
+ }
+ while (!list_empty(&buffers)) {
+ eb = list_entry(buffers.next,
+ struct extent_buffer, leak_list);
+ printk("buffer leak start %Lu len %lu return %lX\n", eb->start, eb->len, eb->alloc_addr);
+ list_del(&eb->leak_list);
+ kmem_cache_free(extent_buffer_cache, eb);
+ }
+
+
if (extent_map_cache)
kmem_cache_destroy(extent_map_cache);
if (extent_state_cache)
@@ -101,12 +123,19 @@
struct extent_state *alloc_extent_state(gfp_t mask)
{
struct extent_state *state;
+ unsigned long flags;
+
state = kmem_cache_alloc(extent_state_cache, mask);
if (!state || IS_ERR(state))
return state;
state->state = 0;
state->in_tree = 0;
state->private = 0;
+
+ spin_lock_irqsave(&state_lock, flags);
+ list_add(&state->list, &states);
+ spin_unlock_irqrestore(&state_lock, flags);
+
atomic_set(&state->refs, 1);
init_waitqueue_head(&state->wq);
return state;
@@ -115,10 +144,14 @@
void free_extent_state(struct extent_state *state)
{
+ unsigned long flags;
if (!state)
return;
if (atomic_dec_and_test(&state->refs)) {
WARN_ON(state->in_tree);
+ spin_lock_irqsave(&state_lock, flags);
+ list_del(&state->list);
+ spin_unlock_irqrestore(&state_lock, flags);
kmem_cache_free(extent_state_cache, state);
}
}
@@ -361,10 +394,6 @@
state->state |= bits;
state->start = start;
state->end = end;
- if ((end & 4095) == 0) {
- printk("insert state %Lu %Lu strange end\n", start, end);
- WARN_ON(1);
- }
node = tree_insert(&tree->state, end, &state->rb_node);
if (node) {
struct extent_state *found;
@@ -399,11 +428,7 @@
prealloc->end = split - 1;
prealloc->state = orig->state;
orig->start = split;
- if ((prealloc->end & 4095) == 0) {
- printk("insert state %Lu %Lu strange end\n", prealloc->start,
- prealloc->end);
- WARN_ON(1);
- }
+
node = tree_insert(&tree->state, prealloc->end, &prealloc->rb_node);
if (node) {
struct extent_state *found;
@@ -957,6 +982,7 @@
*start_ret = state->start;
*end_ret = state->end;
ret = 0;
+ break;
}
node = rb_next(node);
if (!node)
@@ -1877,6 +1903,7 @@
static struct extent_buffer *__alloc_extent_buffer(gfp_t mask)
{
struct extent_buffer *eb = NULL;
+
spin_lock(&extent_buffers_lock);
if (!list_empty(&extent_buffers)) {
eb = list_entry(extent_buffers.next, struct extent_buffer,
@@ -1886,15 +1913,26 @@
nr_extent_buffers--;
}
spin_unlock(&extent_buffers_lock);
+
if (eb) {
memset(eb, 0, sizeof(*eb));
- return eb;
+ } else {
+ eb = kmem_cache_zalloc(extent_buffer_cache, mask);
}
- return kmem_cache_zalloc(extent_buffer_cache, mask);
+ spin_lock(&extent_buffers_lock);
+ list_add(&eb->leak_list, &buffers);
+ spin_unlock(&extent_buffers_lock);
+
+ return eb;
}
static void __free_extent_buffer(struct extent_buffer *eb)
{
+
+ spin_lock(&extent_buffers_lock);
+ list_del_init(&eb->leak_list);
+ spin_unlock(&extent_buffers_lock);
+
if (nr_extent_buffers >= MAX_EXTENT_BUFFER_CACHE) {
kmem_cache_free(extent_buffer_cache, eb);
} else {
@@ -1933,6 +1971,7 @@
if (!eb || IS_ERR(eb))
return NULL;
+ eb->alloc_addr = __builtin_return_address(0);
eb->start = start;
eb->len = len;
atomic_set(&eb->refs, 1);
@@ -1947,6 +1986,7 @@
eb->start &= ~((u64)PAGE_CACHE_SIZE - 1);
goto fail;
}
+ set_page_extent_mapped(p);
if (i == 0)
eb->first_page = p;
if (!PageUptodate(p))
@@ -1978,6 +2018,7 @@
if (!eb || IS_ERR(eb))
return NULL;
+ eb->alloc_addr = __builtin_return_address(0);
eb->start = start;
eb->len = len;
atomic_set(&eb->refs, 1);
@@ -1992,6 +2033,7 @@
eb->start &= ~((u64)PAGE_CACHE_SIZE - 1);
goto fail;
}
+ set_page_extent_mapped(p);
if (i == 0)
eb->first_page = p;
}
diff --git a/fs/btrfs/extent_map.h b/fs/btrfs/extent_map.h
index 3b3abf3..d100f7c 100644
--- a/fs/btrfs/extent_map.h
+++ b/fs/btrfs/extent_map.h
@@ -68,7 +68,9 @@
atomic_t refs;
int flags;
struct list_head list;
+ struct list_head leak_list;
struct page *first_page;
+ unsigned long alloc_addr;
};
typedef struct extent_map *(get_extent_t)(struct inode *inode,
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 750f35a..372b61f 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -352,7 +352,7 @@
return 0;
trans = btrfs_start_transaction(root, 1);
- while (1) {
+ while (0) {
root->defrag_running = 1;
ret = btrfs_defrag_leaves(trans, root, cacheonly);
nr = trans->blocks_used;
@@ -394,7 +394,7 @@
for (i = 0; i < ret; i++) {
root = gang[i];
last = root->root_key.objectid + 1;
- // btrfs_defrag_root(root, 1);
+ btrfs_defrag_root(root, 1);
}
}
// btrfs_defrag_root(info->extent_root, 1);
@@ -462,6 +462,7 @@
ret = btrfs_end_transaction(trans, tree_root);
BUG_ON(ret);
+ free_extent_buffer(dirty->root->node);
kfree(dirty->root);
kfree(dirty);
mutex_unlock(&tree_root->fs_info->fs_mutex);