Btrfs: Cache free inode numbers in memory
Currently btrfs stores the highest objectid of the fs tree, and it always
returns (highest+1) inode number when we create a file, so inode numbers
won't be reclaimed when we delete files, so we'll run out of inode numbers
as we keep create/delete files in 32bits machines.
This fixes it, and it works similarly to how we cache free space in block
cgroups.
We start a kernel thread to read the file tree. By scanning inode items,
we know which chunks of inode numbers are free, and we cache them in
an rb-tree.
Because we are searching the commit root, we have to carefully handle the
cross-transaction case.
The rb-tree is a hybrid extent+bitmap tree, so if we have too many small
chunks of inode numbers, we'll use bitmaps. Initially we allow 16K ram
of extents, and a bitmap will be used if we exceed this threshold. The
extents threshold is adjusted in runtime.
Signed-off-by: Li Zefan <lizf@cn.fujitsu.com>
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index ef6865c..d02683b 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -41,6 +41,7 @@
#include "locking.h"
#include "tree-log.h"
#include "free-space-cache.h"
+#include "inode-map.h"
static struct extent_io_ops btree_extent_io_ops;
static void end_workqueue_fn(struct btrfs_work *work);
@@ -1327,6 +1328,19 @@
if (IS_ERR(root))
return root;
+ root->free_ino_ctl = kzalloc(sizeof(*root->free_ino_ctl), GFP_NOFS);
+ if (!root->free_ino_ctl)
+ goto fail;
+ root->free_ino_pinned = kzalloc(sizeof(*root->free_ino_pinned),
+ GFP_NOFS);
+ if (!root->free_ino_pinned)
+ goto fail;
+
+ btrfs_init_free_ino_ctl(root);
+ mutex_init(&root->fs_commit_mutex);
+ spin_lock_init(&root->cache_lock);
+ init_waitqueue_head(&root->cache_wait);
+
set_anon_super(&root->anon_super, NULL);
if (btrfs_root_refs(&root->root_item) == 0) {
@@ -2483,6 +2497,8 @@
if (btrfs_root_refs(&root->root_item) == 0)
synchronize_srcu(&fs_info->subvol_srcu);
+ __btrfs_remove_free_space_cache(root->free_ino_pinned);
+ __btrfs_remove_free_space_cache(root->free_ino_ctl);
free_fs_root(root);
return 0;
}
@@ -2496,6 +2512,8 @@
}
free_extent_buffer(root->node);
free_extent_buffer(root->commit_root);
+ kfree(root->free_ino_ctl);
+ kfree(root->free_ino_pinned);
kfree(root->name);
kfree(root);
}