Btrfs: Create a work queue for bio writes
This allows checksumming to happen in parallel among many cpus, and
keeps us from bogging down pdflush with the checksumming code.
Signed-off-by: Chris Mason <chris.mason@oracle.com>
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index a22edcf..ff15b85 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -514,8 +514,11 @@
struct list_head hashers;
struct list_head dead_roots;
struct list_head end_io_work_list;
+ struct list_head async_submit_work_list;
struct work_struct end_io_work;
+ struct work_struct async_submit_work;
spinlock_t end_io_work_lock;
+ spinlock_t async_submit_work_lock;
#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18)
struct work_struct trans_work;
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 79c284c..9e41ea9 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -47,6 +47,7 @@
static struct extent_io_ops btree_extent_io_ops;
static struct workqueue_struct *end_io_workqueue;
+static struct workqueue_struct *async_submit_workqueue;
struct end_io_wq {
struct bio *bio;
@@ -58,6 +59,15 @@
struct list_head list;
};
+struct async_submit_bio {
+ struct inode *inode;
+ struct bio *bio;
+ struct list_head list;
+ extent_submit_bio_hook_t *submit_bio_hook;
+ int rw;
+ int mirror_num;
+};
+
struct extent_map *btree_get_extent(struct inode *inode, struct page *page,
size_t page_offset, u64 start, u64 len,
int create)
@@ -365,7 +375,31 @@
return 0;
}
-static int btree_submit_bio_hook(struct inode *inode, int rw, struct bio *bio,
+int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode,
+ int rw, struct bio *bio, int mirror_num,
+ extent_submit_bio_hook_t *submit_bio_hook)
+{
+ struct async_submit_bio *async;
+
+ async = kmalloc(sizeof(*async), GFP_NOFS);
+ if (!async)
+ return -ENOMEM;
+
+ async->inode = inode;
+ async->rw = rw;
+ async->bio = bio;
+ async->mirror_num = mirror_num;
+ async->submit_bio_hook = submit_bio_hook;
+
+ spin_lock(&fs_info->async_submit_work_lock);
+ list_add_tail(&async->list, &fs_info->async_submit_work_list);
+ spin_unlock(&fs_info->async_submit_work_lock);
+
+ queue_work(async_submit_workqueue, &fs_info->async_submit_work);
+ return 0;
+}
+
+static int __btree_submit_bio_hook(struct inode *inode, int rw, struct bio *bio,
int mirror_num)
{
struct btrfs_root *root = BTRFS_I(inode)->root;
@@ -389,6 +423,17 @@
return btrfs_map_bio(BTRFS_I(inode)->root, rw, bio, mirror_num);
}
+static int btree_submit_bio_hook(struct inode *inode, int rw, struct bio *bio,
+ int mirror_num)
+{
+ if (!(rw & (1 << BIO_RW))) {
+ return __btree_submit_bio_hook(inode, rw, bio, mirror_num);
+ }
+ return btrfs_wq_submit_bio(BTRFS_I(inode)->root->fs_info,
+ inode, rw, bio, mirror_num,
+ __btree_submit_bio_hook);
+}
+
static int btree_writepage(struct page *page, struct writeback_control *wbc)
{
struct extent_io_tree *tree;
@@ -903,9 +948,9 @@
}
#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18)
-void btrfs_end_io_csum(void *p)
+static void btrfs_end_io_csum(void *p)
#else
-void btrfs_end_io_csum(struct work_struct *work)
+static void btrfs_end_io_csum(struct work_struct *work)
#endif
{
#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18)
@@ -959,6 +1004,39 @@
}
}
+#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18)
+static void btrfs_async_submit_work(void *p)
+#else
+static void btrfs_async_submit_work(struct work_struct *work)
+#endif
+{
+#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18)
+ struct btrfs_fs_info *fs_info = p;
+#else
+ struct btrfs_fs_info *fs_info = container_of(work,
+ struct btrfs_fs_info,
+ async_submit_work);
+#endif
+ struct async_submit_bio *async;
+ struct list_head *next;
+
+ while(1) {
+ spin_lock(&fs_info->async_submit_work_lock);
+ if (list_empty(&fs_info->async_submit_work_list)) {
+ spin_unlock(&fs_info->async_submit_work_lock);
+ return;
+ }
+ next = fs_info->async_submit_work_list.next;
+ list_del(next);
+ spin_unlock(&fs_info->async_submit_work_lock);
+
+ async = list_entry(next, struct async_submit_bio, list);
+ async->submit_bio_hook(async->inode, async->rw, async->bio,
+ async->mirror_num);
+ kfree(async);
+ }
+}
+
struct btrfs_root *open_ctree(struct super_block *sb,
struct btrfs_fs_devices *fs_devices)
{
@@ -987,14 +1065,17 @@
}
end_io_workqueue = create_workqueue("btrfs-end-io");
BUG_ON(!end_io_workqueue);
+ async_submit_workqueue = create_workqueue("btrfs-async-submit");
INIT_RADIX_TREE(&fs_info->fs_roots_radix, GFP_NOFS);
INIT_LIST_HEAD(&fs_info->trans_list);
INIT_LIST_HEAD(&fs_info->dead_roots);
INIT_LIST_HEAD(&fs_info->hashers);
INIT_LIST_HEAD(&fs_info->end_io_work_list);
+ INIT_LIST_HEAD(&fs_info->async_submit_work_list);
spin_lock_init(&fs_info->hash_lock);
spin_lock_init(&fs_info->end_io_work_lock);
+ spin_lock_init(&fs_info->async_submit_work_lock);
spin_lock_init(&fs_info->delalloc_lock);
spin_lock_init(&fs_info->new_trans_lock);
@@ -1041,9 +1122,12 @@
#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18)
INIT_WORK(&fs_info->end_io_work, btrfs_end_io_csum, fs_info);
+ INIT_WORK(&fs_info->async_submit_work, btrfs_async_submit_work,
+ fs_info);
INIT_WORK(&fs_info->trans_work, btrfs_transaction_cleaner, fs_info);
#else
INIT_WORK(&fs_info->end_io_work, btrfs_end_io_csum);
+ INIT_WORK(&fs_info->async_submit_work, btrfs_async_submit_work);
INIT_DELAYED_WORK(&fs_info->trans_work, btrfs_transaction_cleaner);
#endif
BTRFS_I(fs_info->btree_inode)->root = tree_root;
@@ -1403,6 +1487,9 @@
flush_workqueue(end_io_workqueue);
destroy_workqueue(end_io_workqueue);
+ flush_workqueue(async_submit_workqueue);
+ destroy_workqueue(async_submit_workqueue);
+
iput(fs_info->btree_inode);
#if 0
while(!list_empty(&fs_info->hashers)) {
diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h
index 60b0190..5d5f697 100644
--- a/fs/btrfs/disk-io.h
+++ b/fs/btrfs/disk-io.h
@@ -74,4 +74,7 @@
struct extent_buffer *buf);
int btrfs_bio_wq_end_io(struct btrfs_fs_info *info, struct bio *bio,
int metadata);
+int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode,
+ int rw, struct bio *bio, int mirror_num,
+ extent_submit_bio_hook_t *submit_bio_hook);
#endif
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index b47859c..232300d1 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -24,11 +24,12 @@
struct extent_state;
+typedef int (extent_submit_bio_hook_t)(struct inode *inode, int rw,
+ struct bio *bio, int mirror_num);
struct extent_io_ops {
int (*fill_delalloc)(struct inode *inode, u64 start, u64 end);
int (*writepage_io_hook)(struct page *page, u64 start, u64 end);
- int (*submit_bio_hook)(struct inode *inode, int rw, struct bio *bio,
- int mirror_num);
+ extent_submit_bio_hook_t *submit_bio_hook;
int (*merge_bio_hook)(struct page *page, unsigned long offset,
size_t size, struct bio *bio);
int (*readpage_io_hook)(struct page *page, u64 start, u64 end);
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 7869c01..457daf72 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -324,13 +324,29 @@
return 0;
}
-int btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio,
+int __btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio,
int mirror_num)
{
struct btrfs_root *root = BTRFS_I(inode)->root;
struct btrfs_trans_handle *trans;
int ret = 0;
+ mutex_lock(&root->fs_info->fs_mutex);
+ trans = btrfs_start_transaction(root, 1);
+ btrfs_set_trans_block_group(trans, inode);
+ btrfs_csum_file_blocks(trans, root, inode, bio);
+ ret = btrfs_end_transaction(trans, root);
+ BUG_ON(ret);
+ mutex_unlock(&root->fs_info->fs_mutex);
+ return btrfs_map_bio(root, rw, bio, mirror_num);
+}
+
+int btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio,
+ int mirror_num)
+{
+ struct btrfs_root *root = BTRFS_I(inode)->root;
+ int ret = 0;
+
if (!(rw & (1 << BIO_RW))) {
ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0);
BUG_ON(ret);
@@ -342,13 +358,9 @@
goto mapit;
}
- mutex_lock(&root->fs_info->fs_mutex);
- trans = btrfs_start_transaction(root, 1);
- btrfs_set_trans_block_group(trans, inode);
- btrfs_csum_file_blocks(trans, root, inode, bio);
- ret = btrfs_end_transaction(trans, root);
- BUG_ON(ret);
- mutex_unlock(&root->fs_info->fs_mutex);
+ return btrfs_wq_submit_bio(BTRFS_I(inode)->root->fs_info,
+ inode, rw, bio, mirror_num,
+ __btrfs_submit_bio_hook);
mapit:
return btrfs_map_bio(root, rw, bio, mirror_num);
}