Btrfs: Create a work queue for bio writes

This allows checksumming to happen in parallel among many cpus, and
keeps us from bogging down pdflush with the checksumming code.

Signed-off-by: Chris Mason <chris.mason@oracle.com>
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index a22edcf..ff15b85 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -514,8 +514,11 @@
 	struct list_head hashers;
 	struct list_head dead_roots;
 	struct list_head end_io_work_list;
+	struct list_head async_submit_work_list;
 	struct work_struct end_io_work;
+	struct work_struct async_submit_work;
 	spinlock_t end_io_work_lock;
+	spinlock_t async_submit_work_lock;
 
 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18)
 	struct work_struct trans_work;
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 79c284c..9e41ea9 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -47,6 +47,7 @@
 
 static struct extent_io_ops btree_extent_io_ops;
 static struct workqueue_struct *end_io_workqueue;
+static struct workqueue_struct *async_submit_workqueue;
 
 struct end_io_wq {
 	struct bio *bio;
@@ -58,6 +59,15 @@
 	struct list_head list;
 };
 
+struct async_submit_bio {
+	struct inode *inode;
+	struct bio *bio;
+	struct list_head list;
+	extent_submit_bio_hook_t *submit_bio_hook;
+	int rw;
+	int mirror_num;
+};
+
 struct extent_map *btree_get_extent(struct inode *inode, struct page *page,
 				    size_t page_offset, u64 start, u64 len,
 				    int create)
@@ -365,7 +375,31 @@
 	return 0;
 }
 
-static int btree_submit_bio_hook(struct inode *inode, int rw, struct bio *bio,
+int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode,
+			int rw, struct bio *bio, int mirror_num,
+			extent_submit_bio_hook_t *submit_bio_hook)
+{
+	struct async_submit_bio *async;
+
+	async = kmalloc(sizeof(*async), GFP_NOFS);
+	if (!async)
+		return -ENOMEM;
+
+	async->inode = inode;
+	async->rw = rw;
+	async->bio = bio;
+	async->mirror_num = mirror_num;
+	async->submit_bio_hook = submit_bio_hook;
+
+	spin_lock(&fs_info->async_submit_work_lock);
+	list_add_tail(&async->list, &fs_info->async_submit_work_list);
+	spin_unlock(&fs_info->async_submit_work_lock);
+
+	queue_work(async_submit_workqueue, &fs_info->async_submit_work);
+	return 0;
+}
+
+static int __btree_submit_bio_hook(struct inode *inode, int rw, struct bio *bio,
 				 int mirror_num)
 {
 	struct btrfs_root *root = BTRFS_I(inode)->root;
@@ -389,6 +423,17 @@
 	return btrfs_map_bio(BTRFS_I(inode)->root, rw, bio, mirror_num);
 }
 
+static int btree_submit_bio_hook(struct inode *inode, int rw, struct bio *bio,
+				 int mirror_num)
+{
+	if (!(rw & (1 << BIO_RW))) {
+		return __btree_submit_bio_hook(inode, rw, bio, mirror_num);
+	}
+	return btrfs_wq_submit_bio(BTRFS_I(inode)->root->fs_info,
+				   inode, rw, bio, mirror_num,
+				   __btree_submit_bio_hook);
+}
+
 static int btree_writepage(struct page *page, struct writeback_control *wbc)
 {
 	struct extent_io_tree *tree;
@@ -903,9 +948,9 @@
 }
 
 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18)
-void btrfs_end_io_csum(void *p)
+static void btrfs_end_io_csum(void *p)
 #else
-void btrfs_end_io_csum(struct work_struct *work)
+static void btrfs_end_io_csum(struct work_struct *work)
 #endif
 {
 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18)
@@ -959,6 +1004,39 @@
 	}
 }
 
+#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18)
+static void btrfs_async_submit_work(void *p)
+#else
+static void btrfs_async_submit_work(struct work_struct *work)
+#endif
+{
+#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18)
+	struct btrfs_fs_info *fs_info = p;
+#else
+	struct btrfs_fs_info *fs_info = container_of(work,
+						     struct btrfs_fs_info,
+						     async_submit_work);
+#endif
+	struct async_submit_bio *async;
+	struct list_head *next;
+
+	while(1) {
+		spin_lock(&fs_info->async_submit_work_lock);
+		if (list_empty(&fs_info->async_submit_work_list)) {
+			spin_unlock(&fs_info->async_submit_work_lock);
+			return;
+		}
+		next = fs_info->async_submit_work_list.next;
+		list_del(next);
+		spin_unlock(&fs_info->async_submit_work_lock);
+
+		async = list_entry(next, struct async_submit_bio, list);
+		async->submit_bio_hook(async->inode, async->rw, async->bio,
+				       async->mirror_num);
+		kfree(async);
+	}
+}
+
 struct btrfs_root *open_ctree(struct super_block *sb,
 			      struct btrfs_fs_devices *fs_devices)
 {
@@ -987,14 +1065,17 @@
 	}
 	end_io_workqueue = create_workqueue("btrfs-end-io");
 	BUG_ON(!end_io_workqueue);
+	async_submit_workqueue = create_workqueue("btrfs-async-submit");
 
 	INIT_RADIX_TREE(&fs_info->fs_roots_radix, GFP_NOFS);
 	INIT_LIST_HEAD(&fs_info->trans_list);
 	INIT_LIST_HEAD(&fs_info->dead_roots);
 	INIT_LIST_HEAD(&fs_info->hashers);
 	INIT_LIST_HEAD(&fs_info->end_io_work_list);
+	INIT_LIST_HEAD(&fs_info->async_submit_work_list);
 	spin_lock_init(&fs_info->hash_lock);
 	spin_lock_init(&fs_info->end_io_work_lock);
+	spin_lock_init(&fs_info->async_submit_work_lock);
 	spin_lock_init(&fs_info->delalloc_lock);
 	spin_lock_init(&fs_info->new_trans_lock);
 
@@ -1041,9 +1122,12 @@
 
 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18)
 	INIT_WORK(&fs_info->end_io_work, btrfs_end_io_csum, fs_info);
+	INIT_WORK(&fs_info->async_submit_work, btrfs_async_submit_work,
+		  fs_info);
 	INIT_WORK(&fs_info->trans_work, btrfs_transaction_cleaner, fs_info);
 #else
 	INIT_WORK(&fs_info->end_io_work, btrfs_end_io_csum);
+	INIT_WORK(&fs_info->async_submit_work, btrfs_async_submit_work);
 	INIT_DELAYED_WORK(&fs_info->trans_work, btrfs_transaction_cleaner);
 #endif
 	BTRFS_I(fs_info->btree_inode)->root = tree_root;
@@ -1403,6 +1487,9 @@
 	flush_workqueue(end_io_workqueue);
 	destroy_workqueue(end_io_workqueue);
 
+	flush_workqueue(async_submit_workqueue);
+	destroy_workqueue(async_submit_workqueue);
+
 	iput(fs_info->btree_inode);
 #if 0
 	while(!list_empty(&fs_info->hashers)) {
diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h
index 60b0190..5d5f697 100644
--- a/fs/btrfs/disk-io.h
+++ b/fs/btrfs/disk-io.h
@@ -74,4 +74,7 @@
 			    struct extent_buffer *buf);
 int btrfs_bio_wq_end_io(struct btrfs_fs_info *info, struct bio *bio,
 			int metadata);
+int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode,
+			int rw, struct bio *bio, int mirror_num,
+			extent_submit_bio_hook_t *submit_bio_hook);
 #endif
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index b47859c..232300d1 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -24,11 +24,12 @@
 
 struct extent_state;
 
+typedef	int (extent_submit_bio_hook_t)(struct inode *inode, int rw,
+				       struct bio *bio, int mirror_num);
 struct extent_io_ops {
 	int (*fill_delalloc)(struct inode *inode, u64 start, u64 end);
 	int (*writepage_io_hook)(struct page *page, u64 start, u64 end);
-	int (*submit_bio_hook)(struct inode *inode, int rw, struct bio *bio,
-			       int mirror_num);
+	extent_submit_bio_hook_t *submit_bio_hook;
 	int (*merge_bio_hook)(struct page *page, unsigned long offset,
 			      size_t size, struct bio *bio);
 	int (*readpage_io_hook)(struct page *page, u64 start, u64 end);
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 7869c01..457daf72 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -324,13 +324,29 @@
 	return 0;
 }
 
-int btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio,
+int __btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio,
 			  int mirror_num)
 {
 	struct btrfs_root *root = BTRFS_I(inode)->root;
 	struct btrfs_trans_handle *trans;
 	int ret = 0;
 
+	mutex_lock(&root->fs_info->fs_mutex);
+	trans = btrfs_start_transaction(root, 1);
+	btrfs_set_trans_block_group(trans, inode);
+	btrfs_csum_file_blocks(trans, root, inode, bio);
+	ret = btrfs_end_transaction(trans, root);
+	BUG_ON(ret);
+	mutex_unlock(&root->fs_info->fs_mutex);
+	return btrfs_map_bio(root, rw, bio, mirror_num);
+}
+
+int btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio,
+			  int mirror_num)
+{
+	struct btrfs_root *root = BTRFS_I(inode)->root;
+	int ret = 0;
+
 	if (!(rw & (1 << BIO_RW))) {
 		ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0);
 		BUG_ON(ret);
@@ -342,13 +358,9 @@
 		goto mapit;
 	}
 
-	mutex_lock(&root->fs_info->fs_mutex);
-	trans = btrfs_start_transaction(root, 1);
-	btrfs_set_trans_block_group(trans, inode);
-	btrfs_csum_file_blocks(trans, root, inode, bio);
-	ret = btrfs_end_transaction(trans, root);
-	BUG_ON(ret);
-	mutex_unlock(&root->fs_info->fs_mutex);
+	return btrfs_wq_submit_bio(BTRFS_I(inode)->root->fs_info,
+				   inode, rw, bio, mirror_num,
+				   __btrfs_submit_bio_hook);
 mapit:
 	return btrfs_map_bio(root, rw, bio, mirror_num);
 }