Btrfs: Throttle operations if the reference cache gets too large

A large reference cache is directly related to a lot of work pending
for the cleaner thread.  This throttles back new operations based on
the size of the reference cache so the cleaner thread will be able to keep
up.

Overall, this actually makes the FS faster because the cleaner thread will
be more likely to find things in cache.

Signed-off-by: Chris Mason <chris.mason@oracle.com>
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 4eca0aa..5517dfc 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -580,6 +580,7 @@
 	int do_barriers;
 	int closing;
 	atomic_t throttles;
+	atomic_t throttle_gen;
 
 	u64 total_pinned;
 	struct list_head dirty_cowonly_roots;
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index eccdf13..27ffa9b 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -1171,8 +1171,10 @@
 		vfs_check_frozen(root->fs_info->sb, SB_FREEZE_WRITE);
 		mutex_lock(&root->fs_info->transaction_kthread_mutex);
 
-		printk("btrfs: total reference cache size %Lu\n",
-			root->fs_info->total_ref_cache_size);
+		if (root->fs_info->total_ref_cache_size > 20 * 1024 * 1024) {
+			printk("btrfs: total reference cache size %Lu\n",
+				root->fs_info->total_ref_cache_size);
+		}
 
 		mutex_lock(&root->fs_info->trans_mutex);
 		cur = root->fs_info->running_transaction;
@@ -1256,6 +1258,7 @@
 	btrfs_mapping_init(&fs_info->mapping_tree);
 	atomic_set(&fs_info->nr_async_submits, 0);
 	atomic_set(&fs_info->throttles, 0);
+	atomic_set(&fs_info->throttle_gen, 0);
 	fs_info->sb = sb;
 	fs_info->max_extent = (u64)-1;
 	fs_info->max_inline = 8192 * 1024;
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 0e294cf..6290cf4 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -2650,6 +2650,7 @@
 		}
 	}
 	while(1) {
+		atomic_inc(&root->fs_info->throttle_gen);
 		wret = walk_down_tree(trans, root, path, &level);
 		if (wret > 0)
 			break;
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 3efec25..ded5281 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -974,6 +974,7 @@
 		balance_dirty_pages_ratelimited_nr(inode->i_mapping, num_pages);
 		if (num_pages < (root->leafsize >> PAGE_CACHE_SHIFT) + 1)
 			btrfs_btree_balance_dirty(root, 1);
+		btrfs_throttle(root);
 		cond_resched();
 	}
 out:
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 4f977ea..7c87f86 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -2482,7 +2482,7 @@
 	btrfs_update_inode_block_group(trans, dir);
 out_unlock:
 	nr = trans->blocks_used;
-	btrfs_end_transaction(trans, root);
+	btrfs_end_transaction_throttle(trans, root);
 fail:
 	if (drop_inode) {
 		inode_dec_link_count(inode);
@@ -2535,7 +2535,7 @@
 		drop_inode = 1;
 
 	nr = trans->blocks_used;
-	btrfs_end_transaction(trans, root);
+	btrfs_end_transaction_throttle(trans, root);
 fail:
 	if (drop_inode) {
 		inode_dec_link_count(inode);
@@ -2609,7 +2609,7 @@
 
 out_fail:
 	nr = trans->blocks_used;
-	btrfs_end_transaction(trans, root);
+	btrfs_end_transaction_throttle(trans, root);
 
 out_unlock:
 	if (drop_on_err)
@@ -3434,7 +3434,7 @@
 		goto out_fail;
 
 out_fail:
-	btrfs_end_transaction(trans, root);
+	btrfs_end_transaction_throttle(trans, root);
 out_unlock:
 	return ret;
 }
@@ -3548,7 +3548,7 @@
 
 out_unlock:
 	nr = trans->blocks_used;
-	btrfs_end_transaction(trans, root);
+	btrfs_end_transaction_throttle(trans, root);
 out_fail:
 	if (drop_inode) {
 		inode_dec_link_count(inode);
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index fcef3ca..b8be670 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -202,35 +202,64 @@
 	return 0;
 }
 
+void btrfs_throttle(struct btrfs_root *root)
+{
+	struct btrfs_fs_info *info = root->fs_info;
+
+harder:
+	if (atomic_read(&info->throttles)) {
+		DEFINE_WAIT(wait);
+		int thr;
+		int harder_count = 0;
+		thr = atomic_read(&info->throttle_gen);
+
+		do {
+			prepare_to_wait(&info->transaction_throttle,
+					&wait, TASK_UNINTERRUPTIBLE);
+			if (!atomic_read(&info->throttles)) {
+				finish_wait(&info->transaction_throttle, &wait);
+				break;
+			}
+			schedule();
+			finish_wait(&info->transaction_throttle, &wait);
+		} while (thr == atomic_read(&info->throttle_gen));
+
+		if (harder_count < 5 &&
+		    info->total_ref_cache_size > 5 * 1024 * 1024) {
+			harder_count++;
+			goto harder;
+		}
+
+		if (harder_count < 10 &&
+		    info->total_ref_cache_size > 10 * 1024 * 1024) {
+			harder_count++;
+			goto harder;
+		}
+	}
+}
+
 static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
 			  struct btrfs_root *root, int throttle)
 {
 	struct btrfs_transaction *cur_trans;
+	struct btrfs_fs_info *info = root->fs_info;
 
-	mutex_lock(&root->fs_info->trans_mutex);
-	cur_trans = root->fs_info->running_transaction;
+	mutex_lock(&info->trans_mutex);
+	cur_trans = info->running_transaction;
 	WARN_ON(cur_trans != trans->transaction);
 	WARN_ON(cur_trans->num_writers < 1);
 	cur_trans->num_writers--;
 
 	if (waitqueue_active(&cur_trans->writer_wait))
 		wake_up(&cur_trans->writer_wait);
-
-	if (throttle && atomic_read(&root->fs_info->throttles)) {
-		DEFINE_WAIT(wait);
-		mutex_unlock(&root->fs_info->trans_mutex);
-		prepare_to_wait(&root->fs_info->transaction_throttle, &wait,
-				TASK_UNINTERRUPTIBLE);
-		if (atomic_read(&root->fs_info->throttles))
-			schedule();
-		finish_wait(&root->fs_info->transaction_throttle, &wait);
-		mutex_lock(&root->fs_info->trans_mutex);
-	}
-
 	put_transaction(cur_trans);
-	mutex_unlock(&root->fs_info->trans_mutex);
+	mutex_unlock(&info->trans_mutex);
 	memset(trans, 0, sizeof(*trans));
 	kmem_cache_free(btrfs_trans_handle_cachep, trans);
+
+	if (throttle)
+		btrfs_throttle(root);
+
 	return 0;
 }
 
diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h
index 11fbdec..df2ca2a 100644
--- a/fs/btrfs/transaction.h
+++ b/fs/btrfs/transaction.h
@@ -91,4 +91,5 @@
 			     struct btrfs_root *root);
 int btrfs_end_transaction_throttle(struct btrfs_trans_handle *trans,
 				   struct btrfs_root *root);
+void btrfs_throttle(struct btrfs_root *root);
 #endif