Btrfs: make sure logged extents complete in the current transaction V3 Liu Bo pointed out that my previous fix would lose the generation update in the scenario I described. It is actually much worse than that, we could lose the entire extent if we lose power right after the transaction commits. Consider the following write extent 0-4k log extent in log tree commit transaction < power fail happens here ordered extent completes We would lose the 0-4k extent because it hasn't updated the actual fs tree, and the transaction commit will reset the log so it isn't replayed. If we lose power before the transaction commit we are save, otherwise we are not. Fix this by keeping track of all extents we logged in this transaction. Then when we go to commit the transaction make sure we wait for all of those ordered extents to complete before proceeding. This will make sure that if we lose power after the transaction commit we still have our data. This also fixes the problem of the improperly updated extent generation. Thanks, cc: stable@vger.kernel.org Signed-off-by: Josef Bacik <jbacik@fb.com> Signed-off-by: Chris Mason <clm@fb.com>

commit: 50d9aa99bd35c77200e0e3dd7a72274f8304701f [log] [tgz]
author: Josef Bacik <jbacik@fb.com> Fri Nov 21 14:52:38 2014 -0500
committer: Chris Mason <clm@fb.com> Fri Nov 21 11:58:32 2014 -0800
tree: 346a80fdf2343a5c5e010b57b0213c989eed4ac8
parent: 9dba8cf128ef98257ca719722280c9634e7e9dc7 [diff] [blame]
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 16c704b..295a135 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c

@@ -247,6 +247,7 @@
 	INIT_LIST_HEAD(&cur_trans->pending_snapshots);
 	INIT_LIST_HEAD(&cur_trans->pending_chunks);
 	INIT_LIST_HEAD(&cur_trans->switch_commits);
+	INIT_LIST_HEAD(&cur_trans->pending_ordered);
 	list_add_tail(&cur_trans->list, &fs_info->trans_list);
 	extent_io_tree_init(&cur_trans->dirty_pages,
 			     fs_info->btree_inode->i_mapping);
@@ -515,6 +516,7 @@
 	h->sync = false;
 	INIT_LIST_HEAD(&h->qgroup_ref_list);
 	INIT_LIST_HEAD(&h->new_bgs);
+	INIT_LIST_HEAD(&h->ordered);
 
 	smp_mb();
 	if (cur_trans->state >= TRANS_STATE_BLOCKED &&
@@ -746,6 +748,12 @@
 	if (!list_empty(&trans->new_bgs))
 		btrfs_create_pending_block_groups(trans, root);
 
+	if (!list_empty(&trans->ordered)) {
+		spin_lock(&info->trans_lock);
+		list_splice(&trans->ordered, &cur_trans->pending_ordered);
+		spin_unlock(&info->trans_lock);
+	}
+
 	trans->delayed_ref_updates = 0;
 	if (!trans->sync) {
 		must_run_delayed_refs =
@@ -1715,6 +1723,28 @@
 		btrfs_wait_ordered_roots(fs_info, -1);
 }
 
+static inline void
+btrfs_wait_pending_ordered(struct btrfs_transaction *cur_trans,
+			   struct btrfs_fs_info *fs_info)
+{
+	struct btrfs_ordered_extent *ordered;
+
+	spin_lock(&fs_info->trans_lock);
+	while (!list_empty(&cur_trans->pending_ordered)) {
+		ordered = list_first_entry(&cur_trans->pending_ordered,
+					   struct btrfs_ordered_extent,
+					   trans_list);
+		list_del_init(&ordered->trans_list);
+		spin_unlock(&fs_info->trans_lock);
+
+		wait_event(ordered->wait, test_bit(BTRFS_ORDERED_COMPLETE,
+						   &ordered->flags));
+		btrfs_put_ordered_extent(ordered);
+		spin_lock(&fs_info->trans_lock);
+	}
+	spin_unlock(&fs_info->trans_lock);
+}
+
 int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
 			     struct btrfs_root *root)
 {
@@ -1765,6 +1795,7 @@
 	}
 
 	spin_lock(&root->fs_info->trans_lock);
+	list_splice(&trans->ordered, &cur_trans->pending_ordered);
 	if (cur_trans->state >= TRANS_STATE_COMMIT_START) {
 		spin_unlock(&root->fs_info->trans_lock);
 		atomic_inc(&cur_trans->use_count);
@@ -1817,6 +1848,8 @@
 
 	btrfs_wait_delalloc_flush(root->fs_info);
 
+	btrfs_wait_pending_ordered(cur_trans, root->fs_info);
+
 	btrfs_scrub_pause(root);
 	/*
 	 * Ok now we need to make sure to block out any other joins while we
commit	50d9aa99bd35c77200e0e3dd7a72274f8304701f	[log] [tgz]
author	Josef Bacik <jbacik@fb.com>	Fri Nov 21 14:52:38 2014 -0500
committer	Chris Mason <clm@fb.com>	Fri Nov 21 11:58:32 2014 -0800
tree	346a80fdf2343a5c5e010b57b0213c989eed4ac8
parent	9dba8cf128ef98257ca719722280c9634e7e9dc7 [diff] [blame]