Blame - fs/btrfs/tree-log.c - SHIFTPHONES/mainline/linux

blob: f7efc26aa82a114e13e1816e40f881e840790108 [file] [log] [blame]

David Sterba	c1d7c51	2018-04-03 19:23:33 +0200	[diff] [blame]	1	// SPDX-License-Identifier: GPL-2.0
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	2	/*
				3	* Copyright (C) 2008 Oracle. All rights reserved.
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	4	*/
				5
				6	#include <linux/sched.h>
Tejun Heo	5a0e3ad	2010-03-24 17:04:11 +0900	[diff] [blame]	7	#include <linux/slab.h>
Miao Xie	c6adc9c	2013-05-28 10:05:39 +0000	[diff] [blame]	8	#include <linux/blkdev.h>
Josef Bacik	5dc562c	2012-08-17 13:14:17 -0400	[diff] [blame]	9	#include <linux/list_sort.h>
Jeff Layton	c7f88c4	2017-12-11 06:35:12 -0500	[diff] [blame]	10	#include <linux/iversion.h>
David Sterba	602cbe9	2019-08-21 18:48:25 +0200	[diff] [blame]	11	#include "misc.h"
Nikolay Borisov	9678c54	2018-01-08 11:45:05 +0200	[diff] [blame]	12	#include "ctree.h"
Miao Xie	995946d	2014-04-02 19:51:06 +0800	[diff] [blame]	13	#include "tree-log.h"
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	14	#include "disk-io.h"
				15	#include "locking.h"
				16	#include "print-tree.h"
Mark Fasheh	f186373	2012-08-08 11:32:27 -0700	[diff] [blame]	17	#include "backref.h"
Anand Jain	ebb8765	2016-03-10 17:26:59 +0800	[diff] [blame]	18	#include "compression.h"
Qu Wenruo	df2c95f	2016-08-15 10:36:52 +0800	[diff] [blame]	19	#include "qgroup.h"
Nikolay Borisov	6787bb9	2020-01-20 16:09:10 +0200	[diff] [blame]	20	#include "block-group.h"
				21	#include "space-info.h"
Naohiro Aota	d3575156	2021-02-04 19:21:54 +0900	[diff] [blame]	22	#include "zoned.h"
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	23
				24	/* magic values for the inode_only field in btrfs_log_inode:
				25	*
				26	* LOG_INODE_ALL means to log everything
				27	* LOG_INODE_EXISTS means to log just enough to recreate the inode
				28	* during log replay
				29	*/
David Sterba	e13976c	2019-08-01 14:50:30 +0200	[diff] [blame]	30	enum {
				31	LOG_INODE_ALL,
				32	LOG_INODE_EXISTS,
				33	LOG_OTHER_INODE,
				34	LOG_OTHER_INODE_ALL,
				35	};
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	36
				37	/*
Chris Mason	12fcfd2	2009-03-24 10:24:20 -0400	[diff] [blame]	38	* directory trouble cases
				39	*
				40	* 1) on rename or unlink, if the inode being unlinked isn't in the fsync
				41	* log, we must force a full commit before doing an fsync of the directory
				42	* where the unlink was done.
				43	* ---> record transid of last unlink/rename per directory
				44	*
				45	* mkdir foo/some_dir
				46	* normal commit
				47	* rename foo/some_dir foo2/some_dir
				48	* mkdir foo/some_dir
				49	* fsync foo/some_dir/some_file
				50	*
				51	* The fsync above will unlink the original some_dir without recording
				52	* it in its new location (foo2). After a crash, some_dir will be gone
				53	* unless the fsync of some_file forces a full commit
				54	*
				55	* 2) we must log any new names for any file or dir that is in the fsync
				56	* log. ---> check inode while renaming/linking.
				57	*
				58	* 2a) we must log any new names for any file or dir during rename
				59	* when the directory they are being removed from was logged.
				60	* ---> check inode and old parent dir during rename
				61	*
				62	* 2a is actually the more important variant. With the extra logging
				63	* a crash might unlink the old name without recreating the new one
				64	*
				65	* 3) after a crash, we must go through any directories with a link count
				66	* of zero and redo the rm -rf
				67	*
				68	* mkdir f1/foo
				69	* normal commit
				70	* rm -rf f1/foo
				71	* fsync(f1)
				72	*
				73	* The directory f1 was fully removed from the FS, but fsync was never
				74	* called on f1, only its parent dir. After a crash the rm -rf must
				75	* be replayed. This must be able to recurse down the entire
				76	* directory tree. The inode link count fixup code takes care of the
				77	* ugly details.
				78	*/
				79
				80	/*
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	81	* stages for the tree walking. The first
				82	* stage (0) is to only pin down the blocks we find
				83	* the second stage (1) is to make sure that all the inodes
				84	* we find in the log are created in the subvolume.
				85	*
				86	* The last stage is to deal with directories and links and extents
				87	* and all the other fun semantics
				88	*/
David Sterba	e13976c	2019-08-01 14:50:30 +0200	[diff] [blame]	89	enum {
				90	LOG_WALK_PIN_ONLY,
				91	LOG_WALK_REPLAY_INODES,
				92	LOG_WALK_REPLAY_DIR_INDEX,
				93	LOG_WALK_REPLAY_ALL,
				94	};
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	95
Chris Mason	12fcfd2	2009-03-24 10:24:20 -0400	[diff] [blame]	96	static int btrfs_log_inode(struct btrfs_trans_handle *trans,
Nikolay Borisov	a59108a	2017-01-18 00:31:48 +0200	[diff] [blame]	97	struct btrfs_root root, struct btrfs_inode inode,
Filipe Manana	49dae1b	2014-09-06 22:34:39 +0100	[diff] [blame]	98	int inode_only,
Filipe Manana	8407f55	2014-09-05 15:14:39 +0100	[diff] [blame]	99	struct btrfs_log_ctx *ctx);
Yan Zheng	ec051c0	2009-01-05 15:43:42 -0500	[diff] [blame]	100	static int link_to_fixup_dir(struct btrfs_trans_handle *trans,
				101	struct btrfs_root *root,
				102	struct btrfs_path *path, u64 objectid);
Chris Mason	12fcfd2	2009-03-24 10:24:20 -0400	[diff] [blame]	103	static noinline int replay_dir_deletes(struct btrfs_trans_handle *trans,
				104	struct btrfs_root *root,
				105	struct btrfs_root *log,
				106	struct btrfs_path *path,
				107	u64 dirid, int del_all);
Naohiro Aota	fa1a0f4	2021-02-04 19:22:19 +0900	[diff] [blame]	108	static void wait_log_commit(struct btrfs_root *root, int transid);
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	109
				110	/*
				111	* tree logging is a special write ahead log used to make sure that
				112	* fsyncs and O_SYNCs can happen without doing full tree commits.
				113	*
				114	* Full tree commits are expensive because they require commonly
				115	* modified blocks to be recowed, creating many dirty pages in the
				116	* extent tree an 4x-6x higher write load than ext3.
				117	*
				118	* Instead of doing a tree commit on every fsync, we use the
				119	* key ranges and transaction ids to find items for a given file or directory
				120	* that have changed in this transaction. Those items are copied into
				121	* a special tree (one per subvolume root), that tree is written to disk
				122	* and then the fsync is considered complete.
				123	*
				124	* After a crash, items are copied out of the log-tree back into the
				125	* subvolume tree. Any file data extents found are recorded in the extent
				126	* allocation tree, and the log-tree freed.
				127	*
				128	* The log tree is read three times, once to pin down all the extents it is
				129	* using in ram and once, once to create all the inodes logged in the tree
				130	* and once to do all the other items.
				131	*/
				132
				133	/*
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	134	* start a sub transaction and setup the log tree
				135	* this increments the log tree writer count to make the people
				136	* syncing the tree wait for us to finish
				137	*/
				138	static int start_log_trans(struct btrfs_trans_handle *trans,
Miao Xie	8b050d3	2014-02-20 18:08:58 +0800	[diff] [blame]	139	struct btrfs_root *root,
				140	struct btrfs_log_ctx *ctx)
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	141	{
Jeff Mahoney	0b246af	2016-06-22 18:54:23 -0400	[diff] [blame]	142	struct btrfs_fs_info *fs_info = root->fs_info;
Filipe Manana	47876f7	2020-11-25 12:19:28 +0000	[diff] [blame]	143	struct btrfs_root *tree_root = fs_info->tree_root;
Naohiro Aota	fa1a0f4	2021-02-04 19:22:19 +0900	[diff] [blame]	144	const bool zoned = btrfs_is_zoned(fs_info);
Zhaolei	34eb2a5	2015-08-17 18:44:45 +0800	[diff] [blame]	145	int ret = 0;
Naohiro Aota	fa1a0f4	2021-02-04 19:22:19 +0900	[diff] [blame]	146	bool created = false;
Yan Zheng	7237f18	2009-01-21 12:54:03 -0500	[diff] [blame]	147
Filipe Manana	47876f7	2020-11-25 12:19:28 +0000	[diff] [blame]	148	/*
				149	* First check if the log root tree was already created. If not, create
				150	* it before locking the root's log_mutex, just to keep lockdep happy.
				151	*/
				152	if (!test_bit(BTRFS_ROOT_HAS_LOG_TREE, &tree_root->state)) {
				153	mutex_lock(&tree_root->log_mutex);
				154	if (!fs_info->log_root_tree) {
				155	ret = btrfs_init_log_root_tree(trans, fs_info);
Naohiro Aota	fa1a0f4	2021-02-04 19:22:19 +0900	[diff] [blame]	156	if (!ret) {
Filipe Manana	47876f7	2020-11-25 12:19:28 +0000	[diff] [blame]	157	set_bit(BTRFS_ROOT_HAS_LOG_TREE, &tree_root->state);
Naohiro Aota	fa1a0f4	2021-02-04 19:22:19 +0900	[diff] [blame]	158	created = true;
				159	}
Filipe Manana	47876f7	2020-11-25 12:19:28 +0000	[diff] [blame]	160	}
				161	mutex_unlock(&tree_root->log_mutex);
				162	if (ret)
				163	return ret;
				164	}
				165
Yan Zheng	7237f18	2009-01-21 12:54:03 -0500	[diff] [blame]	166	mutex_lock(&root->log_mutex);
Zhaolei	34eb2a5	2015-08-17 18:44:45 +0800	[diff] [blame]	167
Naohiro Aota	fa1a0f4	2021-02-04 19:22:19 +0900	[diff] [blame]	168	again:
Yan Zheng	7237f18	2009-01-21 12:54:03 -0500	[diff] [blame]	169	if (root->log_root) {
Naohiro Aota	fa1a0f4	2021-02-04 19:22:19 +0900	[diff] [blame]	170	int index = (root->log_transid + 1) % 2;
				171
David Sterba	4884b8e	2019-03-20 13:25:34 +0100	[diff] [blame]	172	if (btrfs_need_log_full_commit(trans)) {
Miao Xie	50471a3	2014-02-20 18:08:57 +0800	[diff] [blame]	173	ret = -EAGAIN;
				174	goto out;
				175	}
Zhaolei	34eb2a5	2015-08-17 18:44:45 +0800	[diff] [blame]	176
Naohiro Aota	fa1a0f4	2021-02-04 19:22:19 +0900	[diff] [blame]	177	if (zoned && atomic_read(&root->log_commit[index])) {
				178	wait_log_commit(root, root->log_transid - 1);
				179	goto again;
				180	}
				181
Josef Bacik	ff782e0	2009-10-08 15:30:04 -0400	[diff] [blame]	182	if (!root->log_start_pid) {
Miao Xie	27cdeb7	2014-04-02 19:51:05 +0800	[diff] [blame]	183	clear_bit(BTRFS_ROOT_MULTI_LOG_TASKS, &root->state);
Zhaolei	34eb2a5	2015-08-17 18:44:45 +0800	[diff] [blame]	184	root->log_start_pid = current->pid;
Josef Bacik	ff782e0	2009-10-08 15:30:04 -0400	[diff] [blame]	185	} else if (root->log_start_pid != current->pid) {
Miao Xie	27cdeb7	2014-04-02 19:51:05 +0800	[diff] [blame]	186	set_bit(BTRFS_ROOT_MULTI_LOG_TASKS, &root->state);
Josef Bacik	ff782e0	2009-10-08 15:30:04 -0400	[diff] [blame]	187	}
Zhaolei	34eb2a5	2015-08-17 18:44:45 +0800	[diff] [blame]	188	} else {
Naohiro Aota	fa1a0f4	2021-02-04 19:22:19 +0900	[diff] [blame]	189	/*
				190	* This means fs_info->log_root_tree was already created
				191	* for some other FS trees. Do the full commit not to mix
				192	* nodes from multiple log transactions to do sequential
				193	* writing.
				194	*/
				195	if (zoned && !created) {
				196	ret = -EAGAIN;
				197	goto out;
				198	}
				199
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	200	ret = btrfs_add_log_tree(trans, root);
Yan, Zheng	4a500fd	2010-05-16 10:49:59 -0400	[diff] [blame]	201	if (ret)
Miao Xie	e87ac13	2014-02-20 18:08:53 +0800	[diff] [blame]	202	goto out;
Zhaolei	34eb2a5	2015-08-17 18:44:45 +0800	[diff] [blame]	203
Filipe Manana	e7a7981	2020-06-15 10:38:44 +0100	[diff] [blame]	204	set_bit(BTRFS_ROOT_HAS_LOG_TREE, &root->state);
Zhaolei	34eb2a5	2015-08-17 18:44:45 +0800	[diff] [blame]	205	clear_bit(BTRFS_ROOT_MULTI_LOG_TASKS, &root->state);
				206	root->log_start_pid = current->pid;
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	207	}
Zhaolei	34eb2a5	2015-08-17 18:44:45 +0800	[diff] [blame]	208
Yan Zheng	7237f18	2009-01-21 12:54:03 -0500	[diff] [blame]	209	atomic_inc(&root->log_writers);
Filipe Manana	75b463d	2020-08-11 12:43:48 +0100	[diff] [blame]	210	if (ctx && !ctx->logging_new_name) {
Zhaolei	34eb2a5	2015-08-17 18:44:45 +0800	[diff] [blame]	211	int index = root->log_transid % 2;
Miao Xie	8b050d3	2014-02-20 18:08:58 +0800	[diff] [blame]	212	list_add_tail(&ctx->list, &root->log_ctxs[index]);
Miao Xie	d1433de	2014-02-20 18:08:59 +0800	[diff] [blame]	213	ctx->log_transid = root->log_transid;
Miao Xie	8b050d3	2014-02-20 18:08:58 +0800	[diff] [blame]	214	}
Zhaolei	34eb2a5	2015-08-17 18:44:45 +0800	[diff] [blame]	215
Miao Xie	e87ac13	2014-02-20 18:08:53 +0800	[diff] [blame]	216	out:
Yan Zheng	7237f18	2009-01-21 12:54:03 -0500	[diff] [blame]	217	mutex_unlock(&root->log_mutex);
Miao Xie	e87ac13	2014-02-20 18:08:53 +0800	[diff] [blame]	218	return ret;
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	219	}
				220
				221	/*
				222	* returns 0 if there was a log transaction running and we were able
				223	* to join, or returns -ENOENT if there were not transactions
				224	* in progress
				225	*/
				226	static int join_running_log_trans(struct btrfs_root *root)
				227	{
Naohiro Aota	fa1a0f4	2021-02-04 19:22:19 +0900	[diff] [blame]	228	const bool zoned = btrfs_is_zoned(root->fs_info);
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	229	int ret = -ENOENT;
				230
Filipe Manana	e7a7981	2020-06-15 10:38:44 +0100	[diff] [blame]	231	if (!test_bit(BTRFS_ROOT_HAS_LOG_TREE, &root->state))
				232	return ret;
				233
Yan Zheng	7237f18	2009-01-21 12:54:03 -0500	[diff] [blame]	234	mutex_lock(&root->log_mutex);
Naohiro Aota	fa1a0f4	2021-02-04 19:22:19 +0900	[diff] [blame]	235	again:
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	236	if (root->log_root) {
Naohiro Aota	fa1a0f4	2021-02-04 19:22:19 +0900	[diff] [blame]	237	int index = (root->log_transid + 1) % 2;
				238
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	239	ret = 0;
Naohiro Aota	fa1a0f4	2021-02-04 19:22:19 +0900	[diff] [blame]	240	if (zoned && atomic_read(&root->log_commit[index])) {
				241	wait_log_commit(root, root->log_transid - 1);
				242	goto again;
				243	}
Yan Zheng	7237f18	2009-01-21 12:54:03 -0500	[diff] [blame]	244	atomic_inc(&root->log_writers);
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	245	}
Yan Zheng	7237f18	2009-01-21 12:54:03 -0500	[diff] [blame]	246	mutex_unlock(&root->log_mutex);
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	247	return ret;
				248	}
				249
				250	/*
Chris Mason	12fcfd2	2009-03-24 10:24:20 -0400	[diff] [blame]	251	* This either makes the current running log transaction wait
				252	* until you call btrfs_end_log_trans() or it makes any future
				253	* log transactions wait until you call btrfs_end_log_trans()
				254	*/
zhong jiang	45128b0	2018-08-17 00:37:15 +0800	[diff] [blame]	255	void btrfs_pin_log_trans(struct btrfs_root *root)
Chris Mason	12fcfd2	2009-03-24 10:24:20 -0400	[diff] [blame]	256	{
Chris Mason	12fcfd2	2009-03-24 10:24:20 -0400	[diff] [blame]	257	atomic_inc(&root->log_writers);
Chris Mason	12fcfd2	2009-03-24 10:24:20 -0400	[diff] [blame]	258	}
				259
				260	/*
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	261	* indicate we're done making changes to the log tree
				262	* and wake up anyone waiting to do a sync
				263	*/
Jeff Mahoney	143bede	2012-03-01 14:56:26 +0100	[diff] [blame]	264	void btrfs_end_log_trans(struct btrfs_root *root)
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	265	{
Yan Zheng	7237f18	2009-01-21 12:54:03 -0500	[diff] [blame]	266	if (atomic_dec_and_test(&root->log_writers)) {
David Sterba	093258e	2018-02-26 16:15:17 +0100	[diff] [blame]	267	/* atomic_dec_and_test implies a barrier */
				268	cond_wake_up_nomb(&root->log_writer_wait);
Yan Zheng	7237f18	2009-01-21 12:54:03 -0500	[diff] [blame]	269	}
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	270	}
				271
David Sterba	247462a	2019-03-21 20:21:05 +0100	[diff] [blame]	272	static int btrfs_write_tree_block(struct extent_buffer *buf)
				273	{
				274	return filemap_fdatawrite_range(buf->pages[0]->mapping, buf->start,
				275	buf->start + buf->len - 1);
				276	}
				277
				278	static void btrfs_wait_tree_block_writeback(struct extent_buffer *buf)
				279	{
				280	filemap_fdatawait_range(buf->pages[0]->mapping,
				281	buf->start, buf->start + buf->len - 1);
				282	}
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	283
				284	/*
				285	* the walk control struct is used to pass state down the chain when
				286	* processing the log tree. The stage field tells us which part
				287	* of the log tree processing we are currently doing. The others
				288	* are state fields used for that specific part
				289	*/
				290	struct walk_control {
				291	/* should we free the extent on disk when done? This is used
				292	* at transaction commit time while freeing a log tree
				293	*/
				294	int free;
				295
				296	/* should we write out the extent buffer? This is used
				297	* while flushing the log tree to disk during a sync
				298	*/
				299	int write;
				300
				301	/* should we wait for the extent buffer io to finish? Also used
				302	* while flushing the log tree to disk for a sync
				303	*/
				304	int wait;
				305
				306	/* pin only walk, we record which extents on disk belong to the
				307	* log trees
				308	*/
				309	int pin;
				310
				311	/* what stage of the replay code we're currently in */
				312	int stage;
				313
Filipe Manana	f2d72f4	2018-10-08 11:12:55 +0100	[diff] [blame]	314	/*
				315	* Ignore any items from the inode currently being processed. Needs
				316	* to be set every time we find a BTRFS_INODE_ITEM_KEY and we are in
				317	* the LOG_WALK_REPLAY_INODES stage.
				318	*/
				319	bool ignore_cur_inode;
				320
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	321	/* the root we are currently replaying */
				322	struct btrfs_root *replay_dest;
				323
				324	/* the trans handle for the current replay */
				325	struct btrfs_trans_handle *trans;
				326
				327	/* the function that gets used to process blocks we find in the
				328	* tree. Note the extent_buffer might not be up to date when it is
				329	* passed in, and it must be checked or read if you need the data
				330	* inside it
				331	*/
				332	int (process_func)(struct btrfs_root log, struct extent_buffer *eb,
Qu Wenruo	581c176	2018-03-29 09:08:11 +0800	[diff] [blame]	333	struct walk_control *wc, u64 gen, int level);
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	334	};
				335
				336	/*
				337	* process_func used to pin down extents, write them or wait on them
				338	*/
				339	static int process_one_buffer(struct btrfs_root *log,
				340	struct extent_buffer *eb,
Qu Wenruo	581c176	2018-03-29 09:08:11 +0800	[diff] [blame]	341	struct walk_control *wc, u64 gen, int level)
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	342	{
Jeff Mahoney	0b246af	2016-06-22 18:54:23 -0400	[diff] [blame]	343	struct btrfs_fs_info *fs_info = log->fs_info;
Josef Bacik	b50c6e2	2013-04-25 15:55:30 -0400	[diff] [blame]	344	int ret = 0;
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	345
Josef Bacik	8c2a1a3	2013-06-06 13:19:32 -0400	[diff] [blame]	346	/*
				347	* If this fs is mixed then we need to be able to process the leaves to
				348	* pin down any logged extents, so we have to read the block.
				349	*/
Jeff Mahoney	0b246af	2016-06-22 18:54:23 -0400	[diff] [blame]	350	if (btrfs_fs_incompat(fs_info, MIXED_GROUPS)) {
Qu Wenruo	581c176	2018-03-29 09:08:11 +0800	[diff] [blame]	351	ret = btrfs_read_buffer(eb, gen, level, NULL);
Josef Bacik	8c2a1a3	2013-06-06 13:19:32 -0400	[diff] [blame]	352	if (ret)
				353	return ret;
				354	}
				355
Josef Bacik	b50c6e2	2013-04-25 15:55:30 -0400	[diff] [blame]	356	if (wc->pin)
Nikolay Borisov	9fce570	2020-01-20 16:09:13 +0200	[diff] [blame]	357	ret = btrfs_pin_extent_for_log_replay(wc->trans, eb->start,
Jeff Mahoney	2ff7e61	2016-06-22 18:54:24 -0400	[diff] [blame]	358	eb->len);
Josef Bacik	b50c6e2	2013-04-25 15:55:30 -0400	[diff] [blame]	359
				360	if (!ret && btrfs_buffer_uptodate(eb, gen, 0)) {
Josef Bacik	8c2a1a3	2013-06-06 13:19:32 -0400	[diff] [blame]	361	if (wc->pin && btrfs_header_level(eb) == 0)
David Sterba	bcdc428	2019-03-20 12:14:33 +0100	[diff] [blame]	362	ret = btrfs_exclude_logged_extents(eb);
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	363	if (wc->write)
				364	btrfs_write_tree_block(eb);
				365	if (wc->wait)
				366	btrfs_wait_tree_block_writeback(eb);
				367	}
Josef Bacik	b50c6e2	2013-04-25 15:55:30 -0400	[diff] [blame]	368	return ret;
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	369	}
				370
				371	/*
				372	* Item overwrite used by replay and tree logging. eb, slot and key all refer
				373	* to the src data we are copying out.
				374	*
				375	* root is the tree we are copying into, and path is a scratch
				376	* path for use in this function (it should be released on entry and
				377	* will be released on exit).
				378	*
				379	* If the key is already in the destination tree the existing item is
				380	* overwritten. If the existing item isn't big enough, it is extended.
				381	* If it is too large, it is truncated.
				382	*
				383	* If the key isn't in the destination yet, a new item is inserted.
				384	*/
				385	static noinline int overwrite_item(struct btrfs_trans_handle *trans,
				386	struct btrfs_root *root,
				387	struct btrfs_path *path,
				388	struct extent_buffer *eb, int slot,
				389	struct btrfs_key *key)
				390	{
				391	int ret;
				392	u32 item_size;
				393	u64 saved_i_size = 0;
				394	int save_old_i_size = 0;
				395	unsigned long src_ptr;
				396	unsigned long dst_ptr;
				397	int overwrite_root = 0;
Josef Bacik	4bc4bee	2013-04-05 20:50:09 +0000	[diff] [blame]	398	bool inode_item = key->type == BTRFS_INODE_ITEM_KEY;
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	399
				400	if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID)
				401	overwrite_root = 1;
				402
				403	item_size = btrfs_item_size_nr(eb, slot);
				404	src_ptr = btrfs_item_ptr_offset(eb, slot);
				405
				406	/* look for the key in the destination tree */
				407	ret = btrfs_search_slot(NULL, root, key, path, 0, 0);
Josef Bacik	4bc4bee	2013-04-05 20:50:09 +0000	[diff] [blame]	408	if (ret < 0)
				409	return ret;
				410
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	411	if (ret == 0) {
				412	char *src_copy;
				413	char *dst_copy;
				414	u32 dst_size = btrfs_item_size_nr(path->nodes[0],
				415	path->slots[0]);
				416	if (dst_size != item_size)
				417	goto insert;
				418
				419	if (item_size == 0) {
David Sterba	b3b4aa7	2011-04-21 01:20:15 +0200	[diff] [blame]	420	btrfs_release_path(path);
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	421	return 0;
				422	}
				423	dst_copy = kmalloc(item_size, GFP_NOFS);
				424	src_copy = kmalloc(item_size, GFP_NOFS);
liubo	2a29edc	2011-01-26 06:22:08 +0000	[diff] [blame]	425	if (!dst_copy \|\| !src_copy) {
David Sterba	b3b4aa7	2011-04-21 01:20:15 +0200	[diff] [blame]	426	btrfs_release_path(path);
liubo	2a29edc	2011-01-26 06:22:08 +0000	[diff] [blame]	427	kfree(dst_copy);
				428	kfree(src_copy);
				429	return -ENOMEM;
				430	}
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	431
				432	read_extent_buffer(eb, src_copy, src_ptr, item_size);
				433
				434	dst_ptr = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
				435	read_extent_buffer(path->nodes[0], dst_copy, dst_ptr,
				436	item_size);
				437	ret = memcmp(dst_copy, src_copy, item_size);
				438
				439	kfree(dst_copy);
				440	kfree(src_copy);
				441	/*
				442	* they have the same contents, just return, this saves
				443	* us from cowing blocks in the destination tree and doing
				444	* extra writes that may not have been done by a previous
				445	* sync
				446	*/
				447	if (ret == 0) {
David Sterba	b3b4aa7	2011-04-21 01:20:15 +0200	[diff] [blame]	448	btrfs_release_path(path);
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	449	return 0;
				450	}
				451
Josef Bacik	4bc4bee	2013-04-05 20:50:09 +0000	[diff] [blame]	452	/*
				453	* We need to load the old nbytes into the inode so when we
				454	* replay the extents we've logged we get the right nbytes.
				455	*/
				456	if (inode_item) {
				457	struct btrfs_inode_item *item;
				458	u64 nbytes;
Josef Bacik	d555438	2013-09-11 14:17:00 -0400	[diff] [blame]	459	u32 mode;
Josef Bacik	4bc4bee	2013-04-05 20:50:09 +0000	[diff] [blame]	460
				461	item = btrfs_item_ptr(path->nodes[0], path->slots[0],
				462	struct btrfs_inode_item);
				463	nbytes = btrfs_inode_nbytes(path->nodes[0], item);
				464	item = btrfs_item_ptr(eb, slot,
				465	struct btrfs_inode_item);
				466	btrfs_set_inode_nbytes(eb, item, nbytes);
Josef Bacik	d555438	2013-09-11 14:17:00 -0400	[diff] [blame]	467
				468	/*
				469	* If this is a directory we need to reset the i_size to
				470	* 0 so that we can set it up properly when replaying
				471	* the rest of the items in this log.
				472	*/
				473	mode = btrfs_inode_mode(eb, item);
				474	if (S_ISDIR(mode))
				475	btrfs_set_inode_size(eb, item, 0);
Josef Bacik	4bc4bee	2013-04-05 20:50:09 +0000	[diff] [blame]	476	}
				477	} else if (inode_item) {
				478	struct btrfs_inode_item *item;
Josef Bacik	d555438	2013-09-11 14:17:00 -0400	[diff] [blame]	479	u32 mode;
Josef Bacik	4bc4bee	2013-04-05 20:50:09 +0000	[diff] [blame]	480
				481	/*
				482	* New inode, set nbytes to 0 so that the nbytes comes out
				483	* properly when we replay the extents.
				484	*/
				485	item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
				486	btrfs_set_inode_nbytes(eb, item, 0);
Josef Bacik	d555438	2013-09-11 14:17:00 -0400	[diff] [blame]	487
				488	/*
				489	* If this is a directory we need to reset the i_size to 0 so
				490	* that we can set it up properly when replaying the rest of
				491	* the items in this log.
				492	*/
				493	mode = btrfs_inode_mode(eb, item);
				494	if (S_ISDIR(mode))
				495	btrfs_set_inode_size(eb, item, 0);
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	496	}
				497	insert:
David Sterba	b3b4aa7	2011-04-21 01:20:15 +0200	[diff] [blame]	498	btrfs_release_path(path);
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	499	/* try to insert the key into the destination tree */
Filipe Manana	df8d116	2015-01-14 01:52:25 +0000	[diff] [blame]	500	path->skip_release_on_error = 1;
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	501	ret = btrfs_insert_empty_item(trans, root, path,
				502	key, item_size);
Filipe Manana	df8d116	2015-01-14 01:52:25 +0000	[diff] [blame]	503	path->skip_release_on_error = 0;
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	504
				505	/* make sure any existing item is the correct size */
Filipe Manana	df8d116	2015-01-14 01:52:25 +0000	[diff] [blame]	506	if (ret == -EEXIST \|\| ret == -EOVERFLOW) {
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	507	u32 found_size;
				508	found_size = btrfs_item_size_nr(path->nodes[0],
				509	path->slots[0]);
Jeff Mahoney	143bede	2012-03-01 14:56:26 +0100	[diff] [blame]	510	if (found_size > item_size)
David Sterba	78ac4f9	2019-03-20 14:49:12 +0100	[diff] [blame]	511	btrfs_truncate_item(path, item_size, 1);
Jeff Mahoney	143bede	2012-03-01 14:56:26 +0100	[diff] [blame]	512	else if (found_size < item_size)
David Sterba	c71dd88	2019-03-20 14:51:10 +0100	[diff] [blame]	513	btrfs_extend_item(path, item_size - found_size);
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	514	} else if (ret) {
Yan, Zheng	4a500fd	2010-05-16 10:49:59 -0400	[diff] [blame]	515	return ret;
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	516	}
				517	dst_ptr = btrfs_item_ptr_offset(path->nodes[0],
				518	path->slots[0]);
				519
				520	/* don't overwrite an existing inode if the generation number
				521	* was logged as zero. This is done when the tree logging code
				522	* is just logging an inode to make sure it exists after recovery.
				523	*
				524	* Also, don't overwrite i_size on directories during replay.
				525	* log replay inserts and removes directory items based on the
				526	* state of the tree found in the subvolume, and i_size is modified
				527	* as it goes
				528	*/
				529	if (key->type == BTRFS_INODE_ITEM_KEY && ret == -EEXIST) {
				530	struct btrfs_inode_item *src_item;
				531	struct btrfs_inode_item *dst_item;
				532
				533	src_item = (struct btrfs_inode_item *)src_ptr;
				534	dst_item = (struct btrfs_inode_item *)dst_ptr;
				535
Filipe Manana	1a4bcf4	2015-02-13 12:30:56 +0000	[diff] [blame]	536	if (btrfs_inode_generation(eb, src_item) == 0) {
				537	struct extent_buffer *dst_eb = path->nodes[0];
Filipe Manana	2f2ff0e	2015-03-20 17:19:46 +0000	[diff] [blame]	538	const u64 ino_size = btrfs_inode_size(eb, src_item);
Filipe Manana	1a4bcf4	2015-02-13 12:30:56 +0000	[diff] [blame]	539
Filipe Manana	2f2ff0e	2015-03-20 17:19:46 +0000	[diff] [blame]	540	/*
				541	* For regular files an ino_size == 0 is used only when
				542	* logging that an inode exists, as part of a directory
				543	* fsync, and the inode wasn't fsynced before. In this
				544	* case don't set the size of the inode in the fs/subvol
				545	* tree, otherwise we would be throwing valid data away.
				546	*/
Filipe Manana	1a4bcf4	2015-02-13 12:30:56 +0000	[diff] [blame]	547	if (S_ISREG(btrfs_inode_mode(eb, src_item)) &&
Filipe Manana	2f2ff0e	2015-03-20 17:19:46 +0000	[diff] [blame]	548	S_ISREG(btrfs_inode_mode(dst_eb, dst_item)) &&
David Sterba	60d48e2	2020-04-29 15:29:53 +0200	[diff] [blame]	549	ino_size != 0)
				550	btrfs_set_inode_size(dst_eb, dst_item, ino_size);
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	551	goto no_copy;
Filipe Manana	1a4bcf4	2015-02-13 12:30:56 +0000	[diff] [blame]	552	}
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	553
				554	if (overwrite_root &&
				555	S_ISDIR(btrfs_inode_mode(eb, src_item)) &&
				556	S_ISDIR(btrfs_inode_mode(path->nodes[0], dst_item))) {
				557	save_old_i_size = 1;
				558	saved_i_size = btrfs_inode_size(path->nodes[0],
				559	dst_item);
				560	}
				561	}
				562
				563	copy_extent_buffer(path->nodes[0], eb, dst_ptr,
				564	src_ptr, item_size);
				565
				566	if (save_old_i_size) {
				567	struct btrfs_inode_item *dst_item;
				568	dst_item = (struct btrfs_inode_item *)dst_ptr;
				569	btrfs_set_inode_size(path->nodes[0], dst_item, saved_i_size);
				570	}
				571
				572	/* make sure the generation is filled in */
				573	if (key->type == BTRFS_INODE_ITEM_KEY) {
				574	struct btrfs_inode_item *dst_item;
				575	dst_item = (struct btrfs_inode_item *)dst_ptr;
				576	if (btrfs_inode_generation(path->nodes[0], dst_item) == 0) {
				577	btrfs_set_inode_generation(path->nodes[0], dst_item,
				578	trans->transid);
				579	}
				580	}
				581	no_copy:
				582	btrfs_mark_buffer_dirty(path->nodes[0]);
David Sterba	b3b4aa7	2011-04-21 01:20:15 +0200	[diff] [blame]	583	btrfs_release_path(path);
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	584	return 0;
				585	}
				586
				587	/*
				588	* simple helper to read an inode off the disk from a given root
				589	* This can only be called for subvolume roots and not for the log
				590	*/
				591	static noinline struct inode read_one_inode(struct btrfs_root root,
				592	u64 objectid)
				593	{
				594	struct inode *inode;
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	595
David Sterba	0202e83	2020-05-15 19:35:59 +0200	[diff] [blame]	596	inode = btrfs_iget(root->fs_info->sb, objectid, root);
Al Viro	2e19f1f	2018-07-29 23:04:45 +0100	[diff] [blame]	597	if (IS_ERR(inode))
Yan Zheng	5d4f98a	2009-06-10 10:45:14 -0400	[diff] [blame]	598	inode = NULL;
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	599	return inode;
				600	}
				601
				602	/* replays a single extent in 'eb' at 'slot' with 'key' into the
				603	* subvolume 'root'. path is released on entry and should be released
				604	* on exit.
				605	*
				606	* extents in the log tree have not been allocated out of the extent
				607	* tree yet. So, this completes the allocation, taking a reference
				608	* as required if the extent already exists or creating a new extent
				609	* if it isn't in the extent allocation tree yet.
				610	*
				611	* The extent is inserted into the file, dropping any existing extents
				612	* from the file that overlap the new one.
				613	*/
				614	static noinline int replay_one_extent(struct btrfs_trans_handle *trans,
				615	struct btrfs_root *root,
				616	struct btrfs_path *path,
				617	struct extent_buffer *eb, int slot,
				618	struct btrfs_key *key)
				619	{
Filipe Manana	5893dfb	2020-11-04 11:07:32 +0000	[diff] [blame]	620	struct btrfs_drop_extents_args drop_args = { 0 };
Jeff Mahoney	0b246af	2016-06-22 18:54:23 -0400	[diff] [blame]	621	struct btrfs_fs_info *fs_info = root->fs_info;
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	622	int found_type;
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	623	u64 extent_end;
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	624	u64 start = key->offset;
Josef Bacik	4bc4bee	2013-04-05 20:50:09 +0000	[diff] [blame]	625	u64 nbytes = 0;
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	626	struct btrfs_file_extent_item *item;
				627	struct inode *inode = NULL;
				628	unsigned long size;
				629	int ret = 0;
				630
				631	item = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
				632	found_type = btrfs_file_extent_type(eb, item);
				633
Yan Zheng	d899e05	2008-10-30 14:25:28 -0400	[diff] [blame]	634	if (found_type == BTRFS_FILE_EXTENT_REG \|\|
Josef Bacik	4bc4bee	2013-04-05 20:50:09 +0000	[diff] [blame]	635	found_type == BTRFS_FILE_EXTENT_PREALLOC) {
				636	nbytes = btrfs_file_extent_num_bytes(eb, item);
				637	extent_end = start + nbytes;
				638
				639	/*
				640	* We don't add to the inodes nbytes if we are prealloc or a
				641	* hole.
				642	*/
				643	if (btrfs_file_extent_disk_bytenr(eb, item) == 0)
				644	nbytes = 0;
				645	} else if (found_type == BTRFS_FILE_EXTENT_INLINE) {
Qu Wenruo	e41ca58	2018-06-06 15:41:49 +0800	[diff] [blame]	646	size = btrfs_file_extent_ram_bytes(eb, item);
Josef Bacik	4bc4bee	2013-04-05 20:50:09 +0000	[diff] [blame]	647	nbytes = btrfs_file_extent_ram_bytes(eb, item);
Jeff Mahoney	da17066	2016-06-15 09:22:56 -0400	[diff] [blame]	648	extent_end = ALIGN(start + size,
Jeff Mahoney	0b246af	2016-06-22 18:54:23 -0400	[diff] [blame]	649	fs_info->sectorsize);
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	650	} else {
				651	ret = 0;
				652	goto out;
				653	}
				654
				655	inode = read_one_inode(root, key->objectid);
				656	if (!inode) {
				657	ret = -EIO;
				658	goto out;
				659	}
				660
				661	/*
				662	* first check to see if we already have this extent in the
				663	* file. This must be done before the btrfs_drop_extents run
				664	* so we don't try to drop this extent.
				665	*/
David Sterba	f85b737	2017-01-20 14:54:07 +0100	[diff] [blame]	666	ret = btrfs_lookup_file_extent(trans, root, path,
				667	btrfs_ino(BTRFS_I(inode)), start, 0);
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	668
Yan Zheng	d899e05	2008-10-30 14:25:28 -0400	[diff] [blame]	669	if (ret == 0 &&
				670	(found_type == BTRFS_FILE_EXTENT_REG \|\|
				671	found_type == BTRFS_FILE_EXTENT_PREALLOC)) {
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	672	struct btrfs_file_extent_item cmp1;
				673	struct btrfs_file_extent_item cmp2;
				674	struct btrfs_file_extent_item *existing;
				675	struct extent_buffer *leaf;
				676
				677	leaf = path->nodes[0];
				678	existing = btrfs_item_ptr(leaf, path->slots[0],
				679	struct btrfs_file_extent_item);
				680
				681	read_extent_buffer(eb, &cmp1, (unsigned long)item,
				682	sizeof(cmp1));
				683	read_extent_buffer(leaf, &cmp2, (unsigned long)existing,
				684	sizeof(cmp2));
				685
				686	/*
				687	* we already have a pointer to this exact extent,
				688	* we don't have to do anything
				689	*/
				690	if (memcmp(&cmp1, &cmp2, sizeof(cmp1)) == 0) {
David Sterba	b3b4aa7	2011-04-21 01:20:15 +0200	[diff] [blame]	691	btrfs_release_path(path);
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	692	goto out;
				693	}
				694	}
David Sterba	b3b4aa7	2011-04-21 01:20:15 +0200	[diff] [blame]	695	btrfs_release_path(path);
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	696
				697	/* drop any overlapping extents */
Filipe Manana	5893dfb	2020-11-04 11:07:32 +0000	[diff] [blame]	698	drop_args.start = start;
				699	drop_args.end = extent_end;
				700	drop_args.drop_cache = true;
				701	ret = btrfs_drop_extents(trans, root, BTRFS_I(inode), &drop_args);
Josef Bacik	3650860	2013-04-25 16:23:32 -0400	[diff] [blame]	702	if (ret)
				703	goto out;
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	704
Yan Zheng	07d400a	2009-01-06 11:42:00 -0500	[diff] [blame]	705	if (found_type == BTRFS_FILE_EXTENT_REG \|\|
				706	found_type == BTRFS_FILE_EXTENT_PREALLOC) {
Yan Zheng	5d4f98a	2009-06-10 10:45:14 -0400	[diff] [blame]	707	u64 offset;
Yan Zheng	07d400a	2009-01-06 11:42:00 -0500	[diff] [blame]	708	unsigned long dest_offset;
				709	struct btrfs_key ins;
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	710
Filipe Manana	3168021c	2017-02-01 14:58:02 +0000	[diff] [blame]	711	if (btrfs_file_extent_disk_bytenr(eb, item) == 0 &&
				712	btrfs_fs_incompat(fs_info, NO_HOLES))
				713	goto update_inode;
				714
Yan Zheng	07d400a	2009-01-06 11:42:00 -0500	[diff] [blame]	715	ret = btrfs_insert_empty_item(trans, root, path, key,
				716	sizeof(*item));
Josef Bacik	3650860	2013-04-25 16:23:32 -0400	[diff] [blame]	717	if (ret)
				718	goto out;
Yan Zheng	07d400a	2009-01-06 11:42:00 -0500	[diff] [blame]	719	dest_offset = btrfs_item_ptr_offset(path->nodes[0],
				720	path->slots[0]);
				721	copy_extent_buffer(path->nodes[0], eb, dest_offset,
				722	(unsigned long)item, sizeof(*item));
				723
				724	ins.objectid = btrfs_file_extent_disk_bytenr(eb, item);
				725	ins.offset = btrfs_file_extent_disk_num_bytes(eb, item);
				726	ins.type = BTRFS_EXTENT_ITEM_KEY;
Yan Zheng	5d4f98a	2009-06-10 10:45:14 -0400	[diff] [blame]	727	offset = key->offset - btrfs_file_extent_offset(eb, item);
Yan Zheng	07d400a	2009-01-06 11:42:00 -0500	[diff] [blame]	728
Qu Wenruo	df2c95f	2016-08-15 10:36:52 +0800	[diff] [blame]	729	/*
				730	* Manually record dirty extent, as here we did a shallow
				731	* file extent item copy and skip normal backref update,
				732	* but modifying extent tree all by ourselves.
				733	* So need to manually record dirty extent for qgroup,
				734	* as the owner of the file extent changed from log tree
				735	* (doesn't affect qgroup) to fs/file tree(affects qgroup)
				736	*/
Lu Fengqi	a95f3aa	2018-07-18 16:28:03 +0800	[diff] [blame]	737	ret = btrfs_qgroup_trace_extent(trans,
Qu Wenruo	df2c95f	2016-08-15 10:36:52 +0800	[diff] [blame]	738	btrfs_file_extent_disk_bytenr(eb, item),
				739	btrfs_file_extent_disk_num_bytes(eb, item),
				740	GFP_NOFS);
				741	if (ret < 0)
				742	goto out;
				743
Yan Zheng	07d400a	2009-01-06 11:42:00 -0500	[diff] [blame]	744	if (ins.objectid > 0) {
Qu Wenruo	82fa113	2019-04-04 14:45:35 +0800	[diff] [blame]	745	struct btrfs_ref ref = { 0 };
Yan Zheng	07d400a	2009-01-06 11:42:00 -0500	[diff] [blame]	746	u64 csum_start;
				747	u64 csum_end;
				748	LIST_HEAD(ordered_sums);
Qu Wenruo	82fa113	2019-04-04 14:45:35 +0800	[diff] [blame]	749
Yan Zheng	07d400a	2009-01-06 11:42:00 -0500	[diff] [blame]	750	/*
				751	* is this extent already allocated in the extent
				752	* allocation tree? If so, just add a reference
				753	*/
Jeff Mahoney	2ff7e61	2016-06-22 18:54:24 -0400	[diff] [blame]	754	ret = btrfs_lookup_data_extent(fs_info, ins.objectid,
Yan Zheng	07d400a	2009-01-06 11:42:00 -0500	[diff] [blame]	755	ins.offset);
Marcos Paulo de Souza	3736127	2021-08-02 09:34:00 -0300	[diff] [blame^]	756	if (ret < 0) {
				757	goto out;
				758	} else if (ret == 0) {
Qu Wenruo	82fa113	2019-04-04 14:45:35 +0800	[diff] [blame]	759	btrfs_init_generic_ref(&ref,
				760	BTRFS_ADD_DELAYED_REF,
				761	ins.objectid, ins.offset, 0);
				762	btrfs_init_data_ref(&ref,
				763	root->root_key.objectid,
Filipe Manana	b06c4bf	2015-10-23 07:52:54 +0100	[diff] [blame]	764	key->objectid, offset);
Qu Wenruo	82fa113	2019-04-04 14:45:35 +0800	[diff] [blame]	765	ret = btrfs_inc_extent_ref(trans, &ref);
Josef Bacik	b50c6e2	2013-04-25 15:55:30 -0400	[diff] [blame]	766	if (ret)
				767	goto out;
Yan Zheng	07d400a	2009-01-06 11:42:00 -0500	[diff] [blame]	768	} else {
				769	/*
				770	* insert the extent pointer in the extent
				771	* allocation tree
				772	*/
Yan Zheng	5d4f98a	2009-06-10 10:45:14 -0400	[diff] [blame]	773	ret = btrfs_alloc_logged_file_extent(trans,
Jeff Mahoney	2ff7e61	2016-06-22 18:54:24 -0400	[diff] [blame]	774	root->root_key.objectid,
Yan Zheng	5d4f98a	2009-06-10 10:45:14 -0400	[diff] [blame]	775	key->objectid, offset, &ins);
Josef Bacik	b50c6e2	2013-04-25 15:55:30 -0400	[diff] [blame]	776	if (ret)
				777	goto out;
Yan Zheng	07d400a	2009-01-06 11:42:00 -0500	[diff] [blame]	778	}
David Sterba	b3b4aa7	2011-04-21 01:20:15 +0200	[diff] [blame]	779	btrfs_release_path(path);
Yan Zheng	07d400a	2009-01-06 11:42:00 -0500	[diff] [blame]	780
				781	if (btrfs_file_extent_compression(eb, item)) {
				782	csum_start = ins.objectid;
				783	csum_end = csum_start + ins.offset;
				784	} else {
				785	csum_start = ins.objectid +
				786	btrfs_file_extent_offset(eb, item);
				787	csum_end = csum_start +
				788	btrfs_file_extent_num_bytes(eb, item);
				789	}
				790
				791	ret = btrfs_lookup_csums_range(root->log_root,
				792	csum_start, csum_end - 1,
Arne Jansen	a2de733	2011-03-08 14:14:00 +0100	[diff] [blame]	793	&ordered_sums, 0);
Josef Bacik	3650860	2013-04-25 16:23:32 -0400	[diff] [blame]	794	if (ret)
				795	goto out;
Filipe Manana	b84b839	2015-08-19 11:09:40 +0100	[diff] [blame]	796	/*
				797	* Now delete all existing cums in the csum root that
				798	* cover our range. We do this because we can have an
				799	* extent that is completely referenced by one file
				800	* extent item and partially referenced by another
				801	* file extent item (like after using the clone or
				802	* extent_same ioctls). In this case if we end up doing
				803	* the replay of the one that partially references the
				804	* extent first, and we do not do the csum deletion
				805	* below, we can get 2 csum items in the csum tree that
				806	* overlap each other. For example, imagine our log has
				807	* the two following file extent items:
				808	*
				809	* key (257 EXTENT_DATA 409600)
				810	* extent data disk byte 12845056 nr 102400
				811	* extent data offset 20480 nr 20480 ram 102400
				812	*
				813	* key (257 EXTENT_DATA 819200)
				814	* extent data disk byte 12845056 nr 102400
				815	* extent data offset 0 nr 102400 ram 102400
				816	*
				817	* Where the second one fully references the 100K extent
				818	* that starts at disk byte 12845056, and the log tree
				819	* has a single csum item that covers the entire range
				820	* of the extent:
				821	*
				822	* key (EXTENT_CSUM EXTENT_CSUM 12845056) itemsize 100
				823	*
				824	* After the first file extent item is replayed, the
				825	* csum tree gets the following csum item:
				826	*
				827	* key (EXTENT_CSUM EXTENT_CSUM 12865536) itemsize 20
				828	*
				829	* Which covers the 20K sub-range starting at offset 20K
				830	* of our extent. Now when we replay the second file
				831	* extent item, if we do not delete existing csum items
				832	* that cover any of its blocks, we end up getting two
				833	* csum items in our csum tree that overlap each other:
				834	*
				835	* key (EXTENT_CSUM EXTENT_CSUM 12845056) itemsize 100
				836	* key (EXTENT_CSUM EXTENT_CSUM 12865536) itemsize 20
				837	*
				838	* Which is a problem, because after this anyone trying
				839	* to lookup up for the checksum of any block of our
				840	* extent starting at an offset of 40K or higher, will
				841	* end up looking at the second csum item only, which
				842	* does not contain the checksum for any block starting
				843	* at offset 40K or higher of our extent.
				844	*/
Yan Zheng	07d400a	2009-01-06 11:42:00 -0500	[diff] [blame]	845	while (!list_empty(&ordered_sums)) {
				846	struct btrfs_ordered_sum *sums;
				847	sums = list_entry(ordered_sums.next,
				848	struct btrfs_ordered_sum,
				849	list);
Josef Bacik	3650860	2013-04-25 16:23:32 -0400	[diff] [blame]	850	if (!ret)
Filipe Manana	40e046a	2019-12-05 16:58:30 +0000	[diff] [blame]	851	ret = btrfs_del_csums(trans,
				852	fs_info->csum_root,
Jeff Mahoney	5b4aace	2016-06-21 10:40:19 -0400	[diff] [blame]	853	sums->bytenr,
				854	sums->len);
Filipe Manana	b84b839	2015-08-19 11:09:40 +0100	[diff] [blame]	855	if (!ret)
Josef Bacik	3650860	2013-04-25 16:23:32 -0400	[diff] [blame]	856	ret = btrfs_csum_file_blocks(trans,
Jeff Mahoney	0b246af	2016-06-22 18:54:23 -0400	[diff] [blame]	857	fs_info->csum_root, sums);
Yan Zheng	07d400a	2009-01-06 11:42:00 -0500	[diff] [blame]	858	list_del(&sums->list);
				859	kfree(sums);
				860	}
Josef Bacik	3650860	2013-04-25 16:23:32 -0400	[diff] [blame]	861	if (ret)
				862	goto out;
Yan Zheng	07d400a	2009-01-06 11:42:00 -0500	[diff] [blame]	863	} else {
David Sterba	b3b4aa7	2011-04-21 01:20:15 +0200	[diff] [blame]	864	btrfs_release_path(path);
Yan Zheng	07d400a	2009-01-06 11:42:00 -0500	[diff] [blame]	865	}
				866	} else if (found_type == BTRFS_FILE_EXTENT_INLINE) {
				867	/* inline extents are easy, we just overwrite them */
				868	ret = overwrite_item(trans, root, path, eb, slot, key);
Josef Bacik	3650860	2013-04-25 16:23:32 -0400	[diff] [blame]	869	if (ret)
				870	goto out;
Yan Zheng	07d400a	2009-01-06 11:42:00 -0500	[diff] [blame]	871	}
				872
Josef Bacik	9ddc959	2020-01-17 09:02:22 -0500	[diff] [blame]	873	ret = btrfs_inode_set_file_extent_range(BTRFS_I(inode), start,
				874	extent_end - start);
				875	if (ret)
				876	goto out;
				877
Filipe Manana	3168021c	2017-02-01 14:58:02 +0000	[diff] [blame]	878	update_inode:
Filipe Manana	2766ff6	2020-11-04 11:07:34 +0000	[diff] [blame]	879	btrfs_update_inode_bytes(BTRFS_I(inode), nbytes, drop_args.bytes_found);
Nikolay Borisov	9a56fcd	2020-11-02 16:48:59 +0200	[diff] [blame]	880	ret = btrfs_update_inode(trans, root, BTRFS_I(inode));
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	881	out:
				882	if (inode)
				883	iput(inode);
				884	return ret;
				885	}
				886
				887	/*
				888	* when cleaning up conflicts between the directory names in the
				889	* subvolume, directory names in the log and directory names in the
				890	* inode back references, we may have to unlink inodes from directories.
				891	*
				892	* This is a helper function to do the unlink of a specific directory
				893	* item
				894	*/
				895	static noinline int drop_one_dir_item(struct btrfs_trans_handle *trans,
				896	struct btrfs_root *root,
				897	struct btrfs_path *path,
Nikolay Borisov	207e7d9	2017-01-18 00:31:45 +0200	[diff] [blame]	898	struct btrfs_inode *dir,
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	899	struct btrfs_dir_item *di)
				900	{
				901	struct inode *inode;
				902	char *name;
				903	int name_len;
				904	struct extent_buffer *leaf;
				905	struct btrfs_key location;
				906	int ret;
				907
				908	leaf = path->nodes[0];
				909
				910	btrfs_dir_item_key_to_cpu(leaf, di, &location);
				911	name_len = btrfs_dir_name_len(leaf, di);
				912	name = kmalloc(name_len, GFP_NOFS);
liubo	2a29edc	2011-01-26 06:22:08 +0000	[diff] [blame]	913	if (!name)
				914	return -ENOMEM;
				915
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	916	read_extent_buffer(leaf, name, (unsigned long)(di + 1), name_len);
David Sterba	b3b4aa7	2011-04-21 01:20:15 +0200	[diff] [blame]	917	btrfs_release_path(path);
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	918
				919	inode = read_one_inode(root, location.objectid);
Tsutomu Itoh	c00e949	2011-04-28 09:10:23 +0000	[diff] [blame]	920	if (!inode) {
Josef Bacik	3650860	2013-04-25 16:23:32 -0400	[diff] [blame]	921	ret = -EIO;
				922	goto out;
Tsutomu Itoh	c00e949	2011-04-28 09:10:23 +0000	[diff] [blame]	923	}
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	924
Yan Zheng	ec051c0	2009-01-05 15:43:42 -0500	[diff] [blame]	925	ret = link_to_fixup_dir(trans, root, path, location.objectid);
Josef Bacik	3650860	2013-04-25 16:23:32 -0400	[diff] [blame]	926	if (ret)
				927	goto out;
Chris Mason	12fcfd2	2009-03-24 10:24:20 -0400	[diff] [blame]	928
Nikolay Borisov	207e7d9	2017-01-18 00:31:45 +0200	[diff] [blame]	929	ret = btrfs_unlink_inode(trans, root, dir, BTRFS_I(inode), name,
				930	name_len);
Josef Bacik	3650860	2013-04-25 16:23:32 -0400	[diff] [blame]	931	if (ret)
				932	goto out;
Filipe David Borba Manana	ada9af2	2013-08-05 09:25:47 +0100	[diff] [blame]	933	else
Nikolay Borisov	e5c304e6	2018-02-07 17:55:43 +0200	[diff] [blame]	934	ret = btrfs_run_delayed_items(trans);
Josef Bacik	3650860	2013-04-25 16:23:32 -0400	[diff] [blame]	935	out:
				936	kfree(name);
				937	iput(inode);
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	938	return ret;
				939	}
				940
				941	/*
				942	* helper function to see if a given name and sequence number found
				943	* in an inode back reference are already in a directory and correctly
				944	* point to this inode
				945	*/
				946	static noinline int inode_in_dir(struct btrfs_root *root,
				947	struct btrfs_path *path,
				948	u64 dirid, u64 objectid, u64 index,
				949	const char *name, int name_len)
				950	{
				951	struct btrfs_dir_item *di;
				952	struct btrfs_key location;
				953	int match = 0;
				954
				955	di = btrfs_lookup_dir_index_item(NULL, root, path, dirid,
				956	index, name, name_len, 0);
				957	if (di && !IS_ERR(di)) {
				958	btrfs_dir_item_key_to_cpu(path->nodes[0], di, &location);
				959	if (location.objectid != objectid)
				960	goto out;
				961	} else
				962	goto out;
David Sterba	b3b4aa7	2011-04-21 01:20:15 +0200	[diff] [blame]	963	btrfs_release_path(path);
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	964
				965	di = btrfs_lookup_dir_item(NULL, root, path, dirid, name, name_len, 0);
				966	if (di && !IS_ERR(di)) {
				967	btrfs_dir_item_key_to_cpu(path->nodes[0], di, &location);
				968	if (location.objectid != objectid)
				969	goto out;
				970	} else
				971	goto out;
				972	match = 1;
				973	out:
David Sterba	b3b4aa7	2011-04-21 01:20:15 +0200	[diff] [blame]	974	btrfs_release_path(path);
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	975	return match;
				976	}
				977
				978	/*
				979	* helper function to check a log tree for a named back reference in
				980	* an inode. This is used to decide if a back reference that is
				981	* found in the subvolume conflicts with what we find in the log.
				982	*
				983	* inode backreferences may have multiple refs in a single item,
				984	* during replay we process one reference at a time, and we don't
				985	* want to delete valid links to a file from the subvolume if that
				986	* link is also in the log.
				987	*/
				988	static noinline int backref_in_log(struct btrfs_root *log,
				989	struct btrfs_key *key,
Mark Fasheh	f186373	2012-08-08 11:32:27 -0700	[diff] [blame]	990	u64 ref_objectid,
Filipe Manana	df8d116	2015-01-14 01:52:25 +0000	[diff] [blame]	991	const char *name, int namelen)
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	992	{
				993	struct btrfs_path *path;
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	994	int ret;
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	995
				996	path = btrfs_alloc_path();
liubo	2a29edc	2011-01-26 06:22:08 +0000	[diff] [blame]	997	if (!path)
				998	return -ENOMEM;
				999
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	1000	ret = btrfs_search_slot(NULL, log, key, path, 0, 0);
Nikolay Borisov	d3316c8	2019-09-25 14:03:03 +0300	[diff] [blame]	1001	if (ret < 0) {
				1002	goto out;
				1003	} else if (ret == 1) {
Nikolay Borisov	89cbf5f6b	2019-08-30 17:44:47 +0300	[diff] [blame]	1004	ret = 0;
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	1005	goto out;
Nikolay Borisov	89cbf5f6b	2019-08-30 17:44:47 +0300	[diff] [blame]	1006	}
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	1007
Nikolay Borisov	89cbf5f6b	2019-08-30 17:44:47 +0300	[diff] [blame]	1008	if (key->type == BTRFS_INODE_EXTREF_KEY)
				1009	ret = !!btrfs_find_name_in_ext_backref(path->nodes[0],
				1010	path->slots[0],
				1011	ref_objectid,
				1012	name, namelen);
				1013	else
				1014	ret = !!btrfs_find_name_in_backref(path->nodes[0],
Filipe Manana	1f250e9	2018-02-28 15:56:10 +0000	[diff] [blame]	1015	path->slots[0],
Nikolay Borisov	89cbf5f6b	2019-08-30 17:44:47 +0300	[diff] [blame]	1016	name, namelen);
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	1017	out:
				1018	btrfs_free_path(path);
Nikolay Borisov	89cbf5f6b	2019-08-30 17:44:47 +0300	[diff] [blame]	1019	return ret;
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	1020	}
				1021
Jan Schmidt	5a1d784	2012-08-17 14:04:41 -0700	[diff] [blame]	1022	static inline int __add_inode_ref(struct btrfs_trans_handle *trans,
				1023	struct btrfs_root *root,
				1024	struct btrfs_path *path,
				1025	struct btrfs_root *log_root,
Nikolay Borisov	94c91a1	2017-01-18 00:31:46 +0200	[diff] [blame]	1026	struct btrfs_inode *dir,
				1027	struct btrfs_inode *inode,
Mark Fasheh	f186373	2012-08-08 11:32:27 -0700	[diff] [blame]	1028	u64 inode_objectid, u64 parent_objectid,
				1029	u64 ref_index, char *name, int namelen,
				1030	int *search_done)
Jan Schmidt	5a1d784	2012-08-17 14:04:41 -0700	[diff] [blame]	1031	{
				1032	int ret;
Mark Fasheh	f186373	2012-08-08 11:32:27 -0700	[diff] [blame]	1033	char *victim_name;
				1034	int victim_name_len;
				1035	struct extent_buffer *leaf;
Jan Schmidt	5a1d784	2012-08-17 14:04:41 -0700	[diff] [blame]	1036	struct btrfs_dir_item *di;
Mark Fasheh	f186373	2012-08-08 11:32:27 -0700	[diff] [blame]	1037	struct btrfs_key search_key;
				1038	struct btrfs_inode_extref *extref;
Jan Schmidt	5a1d784	2012-08-17 14:04:41 -0700	[diff] [blame]	1039
Mark Fasheh	f186373	2012-08-08 11:32:27 -0700	[diff] [blame]	1040	again:
				1041	/* Search old style refs */
				1042	search_key.objectid = inode_objectid;
				1043	search_key.type = BTRFS_INODE_REF_KEY;
				1044	search_key.offset = parent_objectid;
				1045	ret = btrfs_search_slot(NULL, root, &search_key, path, 0, 0);
Jan Schmidt	5a1d784	2012-08-17 14:04:41 -0700	[diff] [blame]	1046	if (ret == 0) {
Jan Schmidt	5a1d784	2012-08-17 14:04:41 -0700	[diff] [blame]	1047	struct btrfs_inode_ref *victim_ref;
				1048	unsigned long ptr;
				1049	unsigned long ptr_end;
Mark Fasheh	f186373	2012-08-08 11:32:27 -0700	[diff] [blame]	1050
				1051	leaf = path->nodes[0];
Jan Schmidt	5a1d784	2012-08-17 14:04:41 -0700	[diff] [blame]	1052
				1053	/* are we trying to overwrite a back ref for the root directory
				1054	* if so, just jump out, we're done
				1055	*/
Mark Fasheh	f186373	2012-08-08 11:32:27 -0700	[diff] [blame]	1056	if (search_key.objectid == search_key.offset)
Jan Schmidt	5a1d784	2012-08-17 14:04:41 -0700	[diff] [blame]	1057	return 1;
				1058
				1059	/* check all the names in this back reference to see
				1060	* if they are in the log. if so, we allow them to stay
				1061	* otherwise they must be unlinked as a conflict
				1062	*/
				1063	ptr = btrfs_item_ptr_offset(leaf, path->slots[0]);
				1064	ptr_end = ptr + btrfs_item_size_nr(leaf, path->slots[0]);
				1065	while (ptr < ptr_end) {
				1066	victim_ref = (struct btrfs_inode_ref *)ptr;
				1067	victim_name_len = btrfs_inode_ref_name_len(leaf,
				1068	victim_ref);
				1069	victim_name = kmalloc(victim_name_len, GFP_NOFS);
Josef Bacik	3650860	2013-04-25 16:23:32 -0400	[diff] [blame]	1070	if (!victim_name)
				1071	return -ENOMEM;
Jan Schmidt	5a1d784	2012-08-17 14:04:41 -0700	[diff] [blame]	1072
				1073	read_extent_buffer(leaf, victim_name,
				1074	(unsigned long)(victim_ref + 1),
				1075	victim_name_len);
				1076
Nikolay Borisov	d3316c8	2019-09-25 14:03:03 +0300	[diff] [blame]	1077	ret = backref_in_log(log_root, &search_key,
				1078	parent_objectid, victim_name,
				1079	victim_name_len);
				1080	if (ret < 0) {
				1081	kfree(victim_name);
				1082	return ret;
				1083	} else if (!ret) {
Nikolay Borisov	94c91a1	2017-01-18 00:31:46 +0200	[diff] [blame]	1084	inc_nlink(&inode->vfs_inode);
Jan Schmidt	5a1d784	2012-08-17 14:04:41 -0700	[diff] [blame]	1085	btrfs_release_path(path);
				1086
Nikolay Borisov	94c91a1	2017-01-18 00:31:46 +0200	[diff] [blame]	1087	ret = btrfs_unlink_inode(trans, root, dir, inode,
Nikolay Borisov	4ec5934	2017-01-18 00:31:44 +0200	[diff] [blame]	1088	victim_name, victim_name_len);
Mark Fasheh	f186373	2012-08-08 11:32:27 -0700	[diff] [blame]	1089	kfree(victim_name);
Josef Bacik	3650860	2013-04-25 16:23:32 -0400	[diff] [blame]	1090	if (ret)
				1091	return ret;
Nikolay Borisov	e5c304e6	2018-02-07 17:55:43 +0200	[diff] [blame]	1092	ret = btrfs_run_delayed_items(trans);
Filipe David Borba Manana	ada9af2	2013-08-05 09:25:47 +0100	[diff] [blame]	1093	if (ret)
				1094	return ret;
Mark Fasheh	f186373	2012-08-08 11:32:27 -0700	[diff] [blame]	1095	*search_done = 1;
				1096	goto again;
Jan Schmidt	5a1d784	2012-08-17 14:04:41 -0700	[diff] [blame]	1097	}
				1098	kfree(victim_name);
Mark Fasheh	f186373	2012-08-08 11:32:27 -0700	[diff] [blame]	1099
Jan Schmidt	5a1d784	2012-08-17 14:04:41 -0700	[diff] [blame]	1100	ptr = (unsigned long)(victim_ref + 1) + victim_name_len;
				1101	}
Jan Schmidt	5a1d784	2012-08-17 14:04:41 -0700	[diff] [blame]	1102
				1103	/*
				1104	* NOTE: we have searched root tree and checked the
Adam Buchbinder	bb7ab3b	2016-03-04 11:23:12 -0800	[diff] [blame]	1105	* corresponding ref, it does not need to check again.
Jan Schmidt	5a1d784	2012-08-17 14:04:41 -0700	[diff] [blame]	1106	*/
				1107	*search_done = 1;
				1108	}
				1109	btrfs_release_path(path);
				1110
Mark Fasheh	f186373	2012-08-08 11:32:27 -0700	[diff] [blame]	1111	/* Same search but for extended refs */
				1112	extref = btrfs_lookup_inode_extref(NULL, root, path, name, namelen,
				1113	inode_objectid, parent_objectid, 0,
				1114	0);
				1115	if (!IS_ERR_OR_NULL(extref)) {
				1116	u32 item_size;
				1117	u32 cur_offset = 0;
				1118	unsigned long base;
				1119	struct inode *victim_parent;
				1120
				1121	leaf = path->nodes[0];
				1122
				1123	item_size = btrfs_item_size_nr(leaf, path->slots[0]);
				1124	base = btrfs_item_ptr_offset(leaf, path->slots[0]);
				1125
				1126	while (cur_offset < item_size) {
Quentin Casasnovas	dd9ef13	2015-03-03 16:31:38 +0100	[diff] [blame]	1127	extref = (struct btrfs_inode_extref *)(base + cur_offset);
Mark Fasheh	f186373	2012-08-08 11:32:27 -0700	[diff] [blame]	1128
				1129	victim_name_len = btrfs_inode_extref_name_len(leaf, extref);
				1130
				1131	if (btrfs_inode_extref_parent(leaf, extref) != parent_objectid)
				1132	goto next;
				1133
				1134	victim_name = kmalloc(victim_name_len, GFP_NOFS);
Josef Bacik	3650860	2013-04-25 16:23:32 -0400	[diff] [blame]	1135	if (!victim_name)
				1136	return -ENOMEM;
Mark Fasheh	f186373	2012-08-08 11:32:27 -0700	[diff] [blame]	1137	read_extent_buffer(leaf, victim_name, (unsigned long)&extref->name,
				1138	victim_name_len);
				1139
				1140	search_key.objectid = inode_objectid;
				1141	search_key.type = BTRFS_INODE_EXTREF_KEY;
				1142	search_key.offset = btrfs_extref_hash(parent_objectid,
				1143	victim_name,
				1144	victim_name_len);
Nikolay Borisov	d3316c8	2019-09-25 14:03:03 +0300	[diff] [blame]	1145	ret = backref_in_log(log_root, &search_key,
				1146	parent_objectid, victim_name,
				1147	victim_name_len);
				1148	if (ret < 0) {
				1149	return ret;
				1150	} else if (!ret) {
Mark Fasheh	f186373	2012-08-08 11:32:27 -0700	[diff] [blame]	1151	ret = -ENOENT;
				1152	victim_parent = read_one_inode(root,
Nikolay Borisov	94c91a1	2017-01-18 00:31:46 +0200	[diff] [blame]	1153	parent_objectid);
Mark Fasheh	f186373	2012-08-08 11:32:27 -0700	[diff] [blame]	1154	if (victim_parent) {
Nikolay Borisov	94c91a1	2017-01-18 00:31:46 +0200	[diff] [blame]	1155	inc_nlink(&inode->vfs_inode);
Mark Fasheh	f186373	2012-08-08 11:32:27 -0700	[diff] [blame]	1156	btrfs_release_path(path);
				1157
				1158	ret = btrfs_unlink_inode(trans, root,
Nikolay Borisov	4ec5934	2017-01-18 00:31:44 +0200	[diff] [blame]	1159	BTRFS_I(victim_parent),
Nikolay Borisov	94c91a1	2017-01-18 00:31:46 +0200	[diff] [blame]	1160	inode,
Nikolay Borisov	4ec5934	2017-01-18 00:31:44 +0200	[diff] [blame]	1161	victim_name,
				1162	victim_name_len);
Filipe David Borba Manana	ada9af2	2013-08-05 09:25:47 +0100	[diff] [blame]	1163	if (!ret)
				1164	ret = btrfs_run_delayed_items(
Nikolay Borisov	e5c304e6	2018-02-07 17:55:43 +0200	[diff] [blame]	1165	trans);
Mark Fasheh	f186373	2012-08-08 11:32:27 -0700	[diff] [blame]	1166	}
Mark Fasheh	f186373	2012-08-08 11:32:27 -0700	[diff] [blame]	1167	iput(victim_parent);
				1168	kfree(victim_name);
Josef Bacik	3650860	2013-04-25 16:23:32 -0400	[diff] [blame]	1169	if (ret)
				1170	return ret;
Mark Fasheh	f186373	2012-08-08 11:32:27 -0700	[diff] [blame]	1171	*search_done = 1;
				1172	goto again;
				1173	}
				1174	kfree(victim_name);
Mark Fasheh	f186373	2012-08-08 11:32:27 -0700	[diff] [blame]	1175	next:
				1176	cur_offset += victim_name_len + sizeof(*extref);
				1177	}
				1178	*search_done = 1;
				1179	}
				1180	btrfs_release_path(path);
				1181
Jan Schmidt	5a1d784	2012-08-17 14:04:41 -0700	[diff] [blame]	1182	/* look for a conflicting sequence number */
Nikolay Borisov	94c91a1	2017-01-18 00:31:46 +0200	[diff] [blame]	1183	di = btrfs_lookup_dir_index_item(trans, root, path, btrfs_ino(dir),
Mark Fasheh	f186373	2012-08-08 11:32:27 -0700	[diff] [blame]	1184	ref_index, name, namelen, 0);
Jan Schmidt	5a1d784	2012-08-17 14:04:41 -0700	[diff] [blame]	1185	if (di && !IS_ERR(di)) {
Nikolay Borisov	94c91a1	2017-01-18 00:31:46 +0200	[diff] [blame]	1186	ret = drop_one_dir_item(trans, root, path, dir, di);
Josef Bacik	3650860	2013-04-25 16:23:32 -0400	[diff] [blame]	1187	if (ret)
				1188	return ret;
Jan Schmidt	5a1d784	2012-08-17 14:04:41 -0700	[diff] [blame]	1189	}
				1190	btrfs_release_path(path);
				1191
Andrea Gelmini	52042d8	2018-11-28 12:05:13 +0100	[diff] [blame]	1192	/* look for a conflicting name */
Nikolay Borisov	94c91a1	2017-01-18 00:31:46 +0200	[diff] [blame]	1193	di = btrfs_lookup_dir_item(trans, root, path, btrfs_ino(dir),
Jan Schmidt	5a1d784	2012-08-17 14:04:41 -0700	[diff] [blame]	1194	name, namelen, 0);
				1195	if (di && !IS_ERR(di)) {
Nikolay Borisov	94c91a1	2017-01-18 00:31:46 +0200	[diff] [blame]	1196	ret = drop_one_dir_item(trans, root, path, dir, di);
Josef Bacik	3650860	2013-04-25 16:23:32 -0400	[diff] [blame]	1197	if (ret)
				1198	return ret;
Jan Schmidt	5a1d784	2012-08-17 14:04:41 -0700	[diff] [blame]	1199	}
				1200	btrfs_release_path(path);
				1201
				1202	return 0;
				1203	}
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	1204
Qu Wenruo	bae15d9	2017-11-08 08:54:26 +0800	[diff] [blame]	1205	static int extref_get_fields(struct extent_buffer *eb, unsigned long ref_ptr,
				1206	u32 namelen, char name, u64 index,
				1207	u64 *parent_objectid)
Mark Fasheh	f186373	2012-08-08 11:32:27 -0700	[diff] [blame]	1208	{
				1209	struct btrfs_inode_extref *extref;
				1210
				1211	extref = (struct btrfs_inode_extref *)ref_ptr;
				1212
				1213	*namelen = btrfs_inode_extref_name_len(eb, extref);
				1214	name = kmalloc(namelen, GFP_NOFS);
				1215	if (*name == NULL)
				1216	return -ENOMEM;
				1217
				1218	read_extent_buffer(eb, *name, (unsigned long)&extref->name,
				1219	*namelen);
				1220
Filipe Manana	1f250e9	2018-02-28 15:56:10 +0000	[diff] [blame]	1221	if (index)
				1222	*index = btrfs_inode_extref_index(eb, extref);
Mark Fasheh	f186373	2012-08-08 11:32:27 -0700	[diff] [blame]	1223	if (parent_objectid)
				1224	*parent_objectid = btrfs_inode_extref_parent(eb, extref);
				1225
				1226	return 0;
				1227	}
				1228
Qu Wenruo	bae15d9	2017-11-08 08:54:26 +0800	[diff] [blame]	1229	static int ref_get_fields(struct extent_buffer *eb, unsigned long ref_ptr,
				1230	u32 namelen, char name, u64 index)
Mark Fasheh	f186373	2012-08-08 11:32:27 -0700	[diff] [blame]	1231	{
				1232	struct btrfs_inode_ref *ref;
				1233
				1234	ref = (struct btrfs_inode_ref *)ref_ptr;
				1235
				1236	*namelen = btrfs_inode_ref_name_len(eb, ref);
				1237	name = kmalloc(namelen, GFP_NOFS);
				1238	if (*name == NULL)
				1239	return -ENOMEM;
				1240
				1241	read_extent_buffer(eb, name, (unsigned long)(ref + 1), namelen);
				1242
Filipe Manana	1f250e9	2018-02-28 15:56:10 +0000	[diff] [blame]	1243	if (index)
				1244	*index = btrfs_inode_ref_index(eb, ref);
Mark Fasheh	f186373	2012-08-08 11:32:27 -0700	[diff] [blame]	1245
				1246	return 0;
				1247	}
				1248
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	1249	/*
Filipe Manana	1f250e9	2018-02-28 15:56:10 +0000	[diff] [blame]	1250	* Take an inode reference item from the log tree and iterate all names from the
				1251	* inode reference item in the subvolume tree with the same key (if it exists).
				1252	* For any name that is not in the inode reference item from the log tree, do a
				1253	* proper unlink of that name (that is, remove its entry from the inode
				1254	* reference item and both dir index keys).
				1255	*/
				1256	static int unlink_old_inode_refs(struct btrfs_trans_handle *trans,
				1257	struct btrfs_root *root,
				1258	struct btrfs_path *path,
				1259	struct btrfs_inode *inode,
				1260	struct extent_buffer *log_eb,
				1261	int log_slot,
				1262	struct btrfs_key *key)
				1263	{
				1264	int ret;
				1265	unsigned long ref_ptr;
				1266	unsigned long ref_end;
				1267	struct extent_buffer *eb;
				1268
				1269	again:
				1270	btrfs_release_path(path);
				1271	ret = btrfs_search_slot(NULL, root, key, path, 0, 0);
				1272	if (ret > 0) {
				1273	ret = 0;
				1274	goto out;
				1275	}
				1276	if (ret < 0)
				1277	goto out;
				1278
				1279	eb = path->nodes[0];
				1280	ref_ptr = btrfs_item_ptr_offset(eb, path->slots[0]);
				1281	ref_end = ref_ptr + btrfs_item_size_nr(eb, path->slots[0]);
				1282	while (ref_ptr < ref_end) {
				1283	char *name = NULL;
				1284	int namelen;
				1285	u64 parent_id;
				1286
				1287	if (key->type == BTRFS_INODE_EXTREF_KEY) {
				1288	ret = extref_get_fields(eb, ref_ptr, &namelen, &name,
				1289	NULL, &parent_id);
				1290	} else {
				1291	parent_id = key->offset;
				1292	ret = ref_get_fields(eb, ref_ptr, &namelen, &name,
				1293	NULL);
				1294	}
				1295	if (ret)
				1296	goto out;
				1297
				1298	if (key->type == BTRFS_INODE_EXTREF_KEY)
Nikolay Borisov	6ff49c6	2019-08-27 14:46:29 +0300	[diff] [blame]	1299	ret = !!btrfs_find_name_in_ext_backref(log_eb, log_slot,
				1300	parent_id, name,
				1301	namelen);
Filipe Manana	1f250e9	2018-02-28 15:56:10 +0000	[diff] [blame]	1302	else
Nikolay Borisov	9bb8407	2019-08-27 14:46:28 +0300	[diff] [blame]	1303	ret = !!btrfs_find_name_in_backref(log_eb, log_slot,
				1304	name, namelen);
Filipe Manana	1f250e9	2018-02-28 15:56:10 +0000	[diff] [blame]	1305
				1306	if (!ret) {
				1307	struct inode *dir;
				1308
				1309	btrfs_release_path(path);
				1310	dir = read_one_inode(root, parent_id);
				1311	if (!dir) {
				1312	ret = -ENOENT;
				1313	kfree(name);
				1314	goto out;
				1315	}
				1316	ret = btrfs_unlink_inode(trans, root, BTRFS_I(dir),
				1317	inode, name, namelen);
				1318	kfree(name);
				1319	iput(dir);
				1320	if (ret)
				1321	goto out;
				1322	goto again;
				1323	}
				1324
				1325	kfree(name);
				1326	ref_ptr += namelen;
				1327	if (key->type == BTRFS_INODE_EXTREF_KEY)
				1328	ref_ptr += sizeof(struct btrfs_inode_extref);
				1329	else
				1330	ref_ptr += sizeof(struct btrfs_inode_ref);
				1331	}
				1332	ret = 0;
				1333	out:
				1334	btrfs_release_path(path);
				1335	return ret;
				1336	}
				1337
Filipe Manana	0d83639	2018-07-20 10:59:06 +0100	[diff] [blame]	1338	static int btrfs_inode_ref_exists(struct inode inode, struct inode dir,
				1339	const u8 ref_type, const char *name,
				1340	const int namelen)
				1341	{
				1342	struct btrfs_key key;
				1343	struct btrfs_path *path;
				1344	const u64 parent_id = btrfs_ino(BTRFS_I(dir));
				1345	int ret;
				1346
				1347	path = btrfs_alloc_path();
				1348	if (!path)
				1349	return -ENOMEM;
				1350
				1351	key.objectid = btrfs_ino(BTRFS_I(inode));
				1352	key.type = ref_type;
				1353	if (key.type == BTRFS_INODE_REF_KEY)
				1354	key.offset = parent_id;
				1355	else
				1356	key.offset = btrfs_extref_hash(parent_id, name, namelen);
				1357
				1358	ret = btrfs_search_slot(NULL, BTRFS_I(inode)->root, &key, path, 0, 0);
				1359	if (ret < 0)
				1360	goto out;
				1361	if (ret > 0) {
				1362	ret = 0;
				1363	goto out;
				1364	}
				1365	if (key.type == BTRFS_INODE_EXTREF_KEY)
Nikolay Borisov	6ff49c6	2019-08-27 14:46:29 +0300	[diff] [blame]	1366	ret = !!btrfs_find_name_in_ext_backref(path->nodes[0],
				1367	path->slots[0], parent_id, name, namelen);
Filipe Manana	0d83639	2018-07-20 10:59:06 +0100	[diff] [blame]	1368	else
Nikolay Borisov	9bb8407	2019-08-27 14:46:28 +0300	[diff] [blame]	1369	ret = !!btrfs_find_name_in_backref(path->nodes[0], path->slots[0],
				1370	name, namelen);
Filipe Manana	0d83639	2018-07-20 10:59:06 +0100	[diff] [blame]	1371
				1372	out:
				1373	btrfs_free_path(path);
				1374	return ret;
				1375	}
				1376
Filipe Manana	6b5fc43	2019-02-13 12:14:03 +0000	[diff] [blame]	1377	static int add_link(struct btrfs_trans_handle trans, struct btrfs_root root,
				1378	struct inode dir, struct inode inode, const char *name,
				1379	int namelen, u64 ref_index)
				1380	{
				1381	struct btrfs_dir_item *dir_item;
				1382	struct btrfs_key key;
				1383	struct btrfs_path *path;
				1384	struct inode *other_inode = NULL;
				1385	int ret;
				1386
				1387	path = btrfs_alloc_path();
				1388	if (!path)
				1389	return -ENOMEM;
				1390
				1391	dir_item = btrfs_lookup_dir_item(NULL, root, path,
				1392	btrfs_ino(BTRFS_I(dir)),
				1393	name, namelen, 0);
				1394	if (!dir_item) {
				1395	btrfs_release_path(path);
				1396	goto add_link;
				1397	} else if (IS_ERR(dir_item)) {
				1398	ret = PTR_ERR(dir_item);
				1399	goto out;
				1400	}
				1401
				1402	/*
				1403	* Our inode's dentry collides with the dentry of another inode which is
				1404	* in the log but not yet processed since it has a higher inode number.
				1405	* So delete that other dentry.
				1406	*/
				1407	btrfs_dir_item_key_to_cpu(path->nodes[0], dir_item, &key);
				1408	btrfs_release_path(path);
				1409	other_inode = read_one_inode(root, key.objectid);
				1410	if (!other_inode) {
				1411	ret = -ENOENT;
				1412	goto out;
				1413	}
				1414	ret = btrfs_unlink_inode(trans, root, BTRFS_I(dir), BTRFS_I(other_inode),
				1415	name, namelen);
				1416	if (ret)
				1417	goto out;
				1418	/*
				1419	* If we dropped the link count to 0, bump it so that later the iput()
				1420	* on the inode will not free it. We will fixup the link count later.
				1421	*/
				1422	if (other_inode->i_nlink == 0)
				1423	inc_nlink(other_inode);
				1424
				1425	ret = btrfs_run_delayed_items(trans);
				1426	if (ret)
				1427	goto out;
				1428	add_link:
				1429	ret = btrfs_add_link(trans, BTRFS_I(dir), BTRFS_I(inode),
				1430	name, namelen, 0, ref_index);
				1431	out:
				1432	iput(other_inode);
				1433	btrfs_free_path(path);
				1434
				1435	return ret;
				1436	}
				1437
Filipe Manana	1f250e9	2018-02-28 15:56:10 +0000	[diff] [blame]	1438	/*
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	1439	* replay one inode back reference item found in the log tree.
				1440	* eb, slot and key refer to the buffer and key found in the log tree.
				1441	* root is the destination we are replaying into, and path is for temp
				1442	* use by this function. (it should be released on return).
				1443	*/
				1444	static noinline int add_inode_ref(struct btrfs_trans_handle *trans,
				1445	struct btrfs_root *root,
				1446	struct btrfs_root *log,
				1447	struct btrfs_path *path,
				1448	struct extent_buffer *eb, int slot,
				1449	struct btrfs_key *key)
				1450	{
Geyslan G. Bem	03b2f08	2013-10-11 15:35:45 -0300	[diff] [blame]	1451	struct inode *dir = NULL;
				1452	struct inode *inode = NULL;
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	1453	unsigned long ref_ptr;
				1454	unsigned long ref_end;
Geyslan G. Bem	03b2f08	2013-10-11 15:35:45 -0300	[diff] [blame]	1455	char *name = NULL;
liubo	34f3e4f	2011-08-06 08:35:23 +0000	[diff] [blame]	1456	int namelen;
				1457	int ret;
liubo	c622ae6	2011-03-26 08:01:12 -0400	[diff] [blame]	1458	int search_done = 0;
Mark Fasheh	f186373	2012-08-08 11:32:27 -0700	[diff] [blame]	1459	int log_ref_ver = 0;
				1460	u64 parent_objectid;
				1461	u64 inode_objectid;
Chris Mason	f46dbe3	2012-10-09 11:17:20 -0400	[diff] [blame]	1462	u64 ref_index = 0;
Mark Fasheh	f186373	2012-08-08 11:32:27 -0700	[diff] [blame]	1463	int ref_struct_size;
				1464
				1465	ref_ptr = btrfs_item_ptr_offset(eb, slot);
				1466	ref_end = ref_ptr + btrfs_item_size_nr(eb, slot);
				1467
				1468	if (key->type == BTRFS_INODE_EXTREF_KEY) {
				1469	struct btrfs_inode_extref *r;
				1470
				1471	ref_struct_size = sizeof(struct btrfs_inode_extref);
				1472	log_ref_ver = 1;
				1473	r = (struct btrfs_inode_extref *)ref_ptr;
				1474	parent_objectid = btrfs_inode_extref_parent(eb, r);
				1475	} else {
				1476	ref_struct_size = sizeof(struct btrfs_inode_ref);
				1477	parent_objectid = key->offset;
				1478	}
				1479	inode_objectid = key->objectid;
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	1480
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	1481	/*
				1482	* it is possible that we didn't log all the parent directories
				1483	* for a given inode. If we don't find the dir, just don't
				1484	* copy the back ref in. The link count fixup code will take
				1485	* care of the rest
				1486	*/
Mark Fasheh	f186373	2012-08-08 11:32:27 -0700	[diff] [blame]	1487	dir = read_one_inode(root, parent_objectid);
Geyslan G. Bem	03b2f08	2013-10-11 15:35:45 -0300	[diff] [blame]	1488	if (!dir) {
				1489	ret = -ENOENT;
				1490	goto out;
				1491	}
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	1492
Mark Fasheh	f186373	2012-08-08 11:32:27 -0700	[diff] [blame]	1493	inode = read_one_inode(root, inode_objectid);
Tsutomu Itoh	c00e949	2011-04-28 09:10:23 +0000	[diff] [blame]	1494	if (!inode) {
Geyslan G. Bem	03b2f08	2013-10-11 15:35:45 -0300	[diff] [blame]	1495	ret = -EIO;
				1496	goto out;
Tsutomu Itoh	c00e949	2011-04-28 09:10:23 +0000	[diff] [blame]	1497	}
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	1498
Jan Schmidt	5a1d784	2012-08-17 14:04:41 -0700	[diff] [blame]	1499	while (ref_ptr < ref_end) {
Mark Fasheh	f186373	2012-08-08 11:32:27 -0700	[diff] [blame]	1500	if (log_ref_ver) {
Qu Wenruo	bae15d9	2017-11-08 08:54:26 +0800	[diff] [blame]	1501	ret = extref_get_fields(eb, ref_ptr, &namelen, &name,
				1502	&ref_index, &parent_objectid);
Mark Fasheh	f186373	2012-08-08 11:32:27 -0700	[diff] [blame]	1503	/*
				1504	* parent object can change from one array
				1505	* item to another.
				1506	*/
				1507	if (!dir)
				1508	dir = read_one_inode(root, parent_objectid);
Geyslan G. Bem	03b2f08	2013-10-11 15:35:45 -0300	[diff] [blame]	1509	if (!dir) {
				1510	ret = -ENOENT;
				1511	goto out;
				1512	}
Mark Fasheh	f186373	2012-08-08 11:32:27 -0700	[diff] [blame]	1513	} else {
Qu Wenruo	bae15d9	2017-11-08 08:54:26 +0800	[diff] [blame]	1514	ret = ref_get_fields(eb, ref_ptr, &namelen, &name,
				1515	&ref_index);
Mark Fasheh	f186373	2012-08-08 11:32:27 -0700	[diff] [blame]	1516	}
				1517	if (ret)
Geyslan G. Bem	03b2f08	2013-10-11 15:35:45 -0300	[diff] [blame]	1518	goto out;
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	1519
Jan Schmidt	5a1d784	2012-08-17 14:04:41 -0700	[diff] [blame]	1520	/* if we already have a perfect match, we're done */
David Sterba	f85b737	2017-01-20 14:54:07 +0100	[diff] [blame]	1521	if (!inode_in_dir(root, path, btrfs_ino(BTRFS_I(dir)),
				1522	btrfs_ino(BTRFS_I(inode)), ref_index,
				1523	name, namelen)) {
Jan Schmidt	5a1d784	2012-08-17 14:04:41 -0700	[diff] [blame]	1524	/*
				1525	* look for a conflicting back reference in the
				1526	* metadata. if we find one we have to unlink that name
				1527	* of the file before we add our new link. Later on, we
				1528	* overwrite any existing back reference, and we don't
				1529	* want to create dangling pointers in the directory.
				1530	*/
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	1531
Jan Schmidt	5a1d784	2012-08-17 14:04:41 -0700	[diff] [blame]	1532	if (!search_done) {
				1533	ret = __add_inode_ref(trans, root, path, log,
Nikolay Borisov	94c91a1	2017-01-18 00:31:46 +0200	[diff] [blame]	1534	BTRFS_I(dir),
David Sterba	d75eefd	2017-02-10 20:20:19 +0100	[diff] [blame]	1535	BTRFS_I(inode),
Mark Fasheh	f186373	2012-08-08 11:32:27 -0700	[diff] [blame]	1536	inode_objectid,
				1537	parent_objectid,
				1538	ref_index, name, namelen,
Jan Schmidt	5a1d784	2012-08-17 14:04:41 -0700	[diff] [blame]	1539	&search_done);
Geyslan G. Bem	03b2f08	2013-10-11 15:35:45 -0300	[diff] [blame]	1540	if (ret) {
				1541	if (ret == 1)
				1542	ret = 0;
Jan Schmidt	5a1d784	2012-08-17 14:04:41 -0700	[diff] [blame]	1543	goto out;
Josef Bacik	3650860	2013-04-25 16:23:32 -0400	[diff] [blame]	1544	}
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	1545	}
Jan Schmidt	5a1d784	2012-08-17 14:04:41 -0700	[diff] [blame]	1546
Filipe Manana	0d83639	2018-07-20 10:59:06 +0100	[diff] [blame]	1547	/*
				1548	* If a reference item already exists for this inode
				1549	* with the same parent and name, but different index,
				1550	* drop it and the corresponding directory index entries
				1551	* from the parent before adding the new reference item
				1552	* and dir index entries, otherwise we would fail with
				1553	* -EEXIST returned from btrfs_add_link() below.
				1554	*/
				1555	ret = btrfs_inode_ref_exists(inode, dir, key->type,
				1556	name, namelen);
				1557	if (ret > 0) {
				1558	ret = btrfs_unlink_inode(trans, root,
				1559	BTRFS_I(dir),
				1560	BTRFS_I(inode),
				1561	name, namelen);
				1562	/*
				1563	* If we dropped the link count to 0, bump it so
				1564	* that later the iput() on the inode will not
				1565	* free it. We will fixup the link count later.
				1566	*/
				1567	if (!ret && inode->i_nlink == 0)
				1568	inc_nlink(inode);
				1569	}
				1570	if (ret < 0)
				1571	goto out;
				1572
Jan Schmidt	5a1d784	2012-08-17 14:04:41 -0700	[diff] [blame]	1573	/* insert our name */
Filipe Manana	6b5fc43	2019-02-13 12:14:03 +0000	[diff] [blame]	1574	ret = add_link(trans, root, dir, inode, name, namelen,
				1575	ref_index);
Josef Bacik	3650860	2013-04-25 16:23:32 -0400	[diff] [blame]	1576	if (ret)
				1577	goto out;
Jan Schmidt	5a1d784	2012-08-17 14:04:41 -0700	[diff] [blame]	1578
Josef Bacik	f96d447	2021-05-19 11:26:25 -0400	[diff] [blame]	1579	ret = btrfs_update_inode(trans, root, BTRFS_I(inode));
				1580	if (ret)
				1581	goto out;
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	1582	}
liubo	c622ae6	2011-03-26 08:01:12 -0400	[diff] [blame]	1583
Mark Fasheh	f186373	2012-08-08 11:32:27 -0700	[diff] [blame]	1584	ref_ptr = (unsigned long)(ref_ptr + ref_struct_size) + namelen;
Jan Schmidt	5a1d784	2012-08-17 14:04:41 -0700	[diff] [blame]	1585	kfree(name);
Geyslan G. Bem	03b2f08	2013-10-11 15:35:45 -0300	[diff] [blame]	1586	name = NULL;
Mark Fasheh	f186373	2012-08-08 11:32:27 -0700	[diff] [blame]	1587	if (log_ref_ver) {
				1588	iput(dir);
				1589	dir = NULL;
				1590	}
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	1591	}
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	1592
Filipe Manana	1f250e9	2018-02-28 15:56:10 +0000	[diff] [blame]	1593	/*
				1594	* Before we overwrite the inode reference item in the subvolume tree
				1595	* with the item from the log tree, we must unlink all names from the
				1596	* parent directory that are in the subvolume's tree inode reference
				1597	* item, otherwise we end up with an inconsistent subvolume tree where
				1598	* dir index entries exist for a name but there is no inode reference
				1599	* item with the same name.
				1600	*/
				1601	ret = unlink_old_inode_refs(trans, root, path, BTRFS_I(inode), eb, slot,
				1602	key);
				1603	if (ret)
				1604	goto out;
				1605
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	1606	/* finally write the back reference in the inode */
				1607	ret = overwrite_item(trans, root, path, eb, slot, key);
Jan Schmidt	5a1d784	2012-08-17 14:04:41 -0700	[diff] [blame]	1608	out:
David Sterba	b3b4aa7	2011-04-21 01:20:15 +0200	[diff] [blame]	1609	btrfs_release_path(path);
Geyslan G. Bem	03b2f08	2013-10-11 15:35:45 -0300	[diff] [blame]	1610	kfree(name);
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	1611	iput(dir);
				1612	iput(inode);
Josef Bacik	3650860	2013-04-25 16:23:32 -0400	[diff] [blame]	1613	return ret;
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	1614	}
				1615
Mark Fasheh	f186373	2012-08-08 11:32:27 -0700	[diff] [blame]	1616	static int count_inode_extrefs(struct btrfs_root *root,
Nikolay Borisov	3628365	2017-01-18 00:31:49 +0200	[diff] [blame]	1617	struct btrfs_inode inode, struct btrfs_path path)
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	1618	{
Mark Fasheh	f186373	2012-08-08 11:32:27 -0700	[diff] [blame]	1619	int ret = 0;
				1620	int name_len;
				1621	unsigned int nlink = 0;
				1622	u32 item_size;
				1623	u32 cur_offset = 0;
Nikolay Borisov	3628365	2017-01-18 00:31:49 +0200	[diff] [blame]	1624	u64 inode_objectid = btrfs_ino(inode);
Mark Fasheh	f186373	2012-08-08 11:32:27 -0700	[diff] [blame]	1625	u64 offset = 0;
				1626	unsigned long ptr;
				1627	struct btrfs_inode_extref *extref;
				1628	struct extent_buffer *leaf;
				1629
				1630	while (1) {
				1631	ret = btrfs_find_one_extref(root, inode_objectid, offset, path,
				1632	&extref, &offset);
				1633	if (ret)
				1634	break;
				1635
				1636	leaf = path->nodes[0];
				1637	item_size = btrfs_item_size_nr(leaf, path->slots[0]);
				1638	ptr = btrfs_item_ptr_offset(leaf, path->slots[0]);
Filipe Manana	2c2c452	2015-01-13 16:40:04 +0000	[diff] [blame]	1639	cur_offset = 0;
Mark Fasheh	f186373	2012-08-08 11:32:27 -0700	[diff] [blame]	1640
				1641	while (cur_offset < item_size) {
				1642	extref = (struct btrfs_inode_extref *) (ptr + cur_offset);
				1643	name_len = btrfs_inode_extref_name_len(leaf, extref);
				1644
				1645	nlink++;
				1646
				1647	cur_offset += name_len + sizeof(*extref);
				1648	}
				1649
				1650	offset++;
				1651	btrfs_release_path(path);
				1652	}
				1653	btrfs_release_path(path);
				1654
Filipe Manana	2c2c452	2015-01-13 16:40:04 +0000	[diff] [blame]	1655	if (ret < 0 && ret != -ENOENT)
Mark Fasheh	f186373	2012-08-08 11:32:27 -0700	[diff] [blame]	1656	return ret;
				1657	return nlink;
				1658	}
				1659
				1660	static int count_inode_refs(struct btrfs_root *root,
Nikolay Borisov	f329e31	2017-01-18 00:31:50 +0200	[diff] [blame]	1661	struct btrfs_inode inode, struct btrfs_path path)
Mark Fasheh	f186373	2012-08-08 11:32:27 -0700	[diff] [blame]	1662	{
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	1663	int ret;
				1664	struct btrfs_key key;
Mark Fasheh	f186373	2012-08-08 11:32:27 -0700	[diff] [blame]	1665	unsigned int nlink = 0;
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	1666	unsigned long ptr;
				1667	unsigned long ptr_end;
				1668	int name_len;
Nikolay Borisov	f329e31	2017-01-18 00:31:50 +0200	[diff] [blame]	1669	u64 ino = btrfs_ino(inode);
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	1670
Li Zefan	33345d01	2011-04-20 10:31:50 +0800	[diff] [blame]	1671	key.objectid = ino;
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	1672	key.type = BTRFS_INODE_REF_KEY;
				1673	key.offset = (u64)-1;
				1674
Chris Mason	d397712	2009-01-05 21:25:51 -0500	[diff] [blame]	1675	while (1) {
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	1676	ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
				1677	if (ret < 0)
				1678	break;
				1679	if (ret > 0) {
				1680	if (path->slots[0] == 0)
				1681	break;
				1682	path->slots[0]--;
				1683	}
Filipe David Borba Manana	e93ae26	2013-10-14 22:49:11 +0100	[diff] [blame]	1684	process_slot:
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	1685	btrfs_item_key_to_cpu(path->nodes[0], &key,
				1686	path->slots[0]);
Li Zefan	33345d01	2011-04-20 10:31:50 +0800	[diff] [blame]	1687	if (key.objectid != ino \|\|
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	1688	key.type != BTRFS_INODE_REF_KEY)
				1689	break;
				1690	ptr = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
				1691	ptr_end = ptr + btrfs_item_size_nr(path->nodes[0],
				1692	path->slots[0]);
Chris Mason	d397712	2009-01-05 21:25:51 -0500	[diff] [blame]	1693	while (ptr < ptr_end) {
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	1694	struct btrfs_inode_ref *ref;
				1695
				1696	ref = (struct btrfs_inode_ref *)ptr;
				1697	name_len = btrfs_inode_ref_name_len(path->nodes[0],
				1698	ref);
				1699	ptr = (unsigned long)(ref + 1) + name_len;
				1700	nlink++;
				1701	}
				1702
				1703	if (key.offset == 0)
				1704	break;
Filipe David Borba Manana	e93ae26	2013-10-14 22:49:11 +0100	[diff] [blame]	1705	if (path->slots[0] > 0) {
				1706	path->slots[0]--;
				1707	goto process_slot;
				1708	}
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	1709	key.offset--;
David Sterba	b3b4aa7	2011-04-21 01:20:15 +0200	[diff] [blame]	1710	btrfs_release_path(path);
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	1711	}
David Sterba	b3b4aa7	2011-04-21 01:20:15 +0200	[diff] [blame]	1712	btrfs_release_path(path);
Mark Fasheh	f186373	2012-08-08 11:32:27 -0700	[diff] [blame]	1713
				1714	return nlink;
				1715	}
				1716
				1717	/*
				1718	* There are a few corners where the link count of the file can't
				1719	* be properly maintained during replay. So, instead of adding
				1720	* lots of complexity to the log code, we just scan the backrefs
				1721	* for any file that has been through replay.
				1722	*
				1723	* The scan will update the link count on the inode to reflect the
				1724	* number of back refs found. If it goes down to zero, the iput
				1725	* will free the inode.
				1726	*/
				1727	static noinline int fixup_inode_link_count(struct btrfs_trans_handle *trans,
				1728	struct btrfs_root *root,
				1729	struct inode *inode)
				1730	{
				1731	struct btrfs_path *path;
				1732	int ret;
				1733	u64 nlink = 0;
Nikolay Borisov	4a0cc7c	2017-01-10 20:35:31 +0200	[diff] [blame]	1734	u64 ino = btrfs_ino(BTRFS_I(inode));
Mark Fasheh	f186373	2012-08-08 11:32:27 -0700	[diff] [blame]	1735
				1736	path = btrfs_alloc_path();
				1737	if (!path)
				1738	return -ENOMEM;
				1739
Nikolay Borisov	f329e31	2017-01-18 00:31:50 +0200	[diff] [blame]	1740	ret = count_inode_refs(root, BTRFS_I(inode), path);
Mark Fasheh	f186373	2012-08-08 11:32:27 -0700	[diff] [blame]	1741	if (ret < 0)
				1742	goto out;
				1743
				1744	nlink = ret;
				1745
Nikolay Borisov	3628365	2017-01-18 00:31:49 +0200	[diff] [blame]	1746	ret = count_inode_extrefs(root, BTRFS_I(inode), path);
Mark Fasheh	f186373	2012-08-08 11:32:27 -0700	[diff] [blame]	1747	if (ret < 0)
				1748	goto out;
				1749
				1750	nlink += ret;
				1751
				1752	ret = 0;
				1753
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	1754	if (nlink != inode->i_nlink) {
Miklos Szeredi	bfe8684	2011-10-28 14:13:29 +0200	[diff] [blame]	1755	set_nlink(inode, nlink);
Josef Bacik	f96d447	2021-05-19 11:26:25 -0400	[diff] [blame]	1756	ret = btrfs_update_inode(trans, root, BTRFS_I(inode));
				1757	if (ret)
				1758	goto out;
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	1759	}
Chris Mason	8d5bf1c	2008-09-11 15:51:21 -0400	[diff] [blame]	1760	BTRFS_I(inode)->index_cnt = (u64)-1;
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	1761
Yan, Zheng	c71bf09	2009-11-12 09:34:40 +0000	[diff] [blame]	1762	if (inode->i_nlink == 0) {
				1763	if (S_ISDIR(inode->i_mode)) {
				1764	ret = replay_dir_deletes(trans, root, NULL, path,
Li Zefan	33345d01	2011-04-20 10:31:50 +0800	[diff] [blame]	1765	ino, 1);
Josef Bacik	3650860	2013-04-25 16:23:32 -0400	[diff] [blame]	1766	if (ret)
				1767	goto out;
Yan, Zheng	c71bf09	2009-11-12 09:34:40 +0000	[diff] [blame]	1768	}
Nikolay Borisov	ecdcf3c	2020-10-22 18:40:46 +0300	[diff] [blame]	1769	ret = btrfs_insert_orphan_item(trans, root, ino);
				1770	if (ret == -EEXIST)
				1771	ret = 0;
Chris Mason	12fcfd2	2009-03-24 10:24:20 -0400	[diff] [blame]	1772	}
Chris Mason	12fcfd2	2009-03-24 10:24:20 -0400	[diff] [blame]	1773
Mark Fasheh	f186373	2012-08-08 11:32:27 -0700	[diff] [blame]	1774	out:
				1775	btrfs_free_path(path);
				1776	return ret;
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	1777	}
				1778
				1779	static noinline int fixup_inode_link_counts(struct btrfs_trans_handle *trans,
				1780	struct btrfs_root *root,
				1781	struct btrfs_path *path)
				1782	{
				1783	int ret;
				1784	struct btrfs_key key;
				1785	struct inode *inode;
				1786
				1787	key.objectid = BTRFS_TREE_LOG_FIXUP_OBJECTID;
				1788	key.type = BTRFS_ORPHAN_ITEM_KEY;
				1789	key.offset = (u64)-1;
Chris Mason	d397712	2009-01-05 21:25:51 -0500	[diff] [blame]	1790	while (1) {
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	1791	ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
				1792	if (ret < 0)
				1793	break;
				1794
				1795	if (ret == 1) {
Josef Bacik	011b28a	2021-05-19 13:13:15 -0400	[diff] [blame]	1796	ret = 0;
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	1797	if (path->slots[0] == 0)
				1798	break;
				1799	path->slots[0]--;
				1800	}
				1801
				1802	btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
				1803	if (key.objectid != BTRFS_TREE_LOG_FIXUP_OBJECTID \|\|
				1804	key.type != BTRFS_ORPHAN_ITEM_KEY)
				1805	break;
				1806
				1807	ret = btrfs_del_item(trans, root, path);
Tsutomu Itoh	65a246c	2011-05-19 04:37:44 +0000	[diff] [blame]	1808	if (ret)
Josef Bacik	011b28a	2021-05-19 13:13:15 -0400	[diff] [blame]	1809	break;
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	1810
David Sterba	b3b4aa7	2011-04-21 01:20:15 +0200	[diff] [blame]	1811	btrfs_release_path(path);
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	1812	inode = read_one_inode(root, key.offset);
Josef Bacik	011b28a	2021-05-19 13:13:15 -0400	[diff] [blame]	1813	if (!inode) {
				1814	ret = -EIO;
				1815	break;
				1816	}
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	1817
				1818	ret = fixup_inode_link_count(trans, root, inode);
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	1819	iput(inode);
Josef Bacik	3650860	2013-04-25 16:23:32 -0400	[diff] [blame]	1820	if (ret)
Josef Bacik	011b28a	2021-05-19 13:13:15 -0400	[diff] [blame]	1821	break;
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	1822
Chris Mason	12fcfd2	2009-03-24 10:24:20 -0400	[diff] [blame]	1823	/*
				1824	* fixup on a directory may create new entries,
				1825	* make sure we always look for the highset possible
				1826	* offset
				1827	*/
				1828	key.offset = (u64)-1;
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	1829	}
David Sterba	b3b4aa7	2011-04-21 01:20:15 +0200	[diff] [blame]	1830	btrfs_release_path(path);
Tsutomu Itoh	65a246c	2011-05-19 04:37:44 +0000	[diff] [blame]	1831	return ret;
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	1832	}
				1833
				1834
				1835	/*
				1836	* record a given inode in the fixup dir so we can check its link
				1837	* count when replay is done. The link count is incremented here
				1838	* so the inode won't go away until we check it
				1839	*/
				1840	static noinline int link_to_fixup_dir(struct btrfs_trans_handle *trans,
				1841	struct btrfs_root *root,
				1842	struct btrfs_path *path,
				1843	u64 objectid)
				1844	{
				1845	struct btrfs_key key;
				1846	int ret = 0;
				1847	struct inode *inode;
				1848
				1849	inode = read_one_inode(root, objectid);
Tsutomu Itoh	c00e949	2011-04-28 09:10:23 +0000	[diff] [blame]	1850	if (!inode)
				1851	return -EIO;
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	1852
				1853	key.objectid = BTRFS_TREE_LOG_FIXUP_OBJECTID;
David Sterba	962a298	2014-06-04 18:41:45 +0200	[diff] [blame]	1854	key.type = BTRFS_ORPHAN_ITEM_KEY;
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	1855	key.offset = objectid;
				1856
				1857	ret = btrfs_insert_empty_item(trans, root, path, &key, 0);
				1858
David Sterba	b3b4aa7	2011-04-21 01:20:15 +0200	[diff] [blame]	1859	btrfs_release_path(path);
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	1860	if (ret == 0) {
Josef Bacik	9bf7a48	2013-03-01 13:35:47 -0500	[diff] [blame]	1861	if (!inode->i_nlink)
				1862	set_nlink(inode, 1);
				1863	else
Zach Brown	8b558c5	2013-10-16 12:10:34 -0700	[diff] [blame]	1864	inc_nlink(inode);
Nikolay Borisov	9a56fcd	2020-11-02 16:48:59 +0200	[diff] [blame]	1865	ret = btrfs_update_inode(trans, root, BTRFS_I(inode));
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	1866	} else if (ret == -EEXIST) {
				1867	ret = 0;
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	1868	}
				1869	iput(inode);
				1870
				1871	return ret;
				1872	}
				1873
				1874	/*
				1875	* when replaying the log for a directory, we only insert names
				1876	* for inodes that actually exist. This means an fsync on a directory
				1877	* does not implicitly fsync all the new files in it
				1878	*/
				1879	static noinline int insert_one_name(struct btrfs_trans_handle *trans,
				1880	struct btrfs_root *root,
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	1881	u64 dirid, u64 index,
Zhaolei	60d53eb	2015-08-17 18:44:46 +0800	[diff] [blame]	1882	char *name, int name_len,
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	1883	struct btrfs_key *location)
				1884	{
				1885	struct inode *inode;
				1886	struct inode *dir;
				1887	int ret;
				1888
				1889	inode = read_one_inode(root, location->objectid);
				1890	if (!inode)
				1891	return -ENOENT;
				1892
				1893	dir = read_one_inode(root, dirid);
				1894	if (!dir) {
				1895	iput(inode);
				1896	return -EIO;
				1897	}
Josef Bacik	d555438	2013-09-11 14:17:00 -0400	[diff] [blame]	1898
Nikolay Borisov	db0a669	2017-02-20 13:51:08 +0200	[diff] [blame]	1899	ret = btrfs_add_link(trans, BTRFS_I(dir), BTRFS_I(inode), name,
				1900	name_len, 1, index);
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	1901
				1902	/* FIXME, put inode into FIXUP list */
				1903
				1904	iput(inode);
				1905	iput(dir);
				1906	return ret;
				1907	}
				1908
				1909	/*
				1910	* take a single entry in a log directory item and replay it into
				1911	* the subvolume.
				1912	*
				1913	* if a conflicting item exists in the subdirectory already,
				1914	* the inode it points to is unlinked and put into the link count
				1915	* fix up tree.
				1916	*
				1917	* If a name from the log points to a file or directory that does
				1918	* not exist in the FS, it is skipped. fsyncs on directories
				1919	* do not force down inodes inside that directory, just changes to the
				1920	* names or unlinks in a directory.
Filipe Manana	bb53eda	2015-07-15 23:26:43 +0100	[diff] [blame]	1921	*
				1922	* Returns < 0 on error, 0 if the name wasn't replayed (dentry points to a
				1923	* non-existing inode) and 1 if the name was replayed.
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	1924	*/
				1925	static noinline int replay_one_name(struct btrfs_trans_handle *trans,
				1926	struct btrfs_root *root,
				1927	struct btrfs_path *path,
				1928	struct extent_buffer *eb,
				1929	struct btrfs_dir_item *di,
				1930	struct btrfs_key *key)
				1931	{
				1932	char *name;
				1933	int name_len;
				1934	struct btrfs_dir_item *dst_di;
				1935	struct btrfs_key found_key;
				1936	struct btrfs_key log_key;
				1937	struct inode *dir;
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	1938	u8 log_type;
Chris Mason	4bef084	2008-09-08 11:18:08 -0400	[diff] [blame]	1939	int exists;
Josef Bacik	3650860	2013-04-25 16:23:32 -0400	[diff] [blame]	1940	int ret = 0;
Josef Bacik	d555438	2013-09-11 14:17:00 -0400	[diff] [blame]	1941	bool update_size = (key->type == BTRFS_DIR_INDEX_KEY);
Filipe Manana	bb53eda	2015-07-15 23:26:43 +0100	[diff] [blame]	1942	bool name_added = false;
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	1943
				1944	dir = read_one_inode(root, key->objectid);
Tsutomu Itoh	c00e949	2011-04-28 09:10:23 +0000	[diff] [blame]	1945	if (!dir)
				1946	return -EIO;
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	1947
				1948	name_len = btrfs_dir_name_len(eb, di);
				1949	name = kmalloc(name_len, GFP_NOFS);
Filipe David Borba Manana	2bac325	2013-08-04 19:58:57 +0100	[diff] [blame]	1950	if (!name) {
				1951	ret = -ENOMEM;
				1952	goto out;
				1953	}
liubo	2a29edc	2011-01-26 06:22:08 +0000	[diff] [blame]	1954
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	1955	log_type = btrfs_dir_type(eb, di);
				1956	read_extent_buffer(eb, name, (unsigned long)(di + 1),
				1957	name_len);
				1958
				1959	btrfs_dir_item_key_to_cpu(eb, di, &log_key);
Chris Mason	4bef084	2008-09-08 11:18:08 -0400	[diff] [blame]	1960	exists = btrfs_lookup_inode(trans, root, path, &log_key, 0);
				1961	if (exists == 0)
				1962	exists = 1;
				1963	else
				1964	exists = 0;
David Sterba	b3b4aa7	2011-04-21 01:20:15 +0200	[diff] [blame]	1965	btrfs_release_path(path);
Chris Mason	4bef084	2008-09-08 11:18:08 -0400	[diff] [blame]	1966
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	1967	if (key->type == BTRFS_DIR_ITEM_KEY) {
				1968	dst_di = btrfs_lookup_dir_item(trans, root, path, key->objectid,
				1969	name, name_len, 1);
Chris Mason	d397712	2009-01-05 21:25:51 -0500	[diff] [blame]	1970	} else if (key->type == BTRFS_DIR_INDEX_KEY) {
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	1971	dst_di = btrfs_lookup_dir_index_item(trans, root, path,
				1972	key->objectid,
				1973	key->offset, name,
				1974	name_len, 1);
				1975	} else {
Josef Bacik	3650860	2013-04-25 16:23:32 -0400	[diff] [blame]	1976	/* Corruption */
				1977	ret = -EINVAL;
				1978	goto out;
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	1979	}
David Sterba	c704005	2011-04-19 18:00:01 +0200	[diff] [blame]	1980	if (IS_ERR_OR_NULL(dst_di)) {
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	1981	/* we need a sequence number to insert, so we only
				1982	* do inserts for the BTRFS_DIR_INDEX_KEY types
				1983	*/
				1984	if (key->type != BTRFS_DIR_INDEX_KEY)
				1985	goto out;
				1986	goto insert;
				1987	}
				1988
				1989	btrfs_dir_item_key_to_cpu(path->nodes[0], dst_di, &found_key);
				1990	/* the existing item matches the logged item */
				1991	if (found_key.objectid == log_key.objectid &&
				1992	found_key.type == log_key.type &&
				1993	found_key.offset == log_key.offset &&
				1994	btrfs_dir_type(path->nodes[0], dst_di) == log_type) {
Filipe Manana	a2cc11d	2014-09-08 22:53:18 +0100	[diff] [blame]	1995	update_size = false;
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	1996	goto out;
				1997	}
				1998
				1999	/*
				2000	* don't drop the conflicting directory entry if the inode
				2001	* for the new entry doesn't exist
				2002	*/
Chris Mason	4bef084	2008-09-08 11:18:08 -0400	[diff] [blame]	2003	if (!exists)
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	2004	goto out;
				2005
Nikolay Borisov	207e7d9	2017-01-18 00:31:45 +0200	[diff] [blame]	2006	ret = drop_one_dir_item(trans, root, path, BTRFS_I(dir), dst_di);
Josef Bacik	3650860	2013-04-25 16:23:32 -0400	[diff] [blame]	2007	if (ret)
				2008	goto out;
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	2009
				2010	if (key->type == BTRFS_DIR_INDEX_KEY)
				2011	goto insert;
				2012	out:
David Sterba	b3b4aa7	2011-04-21 01:20:15 +0200	[diff] [blame]	2013	btrfs_release_path(path);
Josef Bacik	d555438	2013-09-11 14:17:00 -0400	[diff] [blame]	2014	if (!ret && update_size) {
Nikolay Borisov	6ef06d2	2017-02-20 13:50:34 +0200	[diff] [blame]	2015	btrfs_i_size_write(BTRFS_I(dir), dir->i_size + name_len * 2);
Nikolay Borisov	9a56fcd	2020-11-02 16:48:59 +0200	[diff] [blame]	2016	ret = btrfs_update_inode(trans, root, BTRFS_I(dir));
Josef Bacik	d555438	2013-09-11 14:17:00 -0400	[diff] [blame]	2017	}
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	2018	kfree(name);
				2019	iput(dir);
Filipe Manana	bb53eda	2015-07-15 23:26:43 +0100	[diff] [blame]	2020	if (!ret && name_added)
				2021	ret = 1;
Josef Bacik	3650860	2013-04-25 16:23:32 -0400	[diff] [blame]	2022	return ret;
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	2023
				2024	insert:
Nikolay Borisov	725af92	2019-08-30 17:44:49 +0300	[diff] [blame]	2025	/*
				2026	* Check if the inode reference exists in the log for the given name,
				2027	* inode and parent inode
				2028	*/
				2029	found_key.objectid = log_key.objectid;
				2030	found_key.type = BTRFS_INODE_REF_KEY;
				2031	found_key.offset = key->objectid;
				2032	ret = backref_in_log(root->log_root, &found_key, 0, name, name_len);
				2033	if (ret < 0) {
				2034	goto out;
				2035	} else if (ret) {
				2036	/* The dentry will be added later. */
				2037	ret = 0;
				2038	update_size = false;
				2039	goto out;
				2040	}
				2041
				2042	found_key.objectid = log_key.objectid;
				2043	found_key.type = BTRFS_INODE_EXTREF_KEY;
				2044	found_key.offset = key->objectid;
				2045	ret = backref_in_log(root->log_root, &found_key, key->objectid, name,
				2046	name_len);
				2047	if (ret < 0) {
				2048	goto out;
				2049	} else if (ret) {
Filipe Manana	df8d116	2015-01-14 01:52:25 +0000	[diff] [blame]	2050	/* The dentry will be added later. */
				2051	ret = 0;
				2052	update_size = false;
				2053	goto out;
				2054	}
David Sterba	b3b4aa7	2011-04-21 01:20:15 +0200	[diff] [blame]	2055	btrfs_release_path(path);
Zhaolei	60d53eb	2015-08-17 18:44:46 +0800	[diff] [blame]	2056	ret = insert_one_name(trans, root, key->objectid, key->offset,
				2057	name, name_len, &log_key);
Filipe Manana	df8d116	2015-01-14 01:52:25 +0000	[diff] [blame]	2058	if (ret && ret != -ENOENT && ret != -EEXIST)
Josef Bacik	3650860	2013-04-25 16:23:32 -0400	[diff] [blame]	2059	goto out;
Filipe Manana	bb53eda	2015-07-15 23:26:43 +0100	[diff] [blame]	2060	if (!ret)
				2061	name_added = true;
Josef Bacik	d555438	2013-09-11 14:17:00 -0400	[diff] [blame]	2062	update_size = false;
Josef Bacik	3650860	2013-04-25 16:23:32 -0400	[diff] [blame]	2063	ret = 0;
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	2064	goto out;
				2065	}
				2066
				2067	/*
				2068	* find all the names in a directory item and reconcile them into
				2069	* the subvolume. Only BTRFS_DIR_ITEM_KEY types will have more than
				2070	* one name in a directory item, but the same code gets used for
				2071	* both directory index types
				2072	*/
				2073	static noinline int replay_one_dir_item(struct btrfs_trans_handle *trans,
				2074	struct btrfs_root *root,
				2075	struct btrfs_path *path,
				2076	struct extent_buffer *eb, int slot,
				2077	struct btrfs_key *key)
				2078	{
Filipe Manana	bb53eda	2015-07-15 23:26:43 +0100	[diff] [blame]	2079	int ret = 0;
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	2080	u32 item_size = btrfs_item_size_nr(eb, slot);
				2081	struct btrfs_dir_item *di;
				2082	int name_len;
				2083	unsigned long ptr;
				2084	unsigned long ptr_end;
Filipe Manana	bb53eda	2015-07-15 23:26:43 +0100	[diff] [blame]	2085	struct btrfs_path *fixup_path = NULL;
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	2086
				2087	ptr = btrfs_item_ptr_offset(eb, slot);
				2088	ptr_end = ptr + item_size;
Chris Mason	d397712	2009-01-05 21:25:51 -0500	[diff] [blame]	2089	while (ptr < ptr_end) {
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	2090	di = (struct btrfs_dir_item *)ptr;
				2091	name_len = btrfs_dir_name_len(eb, di);
				2092	ret = replay_one_name(trans, root, path, eb, di, key);
Filipe Manana	bb53eda	2015-07-15 23:26:43 +0100	[diff] [blame]	2093	if (ret < 0)
				2094	break;
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	2095	ptr = (unsigned long)(di + 1);
				2096	ptr += name_len;
Filipe Manana	bb53eda	2015-07-15 23:26:43 +0100	[diff] [blame]	2097
				2098	/*
				2099	* If this entry refers to a non-directory (directories can not
				2100	* have a link count > 1) and it was added in the transaction
				2101	* that was not committed, make sure we fixup the link count of
				2102	* the inode it the entry points to. Otherwise something like
				2103	* the following would result in a directory pointing to an
				2104	* inode with a wrong link that does not account for this dir
				2105	* entry:
				2106	*
				2107	* mkdir testdir
				2108	* touch testdir/foo
				2109	* touch testdir/bar
				2110	* sync
				2111	*
				2112	* ln testdir/bar testdir/bar_link
				2113	* ln testdir/foo testdir/foo_link
				2114	* xfs_io -c "fsync" testdir/bar
				2115	*
				2116	* <power failure>
				2117	*
				2118	* mount fs, log replay happens
				2119	*
				2120	* File foo would remain with a link count of 1 when it has two
				2121	* entries pointing to it in the directory testdir. This would
				2122	* make it impossible to ever delete the parent directory has
				2123	* it would result in stale dentries that can never be deleted.
				2124	*/
				2125	if (ret == 1 && btrfs_dir_type(eb, di) != BTRFS_FT_DIR) {
				2126	struct btrfs_key di_key;
				2127
				2128	if (!fixup_path) {
				2129	fixup_path = btrfs_alloc_path();
				2130	if (!fixup_path) {
				2131	ret = -ENOMEM;
				2132	break;
				2133	}
				2134	}
				2135
				2136	btrfs_dir_item_key_to_cpu(eb, di, &di_key);
				2137	ret = link_to_fixup_dir(trans, root, fixup_path,
				2138	di_key.objectid);
				2139	if (ret)
				2140	break;
				2141	}
				2142	ret = 0;
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	2143	}
Filipe Manana	bb53eda	2015-07-15 23:26:43 +0100	[diff] [blame]	2144	btrfs_free_path(fixup_path);
				2145	return ret;
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	2146	}
				2147
				2148	/*
				2149	* directory replay has two parts. There are the standard directory
				2150	* items in the log copied from the subvolume, and range items
				2151	* created in the log while the subvolume was logged.
				2152	*
				2153	* The range items tell us which parts of the key space the log
				2154	* is authoritative for. During replay, if a key in the subvolume
				2155	* directory is in a logged range item, but not actually in the log
				2156	* that means it was deleted from the directory before the fsync
				2157	* and should be removed.
				2158	*/
				2159	static noinline int find_dir_range(struct btrfs_root *root,
				2160	struct btrfs_path *path,
				2161	u64 dirid, int key_type,
				2162	u64 start_ret, u64 end_ret)
				2163	{
				2164	struct btrfs_key key;
				2165	u64 found_end;
				2166	struct btrfs_dir_log_item *item;
				2167	int ret;
				2168	int nritems;
				2169
				2170	if (*start_ret == (u64)-1)
				2171	return 1;
				2172
				2173	key.objectid = dirid;
				2174	key.type = key_type;
				2175	key.offset = *start_ret;
				2176
				2177	ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
				2178	if (ret < 0)
				2179	goto out;
				2180	if (ret > 0) {
				2181	if (path->slots[0] == 0)
				2182	goto out;
				2183	path->slots[0]--;
				2184	}
				2185	if (ret != 0)
				2186	btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
				2187
				2188	if (key.type != key_type \|\| key.objectid != dirid) {
				2189	ret = 1;
				2190	goto next;
				2191	}
				2192	item = btrfs_item_ptr(path->nodes[0], path->slots[0],
				2193	struct btrfs_dir_log_item);
				2194	found_end = btrfs_dir_log_end(path->nodes[0], item);
				2195
				2196	if (start_ret >= key.offset && start_ret <= found_end) {
				2197	ret = 0;
				2198	*start_ret = key.offset;
				2199	*end_ret = found_end;
				2200	goto out;
				2201	}
				2202	ret = 1;
				2203	next:
				2204	/* check the next slot in the tree to see if it is a valid item */
				2205	nritems = btrfs_header_nritems(path->nodes[0]);
Robbie Ko	2a7bf53	2016-10-07 17:30:47 +0800	[diff] [blame]	2206	path->slots[0]++;
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	2207	if (path->slots[0] >= nritems) {
				2208	ret = btrfs_next_leaf(root, path);
				2209	if (ret)
				2210	goto out;
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	2211	}
				2212
				2213	btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
				2214
				2215	if (key.type != key_type \|\| key.objectid != dirid) {
				2216	ret = 1;
				2217	goto out;
				2218	}
				2219	item = btrfs_item_ptr(path->nodes[0], path->slots[0],
				2220	struct btrfs_dir_log_item);
				2221	found_end = btrfs_dir_log_end(path->nodes[0], item);
				2222	*start_ret = key.offset;
				2223	*end_ret = found_end;
				2224	ret = 0;
				2225	out:
David Sterba	b3b4aa7	2011-04-21 01:20:15 +0200	[diff] [blame]	2226	btrfs_release_path(path);
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	2227	return ret;
				2228	}
				2229
				2230	/*
				2231	* this looks for a given directory item in the log. If the directory
				2232	* item is not in the log, the item is removed and the inode it points
				2233	* to is unlinked
				2234	*/
				2235	static noinline int check_item_in_log(struct btrfs_trans_handle *trans,
				2236	struct btrfs_root *root,
				2237	struct btrfs_root *log,
				2238	struct btrfs_path *path,
				2239	struct btrfs_path *log_path,
				2240	struct inode *dir,
				2241	struct btrfs_key *dir_key)
				2242	{
				2243	int ret;
				2244	struct extent_buffer *eb;
				2245	int slot;
				2246	u32 item_size;
				2247	struct btrfs_dir_item *di;
				2248	struct btrfs_dir_item *log_di;
				2249	int name_len;
				2250	unsigned long ptr;
				2251	unsigned long ptr_end;
				2252	char *name;
				2253	struct inode *inode;
				2254	struct btrfs_key location;
				2255
				2256	again:
				2257	eb = path->nodes[0];
				2258	slot = path->slots[0];
				2259	item_size = btrfs_item_size_nr(eb, slot);
				2260	ptr = btrfs_item_ptr_offset(eb, slot);
				2261	ptr_end = ptr + item_size;
Chris Mason	d397712	2009-01-05 21:25:51 -0500	[diff] [blame]	2262	while (ptr < ptr_end) {
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	2263	di = (struct btrfs_dir_item *)ptr;
				2264	name_len = btrfs_dir_name_len(eb, di);
				2265	name = kmalloc(name_len, GFP_NOFS);
				2266	if (!name) {
				2267	ret = -ENOMEM;
				2268	goto out;
				2269	}
				2270	read_extent_buffer(eb, name, (unsigned long)(di + 1),
				2271	name_len);
				2272	log_di = NULL;
Chris Mason	12fcfd2	2009-03-24 10:24:20 -0400	[diff] [blame]	2273	if (log && dir_key->type == BTRFS_DIR_ITEM_KEY) {
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	2274	log_di = btrfs_lookup_dir_item(trans, log, log_path,
				2275	dir_key->objectid,
				2276	name, name_len, 0);
Chris Mason	12fcfd2	2009-03-24 10:24:20 -0400	[diff] [blame]	2277	} else if (log && dir_key->type == BTRFS_DIR_INDEX_KEY) {
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	2278	log_di = btrfs_lookup_dir_index_item(trans, log,
				2279	log_path,
				2280	dir_key->objectid,
				2281	dir_key->offset,
				2282	name, name_len, 0);
				2283	}
Al Viro	8d9e220	2018-07-29 23:04:46 +0100	[diff] [blame]	2284	if (!log_di \|\| log_di == ERR_PTR(-ENOENT)) {
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	2285	btrfs_dir_item_key_to_cpu(eb, di, &location);
David Sterba	b3b4aa7	2011-04-21 01:20:15 +0200	[diff] [blame]	2286	btrfs_release_path(path);
				2287	btrfs_release_path(log_path);
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	2288	inode = read_one_inode(root, location.objectid);
Tsutomu Itoh	c00e949	2011-04-28 09:10:23 +0000	[diff] [blame]	2289	if (!inode) {
				2290	kfree(name);
				2291	return -EIO;
				2292	}
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	2293
				2294	ret = link_to_fixup_dir(trans, root,
				2295	path, location.objectid);
Josef Bacik	3650860	2013-04-25 16:23:32 -0400	[diff] [blame]	2296	if (ret) {
				2297	kfree(name);
				2298	iput(inode);
				2299	goto out;
				2300	}
				2301
Zach Brown	8b558c5	2013-10-16 12:10:34 -0700	[diff] [blame]	2302	inc_nlink(inode);
Nikolay Borisov	4ec5934	2017-01-18 00:31:44 +0200	[diff] [blame]	2303	ret = btrfs_unlink_inode(trans, root, BTRFS_I(dir),
				2304	BTRFS_I(inode), name, name_len);
Josef Bacik	3650860	2013-04-25 16:23:32 -0400	[diff] [blame]	2305	if (!ret)
Nikolay Borisov	e5c304e6	2018-02-07 17:55:43 +0200	[diff] [blame]	2306	ret = btrfs_run_delayed_items(trans);
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	2307	kfree(name);
				2308	iput(inode);
Josef Bacik	3650860	2013-04-25 16:23:32 -0400	[diff] [blame]	2309	if (ret)
				2310	goto out;
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	2311
				2312	/* there might still be more names under this key
				2313	* check and repeat if required
				2314	*/
				2315	ret = btrfs_search_slot(NULL, root, dir_key, path,
				2316	0, 0);
				2317	if (ret == 0)
				2318	goto again;
				2319	ret = 0;
				2320	goto out;
Filipe David Borba Manana	269d040	2013-10-28 17:39:21 +0000	[diff] [blame]	2321	} else if (IS_ERR(log_di)) {
				2322	kfree(name);
				2323	return PTR_ERR(log_di);
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	2324	}
David Sterba	b3b4aa7	2011-04-21 01:20:15 +0200	[diff] [blame]	2325	btrfs_release_path(log_path);
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	2326	kfree(name);
				2327
				2328	ptr = (unsigned long)(di + 1);
				2329	ptr += name_len;
				2330	}
				2331	ret = 0;
				2332	out:
David Sterba	b3b4aa7	2011-04-21 01:20:15 +0200	[diff] [blame]	2333	btrfs_release_path(path);
				2334	btrfs_release_path(log_path);
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	2335	return ret;
				2336	}
				2337
Filipe Manana	4f764e5	2015-02-23 19:53:35 +0000	[diff] [blame]	2338	static int replay_xattr_deletes(struct btrfs_trans_handle *trans,
				2339	struct btrfs_root *root,
				2340	struct btrfs_root *log,
				2341	struct btrfs_path *path,
				2342	const u64 ino)
				2343	{
				2344	struct btrfs_key search_key;
				2345	struct btrfs_path *log_path;
				2346	int i;
				2347	int nritems;
				2348	int ret;
				2349
				2350	log_path = btrfs_alloc_path();
				2351	if (!log_path)
				2352	return -ENOMEM;
				2353
				2354	search_key.objectid = ino;
				2355	search_key.type = BTRFS_XATTR_ITEM_KEY;
				2356	search_key.offset = 0;
				2357	again:
				2358	ret = btrfs_search_slot(NULL, root, &search_key, path, 0, 0);
				2359	if (ret < 0)
				2360	goto out;
				2361	process_leaf:
				2362	nritems = btrfs_header_nritems(path->nodes[0]);
				2363	for (i = path->slots[0]; i < nritems; i++) {
				2364	struct btrfs_key key;
				2365	struct btrfs_dir_item *di;
				2366	struct btrfs_dir_item *log_di;
				2367	u32 total_size;
				2368	u32 cur;
				2369
				2370	btrfs_item_key_to_cpu(path->nodes[0], &key, i);
				2371	if (key.objectid != ino \|\| key.type != BTRFS_XATTR_ITEM_KEY) {
				2372	ret = 0;
				2373	goto out;
				2374	}
				2375
				2376	di = btrfs_item_ptr(path->nodes[0], i, struct btrfs_dir_item);
				2377	total_size = btrfs_item_size_nr(path->nodes[0], i);
				2378	cur = 0;
				2379	while (cur < total_size) {
				2380	u16 name_len = btrfs_dir_name_len(path->nodes[0], di);
				2381	u16 data_len = btrfs_dir_data_len(path->nodes[0], di);
				2382	u32 this_len = sizeof(*di) + name_len + data_len;
				2383	char *name;
				2384
				2385	name = kmalloc(name_len, GFP_NOFS);
				2386	if (!name) {
				2387	ret = -ENOMEM;
				2388	goto out;
				2389	}
				2390	read_extent_buffer(path->nodes[0], name,
				2391	(unsigned long)(di + 1), name_len);
				2392
				2393	log_di = btrfs_lookup_xattr(NULL, log, log_path, ino,
				2394	name, name_len, 0);
				2395	btrfs_release_path(log_path);
				2396	if (!log_di) {
				2397	/* Doesn't exist in log tree, so delete it. */
				2398	btrfs_release_path(path);
				2399	di = btrfs_lookup_xattr(trans, root, path, ino,
				2400	name, name_len, -1);
				2401	kfree(name);
				2402	if (IS_ERR(di)) {
				2403	ret = PTR_ERR(di);
				2404	goto out;
				2405	}
				2406	ASSERT(di);
				2407	ret = btrfs_delete_one_dir_name(trans, root,
				2408	path, di);
				2409	if (ret)
				2410	goto out;
				2411	btrfs_release_path(path);
				2412	search_key = key;
				2413	goto again;
				2414	}
				2415	kfree(name);
				2416	if (IS_ERR(log_di)) {
				2417	ret = PTR_ERR(log_di);
				2418	goto out;
				2419	}
				2420	cur += this_len;
				2421	di = (struct btrfs_dir_item )((char )di + this_len);
				2422	}
				2423	}
				2424	ret = btrfs_next_leaf(root, path);
				2425	if (ret > 0)
				2426	ret = 0;
				2427	else if (ret == 0)
				2428	goto process_leaf;
				2429	out:
				2430	btrfs_free_path(log_path);
				2431	btrfs_release_path(path);
				2432	return ret;
				2433	}
				2434
				2435
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	2436	/*
				2437	* deletion replay happens before we copy any new directory items
				2438	* out of the log or out of backreferences from inodes. It
				2439	* scans the log to find ranges of keys that log is authoritative for,
				2440	* and then scans the directory to find items in those ranges that are
				2441	* not present in the log.
				2442	*
				2443	* Anything we don't find in the log is unlinked and removed from the
				2444	* directory.
				2445	*/
				2446	static noinline int replay_dir_deletes(struct btrfs_trans_handle *trans,
				2447	struct btrfs_root *root,
				2448	struct btrfs_root *log,
				2449	struct btrfs_path *path,
Chris Mason	12fcfd2	2009-03-24 10:24:20 -0400	[diff] [blame]	2450	u64 dirid, int del_all)
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	2451	{
				2452	u64 range_start;
				2453	u64 range_end;
				2454	int key_type = BTRFS_DIR_LOG_ITEM_KEY;
				2455	int ret = 0;
				2456	struct btrfs_key dir_key;
				2457	struct btrfs_key found_key;
				2458	struct btrfs_path *log_path;
				2459	struct inode *dir;
				2460
				2461	dir_key.objectid = dirid;
				2462	dir_key.type = BTRFS_DIR_ITEM_KEY;
				2463	log_path = btrfs_alloc_path();
				2464	if (!log_path)
				2465	return -ENOMEM;
				2466
				2467	dir = read_one_inode(root, dirid);
				2468	/* it isn't an error if the inode isn't there, that can happen
				2469	* because we replay the deletes before we copy in the inode item
				2470	* from the log
				2471	*/
				2472	if (!dir) {
				2473	btrfs_free_path(log_path);
				2474	return 0;
				2475	}
				2476	again:
				2477	range_start = 0;
				2478	range_end = 0;
Chris Mason	d397712	2009-01-05 21:25:51 -0500	[diff] [blame]	2479	while (1) {
Chris Mason	12fcfd2	2009-03-24 10:24:20 -0400	[diff] [blame]	2480	if (del_all)
				2481	range_end = (u64)-1;
				2482	else {
				2483	ret = find_dir_range(log, path, dirid, key_type,
				2484	&range_start, &range_end);
				2485	if (ret != 0)
				2486	break;
				2487	}
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	2488
				2489	dir_key.offset = range_start;
Chris Mason	d397712	2009-01-05 21:25:51 -0500	[diff] [blame]	2490	while (1) {
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	2491	int nritems;
				2492	ret = btrfs_search_slot(NULL, root, &dir_key, path,
				2493	0, 0);
				2494	if (ret < 0)
				2495	goto out;
				2496
				2497	nritems = btrfs_header_nritems(path->nodes[0]);
				2498	if (path->slots[0] >= nritems) {
				2499	ret = btrfs_next_leaf(root, path);
Liu Bo	b98def7	2018-04-03 01:59:48 +0800	[diff] [blame]	2500	if (ret == 1)
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	2501	break;
Liu Bo	b98def7	2018-04-03 01:59:48 +0800	[diff] [blame]	2502	else if (ret < 0)
				2503	goto out;
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	2504	}
				2505	btrfs_item_key_to_cpu(path->nodes[0], &found_key,
				2506	path->slots[0]);
				2507	if (found_key.objectid != dirid \|\|
				2508	found_key.type != dir_key.type)
				2509	goto next_type;
				2510
				2511	if (found_key.offset > range_end)
				2512	break;
				2513
				2514	ret = check_item_in_log(trans, root, log, path,
Chris Mason	12fcfd2	2009-03-24 10:24:20 -0400	[diff] [blame]	2515	log_path, dir,
				2516	&found_key);
Josef Bacik	3650860	2013-04-25 16:23:32 -0400	[diff] [blame]	2517	if (ret)
				2518	goto out;
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	2519	if (found_key.offset == (u64)-1)
				2520	break;
				2521	dir_key.offset = found_key.offset + 1;
				2522	}
David Sterba	b3b4aa7	2011-04-21 01:20:15 +0200	[diff] [blame]	2523	btrfs_release_path(path);
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	2524	if (range_end == (u64)-1)
				2525	break;
				2526	range_start = range_end + 1;
				2527	}
				2528
				2529	next_type:
				2530	ret = 0;
				2531	if (key_type == BTRFS_DIR_LOG_ITEM_KEY) {
				2532	key_type = BTRFS_DIR_LOG_INDEX_KEY;
				2533	dir_key.type = BTRFS_DIR_INDEX_KEY;
David Sterba	b3b4aa7	2011-04-21 01:20:15 +0200	[diff] [blame]	2534	btrfs_release_path(path);
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	2535	goto again;
				2536	}
				2537	out:
David Sterba	b3b4aa7	2011-04-21 01:20:15 +0200	[diff] [blame]	2538	btrfs_release_path(path);
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	2539	btrfs_free_path(log_path);
				2540	iput(dir);
				2541	return ret;
				2542	}
				2543
				2544	/*
				2545	* the process_func used to replay items from the log tree. This
				2546	* gets called in two different stages. The first stage just looks
				2547	* for inodes and makes sure they are all copied into the subvolume.
				2548	*
				2549	* The second stage copies all the other item types from the log into
				2550	* the subvolume. The two stage approach is slower, but gets rid of
				2551	* lots of complexity around inodes referencing other inodes that exist
				2552	* only in the log (references come from either directory items or inode
				2553	* back refs).
				2554	*/
				2555	static int replay_one_buffer(struct btrfs_root log, struct extent_buffer eb,
Qu Wenruo	581c176	2018-03-29 09:08:11 +0800	[diff] [blame]	2556	struct walk_control *wc, u64 gen, int level)
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	2557	{
				2558	int nritems;
				2559	struct btrfs_path *path;
				2560	struct btrfs_root *root = wc->replay_dest;
				2561	struct btrfs_key key;
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	2562	int i;
				2563	int ret;
				2564
Qu Wenruo	581c176	2018-03-29 09:08:11 +0800	[diff] [blame]	2565	ret = btrfs_read_buffer(eb, gen, level, NULL);
Tsutomu Itoh	018642a	2012-05-29 18:10:13 +0900	[diff] [blame]	2566	if (ret)
				2567	return ret;
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	2568
				2569	level = btrfs_header_level(eb);
				2570
				2571	if (level != 0)
				2572	return 0;
				2573
				2574	path = btrfs_alloc_path();
Mark Fasheh	1e5063d	2011-07-12 10:46:06 -0700	[diff] [blame]	2575	if (!path)
				2576	return -ENOMEM;
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	2577
				2578	nritems = btrfs_header_nritems(eb);
				2579	for (i = 0; i < nritems; i++) {
				2580	btrfs_item_key_to_cpu(eb, &key, i);
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	2581
				2582	/* inode keys are done during the first stage */
				2583	if (key.type == BTRFS_INODE_ITEM_KEY &&
				2584	wc->stage == LOG_WALK_REPLAY_INODES) {
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	2585	struct btrfs_inode_item *inode_item;
				2586	u32 mode;
				2587
				2588	inode_item = btrfs_item_ptr(eb, i,
				2589	struct btrfs_inode_item);
Filipe Manana	f2d72f4	2018-10-08 11:12:55 +0100	[diff] [blame]	2590	/*
				2591	* If we have a tmpfile (O_TMPFILE) that got fsync'ed
				2592	* and never got linked before the fsync, skip it, as
				2593	* replaying it is pointless since it would be deleted
				2594	* later. We skip logging tmpfiles, but it's always
				2595	* possible we are replaying a log created with a kernel
				2596	* that used to log tmpfiles.
				2597	*/
				2598	if (btrfs_inode_nlink(eb, inode_item) == 0) {
				2599	wc->ignore_cur_inode = true;
				2600	continue;
				2601	} else {
				2602	wc->ignore_cur_inode = false;
				2603	}
Filipe Manana	4f764e5	2015-02-23 19:53:35 +0000	[diff] [blame]	2604	ret = replay_xattr_deletes(wc->trans, root, log,
				2605	path, key.objectid);
				2606	if (ret)
				2607	break;
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	2608	mode = btrfs_inode_mode(eb, inode_item);
				2609	if (S_ISDIR(mode)) {
				2610	ret = replay_dir_deletes(wc->trans,
Chris Mason	12fcfd2	2009-03-24 10:24:20 -0400	[diff] [blame]	2611	root, log, path, key.objectid, 0);
Josef Bacik	b50c6e2	2013-04-25 15:55:30 -0400	[diff] [blame]	2612	if (ret)
				2613	break;
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	2614	}
				2615	ret = overwrite_item(wc->trans, root, path,
				2616	eb, i, &key);
Josef Bacik	b50c6e2	2013-04-25 15:55:30 -0400	[diff] [blame]	2617	if (ret)
				2618	break;
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	2619
Filipe Manana	471d557	2018-04-05 22:55:12 +0100	[diff] [blame]	2620	/*
				2621	* Before replaying extents, truncate the inode to its
				2622	* size. We need to do it now and not after log replay
				2623	* because before an fsync we can have prealloc extents
				2624	* added beyond the inode's i_size. If we did it after,
				2625	* through orphan cleanup for example, we would drop
				2626	* those prealloc extents just after replaying them.
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	2627	*/
				2628	if (S_ISREG(mode)) {
Filipe Manana	5893dfb	2020-11-04 11:07:32 +0000	[diff] [blame]	2629	struct btrfs_drop_extents_args drop_args = { 0 };
Filipe Manana	471d557	2018-04-05 22:55:12 +0100	[diff] [blame]	2630	struct inode *inode;
				2631	u64 from;
				2632
				2633	inode = read_one_inode(root, key.objectid);
				2634	if (!inode) {
				2635	ret = -EIO;
				2636	break;
				2637	}
				2638	from = ALIGN(i_size_read(inode),
				2639	root->fs_info->sectorsize);
Filipe Manana	5893dfb	2020-11-04 11:07:32 +0000	[diff] [blame]	2640	drop_args.start = from;
				2641	drop_args.end = (u64)-1;
				2642	drop_args.drop_cache = true;
				2643	ret = btrfs_drop_extents(wc->trans, root,
				2644	BTRFS_I(inode),
				2645	&drop_args);
Filipe Manana	471d557	2018-04-05 22:55:12 +0100	[diff] [blame]	2646	if (!ret) {
Filipe Manana	2766ff6	2020-11-04 11:07:34 +0000	[diff] [blame]	2647	inode_sub_bytes(inode,
				2648	drop_args.bytes_found);
Filipe Manana	f2d72f4	2018-10-08 11:12:55 +0100	[diff] [blame]	2649	/* Update the inode's nbytes. */
Filipe Manana	471d557	2018-04-05 22:55:12 +0100	[diff] [blame]	2650	ret = btrfs_update_inode(wc->trans,
Nikolay Borisov	9a56fcd	2020-11-02 16:48:59 +0200	[diff] [blame]	2651	root, BTRFS_I(inode));
Filipe Manana	471d557	2018-04-05 22:55:12 +0100	[diff] [blame]	2652	}
				2653	iput(inode);
Josef Bacik	b50c6e2	2013-04-25 15:55:30 -0400	[diff] [blame]	2654	if (ret)
				2655	break;
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	2656	}
Yan, Zheng	c71bf09	2009-11-12 09:34:40 +0000	[diff] [blame]	2657
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	2658	ret = link_to_fixup_dir(wc->trans, root,
				2659	path, key.objectid);
Josef Bacik	b50c6e2	2013-04-25 15:55:30 -0400	[diff] [blame]	2660	if (ret)
				2661	break;
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	2662	}
Josef Bacik	dd8e721	2013-09-11 11:57:23 -0400	[diff] [blame]	2663
Filipe Manana	f2d72f4	2018-10-08 11:12:55 +0100	[diff] [blame]	2664	if (wc->ignore_cur_inode)
				2665	continue;
				2666
Josef Bacik	dd8e721	2013-09-11 11:57:23 -0400	[diff] [blame]	2667	if (key.type == BTRFS_DIR_INDEX_KEY &&
				2668	wc->stage == LOG_WALK_REPLAY_DIR_INDEX) {
				2669	ret = replay_one_dir_item(wc->trans, root, path,
				2670	eb, i, &key);
				2671	if (ret)
				2672	break;
				2673	}
				2674
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	2675	if (wc->stage < LOG_WALK_REPLAY_ALL)
				2676	continue;
				2677
				2678	/* these keys are simply copied */
				2679	if (key.type == BTRFS_XATTR_ITEM_KEY) {
				2680	ret = overwrite_item(wc->trans, root, path,
				2681	eb, i, &key);
Josef Bacik	b50c6e2	2013-04-25 15:55:30 -0400	[diff] [blame]	2682	if (ret)
				2683	break;
Liu Bo	2da1c66	2013-05-26 13:50:29 +0000	[diff] [blame]	2684	} else if (key.type == BTRFS_INODE_REF_KEY \|\|
				2685	key.type == BTRFS_INODE_EXTREF_KEY) {
Mark Fasheh	f186373	2012-08-08 11:32:27 -0700	[diff] [blame]	2686	ret = add_inode_ref(wc->trans, root, log, path,
				2687	eb, i, &key);
Josef Bacik	b50c6e2	2013-04-25 15:55:30 -0400	[diff] [blame]	2688	if (ret && ret != -ENOENT)
				2689	break;
				2690	ret = 0;
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	2691	} else if (key.type == BTRFS_EXTENT_DATA_KEY) {
				2692	ret = replay_one_extent(wc->trans, root, path,
				2693	eb, i, &key);
Josef Bacik	b50c6e2	2013-04-25 15:55:30 -0400	[diff] [blame]	2694	if (ret)
				2695	break;
Josef Bacik	dd8e721	2013-09-11 11:57:23 -0400	[diff] [blame]	2696	} else if (key.type == BTRFS_DIR_ITEM_KEY) {
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	2697	ret = replay_one_dir_item(wc->trans, root, path,
				2698	eb, i, &key);
Josef Bacik	b50c6e2	2013-04-25 15:55:30 -0400	[diff] [blame]	2699	if (ret)
				2700	break;
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	2701	}
				2702	}
				2703	btrfs_free_path(path);
Josef Bacik	b50c6e2	2013-04-25 15:55:30 -0400	[diff] [blame]	2704	return ret;
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	2705	}
				2706
Nikolay Borisov	6787bb9	2020-01-20 16:09:10 +0200	[diff] [blame]	2707	/*
				2708	* Correctly adjust the reserved bytes occupied by a log tree extent buffer
				2709	*/
				2710	static void unaccount_log_buffer(struct btrfs_fs_info *fs_info, u64 start)
				2711	{
				2712	struct btrfs_block_group *cache;
				2713
				2714	cache = btrfs_lookup_block_group(fs_info, start);
				2715	if (!cache) {
				2716	btrfs_err(fs_info, "unable to find block group for %llu", start);
				2717	return;
				2718	}
				2719
				2720	spin_lock(&cache->space_info->lock);
				2721	spin_lock(&cache->lock);
				2722	cache->reserved -= fs_info->nodesize;
				2723	cache->space_info->bytes_reserved -= fs_info->nodesize;
				2724	spin_unlock(&cache->lock);
				2725	spin_unlock(&cache->space_info->lock);
				2726
				2727	btrfs_put_block_group(cache);
				2728	}
				2729
Chris Mason	d397712	2009-01-05 21:25:51 -0500	[diff] [blame]	2730	static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans,
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	2731	struct btrfs_root *root,
				2732	struct btrfs_path path, int level,
				2733	struct walk_control *wc)
				2734	{
Jeff Mahoney	0b246af	2016-06-22 18:54:23 -0400	[diff] [blame]	2735	struct btrfs_fs_info *fs_info = root->fs_info;
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	2736	u64 bytenr;
				2737	u64 ptr_gen;
				2738	struct extent_buffer *next;
				2739	struct extent_buffer *cur;
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	2740	u32 blocksize;
				2741	int ret = 0;
				2742
Chris Mason	d397712	2009-01-05 21:25:51 -0500	[diff] [blame]	2743	while (*level > 0) {
Qu Wenruo	581c176	2018-03-29 09:08:11 +0800	[diff] [blame]	2744	struct btrfs_key first_key;
				2745
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	2746	cur = path->nodes[*level];
				2747
Dulshani Gunawardhana	fae7f21	2013-10-31 10:30:08 +0530	[diff] [blame]	2748	WARN_ON(btrfs_header_level(cur) != *level);
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	2749
				2750	if (path->slots[*level] >=
				2751	btrfs_header_nritems(cur))
				2752	break;
				2753
				2754	bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
				2755	ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
Qu Wenruo	581c176	2018-03-29 09:08:11 +0800	[diff] [blame]	2756	btrfs_node_key_to_cpu(cur, &first_key, path->slots[*level]);
Jeff Mahoney	0b246af	2016-06-22 18:54:23 -0400	[diff] [blame]	2757	blocksize = fs_info->nodesize;
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	2758
Josef Bacik	3fbaf25	2020-11-05 10:45:20 -0500	[diff] [blame]	2759	next = btrfs_find_create_tree_block(fs_info, bytenr,
				2760	btrfs_header_owner(cur),
				2761	*level - 1);
Liu Bo	c871b0f	2016-06-06 12:01:23 -0700	[diff] [blame]	2762	if (IS_ERR(next))
				2763	return PTR_ERR(next);
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	2764
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	2765	if (*level == 1) {
Qu Wenruo	581c176	2018-03-29 09:08:11 +0800	[diff] [blame]	2766	ret = wc->process_func(root, next, wc, ptr_gen,
				2767	*level - 1);
Josef Bacik	b50c6e2	2013-04-25 15:55:30 -0400	[diff] [blame]	2768	if (ret) {
				2769	free_extent_buffer(next);
Mark Fasheh	1e5063d	2011-07-12 10:46:06 -0700	[diff] [blame]	2770	return ret;
Josef Bacik	b50c6e2	2013-04-25 15:55:30 -0400	[diff] [blame]	2771	}
Yan, Zheng	4a500fd	2010-05-16 10:49:59 -0400	[diff] [blame]	2772
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	2773	path->slots[*level]++;
				2774	if (wc->free) {
Qu Wenruo	581c176	2018-03-29 09:08:11 +0800	[diff] [blame]	2775	ret = btrfs_read_buffer(next, ptr_gen,
				2776	*level - 1, &first_key);
Tsutomu Itoh	018642a	2012-05-29 18:10:13 +0900	[diff] [blame]	2777	if (ret) {
				2778	free_extent_buffer(next);
				2779	return ret;
				2780	}
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	2781
Josef Bacik	681ae50	2013-10-07 15:11:00 -0400	[diff] [blame]	2782	if (trans) {
				2783	btrfs_tree_lock(next);
David Sterba	6a884d7d	2019-03-20 14:30:02 +0100	[diff] [blame]	2784	btrfs_clean_tree_block(next);
Josef Bacik	681ae50	2013-10-07 15:11:00 -0400	[diff] [blame]	2785	btrfs_wait_tree_block_writeback(next);
				2786	btrfs_tree_unlock(next);
Nikolay Borisov	7bfc100	2020-01-20 16:09:12 +0200	[diff] [blame]	2787	ret = btrfs_pin_reserved_extent(trans,
Nikolay Borisov	10e958d	2020-01-20 16:09:11 +0200	[diff] [blame]	2788	bytenr, blocksize);
				2789	if (ret) {
				2790	free_extent_buffer(next);
				2791	return ret;
				2792	}
Naohiro Aota	d3575156	2021-02-04 19:21:54 +0900	[diff] [blame]	2793	btrfs_redirty_list_add(
				2794	trans->transaction, next);
Liu Bo	1846430	2018-01-25 11:02:51 -0700	[diff] [blame]	2795	} else {
				2796	if (test_and_clear_bit(EXTENT_BUFFER_DIRTY, &next->bflags))
				2797	clear_extent_buffer_dirty(next);
Nikolay Borisov	10e958d	2020-01-20 16:09:11 +0200	[diff] [blame]	2798	unaccount_log_buffer(fs_info, bytenr);
Josef Bacik	3650860	2013-04-25 16:23:32 -0400	[diff] [blame]	2799	}
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	2800	}
				2801	free_extent_buffer(next);
				2802	continue;
				2803	}
Qu Wenruo	581c176	2018-03-29 09:08:11 +0800	[diff] [blame]	2804	ret = btrfs_read_buffer(next, ptr_gen, *level - 1, &first_key);
Tsutomu Itoh	018642a	2012-05-29 18:10:13 +0900	[diff] [blame]	2805	if (ret) {
				2806	free_extent_buffer(next);
				2807	return ret;
				2808	}
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	2809
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	2810	if (path->nodes[*level-1])
				2811	free_extent_buffer(path->nodes[*level-1]);
				2812	path->nodes[*level-1] = next;
				2813	*level = btrfs_header_level(next);
				2814	path->slots[*level] = 0;
				2815	cond_resched();
				2816	}
Yan, Zheng	4a500fd	2010-05-16 10:49:59 -0400	[diff] [blame]	2817	path->slots[level] = btrfs_header_nritems(path->nodes[level]);
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	2818
				2819	cond_resched();
				2820	return 0;
				2821	}
				2822
Chris Mason	d397712	2009-01-05 21:25:51 -0500	[diff] [blame]	2823	static noinline int walk_up_log_tree(struct btrfs_trans_handle *trans,
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	2824	struct btrfs_root *root,
				2825	struct btrfs_path path, int level,
				2826	struct walk_control *wc)
				2827	{
Jeff Mahoney	0b246af	2016-06-22 18:54:23 -0400	[diff] [blame]	2828	struct btrfs_fs_info *fs_info = root->fs_info;
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	2829	int i;
				2830	int slot;
				2831	int ret;
				2832
Chris Mason	d397712	2009-01-05 21:25:51 -0500	[diff] [blame]	2833	for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	2834	slot = path->slots[i];
Yan, Zheng	4a500fd	2010-05-16 10:49:59 -0400	[diff] [blame]	2835	if (slot + 1 < btrfs_header_nritems(path->nodes[i])) {
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	2836	path->slots[i]++;
				2837	*level = i;
				2838	WARN_ON(*level == 0);
				2839	return 0;
				2840	} else {
Mark Fasheh	1e5063d	2011-07-12 10:46:06 -0700	[diff] [blame]	2841	ret = wc->process_func(root, path->nodes[*level], wc,
Qu Wenruo	581c176	2018-03-29 09:08:11 +0800	[diff] [blame]	2842	btrfs_header_generation(path->nodes[*level]),
				2843	*level);
Mark Fasheh	1e5063d	2011-07-12 10:46:06 -0700	[diff] [blame]	2844	if (ret)
				2845	return ret;
				2846
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	2847	if (wc->free) {
				2848	struct extent_buffer *next;
				2849
				2850	next = path->nodes[*level];
				2851
Josef Bacik	681ae50	2013-10-07 15:11:00 -0400	[diff] [blame]	2852	if (trans) {
				2853	btrfs_tree_lock(next);
David Sterba	6a884d7d	2019-03-20 14:30:02 +0100	[diff] [blame]	2854	btrfs_clean_tree_block(next);
Josef Bacik	681ae50	2013-10-07 15:11:00 -0400	[diff] [blame]	2855	btrfs_wait_tree_block_writeback(next);
				2856	btrfs_tree_unlock(next);
Nikolay Borisov	7bfc100	2020-01-20 16:09:12 +0200	[diff] [blame]	2857	ret = btrfs_pin_reserved_extent(trans,
Nikolay Borisov	10e958d	2020-01-20 16:09:11 +0200	[diff] [blame]	2858	path->nodes[*level]->start,
				2859	path->nodes[*level]->len);
				2860	if (ret)
				2861	return ret;
Liu Bo	1846430	2018-01-25 11:02:51 -0700	[diff] [blame]	2862	} else {
				2863	if (test_and_clear_bit(EXTENT_BUFFER_DIRTY, &next->bflags))
				2864	clear_extent_buffer_dirty(next);
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	2865
Nikolay Borisov	10e958d	2020-01-20 16:09:11 +0200	[diff] [blame]	2866	unaccount_log_buffer(fs_info,
				2867	path->nodes[*level]->start);
				2868	}
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	2869	}
				2870	free_extent_buffer(path->nodes[*level]);
				2871	path->nodes[*level] = NULL;
				2872	*level = i + 1;
				2873	}
				2874	}
				2875	return 1;
				2876	}
				2877
				2878	/*
				2879	* drop the reference count on the tree rooted at 'snap'. This traverses
				2880	* the tree freeing any blocks that have a ref count of zero after being
				2881	* decremented.
				2882	*/
				2883	static int walk_log_tree(struct btrfs_trans_handle *trans,
				2884	struct btrfs_root log, struct walk_control wc)
				2885	{
Jeff Mahoney	2ff7e61	2016-06-22 18:54:24 -0400	[diff] [blame]	2886	struct btrfs_fs_info *fs_info = log->fs_info;
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	2887	int ret = 0;
				2888	int wret;
				2889	int level;
				2890	struct btrfs_path *path;
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	2891	int orig_level;
				2892
				2893	path = btrfs_alloc_path();
Tsutomu Itoh	db5b493	2011-03-23 08:14:16 +0000	[diff] [blame]	2894	if (!path)
				2895	return -ENOMEM;
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	2896
				2897	level = btrfs_header_level(log->node);
				2898	orig_level = level;
				2899	path->nodes[level] = log->node;
David Sterba	67439da	2019-10-08 13:28:47 +0200	[diff] [blame]	2900	atomic_inc(&log->node->refs);
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	2901	path->slots[level] = 0;
				2902
Chris Mason	d397712	2009-01-05 21:25:51 -0500	[diff] [blame]	2903	while (1) {
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	2904	wret = walk_down_log_tree(trans, log, path, &level, wc);
				2905	if (wret > 0)
				2906	break;
Jeff Mahoney	79787ea	2012-03-12 16:03:00 +0100	[diff] [blame]	2907	if (wret < 0) {
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	2908	ret = wret;
Jeff Mahoney	79787ea	2012-03-12 16:03:00 +0100	[diff] [blame]	2909	goto out;
				2910	}
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	2911
				2912	wret = walk_up_log_tree(trans, log, path, &level, wc);
				2913	if (wret > 0)
				2914	break;
Jeff Mahoney	79787ea	2012-03-12 16:03:00 +0100	[diff] [blame]	2915	if (wret < 0) {
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	2916	ret = wret;
Jeff Mahoney	79787ea	2012-03-12 16:03:00 +0100	[diff] [blame]	2917	goto out;
				2918	}
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	2919	}
				2920
				2921	/* was the root node processed? if not, catch it here */
				2922	if (path->nodes[orig_level]) {
Jeff Mahoney	79787ea	2012-03-12 16:03:00 +0100	[diff] [blame]	2923	ret = wc->process_func(log, path->nodes[orig_level], wc,
Qu Wenruo	581c176	2018-03-29 09:08:11 +0800	[diff] [blame]	2924	btrfs_header_generation(path->nodes[orig_level]),
				2925	orig_level);
Jeff Mahoney	79787ea	2012-03-12 16:03:00 +0100	[diff] [blame]	2926	if (ret)
				2927	goto out;
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	2928	if (wc->free) {
				2929	struct extent_buffer *next;
				2930
				2931	next = path->nodes[orig_level];
				2932
Josef Bacik	681ae50	2013-10-07 15:11:00 -0400	[diff] [blame]	2933	if (trans) {
				2934	btrfs_tree_lock(next);
David Sterba	6a884d7d	2019-03-20 14:30:02 +0100	[diff] [blame]	2935	btrfs_clean_tree_block(next);
Josef Bacik	681ae50	2013-10-07 15:11:00 -0400	[diff] [blame]	2936	btrfs_wait_tree_block_writeback(next);
				2937	btrfs_tree_unlock(next);
Nikolay Borisov	7bfc100	2020-01-20 16:09:12 +0200	[diff] [blame]	2938	ret = btrfs_pin_reserved_extent(trans,
Nikolay Borisov	10e958d	2020-01-20 16:09:11 +0200	[diff] [blame]	2939	next->start, next->len);
				2940	if (ret)
				2941	goto out;
Liu Bo	1846430	2018-01-25 11:02:51 -0700	[diff] [blame]	2942	} else {
				2943	if (test_and_clear_bit(EXTENT_BUFFER_DIRTY, &next->bflags))
				2944	clear_extent_buffer_dirty(next);
Nikolay Borisov	10e958d	2020-01-20 16:09:11 +0200	[diff] [blame]	2945	unaccount_log_buffer(fs_info, next->start);
Josef Bacik	681ae50	2013-10-07 15:11:00 -0400	[diff] [blame]	2946	}
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	2947	}
				2948	}
				2949
Jeff Mahoney	79787ea	2012-03-12 16:03:00 +0100	[diff] [blame]	2950	out:
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	2951	btrfs_free_path(path);
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	2952	return ret;
				2953	}
				2954
Yan Zheng	7237f18	2009-01-21 12:54:03 -0500	[diff] [blame]	2955	/*
				2956	* helper function to update the item for a given subvolumes log root
				2957	* in the tree of log roots
				2958	*/
				2959	static int update_log_root(struct btrfs_trans_handle *trans,
Josef Bacik	4203e96	2019-09-30 16:27:25 -0400	[diff] [blame]	2960	struct btrfs_root *log,
				2961	struct btrfs_root_item *root_item)
Yan Zheng	7237f18	2009-01-21 12:54:03 -0500	[diff] [blame]	2962	{
Jeff Mahoney	0b246af	2016-06-22 18:54:23 -0400	[diff] [blame]	2963	struct btrfs_fs_info *fs_info = log->fs_info;
Yan Zheng	7237f18	2009-01-21 12:54:03 -0500	[diff] [blame]	2964	int ret;
				2965
				2966	if (log->log_transid == 1) {
				2967	/* insert root item on the first sync */
Jeff Mahoney	0b246af	2016-06-22 18:54:23 -0400	[diff] [blame]	2968	ret = btrfs_insert_root(trans, fs_info->log_root_tree,
Josef Bacik	4203e96	2019-09-30 16:27:25 -0400	[diff] [blame]	2969	&log->root_key, root_item);
Yan Zheng	7237f18	2009-01-21 12:54:03 -0500	[diff] [blame]	2970	} else {
Jeff Mahoney	0b246af	2016-06-22 18:54:23 -0400	[diff] [blame]	2971	ret = btrfs_update_root(trans, fs_info->log_root_tree,
Josef Bacik	4203e96	2019-09-30 16:27:25 -0400	[diff] [blame]	2972	&log->root_key, root_item);
Yan Zheng	7237f18	2009-01-21 12:54:03 -0500	[diff] [blame]	2973	}
				2974	return ret;
				2975	}
				2976
Zhaolei	60d53eb	2015-08-17 18:44:46 +0800	[diff] [blame]	2977	static void wait_log_commit(struct btrfs_root *root, int transid)
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	2978	{
				2979	DEFINE_WAIT(wait);
Yan Zheng	7237f18	2009-01-21 12:54:03 -0500	[diff] [blame]	2980	int index = transid % 2;
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	2981
Yan Zheng	7237f18	2009-01-21 12:54:03 -0500	[diff] [blame]	2982	/*
				2983	* we only allow two pending log transactions at a time,
				2984	* so we know that if ours is more than 2 older than the
				2985	* current transaction, we're done
				2986	*/
Liu Bo	49e83f5	2017-09-01 16:14:30 -0600	[diff] [blame]	2987	for (;;) {
Yan Zheng	7237f18	2009-01-21 12:54:03 -0500	[diff] [blame]	2988	prepare_to_wait(&root->log_commit_wait[index],
				2989	&wait, TASK_UNINTERRUPTIBLE);
Liu Bo	49e83f5	2017-09-01 16:14:30 -0600	[diff] [blame]	2990
				2991	if (!(root->log_transid_committed < transid &&
				2992	atomic_read(&root->log_commit[index])))
				2993	break;
				2994
Yan Zheng	7237f18	2009-01-21 12:54:03 -0500	[diff] [blame]	2995	mutex_unlock(&root->log_mutex);
Liu Bo	49e83f5	2017-09-01 16:14:30 -0600	[diff] [blame]	2996	schedule();
Yan Zheng	7237f18	2009-01-21 12:54:03 -0500	[diff] [blame]	2997	mutex_lock(&root->log_mutex);
Liu Bo	49e83f5	2017-09-01 16:14:30 -0600	[diff] [blame]	2998	}
				2999	finish_wait(&root->log_commit_wait[index], &wait);
Yan Zheng	7237f18	2009-01-21 12:54:03 -0500	[diff] [blame]	3000	}
				3001
Zhaolei	60d53eb	2015-08-17 18:44:46 +0800	[diff] [blame]	3002	static void wait_for_writer(struct btrfs_root *root)
Yan Zheng	7237f18	2009-01-21 12:54:03 -0500	[diff] [blame]	3003	{
				3004	DEFINE_WAIT(wait);
Miao Xie	8b050d3	2014-02-20 18:08:58 +0800	[diff] [blame]	3005
Liu Bo	49e83f5	2017-09-01 16:14:30 -0600	[diff] [blame]	3006	for (;;) {
				3007	prepare_to_wait(&root->log_writer_wait, &wait,
				3008	TASK_UNINTERRUPTIBLE);
				3009	if (!atomic_read(&root->log_writers))
				3010	break;
				3011
Yan Zheng	7237f18	2009-01-21 12:54:03 -0500	[diff] [blame]	3012	mutex_unlock(&root->log_mutex);
Liu Bo	49e83f5	2017-09-01 16:14:30 -0600	[diff] [blame]	3013	schedule();
Filipe Manana	575849e	2015-02-11 11:12:39 +0000	[diff] [blame]	3014	mutex_lock(&root->log_mutex);
Yan Zheng	7237f18	2009-01-21 12:54:03 -0500	[diff] [blame]	3015	}
Liu Bo	49e83f5	2017-09-01 16:14:30 -0600	[diff] [blame]	3016	finish_wait(&root->log_writer_wait, &wait);
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	3017	}
				3018
Miao Xie	8b050d3	2014-02-20 18:08:58 +0800	[diff] [blame]	3019	static inline void btrfs_remove_log_ctx(struct btrfs_root *root,
				3020	struct btrfs_log_ctx *ctx)
				3021	{
				3022	if (!ctx)
				3023	return;
				3024
				3025	mutex_lock(&root->log_mutex);
				3026	list_del_init(&ctx->list);
				3027	mutex_unlock(&root->log_mutex);
				3028	}
				3029
				3030	/*
				3031	* Invoked in log mutex context, or be sure there is no other task which
				3032	* can access the list.
				3033	*/
				3034	static inline void btrfs_remove_all_log_ctxs(struct btrfs_root *root,
				3035	int index, int error)
				3036	{
				3037	struct btrfs_log_ctx *ctx;
Chris Mason	570dd45	2016-10-27 10:42:20 -0700	[diff] [blame]	3038	struct btrfs_log_ctx *safe;
Miao Xie	8b050d3	2014-02-20 18:08:58 +0800	[diff] [blame]	3039
Chris Mason	570dd45	2016-10-27 10:42:20 -0700	[diff] [blame]	3040	list_for_each_entry_safe(ctx, safe, &root->log_ctxs[index], list) {
				3041	list_del_init(&ctx->list);
Miao Xie	8b050d3	2014-02-20 18:08:58 +0800	[diff] [blame]	3042	ctx->log_ret = error;
Chris Mason	570dd45	2016-10-27 10:42:20 -0700	[diff] [blame]	3043	}
Miao Xie	8b050d3	2014-02-20 18:08:58 +0800	[diff] [blame]	3044	}
				3045
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	3046	/*
				3047	* btrfs_sync_log does sends a given tree log down to the disk and
				3048	* updates the super blocks to record it. When this call is done,
Chris Mason	12fcfd2	2009-03-24 10:24:20 -0400	[diff] [blame]	3049	* you know that any inodes previously logged are safely on disk only
				3050	* if it returns 0.
				3051	*
				3052	* Any other return value means you need to call btrfs_commit_transaction.
				3053	* Some of the edge cases for fsyncing directories that have had unlinks
				3054	* or renames done in the past mean that sometimes the only safe
				3055	* fsync is to commit the whole FS. When btrfs_sync_log returns -EAGAIN,
				3056	* that has happened.
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	3057	*/
				3058	int btrfs_sync_log(struct btrfs_trans_handle *trans,
Miao Xie	8b050d3	2014-02-20 18:08:58 +0800	[diff] [blame]	3059	struct btrfs_root root, struct btrfs_log_ctx ctx)
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	3060	{
Yan Zheng	7237f18	2009-01-21 12:54:03 -0500	[diff] [blame]	3061	int index1;
				3062	int index2;
Yan, Zheng	8cef4e1	2009-11-12 09:33:26 +0000	[diff] [blame]	3063	int mark;
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	3064	int ret;
Jeff Mahoney	0b246af	2016-06-22 18:54:23 -0400	[diff] [blame]	3065	struct btrfs_fs_info *fs_info = root->fs_info;
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	3066	struct btrfs_root *log = root->log_root;
Jeff Mahoney	0b246af	2016-06-22 18:54:23 -0400	[diff] [blame]	3067	struct btrfs_root *log_root_tree = fs_info->log_root_tree;
Josef Bacik	4203e96	2019-09-30 16:27:25 -0400	[diff] [blame]	3068	struct btrfs_root_item new_root_item;
Miao Xie	bb14a59	2014-02-20 18:08:56 +0800	[diff] [blame]	3069	int log_transid = 0;
Miao Xie	8b050d3	2014-02-20 18:08:58 +0800	[diff] [blame]	3070	struct btrfs_log_ctx root_log_ctx;
Miao Xie	c6adc9c	2013-05-28 10:05:39 +0000	[diff] [blame]	3071	struct blk_plug plug;
Filipe Manana	47876f7	2020-11-25 12:19:28 +0000	[diff] [blame]	3072	u64 log_root_start;
				3073	u64 log_root_level;
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	3074
Yan Zheng	7237f18	2009-01-21 12:54:03 -0500	[diff] [blame]	3075	mutex_lock(&root->log_mutex);
Miao Xie	d1433de	2014-02-20 18:08:59 +0800	[diff] [blame]	3076	log_transid = ctx->log_transid;
				3077	if (root->log_transid_committed >= log_transid) {
Yan Zheng	7237f18	2009-01-21 12:54:03 -0500	[diff] [blame]	3078	mutex_unlock(&root->log_mutex);
Miao Xie	8b050d3	2014-02-20 18:08:58 +0800	[diff] [blame]	3079	return ctx->log_ret;
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	3080	}
Miao Xie	d1433de	2014-02-20 18:08:59 +0800	[diff] [blame]	3081
				3082	index1 = log_transid % 2;
				3083	if (atomic_read(&root->log_commit[index1])) {
Zhaolei	60d53eb	2015-08-17 18:44:46 +0800	[diff] [blame]	3084	wait_log_commit(root, log_transid);
Miao Xie	d1433de	2014-02-20 18:08:59 +0800	[diff] [blame]	3085	mutex_unlock(&root->log_mutex);
				3086	return ctx->log_ret;
				3087	}
				3088	ASSERT(log_transid == root->log_transid);
Yan Zheng	7237f18	2009-01-21 12:54:03 -0500	[diff] [blame]	3089	atomic_set(&root->log_commit[index1], 1);
				3090
				3091	/* wait for previous tree log sync to complete */
				3092	if (atomic_read(&root->log_commit[(index1 + 1) % 2]))
Zhaolei	60d53eb	2015-08-17 18:44:46 +0800	[diff] [blame]	3093	wait_log_commit(root, log_transid - 1);
Miao Xie	48cab2e	2014-02-20 18:08:52 +0800	[diff] [blame]	3094
Yan, Zheng	86df7eb	2009-10-14 09:24:59 -0400	[diff] [blame]	3095	while (1) {
Miao Xie	2ecb792	2012-09-06 04:04:27 -0600	[diff] [blame]	3096	int batch = atomic_read(&root->log_batch);
Chris Mason	cd354ad	2011-10-20 15:45:37 -0400	[diff] [blame]	3097	/* when we're on an ssd, just kick the log commit out */
Jeff Mahoney	0b246af	2016-06-22 18:54:23 -0400	[diff] [blame]	3098	if (!btrfs_test_opt(fs_info, SSD) &&
Miao Xie	27cdeb7	2014-04-02 19:51:05 +0800	[diff] [blame]	3099	test_bit(BTRFS_ROOT_MULTI_LOG_TASKS, &root->state)) {
Yan, Zheng	86df7eb	2009-10-14 09:24:59 -0400	[diff] [blame]	3100	mutex_unlock(&root->log_mutex);
				3101	schedule_timeout_uninterruptible(1);
				3102	mutex_lock(&root->log_mutex);
				3103	}
Zhaolei	60d53eb	2015-08-17 18:44:46 +0800	[diff] [blame]	3104	wait_for_writer(root);
Miao Xie	2ecb792	2012-09-06 04:04:27 -0600	[diff] [blame]	3105	if (batch == atomic_read(&root->log_batch))
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	3106	break;
				3107	}
Chris Mason	d0c803c	2008-09-11 16:17:57 -0400	[diff] [blame]	3108
Chris Mason	12fcfd2	2009-03-24 10:24:20 -0400	[diff] [blame]	3109	/* bail out if we need to do a full commit */
David Sterba	4884b8e	2019-03-20 13:25:34 +0100	[diff] [blame]	3110	if (btrfs_need_log_full_commit(trans)) {
Chris Mason	12fcfd2	2009-03-24 10:24:20 -0400	[diff] [blame]	3111	ret = -EAGAIN;
				3112	mutex_unlock(&root->log_mutex);
				3113	goto out;
				3114	}
				3115
Yan, Zheng	8cef4e1	2009-11-12 09:33:26 +0000	[diff] [blame]	3116	if (log_transid % 2 == 0)
				3117	mark = EXTENT_DIRTY;
				3118	else
				3119	mark = EXTENT_NEW;
				3120
Chris Mason	690587d	2009-10-13 13:29:19 -0400	[diff] [blame]	3121	/* we start IO on all the marked extents here, but we don't actually
				3122	* wait for them until later.
				3123	*/
Miao Xie	c6adc9c	2013-05-28 10:05:39 +0000	[diff] [blame]	3124	blk_start_plug(&plug);
Jeff Mahoney	2ff7e61	2016-06-22 18:54:24 -0400	[diff] [blame]	3125	ret = btrfs_write_marked_extents(fs_info, &log->dirty_log_pages, mark);
Naohiro Aota	b528f46	2021-02-05 23:58:36 +0900	[diff] [blame]	3126	/*
				3127	* -EAGAIN happens when someone, e.g., a concurrent transaction
				3128	* commit, writes a dirty extent in this tree-log commit. This
				3129	* concurrent write will create a hole writing out the extents,
				3130	* and we cannot proceed on a zoned filesystem, requiring
				3131	* sequential writing. While we can bail out to a full commit
				3132	* here, but we can continue hoping the concurrent writing fills
				3133	* the hole.
				3134	*/
				3135	if (ret == -EAGAIN && btrfs_is_zoned(fs_info))
				3136	ret = 0;
Jeff Mahoney	79787ea	2012-03-12 16:03:00 +0100	[diff] [blame]	3137	if (ret) {
Miao Xie	c6adc9c	2013-05-28 10:05:39 +0000	[diff] [blame]	3138	blk_finish_plug(&plug);
Jeff Mahoney	6664283	2016-06-10 18:19:25 -0400	[diff] [blame]	3139	btrfs_abort_transaction(trans, ret);
David Sterba	9078776	2019-03-20 13:28:05 +0100	[diff] [blame]	3140	btrfs_set_log_full_commit(trans);
Jeff Mahoney	79787ea	2012-03-12 16:03:00 +0100	[diff] [blame]	3141	mutex_unlock(&root->log_mutex);
				3142	goto out;
				3143	}
Yan Zheng	7237f18	2009-01-21 12:54:03 -0500	[diff] [blame]	3144
Josef Bacik	4203e96	2019-09-30 16:27:25 -0400	[diff] [blame]	3145	/*
				3146	* We _must_ update under the root->log_mutex in order to make sure we
				3147	* have a consistent view of the log root we are trying to commit at
				3148	* this moment.
				3149	*
				3150	* We _must_ copy this into a local copy, because we are not holding the
				3151	* log_root_tree->log_mutex yet. This is important because when we
				3152	* commit the log_root_tree we must have a consistent view of the
				3153	* log_root_tree when we update the super block to point at the
				3154	* log_root_tree bytenr. If we update the log_root_tree here we'll race
				3155	* with the commit and possibly point at the new block which we may not
				3156	* have written out.
				3157	*/
Yan Zheng	5d4f98a	2009-06-10 10:45:14 -0400	[diff] [blame]	3158	btrfs_set_root_node(&log->root_item, log->node);
Josef Bacik	4203e96	2019-09-30 16:27:25 -0400	[diff] [blame]	3159	memcpy(&new_root_item, &log->root_item, sizeof(new_root_item));
Yan Zheng	7237f18	2009-01-21 12:54:03 -0500	[diff] [blame]	3160
Yan Zheng	7237f18	2009-01-21 12:54:03 -0500	[diff] [blame]	3161	root->log_transid++;
				3162	log->log_transid = root->log_transid;
Josef Bacik	ff782e0	2009-10-08 15:30:04 -0400	[diff] [blame]	3163	root->log_start_pid = 0;
Yan Zheng	7237f18	2009-01-21 12:54:03 -0500	[diff] [blame]	3164	/*
Yan, Zheng	8cef4e1	2009-11-12 09:33:26 +0000	[diff] [blame]	3165	* IO has been started, blocks of the log tree have WRITTEN flag set
				3166	* in their headers. new modifications of the log will be written to
				3167	* new positions. so it's safe to allow log writers to go in.
Yan Zheng	7237f18	2009-01-21 12:54:03 -0500	[diff] [blame]	3168	*/
				3169	mutex_unlock(&root->log_mutex);
				3170
Naohiro Aota	3ddebf2	2021-02-04 19:22:20 +0900	[diff] [blame]	3171	if (btrfs_is_zoned(fs_info)) {
Naohiro Aota	e75f9fd	2021-03-24 23:23:11 +0900	[diff] [blame]	3172	mutex_lock(&fs_info->tree_root->log_mutex);
Naohiro Aota	3ddebf2	2021-02-04 19:22:20 +0900	[diff] [blame]	3173	if (!log_root_tree->node) {
				3174	ret = btrfs_alloc_log_tree_node(trans, log_root_tree);
				3175	if (ret) {
Filipe Manana	ea32af4	2021-07-07 12:23:45 +0100	[diff] [blame]	3176	mutex_unlock(&fs_info->tree_root->log_mutex);
Naohiro Aota	3ddebf2	2021-02-04 19:22:20 +0900	[diff] [blame]	3177	goto out;
				3178	}
				3179	}
Naohiro Aota	e75f9fd	2021-03-24 23:23:11 +0900	[diff] [blame]	3180	mutex_unlock(&fs_info->tree_root->log_mutex);
Naohiro Aota	3ddebf2	2021-02-04 19:22:20 +0900	[diff] [blame]	3181	}
				3182
Naohiro Aota	e75f9fd	2021-03-24 23:23:11 +0900	[diff] [blame]	3183	btrfs_init_log_ctx(&root_log_ctx, NULL);
				3184
				3185	mutex_lock(&log_root_tree->log_mutex);
				3186
Filipe Manana	e3d3b41	2021-03-11 15:13:30 +0000	[diff] [blame]	3187	index2 = log_root_tree->log_transid % 2;
				3188	list_add_tail(&root_log_ctx.list, &log_root_tree->log_ctxs[index2]);
				3189	root_log_ctx.log_transid = log_root_tree->log_transid;
				3190
Josef Bacik	4203e96	2019-09-30 16:27:25 -0400	[diff] [blame]	3191	/*
				3192	* Now we are safe to update the log_root_tree because we're under the
				3193	* log_mutex, and we're a current writer so we're holding the commit
				3194	* open until we drop the log_mutex.
				3195	*/
				3196	ret = update_log_root(trans, log, &new_root_item);
Yan, Zheng	4a500fd	2010-05-16 10:49:59 -0400	[diff] [blame]	3197	if (ret) {
Miao Xie	d1433de	2014-02-20 18:08:59 +0800	[diff] [blame]	3198	if (!list_empty(&root_log_ctx.list))
				3199	list_del_init(&root_log_ctx.list);
				3200
Miao Xie	c6adc9c	2013-05-28 10:05:39 +0000	[diff] [blame]	3201	blk_finish_plug(&plug);
David Sterba	9078776	2019-03-20 13:28:05 +0100	[diff] [blame]	3202	btrfs_set_log_full_commit(trans);
Miao Xie	995946d	2014-04-02 19:51:06 +0800	[diff] [blame]	3203
Jeff Mahoney	79787ea	2012-03-12 16:03:00 +0100	[diff] [blame]	3204	if (ret != -ENOSPC) {
Jeff Mahoney	6664283	2016-06-10 18:19:25 -0400	[diff] [blame]	3205	btrfs_abort_transaction(trans, ret);
Jeff Mahoney	79787ea	2012-03-12 16:03:00 +0100	[diff] [blame]	3206	mutex_unlock(&log_root_tree->log_mutex);
				3207	goto out;
				3208	}
Jeff Mahoney	bf89d38	2016-09-09 20:42:44 -0400	[diff] [blame]	3209	btrfs_wait_tree_log_extents(log, mark);
Yan, Zheng	4a500fd	2010-05-16 10:49:59 -0400	[diff] [blame]	3210	mutex_unlock(&log_root_tree->log_mutex);
				3211	ret = -EAGAIN;
				3212	goto out;
				3213	}
				3214
Miao Xie	d1433de	2014-02-20 18:08:59 +0800	[diff] [blame]	3215	if (log_root_tree->log_transid_committed >= root_log_ctx.log_transid) {
Forrest Liu	3da5ab5	2015-01-30 19:42:12 +0800	[diff] [blame]	3216	blk_finish_plug(&plug);
Chris Mason	cbd60aa	2016-09-06 05:37:40 -0700	[diff] [blame]	3217	list_del_init(&root_log_ctx.list);
Miao Xie	d1433de	2014-02-20 18:08:59 +0800	[diff] [blame]	3218	mutex_unlock(&log_root_tree->log_mutex);
				3219	ret = root_log_ctx.log_ret;
				3220	goto out;
				3221	}
Miao Xie	8b050d3	2014-02-20 18:08:58 +0800	[diff] [blame]	3222
Miao Xie	d1433de	2014-02-20 18:08:59 +0800	[diff] [blame]	3223	index2 = root_log_ctx.log_transid % 2;
Yan Zheng	7237f18	2009-01-21 12:54:03 -0500	[diff] [blame]	3224	if (atomic_read(&log_root_tree->log_commit[index2])) {
Miao Xie	c6adc9c	2013-05-28 10:05:39 +0000	[diff] [blame]	3225	blk_finish_plug(&plug);
Jeff Mahoney	bf89d38	2016-09-09 20:42:44 -0400	[diff] [blame]	3226	ret = btrfs_wait_tree_log_extents(log, mark);
Zhaolei	60d53eb	2015-08-17 18:44:46 +0800	[diff] [blame]	3227	wait_log_commit(log_root_tree,
Miao Xie	d1433de	2014-02-20 18:08:59 +0800	[diff] [blame]	3228	root_log_ctx.log_transid);
Yan Zheng	7237f18	2009-01-21 12:54:03 -0500	[diff] [blame]	3229	mutex_unlock(&log_root_tree->log_mutex);
Filipe Manana	5ab5e44	2014-11-13 16:59:53 +0000	[diff] [blame]	3230	if (!ret)
				3231	ret = root_log_ctx.log_ret;
Yan Zheng	7237f18	2009-01-21 12:54:03 -0500	[diff] [blame]	3232	goto out;
				3233	}
Miao Xie	d1433de	2014-02-20 18:08:59 +0800	[diff] [blame]	3234	ASSERT(root_log_ctx.log_transid == log_root_tree->log_transid);
Yan Zheng	7237f18	2009-01-21 12:54:03 -0500	[diff] [blame]	3235	atomic_set(&log_root_tree->log_commit[index2], 1);
				3236
Chris Mason	12fcfd2	2009-03-24 10:24:20 -0400	[diff] [blame]	3237	if (atomic_read(&log_root_tree->log_commit[(index2 + 1) % 2])) {
Zhaolei	60d53eb	2015-08-17 18:44:46 +0800	[diff] [blame]	3238	wait_log_commit(log_root_tree,
Miao Xie	d1433de	2014-02-20 18:08:59 +0800	[diff] [blame]	3239	root_log_ctx.log_transid - 1);
Chris Mason	12fcfd2	2009-03-24 10:24:20 -0400	[diff] [blame]	3240	}
Yan Zheng	7237f18	2009-01-21 12:54:03 -0500	[diff] [blame]	3241
Chris Mason	12fcfd2	2009-03-24 10:24:20 -0400	[diff] [blame]	3242	/*
				3243	* now that we've moved on to the tree of log tree roots,
				3244	* check the full commit flag again
				3245	*/
David Sterba	4884b8e	2019-03-20 13:25:34 +0100	[diff] [blame]	3246	if (btrfs_need_log_full_commit(trans)) {
Miao Xie	c6adc9c	2013-05-28 10:05:39 +0000	[diff] [blame]	3247	blk_finish_plug(&plug);
Jeff Mahoney	bf89d38	2016-09-09 20:42:44 -0400	[diff] [blame]	3248	btrfs_wait_tree_log_extents(log, mark);
Chris Mason	12fcfd2	2009-03-24 10:24:20 -0400	[diff] [blame]	3249	mutex_unlock(&log_root_tree->log_mutex);
				3250	ret = -EAGAIN;
				3251	goto out_wake_log_root;
				3252	}
Yan Zheng	7237f18	2009-01-21 12:54:03 -0500	[diff] [blame]	3253
Jeff Mahoney	2ff7e61	2016-06-22 18:54:24 -0400	[diff] [blame]	3254	ret = btrfs_write_marked_extents(fs_info,
Miao Xie	c6adc9c	2013-05-28 10:05:39 +0000	[diff] [blame]	3255	&log_root_tree->dirty_log_pages,
				3256	EXTENT_DIRTY \| EXTENT_NEW);
				3257	blk_finish_plug(&plug);
Naohiro Aota	b528f46	2021-02-05 23:58:36 +0900	[diff] [blame]	3258	/*
				3259	* As described above, -EAGAIN indicates a hole in the extents. We
				3260	* cannot wait for these write outs since the waiting cause a
				3261	* deadlock. Bail out to the full commit instead.
				3262	*/
				3263	if (ret == -EAGAIN && btrfs_is_zoned(fs_info)) {
				3264	btrfs_set_log_full_commit(trans);
				3265	btrfs_wait_tree_log_extents(log, mark);
				3266	mutex_unlock(&log_root_tree->log_mutex);
				3267	goto out_wake_log_root;
				3268	} else if (ret) {
David Sterba	9078776	2019-03-20 13:28:05 +0100	[diff] [blame]	3269	btrfs_set_log_full_commit(trans);
Jeff Mahoney	6664283	2016-06-10 18:19:25 -0400	[diff] [blame]	3270	btrfs_abort_transaction(trans, ret);
Jeff Mahoney	79787ea	2012-03-12 16:03:00 +0100	[diff] [blame]	3271	mutex_unlock(&log_root_tree->log_mutex);
				3272	goto out_wake_log_root;
				3273	}
Jeff Mahoney	bf89d38	2016-09-09 20:42:44 -0400	[diff] [blame]	3274	ret = btrfs_wait_tree_log_extents(log, mark);
Filipe Manana	5ab5e44	2014-11-13 16:59:53 +0000	[diff] [blame]	3275	if (!ret)
Jeff Mahoney	bf89d38	2016-09-09 20:42:44 -0400	[diff] [blame]	3276	ret = btrfs_wait_tree_log_extents(log_root_tree,
				3277	EXTENT_NEW \| EXTENT_DIRTY);
Filipe Manana	5ab5e44	2014-11-13 16:59:53 +0000	[diff] [blame]	3278	if (ret) {
David Sterba	9078776	2019-03-20 13:28:05 +0100	[diff] [blame]	3279	btrfs_set_log_full_commit(trans);
Filipe Manana	5ab5e44	2014-11-13 16:59:53 +0000	[diff] [blame]	3280	mutex_unlock(&log_root_tree->log_mutex);
				3281	goto out_wake_log_root;
				3282	}
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	3283
Filipe Manana	47876f7	2020-11-25 12:19:28 +0000	[diff] [blame]	3284	log_root_start = log_root_tree->node->start;
				3285	log_root_level = btrfs_header_level(log_root_tree->node);
Yan Zheng	7237f18	2009-01-21 12:54:03 -0500	[diff] [blame]	3286	log_root_tree->log_transid++;
Yan Zheng	7237f18	2009-01-21 12:54:03 -0500	[diff] [blame]	3287	mutex_unlock(&log_root_tree->log_mutex);
				3288
				3289	/*
Filipe Manana	47876f7	2020-11-25 12:19:28 +0000	[diff] [blame]	3290	* Here we are guaranteed that nobody is going to write the superblock
				3291	* for the current transaction before us and that neither we do write
				3292	* our superblock before the previous transaction finishes its commit
				3293	* and writes its superblock, because:
				3294	*
				3295	* 1) We are holding a handle on the current transaction, so no body
				3296	* can commit it until we release the handle;
				3297	*
				3298	* 2) Before writing our superblock we acquire the tree_log_mutex, so
				3299	* if the previous transaction is still committing, and hasn't yet
				3300	* written its superblock, we wait for it to do it, because a
				3301	* transaction commit acquires the tree_log_mutex when the commit
				3302	* begins and releases it only after writing its superblock.
Yan Zheng	7237f18	2009-01-21 12:54:03 -0500	[diff] [blame]	3303	*/
Filipe Manana	47876f7	2020-11-25 12:19:28 +0000	[diff] [blame]	3304	mutex_lock(&fs_info->tree_log_mutex);
Josef Bacik	165ea85	2021-05-19 17:15:53 -0400	[diff] [blame]	3305
				3306	/*
				3307	* The previous transaction writeout phase could have failed, and thus
				3308	* marked the fs in an error state. We must not commit here, as we
				3309	* could have updated our generation in the super_for_commit and
				3310	* writing the super here would result in transid mismatches. If there
				3311	* is an error here just bail.
				3312	*/
				3313	if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state)) {
				3314	ret = -EIO;
				3315	btrfs_set_log_full_commit(trans);
				3316	btrfs_abort_transaction(trans, ret);
				3317	mutex_unlock(&fs_info->tree_log_mutex);
				3318	goto out_wake_log_root;
				3319	}
				3320
Filipe Manana	47876f7	2020-11-25 12:19:28 +0000	[diff] [blame]	3321	btrfs_set_super_log_root(fs_info->super_for_commit, log_root_start);
				3322	btrfs_set_super_log_root_level(fs_info->super_for_commit, log_root_level);
David Sterba	eece6a9	2017-02-10 19:04:32 +0100	[diff] [blame]	3323	ret = write_all_supers(fs_info, 1);
Filipe Manana	47876f7	2020-11-25 12:19:28 +0000	[diff] [blame]	3324	mutex_unlock(&fs_info->tree_log_mutex);
Stefan Behrens	5af3e8c	2012-08-01 18:56:49 +0200	[diff] [blame]	3325	if (ret) {
David Sterba	9078776	2019-03-20 13:28:05 +0100	[diff] [blame]	3326	btrfs_set_log_full_commit(trans);
Jeff Mahoney	6664283	2016-06-10 18:19:25 -0400	[diff] [blame]	3327	btrfs_abort_transaction(trans, ret);
Stefan Behrens	5af3e8c	2012-08-01 18:56:49 +0200	[diff] [blame]	3328	goto out_wake_log_root;
				3329	}
Yan Zheng	7237f18	2009-01-21 12:54:03 -0500	[diff] [blame]	3330
Filipe Manana	e1a6d26	2021-07-20 16:03:41 +0100	[diff] [blame]	3331	/*
				3332	* We know there can only be one task here, since we have not yet set
				3333	* root->log_commit[index1] to 0 and any task attempting to sync the
				3334	* log must wait for the previous log transaction to commit if it's
				3335	* still in progress or wait for the current log transaction commit if
				3336	* someone else already started it. We use <= and not < because the
				3337	* first log transaction has an ID of 0.
				3338	*/
				3339	ASSERT(root->last_log_commit <= log_transid);
				3340	root->last_log_commit = log_transid;
Chris Mason	257c62e	2009-10-13 13:21:08 -0400	[diff] [blame]	3341
Chris Mason	12fcfd2	2009-03-24 10:24:20 -0400	[diff] [blame]	3342	out_wake_log_root:
Chris Mason	570dd45	2016-10-27 10:42:20 -0700	[diff] [blame]	3343	mutex_lock(&log_root_tree->log_mutex);
Miao Xie	8b050d3	2014-02-20 18:08:58 +0800	[diff] [blame]	3344	btrfs_remove_all_log_ctxs(log_root_tree, index2, ret);
				3345
Miao Xie	d1433de	2014-02-20 18:08:59 +0800	[diff] [blame]	3346	log_root_tree->log_transid_committed++;
Yan Zheng	7237f18	2009-01-21 12:54:03 -0500	[diff] [blame]	3347	atomic_set(&log_root_tree->log_commit[index2], 0);
Miao Xie	d1433de	2014-02-20 18:08:59 +0800	[diff] [blame]	3348	mutex_unlock(&log_root_tree->log_mutex);
				3349
David Sterba	33a9eca	2015-10-10 18:35:10 +0200	[diff] [blame]	3350	/*
David Sterba	093258e	2018-02-26 16:15:17 +0100	[diff] [blame]	3351	* The barrier before waitqueue_active (in cond_wake_up) is needed so
				3352	* all the updates above are seen by the woken threads. It might not be
				3353	* necessary, but proving that seems to be hard.
David Sterba	33a9eca	2015-10-10 18:35:10 +0200	[diff] [blame]	3354	*/
David Sterba	093258e	2018-02-26 16:15:17 +0100	[diff] [blame]	3355	cond_wake_up(&log_root_tree->log_commit_wait[index2]);
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	3356	out:
Miao Xie	d1433de	2014-02-20 18:08:59 +0800	[diff] [blame]	3357	mutex_lock(&root->log_mutex);
Chris Mason	570dd45	2016-10-27 10:42:20 -0700	[diff] [blame]	3358	btrfs_remove_all_log_ctxs(root, index1, ret);
Miao Xie	d1433de	2014-02-20 18:08:59 +0800	[diff] [blame]	3359	root->log_transid_committed++;
Yan Zheng	7237f18	2009-01-21 12:54:03 -0500	[diff] [blame]	3360	atomic_set(&root->log_commit[index1], 0);
Miao Xie	d1433de	2014-02-20 18:08:59 +0800	[diff] [blame]	3361	mutex_unlock(&root->log_mutex);
Miao Xie	8b050d3	2014-02-20 18:08:58 +0800	[diff] [blame]	3362
David Sterba	33a9eca	2015-10-10 18:35:10 +0200	[diff] [blame]	3363	/*
David Sterba	093258e	2018-02-26 16:15:17 +0100	[diff] [blame]	3364	* The barrier before waitqueue_active (in cond_wake_up) is needed so
				3365	* all the updates above are seen by the woken threads. It might not be
				3366	* necessary, but proving that seems to be hard.
David Sterba	33a9eca	2015-10-10 18:35:10 +0200	[diff] [blame]	3367	*/
David Sterba	093258e	2018-02-26 16:15:17 +0100	[diff] [blame]	3368	cond_wake_up(&root->log_commit_wait[index1]);
Chris Mason	b31eabd	2011-01-31 16:48:24 -0500	[diff] [blame]	3369	return ret;
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	3370	}
				3371
Yan, Zheng	4a500fd	2010-05-16 10:49:59 -0400	[diff] [blame]	3372	static void free_log_tree(struct btrfs_trans_handle *trans,
				3373	struct btrfs_root *log)
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	3374	{
				3375	int ret;
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	3376	struct walk_control wc = {
				3377	.free = 1,
				3378	.process_func = process_one_buffer
				3379	};
				3380
Naohiro Aota	3ddebf2	2021-02-04 19:22:20 +0900	[diff] [blame]	3381	if (log->node) {
				3382	ret = walk_log_tree(trans, log, &wc);
				3383	if (ret) {
				3384	if (trans)
				3385	btrfs_abort_transaction(trans, ret);
				3386	else
				3387	btrfs_handle_fs_error(log->fs_info, ret, NULL);
				3388	}
Jeff Mahoney	374b0e2	2018-09-06 16:59:33 -0400	[diff] [blame]	3389	}
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	3390
Filipe Manana	59b0713	2018-11-09 10:43:08 +0000	[diff] [blame]	3391	clear_extent_bits(&log->dirty_log_pages, 0, (u64)-1,
				3392	EXTENT_DIRTY \| EXTENT_NEW \| EXTENT_NEED_WAIT);
Filipe Manana	e289f03	2020-05-18 12:14:50 +0100	[diff] [blame]	3393	extent_io_tree_release(&log->log_csum_range);
Naohiro Aota	d3575156	2021-02-04 19:21:54 +0900	[diff] [blame]	3394
				3395	if (trans && log->node)
				3396	btrfs_redirty_list_add(trans->transaction, log->node);
Josef Bacik	0024652	2020-01-24 09:33:01 -0500	[diff] [blame]	3397	btrfs_put_root(log);
Yan, Zheng	4a500fd	2010-05-16 10:49:59 -0400	[diff] [blame]	3398	}
				3399
				3400	/*
				3401	* free all the extents used by the tree log. This should be called
				3402	* at commit time of the full transaction
				3403	*/
				3404	int btrfs_free_log(struct btrfs_trans_handle trans, struct btrfs_root root)
				3405	{
				3406	if (root->log_root) {
				3407	free_log_tree(trans, root->log_root);
				3408	root->log_root = NULL;
Filipe Manana	e7a7981	2020-06-15 10:38:44 +0100	[diff] [blame]	3409	clear_bit(BTRFS_ROOT_HAS_LOG_TREE, &root->state);
Yan, Zheng	4a500fd	2010-05-16 10:49:59 -0400	[diff] [blame]	3410	}
				3411	return 0;
				3412	}
				3413
				3414	int btrfs_free_log_root_tree(struct btrfs_trans_handle *trans,
				3415	struct btrfs_fs_info *fs_info)
				3416	{
				3417	if (fs_info->log_root_tree) {
				3418	free_log_tree(trans, fs_info->log_root_tree);
				3419	fs_info->log_root_tree = NULL;
Filipe Manana	47876f7	2020-11-25 12:19:28 +0000	[diff] [blame]	3420	clear_bit(BTRFS_ROOT_HAS_LOG_TREE, &fs_info->tree_root->state);
Yan, Zheng	4a500fd	2010-05-16 10:49:59 -0400	[diff] [blame]	3421	}
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	3422	return 0;
				3423	}
				3424
				3425	/*
Filipe Manana	6e8e777	2021-07-27 11:24:44 +0100	[diff] [blame]	3426	* Check if an inode was logged in the current transaction. This may often
				3427	* return some false positives, because logged_trans is an in memory only field,
				3428	* not persisted anywhere. This is meant to be used in contexts where a false
				3429	* positive has no functional consequences.
Filipe Manana	803f0f6	2019-06-19 13:05:39 +0100	[diff] [blame]	3430	*/
				3431	static bool inode_logged(struct btrfs_trans_handle *trans,
				3432	struct btrfs_inode *inode)
				3433	{
				3434	if (inode->logged_trans == trans->transid)
				3435	return true;
				3436
Filipe Manana	6e8e777	2021-07-27 11:24:44 +0100	[diff] [blame]	3437	/*
				3438	* The inode's logged_trans is always 0 when we load it (because it is
				3439	* not persisted in the inode item or elsewhere). So if it is 0, the
Filipe Manana	d135a53	2021-07-29 15:29:01 +0100	[diff] [blame]	3440	* inode was last modified in the current transaction then the inode may
				3441	* have been logged before in the current transaction, then evicted and
				3442	* loaded again in the current transaction - or may have never been logged
				3443	* in the current transaction, but since we can not be sure, we have to
				3444	* assume it was, otherwise our callers can leave an inconsistent log.
Filipe Manana	6e8e777	2021-07-27 11:24:44 +0100	[diff] [blame]	3445	*/
				3446	if (inode->logged_trans == 0 &&
				3447	inode->last_trans == trans->transid &&
Filipe Manana	803f0f6	2019-06-19 13:05:39 +0100	[diff] [blame]	3448	!test_bit(BTRFS_FS_LOG_RECOVERING, &trans->fs_info->flags))
				3449	return true;
				3450
				3451	return false;
				3452	}
				3453
				3454	/*
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	3455	* If both a file and directory are logged, and unlinks or renames are
				3456	* mixed in, we have a few interesting corners:
				3457	*
				3458	* create file X in dir Y
				3459	* link file X to X.link in dir Y
				3460	* fsync file X
				3461	* unlink file X but leave X.link
				3462	* fsync dir Y
				3463	*
				3464	* After a crash we would expect only X.link to exist. But file X
				3465	* didn't get fsync'd again so the log has back refs for X and X.link.
				3466	*
				3467	* We solve this by removing directory entries and inode backrefs from the
				3468	* log when a file that was logged in the current transaction is
				3469	* unlinked. Any later fsync will include the updated log entries, and
				3470	* we'll be able to reconstruct the proper directory items from backrefs.
				3471	*
				3472	* This optimizations allows us to avoid relogging the entire inode
				3473	* or the entire directory.
				3474	*/
				3475	int btrfs_del_dir_entries_in_log(struct btrfs_trans_handle *trans,
				3476	struct btrfs_root *root,
				3477	const char *name, int name_len,
Nikolay Borisov	49f34d1	2017-01-18 00:31:32 +0200	[diff] [blame]	3478	struct btrfs_inode *dir, u64 index)
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	3479	{
				3480	struct btrfs_root *log;
				3481	struct btrfs_dir_item *di;
				3482	struct btrfs_path *path;
				3483	int ret;
Yan, Zheng	4a500fd	2010-05-16 10:49:59 -0400	[diff] [blame]	3484	int err = 0;
Nikolay Borisov	49f34d1	2017-01-18 00:31:32 +0200	[diff] [blame]	3485	u64 dir_ino = btrfs_ino(dir);
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	3486
Filipe Manana	803f0f6	2019-06-19 13:05:39 +0100	[diff] [blame]	3487	if (!inode_logged(trans, dir))
Chris Mason	3a5f1d4	2008-09-11 15:53:37 -0400	[diff] [blame]	3488	return 0;
				3489
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	3490	ret = join_running_log_trans(root);
				3491	if (ret)
				3492	return 0;
				3493
Nikolay Borisov	49f34d1	2017-01-18 00:31:32 +0200	[diff] [blame]	3494	mutex_lock(&dir->log_mutex);
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	3495
				3496	log = root->log_root;
				3497	path = btrfs_alloc_path();
Tsutomu Itoh	a62f44a	2011-04-25 19:43:51 -0400	[diff] [blame]	3498	if (!path) {
				3499	err = -ENOMEM;
				3500	goto out_unlock;
				3501	}
liubo	2a29edc	2011-01-26 06:22:08 +0000	[diff] [blame]	3502
Li Zefan	33345d01	2011-04-20 10:31:50 +0800	[diff] [blame]	3503	di = btrfs_lookup_dir_item(trans, log, path, dir_ino,
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	3504	name, name_len, -1);
Yan, Zheng	4a500fd	2010-05-16 10:49:59 -0400	[diff] [blame]	3505	if (IS_ERR(di)) {
				3506	err = PTR_ERR(di);
				3507	goto fail;
				3508	}
				3509	if (di) {
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	3510	ret = btrfs_delete_one_dir_name(trans, log, path, di);
Josef Bacik	3650860	2013-04-25 16:23:32 -0400	[diff] [blame]	3511	if (ret) {
				3512	err = ret;
				3513	goto fail;
				3514	}
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	3515	}
David Sterba	b3b4aa7	2011-04-21 01:20:15 +0200	[diff] [blame]	3516	btrfs_release_path(path);
Li Zefan	33345d01	2011-04-20 10:31:50 +0800	[diff] [blame]	3517	di = btrfs_lookup_dir_index_item(trans, log, path, dir_ino,
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	3518	index, name, name_len, -1);
Yan, Zheng	4a500fd	2010-05-16 10:49:59 -0400	[diff] [blame]	3519	if (IS_ERR(di)) {
				3520	err = PTR_ERR(di);
				3521	goto fail;
				3522	}
				3523	if (di) {
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	3524	ret = btrfs_delete_one_dir_name(trans, log, path, di);
Josef Bacik	3650860	2013-04-25 16:23:32 -0400	[diff] [blame]	3525	if (ret) {
				3526	err = ret;
				3527	goto fail;
				3528	}
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	3529	}
				3530
Filipe Manana	ddffcf6	2021-01-27 10:34:54 +0000	[diff] [blame]	3531	/*
				3532	* We do not need to update the size field of the directory's inode item
				3533	* because on log replay we update the field to reflect all existing
				3534	* entries in the directory (see overwrite_item()).
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	3535	*/
Yan, Zheng	4a500fd	2010-05-16 10:49:59 -0400	[diff] [blame]	3536	fail:
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	3537	btrfs_free_path(path);
Tsutomu Itoh	a62f44a	2011-04-25 19:43:51 -0400	[diff] [blame]	3538	out_unlock:
Nikolay Borisov	49f34d1	2017-01-18 00:31:32 +0200	[diff] [blame]	3539	mutex_unlock(&dir->log_mutex);
Josef Bacik	fb2fecb	2020-08-10 17:31:16 -0400	[diff] [blame]	3540	if (err == -ENOSPC) {
David Sterba	9078776	2019-03-20 13:28:05 +0100	[diff] [blame]	3541	btrfs_set_log_full_commit(trans);
Josef Bacik	fb2fecb	2020-08-10 17:31:16 -0400	[diff] [blame]	3542	err = 0;
				3543	} else if (err < 0 && err != -ENOENT) {
				3544	/* ENOENT can be returned if the entry hasn't been fsynced yet */
				3545	btrfs_abort_transaction(trans, err);
				3546	}
Jeff Mahoney	79787ea	2012-03-12 16:03:00 +0100	[diff] [blame]	3547
Chris Mason	12fcfd2	2009-03-24 10:24:20 -0400	[diff] [blame]	3548	btrfs_end_log_trans(root);
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	3549
Andi Kleen	411fc6b	2010-10-29 15:14:31 -0400	[diff] [blame]	3550	return err;
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	3551	}
				3552
				3553	/* see comments for btrfs_del_dir_entries_in_log */
				3554	int btrfs_del_inode_ref_in_log(struct btrfs_trans_handle *trans,
				3555	struct btrfs_root *root,
				3556	const char *name, int name_len,
Nikolay Borisov	a491abb	2017-01-18 00:31:33 +0200	[diff] [blame]	3557	struct btrfs_inode *inode, u64 dirid)
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	3558	{
				3559	struct btrfs_root *log;
				3560	u64 index;
				3561	int ret;
				3562
Filipe Manana	803f0f6	2019-06-19 13:05:39 +0100	[diff] [blame]	3563	if (!inode_logged(trans, inode))
Chris Mason	3a5f1d4	2008-09-11 15:53:37 -0400	[diff] [blame]	3564	return 0;
				3565
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	3566	ret = join_running_log_trans(root);
				3567	if (ret)
				3568	return 0;
				3569	log = root->log_root;
Nikolay Borisov	a491abb	2017-01-18 00:31:33 +0200	[diff] [blame]	3570	mutex_lock(&inode->log_mutex);
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	3571
Nikolay Borisov	a491abb	2017-01-18 00:31:33 +0200	[diff] [blame]	3572	ret = btrfs_del_inode_ref(trans, log, name, name_len, btrfs_ino(inode),
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	3573	dirid, &index);
Nikolay Borisov	a491abb	2017-01-18 00:31:33 +0200	[diff] [blame]	3574	mutex_unlock(&inode->log_mutex);
Yan, Zheng	4a500fd	2010-05-16 10:49:59 -0400	[diff] [blame]	3575	if (ret == -ENOSPC) {
David Sterba	9078776	2019-03-20 13:28:05 +0100	[diff] [blame]	3576	btrfs_set_log_full_commit(trans);
Yan, Zheng	4a500fd	2010-05-16 10:49:59 -0400	[diff] [blame]	3577	ret = 0;
Jeff Mahoney	79787ea	2012-03-12 16:03:00 +0100	[diff] [blame]	3578	} else if (ret < 0 && ret != -ENOENT)
Jeff Mahoney	6664283	2016-06-10 18:19:25 -0400	[diff] [blame]	3579	btrfs_abort_transaction(trans, ret);
Chris Mason	12fcfd2	2009-03-24 10:24:20 -0400	[diff] [blame]	3580	btrfs_end_log_trans(root);
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	3581
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	3582	return ret;
				3583	}
				3584
				3585	/*
				3586	* creates a range item in the log for 'dirid'. first_offset and
				3587	* last_offset tell us which parts of the key space the log should
				3588	* be considered authoritative for.
				3589	*/
				3590	static noinline int insert_dir_log_key(struct btrfs_trans_handle *trans,
				3591	struct btrfs_root *log,
				3592	struct btrfs_path *path,
				3593	int key_type, u64 dirid,
				3594	u64 first_offset, u64 last_offset)
				3595	{
				3596	int ret;
				3597	struct btrfs_key key;
				3598	struct btrfs_dir_log_item *item;
				3599
				3600	key.objectid = dirid;
				3601	key.offset = first_offset;
				3602	if (key_type == BTRFS_DIR_ITEM_KEY)
				3603	key.type = BTRFS_DIR_LOG_ITEM_KEY;
				3604	else
				3605	key.type = BTRFS_DIR_LOG_INDEX_KEY;
				3606	ret = btrfs_insert_empty_item(trans, log, path, &key, sizeof(*item));
Yan, Zheng	4a500fd	2010-05-16 10:49:59 -0400	[diff] [blame]	3607	if (ret)
				3608	return ret;
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	3609
				3610	item = btrfs_item_ptr(path->nodes[0], path->slots[0],
				3611	struct btrfs_dir_log_item);
				3612	btrfs_set_dir_log_end(path->nodes[0], item, last_offset);
				3613	btrfs_mark_buffer_dirty(path->nodes[0]);
David Sterba	b3b4aa7	2011-04-21 01:20:15 +0200	[diff] [blame]	3614	btrfs_release_path(path);
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	3615	return 0;
				3616	}
				3617
				3618	/*
				3619	* log all the items included in the current transaction for a given
				3620	* directory. This also creates the range items in the log tree required
				3621	* to replay anything deleted before the fsync
				3622	*/
				3623	static noinline int log_dir_items(struct btrfs_trans_handle *trans,
Nikolay Borisov	684a577	2017-01-18 00:31:41 +0200	[diff] [blame]	3624	struct btrfs_root root, struct btrfs_inode inode,
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	3625	struct btrfs_path *path,
				3626	struct btrfs_path *dst_path, int key_type,
Filipe Manana	2f2ff0e	2015-03-20 17:19:46 +0000	[diff] [blame]	3627	struct btrfs_log_ctx *ctx,
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	3628	u64 min_offset, u64 *last_offset_ret)
				3629	{
				3630	struct btrfs_key min_key;
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	3631	struct btrfs_root *log = root->log_root;
				3632	struct extent_buffer *src;
Yan, Zheng	4a500fd	2010-05-16 10:49:59 -0400	[diff] [blame]	3633	int err = 0;
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	3634	int ret;
				3635	int i;
				3636	int nritems;
				3637	u64 first_offset = min_offset;
				3638	u64 last_offset = (u64)-1;
Nikolay Borisov	684a577	2017-01-18 00:31:41 +0200	[diff] [blame]	3639	u64 ino = btrfs_ino(inode);
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	3640
				3641	log = root->log_root;
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	3642
Li Zefan	33345d01	2011-04-20 10:31:50 +0800	[diff] [blame]	3643	min_key.objectid = ino;
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	3644	min_key.type = key_type;
				3645	min_key.offset = min_offset;
				3646
Filipe David Borba Manana	6174d3c	2013-10-01 16:13:42 +0100	[diff] [blame]	3647	ret = btrfs_search_forward(root, &min_key, path, trans->transid);
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	3648
				3649	/*
				3650	* we didn't find anything from this transaction, see if there
				3651	* is anything at all
				3652	*/
Li Zefan	33345d01	2011-04-20 10:31:50 +0800	[diff] [blame]	3653	if (ret != 0 \|\| min_key.objectid != ino \|\| min_key.type != key_type) {
				3654	min_key.objectid = ino;
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	3655	min_key.type = key_type;
				3656	min_key.offset = (u64)-1;
David Sterba	b3b4aa7	2011-04-21 01:20:15 +0200	[diff] [blame]	3657	btrfs_release_path(path);
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	3658	ret = btrfs_search_slot(NULL, root, &min_key, path, 0, 0);
				3659	if (ret < 0) {
David Sterba	b3b4aa7	2011-04-21 01:20:15 +0200	[diff] [blame]	3660	btrfs_release_path(path);
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	3661	return ret;
				3662	}
Li Zefan	33345d01	2011-04-20 10:31:50 +0800	[diff] [blame]	3663	ret = btrfs_previous_item(root, path, ino, key_type);
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	3664
				3665	/* if ret == 0 there are items for this type,
				3666	* create a range to tell us the last key of this type.
				3667	* otherwise, there are no items in this directory after
				3668	* *min_offset, and we create a range to indicate that.
				3669	*/
				3670	if (ret == 0) {
				3671	struct btrfs_key tmp;
				3672	btrfs_item_key_to_cpu(path->nodes[0], &tmp,
				3673	path->slots[0]);
Chris Mason	d397712	2009-01-05 21:25:51 -0500	[diff] [blame]	3674	if (key_type == tmp.type)
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	3675	first_offset = max(min_offset, tmp.offset) + 1;
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	3676	}
				3677	goto done;
				3678	}
				3679
				3680	/* go backward to find any previous key */
Li Zefan	33345d01	2011-04-20 10:31:50 +0800	[diff] [blame]	3681	ret = btrfs_previous_item(root, path, ino, key_type);
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	3682	if (ret == 0) {
				3683	struct btrfs_key tmp;
				3684	btrfs_item_key_to_cpu(path->nodes[0], &tmp, path->slots[0]);
				3685	if (key_type == tmp.type) {
				3686	first_offset = tmp.offset;
				3687	ret = overwrite_item(trans, log, dst_path,
				3688	path->nodes[0], path->slots[0],
				3689	&tmp);
Yan, Zheng	4a500fd	2010-05-16 10:49:59 -0400	[diff] [blame]	3690	if (ret) {
				3691	err = ret;
				3692	goto done;
				3693	}
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	3694	}
				3695	}
David Sterba	b3b4aa7	2011-04-21 01:20:15 +0200	[diff] [blame]	3696	btrfs_release_path(path);
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	3697
Josef Bacik	2cc8334	2019-03-06 17:13:04 -0500	[diff] [blame]	3698	/*
				3699	* Find the first key from this transaction again. See the note for
				3700	* log_new_dir_dentries, if we're logging a directory recursively we
				3701	* won't be holding its i_mutex, which means we can modify the directory
				3702	* while we're logging it. If we remove an entry between our first
				3703	* search and this search we'll not find the key again and can just
				3704	* bail.
				3705	*/
Filipe Manana	bb56f02	2020-09-14 15:27:50 +0100	[diff] [blame]	3706	search:
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	3707	ret = btrfs_search_slot(NULL, root, &min_key, path, 0, 0);
Josef Bacik	2cc8334	2019-03-06 17:13:04 -0500	[diff] [blame]	3708	if (ret != 0)
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	3709	goto done;
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	3710
				3711	/*
				3712	* we have a block from this transaction, log every item in it
				3713	* from our directory
				3714	*/
Chris Mason	d397712	2009-01-05 21:25:51 -0500	[diff] [blame]	3715	while (1) {
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	3716	struct btrfs_key tmp;
				3717	src = path->nodes[0];
				3718	nritems = btrfs_header_nritems(src);
				3719	for (i = path->slots[0]; i < nritems; i++) {
Filipe Manana	2f2ff0e	2015-03-20 17:19:46 +0000	[diff] [blame]	3720	struct btrfs_dir_item *di;
				3721
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	3722	btrfs_item_key_to_cpu(src, &min_key, i);
				3723
Li Zefan	33345d01	2011-04-20 10:31:50 +0800	[diff] [blame]	3724	if (min_key.objectid != ino \|\| min_key.type != key_type)
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	3725	goto done;
Filipe Manana	bb56f02	2020-09-14 15:27:50 +0100	[diff] [blame]	3726
				3727	if (need_resched()) {
				3728	btrfs_release_path(path);
				3729	cond_resched();
				3730	goto search;
				3731	}
				3732
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	3733	ret = overwrite_item(trans, log, dst_path, src, i,
				3734	&min_key);
Yan, Zheng	4a500fd	2010-05-16 10:49:59 -0400	[diff] [blame]	3735	if (ret) {
				3736	err = ret;
				3737	goto done;
				3738	}
Filipe Manana	2f2ff0e	2015-03-20 17:19:46 +0000	[diff] [blame]	3739
				3740	/*
				3741	* We must make sure that when we log a directory entry,
				3742	* the corresponding inode, after log replay, has a
				3743	* matching link count. For example:
				3744	*
				3745	* touch foo
				3746	* mkdir mydir
				3747	* sync
				3748	* ln foo mydir/bar
				3749	* xfs_io -c "fsync" mydir
				3750	* <crash>
				3751	* <mount fs and log replay>
				3752	*
				3753	* Would result in a fsync log that when replayed, our
				3754	* file inode would have a link count of 1, but we get
				3755	* two directory entries pointing to the same inode.
				3756	* After removing one of the names, it would not be
				3757	* possible to remove the other name, which resulted
				3758	* always in stale file handle errors, and would not
				3759	* be possible to rmdir the parent directory, since
				3760	* its i_size could never decrement to the value
				3761	* BTRFS_EMPTY_DIR_SIZE, resulting in -ENOTEMPTY errors.
				3762	*/
				3763	di = btrfs_item_ptr(src, i, struct btrfs_dir_item);
				3764	btrfs_dir_item_key_to_cpu(src, di, &tmp);
				3765	if (ctx &&
				3766	(btrfs_dir_transid(src, di) == trans->transid \|\|
				3767	btrfs_dir_type(src, di) == BTRFS_FT_DIR) &&
				3768	tmp.type != BTRFS_ROOT_ITEM_KEY)
				3769	ctx->log_new_dentries = true;
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	3770	}
				3771	path->slots[0] = nritems;
				3772
				3773	/*
				3774	* look ahead to the next item and see if it is also
				3775	* from this directory and from this transaction
				3776	*/
				3777	ret = btrfs_next_leaf(root, path);
Liu Bo	80c0b42	2018-04-03 01:59:47 +0800	[diff] [blame]	3778	if (ret) {
				3779	if (ret == 1)
				3780	last_offset = (u64)-1;
				3781	else
				3782	err = ret;
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	3783	goto done;
				3784	}
				3785	btrfs_item_key_to_cpu(path->nodes[0], &tmp, path->slots[0]);
Li Zefan	33345d01	2011-04-20 10:31:50 +0800	[diff] [blame]	3786	if (tmp.objectid != ino \|\| tmp.type != key_type) {
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	3787	last_offset = (u64)-1;
				3788	goto done;
				3789	}
				3790	if (btrfs_header_generation(path->nodes[0]) != trans->transid) {
				3791	ret = overwrite_item(trans, log, dst_path,
				3792	path->nodes[0], path->slots[0],
				3793	&tmp);
Yan, Zheng	4a500fd	2010-05-16 10:49:59 -0400	[diff] [blame]	3794	if (ret)
				3795	err = ret;
				3796	else
				3797	last_offset = tmp.offset;
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	3798	goto done;
				3799	}
				3800	}
				3801	done:
David Sterba	b3b4aa7	2011-04-21 01:20:15 +0200	[diff] [blame]	3802	btrfs_release_path(path);
				3803	btrfs_release_path(dst_path);
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	3804
Yan, Zheng	4a500fd	2010-05-16 10:49:59 -0400	[diff] [blame]	3805	if (err == 0) {
				3806	*last_offset_ret = last_offset;
				3807	/*
				3808	* insert the log range keys to indicate where the log
				3809	* is valid
				3810	*/
				3811	ret = insert_dir_log_key(trans, log, path, key_type,
Li Zefan	33345d01	2011-04-20 10:31:50 +0800	[diff] [blame]	3812	ino, first_offset, last_offset);
Yan, Zheng	4a500fd	2010-05-16 10:49:59 -0400	[diff] [blame]	3813	if (ret)
				3814	err = ret;
				3815	}
				3816	return err;
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	3817	}
				3818
				3819	/*
				3820	* logging directories is very similar to logging inodes, We find all the items
				3821	* from the current transaction and write them to the log.
				3822	*
				3823	* The recovery code scans the directory in the subvolume, and if it finds a
				3824	* key in the range logged that is not present in the log tree, then it means
				3825	* that dir entry was unlinked during the transaction.
				3826	*
				3827	* In order for that scan to work, we must include one key smaller than
				3828	* the smallest logged by this transaction and one key larger than the largest
				3829	* key logged by this transaction.
				3830	*/
				3831	static noinline int log_directory_changes(struct btrfs_trans_handle *trans,
Nikolay Borisov	dbf39ea	2017-01-18 00:31:42 +0200	[diff] [blame]	3832	struct btrfs_root root, struct btrfs_inode inode,
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	3833	struct btrfs_path *path,
Filipe Manana	2f2ff0e	2015-03-20 17:19:46 +0000	[diff] [blame]	3834	struct btrfs_path *dst_path,
				3835	struct btrfs_log_ctx *ctx)
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	3836	{
				3837	u64 min_key;
				3838	u64 max_key;
				3839	int ret;
				3840	int key_type = BTRFS_DIR_ITEM_KEY;
				3841
				3842	again:
				3843	min_key = 0;
				3844	max_key = 0;
Chris Mason	d397712	2009-01-05 21:25:51 -0500	[diff] [blame]	3845	while (1) {
Nikolay Borisov	dbf39ea	2017-01-18 00:31:42 +0200	[diff] [blame]	3846	ret = log_dir_items(trans, root, inode, path, dst_path, key_type,
				3847	ctx, min_key, &max_key);
Yan, Zheng	4a500fd	2010-05-16 10:49:59 -0400	[diff] [blame]	3848	if (ret)
				3849	return ret;
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	3850	if (max_key == (u64)-1)
				3851	break;
				3852	min_key = max_key + 1;
				3853	}
				3854
				3855	if (key_type == BTRFS_DIR_ITEM_KEY) {
				3856	key_type = BTRFS_DIR_INDEX_KEY;
				3857	goto again;
				3858	}
				3859	return 0;
				3860	}
				3861
				3862	/*
				3863	* a helper function to drop items from the log before we relog an
				3864	* inode. max_key_type indicates the highest item type to remove.
				3865	* This cannot be run for file data extents because it does not
				3866	* free the extents they point to.
				3867	*/
				3868	static int drop_objectid_items(struct btrfs_trans_handle *trans,
				3869	struct btrfs_root *log,
				3870	struct btrfs_path *path,
				3871	u64 objectid, int max_key_type)
				3872	{
				3873	int ret;
				3874	struct btrfs_key key;
				3875	struct btrfs_key found_key;
Josef Bacik	18ec90d	2012-09-28 11:56:28 -0400	[diff] [blame]	3876	int start_slot;
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	3877
				3878	key.objectid = objectid;
				3879	key.type = max_key_type;
				3880	key.offset = (u64)-1;
				3881
Chris Mason	d397712	2009-01-05 21:25:51 -0500	[diff] [blame]	3882	while (1) {
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	3883	ret = btrfs_search_slot(trans, log, &key, path, -1, 1);
Josef Bacik	3650860	2013-04-25 16:23:32 -0400	[diff] [blame]	3884	BUG_ON(ret == 0); /* Logic error */
Yan, Zheng	4a500fd	2010-05-16 10:49:59 -0400	[diff] [blame]	3885	if (ret < 0)
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	3886	break;
				3887
				3888	if (path->slots[0] == 0)
				3889	break;
				3890
				3891	path->slots[0]--;
				3892	btrfs_item_key_to_cpu(path->nodes[0], &found_key,
				3893	path->slots[0]);
				3894
				3895	if (found_key.objectid != objectid)
				3896	break;
				3897
Josef Bacik	18ec90d	2012-09-28 11:56:28 -0400	[diff] [blame]	3898	found_key.offset = 0;
				3899	found_key.type = 0;
Qu Wenruo	e3b8336	2020-04-17 15:08:21 +0800	[diff] [blame]	3900	ret = btrfs_bin_search(path->nodes[0], &found_key, &start_slot);
Filipe Manana	cbca7d5	2019-02-18 16:57:26 +0000	[diff] [blame]	3901	if (ret < 0)
				3902	break;
Josef Bacik	18ec90d	2012-09-28 11:56:28 -0400	[diff] [blame]	3903
				3904	ret = btrfs_del_items(trans, log, path, start_slot,
				3905	path->slots[0] - start_slot + 1);
				3906	/*
				3907	* If start slot isn't 0 then we don't need to re-search, we've
				3908	* found the last guy with the objectid in this tree.
				3909	*/
				3910	if (ret \|\| start_slot != 0)
Tsutomu Itoh	65a246c	2011-05-19 04:37:44 +0000	[diff] [blame]	3911	break;
David Sterba	b3b4aa7	2011-04-21 01:20:15 +0200	[diff] [blame]	3912	btrfs_release_path(path);
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	3913	}
David Sterba	b3b4aa7	2011-04-21 01:20:15 +0200	[diff] [blame]	3914	btrfs_release_path(path);
Josef Bacik	5bdbeb2	2012-05-29 16:59:49 -0400	[diff] [blame]	3915	if (ret > 0)
				3916	ret = 0;
Yan, Zheng	4a500fd	2010-05-16 10:49:59 -0400	[diff] [blame]	3917	return ret;
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	3918	}
				3919
Josef Bacik	94edf4a	2012-09-25 14:56:25 -0400	[diff] [blame]	3920	static void fill_inode_item(struct btrfs_trans_handle *trans,
				3921	struct extent_buffer *leaf,
				3922	struct btrfs_inode_item *item,
Filipe Manana	1a4bcf4	2015-02-13 12:30:56 +0000	[diff] [blame]	3923	struct inode *inode, int log_inode_only,
				3924	u64 logged_isize)
Josef Bacik	94edf4a	2012-09-25 14:56:25 -0400	[diff] [blame]	3925	{
Josef Bacik	0b1c6cc	2012-10-23 16:03:44 -0400	[diff] [blame]	3926	struct btrfs_map_token token;
Boris Burkov	77eea05	2021-06-30 13:01:48 -0700	[diff] [blame]	3927	u64 flags;
Josef Bacik	94edf4a	2012-09-25 14:56:25 -0400	[diff] [blame]	3928
David Sterba	c82f823	2019-08-09 17:48:21 +0200	[diff] [blame]	3929	btrfs_init_map_token(&token, leaf);
Josef Bacik	94edf4a	2012-09-25 14:56:25 -0400	[diff] [blame]	3930
				3931	if (log_inode_only) {
				3932	/* set the generation to zero so the recover code
				3933	* can tell the difference between an logging
				3934	* just to say 'this inode exists' and a logging
				3935	* to say 'update this inode with these values'
				3936	*/
David Sterba	cc4c13d	2020-04-29 02:15:56 +0200	[diff] [blame]	3937	btrfs_set_token_inode_generation(&token, item, 0);
				3938	btrfs_set_token_inode_size(&token, item, logged_isize);
Josef Bacik	94edf4a	2012-09-25 14:56:25 -0400	[diff] [blame]	3939	} else {
David Sterba	cc4c13d	2020-04-29 02:15:56 +0200	[diff] [blame]	3940	btrfs_set_token_inode_generation(&token, item,
				3941	BTRFS_I(inode)->generation);
				3942	btrfs_set_token_inode_size(&token, item, inode->i_size);
Josef Bacik	94edf4a	2012-09-25 14:56:25 -0400	[diff] [blame]	3943	}
				3944
David Sterba	cc4c13d	2020-04-29 02:15:56 +0200	[diff] [blame]	3945	btrfs_set_token_inode_uid(&token, item, i_uid_read(inode));
				3946	btrfs_set_token_inode_gid(&token, item, i_gid_read(inode));
				3947	btrfs_set_token_inode_mode(&token, item, inode->i_mode);
				3948	btrfs_set_token_inode_nlink(&token, item, inode->i_nlink);
Josef Bacik	0b1c6cc	2012-10-23 16:03:44 -0400	[diff] [blame]	3949
David Sterba	cc4c13d	2020-04-29 02:15:56 +0200	[diff] [blame]	3950	btrfs_set_token_timespec_sec(&token, &item->atime,
				3951	inode->i_atime.tv_sec);
				3952	btrfs_set_token_timespec_nsec(&token, &item->atime,
				3953	inode->i_atime.tv_nsec);
Josef Bacik	0b1c6cc	2012-10-23 16:03:44 -0400	[diff] [blame]	3954
David Sterba	cc4c13d	2020-04-29 02:15:56 +0200	[diff] [blame]	3955	btrfs_set_token_timespec_sec(&token, &item->mtime,
				3956	inode->i_mtime.tv_sec);
				3957	btrfs_set_token_timespec_nsec(&token, &item->mtime,
				3958	inode->i_mtime.tv_nsec);
Josef Bacik	0b1c6cc	2012-10-23 16:03:44 -0400	[diff] [blame]	3959
David Sterba	cc4c13d	2020-04-29 02:15:56 +0200	[diff] [blame]	3960	btrfs_set_token_timespec_sec(&token, &item->ctime,
				3961	inode->i_ctime.tv_sec);
				3962	btrfs_set_token_timespec_nsec(&token, &item->ctime,
				3963	inode->i_ctime.tv_nsec);
Josef Bacik	0b1c6cc	2012-10-23 16:03:44 -0400	[diff] [blame]	3964
Filipe Manana	e593e54	2021-01-27 10:34:55 +0000	[diff] [blame]	3965	/*
				3966	* We do not need to set the nbytes field, in fact during a fast fsync
				3967	* its value may not even be correct, since a fast fsync does not wait
				3968	* for ordered extent completion, which is where we update nbytes, it
				3969	* only waits for writeback to complete. During log replay as we find
				3970	* file extent items and replay them, we adjust the nbytes field of the
				3971	* inode item in subvolume tree as needed (see overwrite_item()).
				3972	*/
Josef Bacik	0b1c6cc	2012-10-23 16:03:44 -0400	[diff] [blame]	3973
David Sterba	cc4c13d	2020-04-29 02:15:56 +0200	[diff] [blame]	3974	btrfs_set_token_inode_sequence(&token, item, inode_peek_iversion(inode));
				3975	btrfs_set_token_inode_transid(&token, item, trans->transid);
				3976	btrfs_set_token_inode_rdev(&token, item, inode->i_rdev);
Boris Burkov	77eea05	2021-06-30 13:01:48 -0700	[diff] [blame]	3977	flags = btrfs_inode_combine_flags(BTRFS_I(inode)->flags,
				3978	BTRFS_I(inode)->ro_flags);
				3979	btrfs_set_token_inode_flags(&token, item, flags);
David Sterba	cc4c13d	2020-04-29 02:15:56 +0200	[diff] [blame]	3980	btrfs_set_token_inode_block_group(&token, item, 0);
Josef Bacik	94edf4a	2012-09-25 14:56:25 -0400	[diff] [blame]	3981	}
				3982
Josef Bacik	a95249b	2012-10-11 16:17:34 -0400	[diff] [blame]	3983	static int log_inode_item(struct btrfs_trans_handle *trans,
				3984	struct btrfs_root log, struct btrfs_path path,
Filipe Manana	2ac691d	2021-07-20 16:03:43 +0100	[diff] [blame]	3985	struct btrfs_inode *inode, bool inode_item_dropped)
Josef Bacik	a95249b	2012-10-11 16:17:34 -0400	[diff] [blame]	3986	{
				3987	struct btrfs_inode_item *inode_item;
Josef Bacik	a95249b	2012-10-11 16:17:34 -0400	[diff] [blame]	3988	int ret;
				3989
Filipe Manana	2ac691d	2021-07-20 16:03:43 +0100	[diff] [blame]	3990	/*
				3991	* If we are doing a fast fsync and the inode was logged before in the
				3992	* current transaction, then we know the inode was previously logged and
				3993	* it exists in the log tree. For performance reasons, in this case use
				3994	* btrfs_search_slot() directly with ins_len set to 0 so that we never
				3995	* attempt a write lock on the leaf's parent, which adds unnecessary lock
				3996	* contention in case there are concurrent fsyncs for other inodes of the
				3997	* same subvolume. Using btrfs_insert_empty_item() when the inode item
				3998	* already exists can also result in unnecessarily splitting a leaf.
				3999	*/
				4000	if (!inode_item_dropped && inode->logged_trans == trans->transid) {
				4001	ret = btrfs_search_slot(trans, log, &inode->location, path, 0, 1);
				4002	ASSERT(ret <= 0);
				4003	if (ret > 0)
				4004	ret = -ENOENT;
				4005	} else {
				4006	/*
				4007	* This means it is the first fsync in the current transaction,
				4008	* so the inode item is not in the log and we need to insert it.
				4009	* We can never get -EEXIST because we are only called for a fast
				4010	* fsync and in case an inode eviction happens after the inode was
				4011	* logged before in the current transaction, when we load again
				4012	* the inode, we set BTRFS_INODE_NEEDS_FULL_SYNC on its runtime
				4013	* flags and set ->logged_trans to 0.
				4014	*/
				4015	ret = btrfs_insert_empty_item(trans, log, path, &inode->location,
				4016	sizeof(*inode_item));
				4017	ASSERT(ret != -EEXIST);
				4018	}
				4019	if (ret)
Josef Bacik	a95249b	2012-10-11 16:17:34 -0400	[diff] [blame]	4020	return ret;
				4021	inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
				4022	struct btrfs_inode_item);
Nikolay Borisov	6d889a3	2017-01-18 00:31:47 +0200	[diff] [blame]	4023	fill_inode_item(trans, path->nodes[0], inode_item, &inode->vfs_inode,
				4024	0, 0);
Josef Bacik	a95249b	2012-10-11 16:17:34 -0400	[diff] [blame]	4025	btrfs_release_path(path);
				4026	return 0;
				4027	}
				4028
Filipe Manana	40e046a	2019-12-05 16:58:30 +0000	[diff] [blame]	4029	static int log_csums(struct btrfs_trans_handle *trans,
Filipe Manana	3ebac17	2020-07-15 12:30:43 +0100	[diff] [blame]	4030	struct btrfs_inode *inode,
Filipe Manana	40e046a	2019-12-05 16:58:30 +0000	[diff] [blame]	4031	struct btrfs_root *log_root,
				4032	struct btrfs_ordered_sum *sums)
				4033	{
Filipe Manana	e289f03	2020-05-18 12:14:50 +0100	[diff] [blame]	4034	const u64 lock_end = sums->bytenr + sums->len - 1;
				4035	struct extent_state *cached_state = NULL;
Filipe Manana	40e046a	2019-12-05 16:58:30 +0000	[diff] [blame]	4036	int ret;
				4037
				4038	/*
Filipe Manana	3ebac17	2020-07-15 12:30:43 +0100	[diff] [blame]	4039	* If this inode was not used for reflink operations in the current
				4040	* transaction with new extents, then do the fast path, no need to
				4041	* worry about logging checksum items with overlapping ranges.
				4042	*/
				4043	if (inode->last_reflink_trans < trans->transid)
				4044	return btrfs_csum_file_blocks(trans, log_root, sums);
				4045
				4046	/*
Filipe Manana	e289f03	2020-05-18 12:14:50 +0100	[diff] [blame]	4047	* Serialize logging for checksums. This is to avoid racing with the
				4048	* same checksum being logged by another task that is logging another
				4049	* file which happens to refer to the same extent as well. Such races
				4050	* can leave checksum items in the log with overlapping ranges.
				4051	*/
				4052	ret = lock_extent_bits(&log_root->log_csum_range, sums->bytenr,
				4053	lock_end, &cached_state);
				4054	if (ret)
				4055	return ret;
				4056	/*
Filipe Manana	40e046a	2019-12-05 16:58:30 +0000	[diff] [blame]	4057	* Due to extent cloning, we might have logged a csum item that covers a
				4058	* subrange of a cloned extent, and later we can end up logging a csum
				4059	* item for a larger subrange of the same extent or the entire range.
				4060	* This would leave csum items in the log tree that cover the same range
				4061	* and break the searches for checksums in the log tree, resulting in
				4062	* some checksums missing in the fs/subvolume tree. So just delete (or
				4063	* trim and adjust) any existing csum items in the log for this range.
				4064	*/
				4065	ret = btrfs_del_csums(trans, log_root, sums->bytenr, sums->len);
Filipe Manana	e289f03	2020-05-18 12:14:50 +0100	[diff] [blame]	4066	if (!ret)
				4067	ret = btrfs_csum_file_blocks(trans, log_root, sums);
Filipe Manana	40e046a	2019-12-05 16:58:30 +0000	[diff] [blame]	4068
Filipe Manana	e289f03	2020-05-18 12:14:50 +0100	[diff] [blame]	4069	unlock_extent_cached(&log_root->log_csum_range, sums->bytenr, lock_end,
				4070	&cached_state);
				4071
				4072	return ret;
Filipe Manana	40e046a	2019-12-05 16:58:30 +0000	[diff] [blame]	4073	}
				4074
Chris Mason	31ff1cd	2008-09-11 16:17:57 -0400	[diff] [blame]	4075	static noinline int copy_items(struct btrfs_trans_handle *trans,
Nikolay Borisov	44d70e1	2017-01-18 00:31:36 +0200	[diff] [blame]	4076	struct btrfs_inode *inode,
Chris Mason	31ff1cd	2008-09-11 16:17:57 -0400	[diff] [blame]	4077	struct btrfs_path *dst_path,
Filipe Manana	0e56315	2019-11-19 12:07:33 +0000	[diff] [blame]	4078	struct btrfs_path *src_path,
Filipe Manana	1a4bcf4	2015-02-13 12:30:56 +0000	[diff] [blame]	4079	int start_slot, int nr, int inode_only,
				4080	u64 logged_isize)
Chris Mason	31ff1cd	2008-09-11 16:17:57 -0400	[diff] [blame]	4081	{
David Sterba	3ffbd68	2018-06-29 10:56:42 +0200	[diff] [blame]	4082	struct btrfs_fs_info *fs_info = trans->fs_info;
Chris Mason	31ff1cd	2008-09-11 16:17:57 -0400	[diff] [blame]	4083	unsigned long src_offset;
				4084	unsigned long dst_offset;
Nikolay Borisov	44d70e1	2017-01-18 00:31:36 +0200	[diff] [blame]	4085	struct btrfs_root *log = inode->root->log_root;
Chris Mason	31ff1cd	2008-09-11 16:17:57 -0400	[diff] [blame]	4086	struct btrfs_file_extent_item *extent;
				4087	struct btrfs_inode_item *inode_item;
Josef Bacik	16e7549	2013-10-22 12:18:51 -0400	[diff] [blame]	4088	struct extent_buffer *src = src_path->nodes[0];
Chris Mason	31ff1cd	2008-09-11 16:17:57 -0400	[diff] [blame]	4089	int ret;
				4090	struct btrfs_key *ins_keys;
				4091	u32 *ins_sizes;
				4092	char *ins_data;
				4093	int i;
Chris Mason	d20f704	2008-12-08 16:58:54 -0500	[diff] [blame]	4094	struct list_head ordered_sums;
Nikolay Borisov	44d70e1	2017-01-18 00:31:36 +0200	[diff] [blame]	4095	int skip_csum = inode->flags & BTRFS_INODE_NODATASUM;
Chris Mason	d20f704	2008-12-08 16:58:54 -0500	[diff] [blame]	4096
				4097	INIT_LIST_HEAD(&ordered_sums);
Chris Mason	31ff1cd	2008-09-11 16:17:57 -0400	[diff] [blame]	4098
				4099	ins_data = kmalloc(nr * sizeof(struct btrfs_key) +
				4100	nr * sizeof(u32), GFP_NOFS);
liubo	2a29edc	2011-01-26 06:22:08 +0000	[diff] [blame]	4101	if (!ins_data)
				4102	return -ENOMEM;
				4103
Chris Mason	31ff1cd	2008-09-11 16:17:57 -0400	[diff] [blame]	4104	ins_sizes = (u32 *)ins_data;
				4105	ins_keys = (struct btrfs_key )(ins_data + nr sizeof(u32));
				4106
				4107	for (i = 0; i < nr; i++) {
				4108	ins_sizes[i] = btrfs_item_size_nr(src, i + start_slot);
				4109	btrfs_item_key_to_cpu(src, ins_keys + i, i + start_slot);
				4110	}
				4111	ret = btrfs_insert_empty_items(trans, log, dst_path,
				4112	ins_keys, ins_sizes, nr);
Yan, Zheng	4a500fd	2010-05-16 10:49:59 -0400	[diff] [blame]	4113	if (ret) {
				4114	kfree(ins_data);
				4115	return ret;
				4116	}
Chris Mason	31ff1cd	2008-09-11 16:17:57 -0400	[diff] [blame]	4117
Yan Zheng	5d4f98a	2009-06-10 10:45:14 -0400	[diff] [blame]	4118	for (i = 0; i < nr; i++, dst_path->slots[0]++) {
Chris Mason	31ff1cd	2008-09-11 16:17:57 -0400	[diff] [blame]	4119	dst_offset = btrfs_item_ptr_offset(dst_path->nodes[0],
				4120	dst_path->slots[0]);
				4121
				4122	src_offset = btrfs_item_ptr_offset(src, start_slot + i);
				4123
Josef Bacik	94edf4a	2012-09-25 14:56:25 -0400	[diff] [blame]	4124	if (ins_keys[i].type == BTRFS_INODE_ITEM_KEY) {
Chris Mason	31ff1cd	2008-09-11 16:17:57 -0400	[diff] [blame]	4125	inode_item = btrfs_item_ptr(dst_path->nodes[0],
				4126	dst_path->slots[0],
				4127	struct btrfs_inode_item);
Josef Bacik	94edf4a	2012-09-25 14:56:25 -0400	[diff] [blame]	4128	fill_inode_item(trans, dst_path->nodes[0], inode_item,
David Sterba	f85b737	2017-01-20 14:54:07 +0100	[diff] [blame]	4129	&inode->vfs_inode,
				4130	inode_only == LOG_INODE_EXISTS,
Filipe Manana	1a4bcf4	2015-02-13 12:30:56 +0000	[diff] [blame]	4131	logged_isize);
Josef Bacik	94edf4a	2012-09-25 14:56:25 -0400	[diff] [blame]	4132	} else {
				4133	copy_extent_buffer(dst_path->nodes[0], src, dst_offset,
				4134	src_offset, ins_sizes[i]);
Chris Mason	31ff1cd	2008-09-11 16:17:57 -0400	[diff] [blame]	4135	}
Josef Bacik	94edf4a	2012-09-25 14:56:25 -0400	[diff] [blame]	4136
Chris Mason	31ff1cd	2008-09-11 16:17:57 -0400	[diff] [blame]	4137	/* take a reference on file data extents so that truncates
				4138	* or deletes of this inode don't have to relog the inode
				4139	* again
				4140	*/
David Sterba	962a298	2014-06-04 18:41:45 +0200	[diff] [blame]	4141	if (ins_keys[i].type == BTRFS_EXTENT_DATA_KEY &&
Liu Bo	d279440	2012-08-29 01:07:56 -0600	[diff] [blame]	4142	!skip_csum) {
Chris Mason	31ff1cd	2008-09-11 16:17:57 -0400	[diff] [blame]	4143	int found_type;
				4144	extent = btrfs_item_ptr(src, start_slot + i,
				4145	struct btrfs_file_extent_item);
				4146
liubo	8e531cd	2011-05-06 10:36:09 +0800	[diff] [blame]	4147	if (btrfs_file_extent_generation(src, extent) < trans->transid)
				4148	continue;
				4149
Chris Mason	31ff1cd	2008-09-11 16:17:57 -0400	[diff] [blame]	4150	found_type = btrfs_file_extent_type(src, extent);
Josef Bacik	6f1fed7	2012-09-26 11:07:06 -0400	[diff] [blame]	4151	if (found_type == BTRFS_FILE_EXTENT_REG) {
Yan Zheng	5d4f98a	2009-06-10 10:45:14 -0400	[diff] [blame]	4152	u64 ds, dl, cs, cl;
				4153	ds = btrfs_file_extent_disk_bytenr(src,
				4154	extent);
				4155	/* ds == 0 is a hole */
				4156	if (ds == 0)
				4157	continue;
				4158
				4159	dl = btrfs_file_extent_disk_num_bytes(src,
				4160	extent);
				4161	cs = btrfs_file_extent_offset(src, extent);
				4162	cl = btrfs_file_extent_num_bytes(src,
Joe Perches	a419aef	2009-08-18 11:18:35 -0700	[diff] [blame]	4163	extent);
Chris Mason	580afd7	2008-12-08 19:15:39 -0500	[diff] [blame]	4164	if (btrfs_file_extent_compression(src,
				4165	extent)) {
				4166	cs = 0;
				4167	cl = dl;
				4168	}
Yan Zheng	5d4f98a	2009-06-10 10:45:14 -0400	[diff] [blame]	4169
				4170	ret = btrfs_lookup_csums_range(
Jeff Mahoney	0b246af	2016-06-22 18:54:23 -0400	[diff] [blame]	4171	fs_info->csum_root,
Yan Zheng	5d4f98a	2009-06-10 10:45:14 -0400	[diff] [blame]	4172	ds + cs, ds + cs + cl - 1,
Arne Jansen	a2de733	2011-03-08 14:14:00 +0100	[diff] [blame]	4173	&ordered_sums, 0);
Filipe Manana	4f26433	2020-07-29 10:17:50 +0100	[diff] [blame]	4174	if (ret)
				4175	break;
Chris Mason	31ff1cd	2008-09-11 16:17:57 -0400	[diff] [blame]	4176	}
				4177	}
Chris Mason	31ff1cd	2008-09-11 16:17:57 -0400	[diff] [blame]	4178	}
				4179
				4180	btrfs_mark_buffer_dirty(dst_path->nodes[0]);
David Sterba	b3b4aa7	2011-04-21 01:20:15 +0200	[diff] [blame]	4181	btrfs_release_path(dst_path);
Chris Mason	31ff1cd	2008-09-11 16:17:57 -0400	[diff] [blame]	4182	kfree(ins_data);
Chris Mason	d20f704	2008-12-08 16:58:54 -0500	[diff] [blame]	4183
				4184	/*
				4185	* we have to do this after the loop above to avoid changing the
				4186	* log tree while trying to change the log tree.
				4187	*/
Chris Mason	d397712	2009-01-05 21:25:51 -0500	[diff] [blame]	4188	while (!list_empty(&ordered_sums)) {
Chris Mason	d20f704	2008-12-08 16:58:54 -0500	[diff] [blame]	4189	struct btrfs_ordered_sum *sums = list_entry(ordered_sums.next,
				4190	struct btrfs_ordered_sum,
				4191	list);
Yan, Zheng	4a500fd	2010-05-16 10:49:59 -0400	[diff] [blame]	4192	if (!ret)
Filipe Manana	3ebac17	2020-07-15 12:30:43 +0100	[diff] [blame]	4193	ret = log_csums(trans, inode, log, sums);
Chris Mason	d20f704	2008-12-08 16:58:54 -0500	[diff] [blame]	4194	list_del(&sums->list);
				4195	kfree(sums);
				4196	}
Josef Bacik	16e7549	2013-10-22 12:18:51 -0400	[diff] [blame]	4197
Yan, Zheng	4a500fd	2010-05-16 10:49:59 -0400	[diff] [blame]	4198	return ret;
Chris Mason	31ff1cd	2008-09-11 16:17:57 -0400	[diff] [blame]	4199	}
				4200
Sami Tolvanen	4f0f586	2021-04-08 11:28:34 -0700	[diff] [blame]	4201	static int extent_cmp(void priv, const struct list_head a,
				4202	const struct list_head *b)
Josef Bacik	5dc562c	2012-08-17 13:14:17 -0400	[diff] [blame]	4203	{
David Sterba	214cc18	2021-07-26 14:15:26 +0200	[diff] [blame]	4204	const struct extent_map em1, em2;
Josef Bacik	5dc562c	2012-08-17 13:14:17 -0400	[diff] [blame]	4205
				4206	em1 = list_entry(a, struct extent_map, list);
				4207	em2 = list_entry(b, struct extent_map, list);
				4208
				4209	if (em1->start < em2->start)
				4210	return -1;
				4211	else if (em1->start > em2->start)
				4212	return 1;
				4213	return 0;
				4214	}
				4215
Josef Bacik	e7175a6	2018-05-23 11:58:34 -0400	[diff] [blame]	4216	static int log_extent_csums(struct btrfs_trans_handle *trans,
				4217	struct btrfs_inode *inode,
Nikolay Borisov	a9ecb65	2018-06-20 17:26:42 +0300	[diff] [blame]	4218	struct btrfs_root *log_root,
Filipe Manana	4877817	2020-08-11 12:43:58 +0100	[diff] [blame]	4219	const struct extent_map *em,
				4220	struct btrfs_log_ctx *ctx)
Josef Bacik	5dc562c	2012-08-17 13:14:17 -0400	[diff] [blame]	4221	{
Filipe Manana	4877817	2020-08-11 12:43:58 +0100	[diff] [blame]	4222	struct btrfs_ordered_extent *ordered;
Josef Bacik	2ab28f3	2012-10-12 15:27:49 -0400	[diff] [blame]	4223	u64 csum_offset;
				4224	u64 csum_len;
Filipe Manana	4877817	2020-08-11 12:43:58 +0100	[diff] [blame]	4225	u64 mod_start = em->mod_start;
				4226	u64 mod_len = em->mod_len;
Filipe Manana	8407f55	2014-09-05 15:14:39 +0100	[diff] [blame]	4227	LIST_HEAD(ordered_sums);
				4228	int ret = 0;
Josef Bacik	09a2a8f9	2013-04-05 16:51:15 -0400	[diff] [blame]	4229
Josef Bacik	e7175a6	2018-05-23 11:58:34 -0400	[diff] [blame]	4230	if (inode->flags & BTRFS_INODE_NODATASUM \|\|
				4231	test_bit(EXTENT_FLAG_PREALLOC, &em->flags) \|\|
Filipe Manana	8407f55	2014-09-05 15:14:39 +0100	[diff] [blame]	4232	em->block_start == EXTENT_MAP_HOLE)
Josef Bacik	70c8a91	2012-10-11 16:54:30 -0400	[diff] [blame]	4233	return 0;
				4234
Filipe Manana	4877817	2020-08-11 12:43:58 +0100	[diff] [blame]	4235	list_for_each_entry(ordered, &ctx->ordered_extents, log_list) {
				4236	const u64 ordered_end = ordered->file_offset + ordered->num_bytes;
				4237	const u64 mod_end = mod_start + mod_len;
				4238	struct btrfs_ordered_sum *sums;
				4239
				4240	if (mod_len == 0)
				4241	break;
				4242
				4243	if (ordered_end <= mod_start)
				4244	continue;
				4245	if (mod_end <= ordered->file_offset)
				4246	break;
				4247
				4248	/*
				4249	* We are going to copy all the csums on this ordered extent, so
				4250	* go ahead and adjust mod_start and mod_len in case this ordered
				4251	* extent has already been logged.
				4252	*/
				4253	if (ordered->file_offset > mod_start) {
				4254	if (ordered_end >= mod_end)
				4255	mod_len = ordered->file_offset - mod_start;
				4256	/*
				4257	* If we have this case
				4258	*
				4259	* \|--------- logged extent ---------\|
				4260	* \|----- ordered extent ----\|
				4261	*
				4262	* Just don't mess with mod_start and mod_len, we'll
				4263	* just end up logging more csums than we need and it
				4264	* will be ok.
				4265	*/
				4266	} else {
				4267	if (ordered_end < mod_end) {
				4268	mod_len = mod_end - ordered_end;
				4269	mod_start = ordered_end;
				4270	} else {
				4271	mod_len = 0;
				4272	}
				4273	}
				4274
				4275	/*
				4276	* To keep us from looping for the above case of an ordered
				4277	* extent that falls inside of the logged extent.
				4278	*/
				4279	if (test_and_set_bit(BTRFS_ORDERED_LOGGED_CSUM, &ordered->flags))
				4280	continue;
				4281
				4282	list_for_each_entry(sums, &ordered->list, list) {
				4283	ret = log_csums(trans, inode, log_root, sums);
				4284	if (ret)
				4285	return ret;
				4286	}
				4287	}
				4288
				4289	/* We're done, found all csums in the ordered extents. */
				4290	if (mod_len == 0)
				4291	return 0;
				4292
Josef Bacik	e7175a6	2018-05-23 11:58:34 -0400	[diff] [blame]	4293	/* If we're compressed we have to save the entire range of csums. */
Filipe David Borba Manana	488111a	2013-10-28 16:30:29 +0000	[diff] [blame]	4294	if (em->compress_type) {
				4295	csum_offset = 0;
Filipe Manana	8407f55	2014-09-05 15:14:39 +0100	[diff] [blame]	4296	csum_len = max(em->block_len, em->orig_block_len);
Filipe David Borba Manana	488111a	2013-10-28 16:30:29 +0000	[diff] [blame]	4297	} else {
Filipe Manana	4877817	2020-08-11 12:43:58 +0100	[diff] [blame]	4298	csum_offset = mod_start - em->start;
				4299	csum_len = mod_len;
Filipe David Borba Manana	488111a	2013-10-28 16:30:29 +0000	[diff] [blame]	4300	}
Josef Bacik	2ab28f3	2012-10-12 15:27:49 -0400	[diff] [blame]	4301
Josef Bacik	70c8a91	2012-10-11 16:54:30 -0400	[diff] [blame]	4302	/* block start is already adjusted for the file extent offset. */
Nikolay Borisov	a9ecb65	2018-06-20 17:26:42 +0300	[diff] [blame]	4303	ret = btrfs_lookup_csums_range(trans->fs_info->csum_root,
Josef Bacik	70c8a91	2012-10-11 16:54:30 -0400	[diff] [blame]	4304	em->block_start + csum_offset,
				4305	em->block_start + csum_offset +
				4306	csum_len - 1, &ordered_sums, 0);
				4307	if (ret)
				4308	return ret;
				4309
				4310	while (!list_empty(&ordered_sums)) {
				4311	struct btrfs_ordered_sum *sums = list_entry(ordered_sums.next,
				4312	struct btrfs_ordered_sum,
				4313	list);
				4314	if (!ret)
Filipe Manana	3ebac17	2020-07-15 12:30:43 +0100	[diff] [blame]	4315	ret = log_csums(trans, inode, log_root, sums);
Josef Bacik	70c8a91	2012-10-11 16:54:30 -0400	[diff] [blame]	4316	list_del(&sums->list);
				4317	kfree(sums);
				4318	}
				4319
				4320	return ret;
Josef Bacik	5dc562c	2012-08-17 13:14:17 -0400	[diff] [blame]	4321	}
				4322
Filipe Manana	8407f55	2014-09-05 15:14:39 +0100	[diff] [blame]	4323	static int log_one_extent(struct btrfs_trans_handle *trans,
Nikolay Borisov	9d12262	2017-01-18 00:31:40 +0200	[diff] [blame]	4324	struct btrfs_inode inode, struct btrfs_root root,
Filipe Manana	8407f55	2014-09-05 15:14:39 +0100	[diff] [blame]	4325	const struct extent_map *em,
				4326	struct btrfs_path *path,
Filipe Manana	8407f55	2014-09-05 15:14:39 +0100	[diff] [blame]	4327	struct btrfs_log_ctx *ctx)
				4328	{
Filipe Manana	5893dfb	2020-11-04 11:07:32 +0000	[diff] [blame]	4329	struct btrfs_drop_extents_args drop_args = { 0 };
Filipe Manana	8407f55	2014-09-05 15:14:39 +0100	[diff] [blame]	4330	struct btrfs_root *log = root->log_root;
				4331	struct btrfs_file_extent_item *fi;
				4332	struct extent_buffer *leaf;
				4333	struct btrfs_map_token token;
				4334	struct btrfs_key key;
				4335	u64 extent_offset = em->start - em->orig_start;
				4336	u64 block_len;
				4337	int ret;
Filipe Manana	8407f55	2014-09-05 15:14:39 +0100	[diff] [blame]	4338
Filipe Manana	4877817	2020-08-11 12:43:58 +0100	[diff] [blame]	4339	ret = log_extent_csums(trans, inode, log, em, ctx);
Filipe Manana	8407f55	2014-09-05 15:14:39 +0100	[diff] [blame]	4340	if (ret)
				4341	return ret;
				4342
Filipe Manana	5893dfb	2020-11-04 11:07:32 +0000	[diff] [blame]	4343	drop_args.path = path;
				4344	drop_args.start = em->start;
				4345	drop_args.end = em->start + em->len;
				4346	drop_args.replace_extent = true;
				4347	drop_args.extent_item_size = sizeof(*fi);
				4348	ret = btrfs_drop_extents(trans, log, inode, &drop_args);
Filipe Manana	8407f55	2014-09-05 15:14:39 +0100	[diff] [blame]	4349	if (ret)
				4350	return ret;
				4351
Filipe Manana	5893dfb	2020-11-04 11:07:32 +0000	[diff] [blame]	4352	if (!drop_args.extent_inserted) {
Nikolay Borisov	9d12262	2017-01-18 00:31:40 +0200	[diff] [blame]	4353	key.objectid = btrfs_ino(inode);
Filipe Manana	8407f55	2014-09-05 15:14:39 +0100	[diff] [blame]	4354	key.type = BTRFS_EXTENT_DATA_KEY;
				4355	key.offset = em->start;
				4356
				4357	ret = btrfs_insert_empty_item(trans, log, path, &key,
				4358	sizeof(*fi));
				4359	if (ret)
				4360	return ret;
				4361	}
				4362	leaf = path->nodes[0];
David Sterba	c82f823	2019-08-09 17:48:21 +0200	[diff] [blame]	4363	btrfs_init_map_token(&token, leaf);
Filipe Manana	8407f55	2014-09-05 15:14:39 +0100	[diff] [blame]	4364	fi = btrfs_item_ptr(leaf, path->slots[0],
				4365	struct btrfs_file_extent_item);
				4366
David Sterba	cc4c13d	2020-04-29 02:15:56 +0200	[diff] [blame]	4367	btrfs_set_token_file_extent_generation(&token, fi, trans->transid);
Filipe Manana	8407f55	2014-09-05 15:14:39 +0100	[diff] [blame]	4368	if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags))
David Sterba	cc4c13d	2020-04-29 02:15:56 +0200	[diff] [blame]	4369	btrfs_set_token_file_extent_type(&token, fi,
				4370	BTRFS_FILE_EXTENT_PREALLOC);
Filipe Manana	8407f55	2014-09-05 15:14:39 +0100	[diff] [blame]	4371	else
David Sterba	cc4c13d	2020-04-29 02:15:56 +0200	[diff] [blame]	4372	btrfs_set_token_file_extent_type(&token, fi,
				4373	BTRFS_FILE_EXTENT_REG);
Filipe Manana	8407f55	2014-09-05 15:14:39 +0100	[diff] [blame]	4374
				4375	block_len = max(em->block_len, em->orig_block_len);
				4376	if (em->compress_type != BTRFS_COMPRESS_NONE) {
David Sterba	cc4c13d	2020-04-29 02:15:56 +0200	[diff] [blame]	4377	btrfs_set_token_file_extent_disk_bytenr(&token, fi,
				4378	em->block_start);
				4379	btrfs_set_token_file_extent_disk_num_bytes(&token, fi, block_len);
Filipe Manana	8407f55	2014-09-05 15:14:39 +0100	[diff] [blame]	4380	} else if (em->block_start < EXTENT_MAP_LAST_BYTE) {
David Sterba	cc4c13d	2020-04-29 02:15:56 +0200	[diff] [blame]	4381	btrfs_set_token_file_extent_disk_bytenr(&token, fi,
Filipe Manana	8407f55	2014-09-05 15:14:39 +0100	[diff] [blame]	4382	em->block_start -
David Sterba	cc4c13d	2020-04-29 02:15:56 +0200	[diff] [blame]	4383	extent_offset);
				4384	btrfs_set_token_file_extent_disk_num_bytes(&token, fi, block_len);
Filipe Manana	8407f55	2014-09-05 15:14:39 +0100	[diff] [blame]	4385	} else {
David Sterba	cc4c13d	2020-04-29 02:15:56 +0200	[diff] [blame]	4386	btrfs_set_token_file_extent_disk_bytenr(&token, fi, 0);
				4387	btrfs_set_token_file_extent_disk_num_bytes(&token, fi, 0);
Filipe Manana	8407f55	2014-09-05 15:14:39 +0100	[diff] [blame]	4388	}
				4389
David Sterba	cc4c13d	2020-04-29 02:15:56 +0200	[diff] [blame]	4390	btrfs_set_token_file_extent_offset(&token, fi, extent_offset);
				4391	btrfs_set_token_file_extent_num_bytes(&token, fi, em->len);
				4392	btrfs_set_token_file_extent_ram_bytes(&token, fi, em->ram_bytes);
				4393	btrfs_set_token_file_extent_compression(&token, fi, em->compress_type);
				4394	btrfs_set_token_file_extent_encryption(&token, fi, 0);
				4395	btrfs_set_token_file_extent_other_encoding(&token, fi, 0);
Filipe Manana	8407f55	2014-09-05 15:14:39 +0100	[diff] [blame]	4396	btrfs_mark_buffer_dirty(leaf);
				4397
				4398	btrfs_release_path(path);
				4399
				4400	return ret;
				4401	}
				4402
Filipe Manana	31d11b8	2018-05-09 16:01:46 +0100	[diff] [blame]	4403	/*
				4404	* Log all prealloc extents beyond the inode's i_size to make sure we do not
				4405	* lose them after doing a fast fsync and replaying the log. We scan the
				4406	* subvolume's root instead of iterating the inode's extent map tree because
				4407	* otherwise we can log incorrect extent items based on extent map conversion.
				4408	* That can happen due to the fact that extent maps are merged when they
				4409	* are not in the extent map tree's list of modified extents.
				4410	*/
				4411	static int btrfs_log_prealloc_extents(struct btrfs_trans_handle *trans,
				4412	struct btrfs_inode *inode,
				4413	struct btrfs_path *path)
				4414	{
				4415	struct btrfs_root *root = inode->root;
				4416	struct btrfs_key key;
				4417	const u64 i_size = i_size_read(&inode->vfs_inode);
				4418	const u64 ino = btrfs_ino(inode);
				4419	struct btrfs_path *dst_path = NULL;
Filipe Manana	0e56315	2019-11-19 12:07:33 +0000	[diff] [blame]	4420	bool dropped_extents = false;
Filipe Manana	f135cea	2020-04-23 16:30:53 +0100	[diff] [blame]	4421	u64 truncate_offset = i_size;
				4422	struct extent_buffer *leaf;
				4423	int slot;
Filipe Manana	31d11b8	2018-05-09 16:01:46 +0100	[diff] [blame]	4424	int ins_nr = 0;
				4425	int start_slot;
				4426	int ret;
				4427
				4428	if (!(inode->flags & BTRFS_INODE_PREALLOC))
				4429	return 0;
				4430
				4431	key.objectid = ino;
				4432	key.type = BTRFS_EXTENT_DATA_KEY;
				4433	key.offset = i_size;
				4434	ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
				4435	if (ret < 0)
				4436	goto out;
				4437
Filipe Manana	f135cea	2020-04-23 16:30:53 +0100	[diff] [blame]	4438	/*
				4439	* We must check if there is a prealloc extent that starts before the
				4440	* i_size and crosses the i_size boundary. This is to ensure later we
				4441	* truncate down to the end of that extent and not to the i_size, as
				4442	* otherwise we end up losing part of the prealloc extent after a log
				4443	* replay and with an implicit hole if there is another prealloc extent
				4444	* that starts at an offset beyond i_size.
				4445	*/
				4446	ret = btrfs_previous_item(root, path, ino, BTRFS_EXTENT_DATA_KEY);
				4447	if (ret < 0)
				4448	goto out;
				4449
				4450	if (ret == 0) {
				4451	struct btrfs_file_extent_item *ei;
				4452
				4453	leaf = path->nodes[0];
				4454	slot = path->slots[0];
				4455	ei = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
				4456
				4457	if (btrfs_file_extent_type(leaf, ei) ==
				4458	BTRFS_FILE_EXTENT_PREALLOC) {
				4459	u64 extent_end;
				4460
				4461	btrfs_item_key_to_cpu(leaf, &key, slot);
				4462	extent_end = key.offset +
				4463	btrfs_file_extent_num_bytes(leaf, ei);
				4464
				4465	if (extent_end > i_size)
				4466	truncate_offset = extent_end;
				4467	}
				4468	} else {
				4469	ret = 0;
				4470	}
				4471
Filipe Manana	31d11b8	2018-05-09 16:01:46 +0100	[diff] [blame]	4472	while (true) {
Filipe Manana	f135cea	2020-04-23 16:30:53 +0100	[diff] [blame]	4473	leaf = path->nodes[0];
				4474	slot = path->slots[0];
Filipe Manana	31d11b8	2018-05-09 16:01:46 +0100	[diff] [blame]	4475
				4476	if (slot >= btrfs_header_nritems(leaf)) {
				4477	if (ins_nr > 0) {
				4478	ret = copy_items(trans, inode, dst_path, path,
Filipe Manana	0e56315	2019-11-19 12:07:33 +0000	[diff] [blame]	4479	start_slot, ins_nr, 1, 0);
Filipe Manana	31d11b8	2018-05-09 16:01:46 +0100	[diff] [blame]	4480	if (ret < 0)
				4481	goto out;
				4482	ins_nr = 0;
				4483	}
				4484	ret = btrfs_next_leaf(root, path);
				4485	if (ret < 0)
				4486	goto out;
				4487	if (ret > 0) {
				4488	ret = 0;
				4489	break;
				4490	}
				4491	continue;
				4492	}
				4493
				4494	btrfs_item_key_to_cpu(leaf, &key, slot);
				4495	if (key.objectid > ino)
				4496	break;
				4497	if (WARN_ON_ONCE(key.objectid < ino) \|\|
				4498	key.type < BTRFS_EXTENT_DATA_KEY \|\|
				4499	key.offset < i_size) {
				4500	path->slots[0]++;
				4501	continue;
				4502	}
Filipe Manana	0e56315	2019-11-19 12:07:33 +0000	[diff] [blame]	4503	if (!dropped_extents) {
Filipe Manana	31d11b8	2018-05-09 16:01:46 +0100	[diff] [blame]	4504	/*
				4505	* Avoid logging extent items logged in past fsync calls
				4506	* and leading to duplicate keys in the log tree.
				4507	*/
				4508	do {
				4509	ret = btrfs_truncate_inode_items(trans,
				4510	root->log_root,
Nikolay Borisov	5074339	2020-11-02 16:48:55 +0200	[diff] [blame]	4511	inode, truncate_offset,
Filipe Manana	0d7d316	2021-05-24 11:35:55 +0100	[diff] [blame]	4512	BTRFS_EXTENT_DATA_KEY,
				4513	NULL);
Filipe Manana	31d11b8	2018-05-09 16:01:46 +0100	[diff] [blame]	4514	} while (ret == -EAGAIN);
				4515	if (ret)
				4516	goto out;
Filipe Manana	0e56315	2019-11-19 12:07:33 +0000	[diff] [blame]	4517	dropped_extents = true;
Filipe Manana	31d11b8	2018-05-09 16:01:46 +0100	[diff] [blame]	4518	}
				4519	if (ins_nr == 0)
				4520	start_slot = slot;
				4521	ins_nr++;
				4522	path->slots[0]++;
				4523	if (!dst_path) {
				4524	dst_path = btrfs_alloc_path();
				4525	if (!dst_path) {
				4526	ret = -ENOMEM;
				4527	goto out;
				4528	}
				4529	}
				4530	}
Filipe Manana	0bc2d3c	2020-04-21 11:25:31 +0100	[diff] [blame]	4531	if (ins_nr > 0)
Filipe Manana	0e56315	2019-11-19 12:07:33 +0000	[diff] [blame]	4532	ret = copy_items(trans, inode, dst_path, path,
Filipe Manana	31d11b8	2018-05-09 16:01:46 +0100	[diff] [blame]	4533	start_slot, ins_nr, 1, 0);
Filipe Manana	31d11b8	2018-05-09 16:01:46 +0100	[diff] [blame]	4534	out:
				4535	btrfs_release_path(path);
				4536	btrfs_free_path(dst_path);
				4537	return ret;
				4538	}
				4539
Josef Bacik	5dc562c	2012-08-17 13:14:17 -0400	[diff] [blame]	4540	static int btrfs_log_changed_extents(struct btrfs_trans_handle *trans,
				4541	struct btrfs_root *root,
Nikolay Borisov	9d12262	2017-01-18 00:31:40 +0200	[diff] [blame]	4542	struct btrfs_inode *inode,
Miao Xie	827463c	2014-01-14 20:31:51 +0800	[diff] [blame]	4543	struct btrfs_path *path,
Filipe Manana	4877817	2020-08-11 12:43:58 +0100	[diff] [blame]	4544	struct btrfs_log_ctx *ctx)
Josef Bacik	5dc562c	2012-08-17 13:14:17 -0400	[diff] [blame]	4545	{
Filipe Manana	4877817	2020-08-11 12:43:58 +0100	[diff] [blame]	4546	struct btrfs_ordered_extent *ordered;
				4547	struct btrfs_ordered_extent *tmp;
Josef Bacik	5dc562c	2012-08-17 13:14:17 -0400	[diff] [blame]	4548	struct extent_map em, n;
				4549	struct list_head extents;
Nikolay Borisov	9d12262	2017-01-18 00:31:40 +0200	[diff] [blame]	4550	struct extent_map_tree *tree = &inode->extent_tree;
Josef Bacik	5dc562c	2012-08-17 13:14:17 -0400	[diff] [blame]	4551	int ret = 0;
Josef Bacik	2ab28f3	2012-10-12 15:27:49 -0400	[diff] [blame]	4552	int num = 0;
Josef Bacik	5dc562c	2012-08-17 13:14:17 -0400	[diff] [blame]	4553
				4554	INIT_LIST_HEAD(&extents);
				4555
Josef Bacik	5dc562c	2012-08-17 13:14:17 -0400	[diff] [blame]	4556	write_lock(&tree->lock);
Josef Bacik	5dc562c	2012-08-17 13:14:17 -0400	[diff] [blame]	4557
				4558	list_for_each_entry_safe(em, n, &tree->modified_extents, list) {
				4559	list_del_init(&em->list);
Josef Bacik	2ab28f3	2012-10-12 15:27:49 -0400	[diff] [blame]	4560	/*
				4561	* Just an arbitrary number, this can be really CPU intensive
				4562	* once we start getting a lot of extents, and really once we
				4563	* have a bunch of extents we just want to commit since it will
				4564	* be faster.
				4565	*/
				4566	if (++num > 32768) {
				4567	list_del_init(&tree->modified_extents);
				4568	ret = -EFBIG;
				4569	goto process;
				4570	}
				4571
Filipe Manana	5f96bfb	2020-11-25 12:19:24 +0000	[diff] [blame]	4572	if (em->generation < trans->transid)
Josef Bacik	5dc562c	2012-08-17 13:14:17 -0400	[diff] [blame]	4573	continue;
Josef Bacik	8c6c592	2017-08-29 10:11:39 -0400	[diff] [blame]	4574
Filipe Manana	31d11b8	2018-05-09 16:01:46 +0100	[diff] [blame]	4575	/* We log prealloc extents beyond eof later. */
				4576	if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags) &&
				4577	em->start >= i_size_read(&inode->vfs_inode))
				4578	continue;
				4579
Josef Bacik	ff44c6e	2012-09-14 12:59:20 -0400	[diff] [blame]	4580	/* Need a ref to keep it from getting evicted from cache */
Elena Reshetova	490b54d	2017-03-03 10:55:12 +0200	[diff] [blame]	4581	refcount_inc(&em->refs);
Josef Bacik	ff44c6e	2012-09-14 12:59:20 -0400	[diff] [blame]	4582	set_bit(EXTENT_FLAG_LOGGING, &em->flags);
Josef Bacik	5dc562c	2012-08-17 13:14:17 -0400	[diff] [blame]	4583	list_add_tail(&em->list, &extents);
Josef Bacik	2ab28f3	2012-10-12 15:27:49 -0400	[diff] [blame]	4584	num++;
Josef Bacik	5dc562c	2012-08-17 13:14:17 -0400	[diff] [blame]	4585	}
				4586
				4587	list_sort(NULL, &extents, extent_cmp);
Josef Bacik	2ab28f3	2012-10-12 15:27:49 -0400	[diff] [blame]	4588	process:
Josef Bacik	5dc562c	2012-08-17 13:14:17 -0400	[diff] [blame]	4589	while (!list_empty(&extents)) {
				4590	em = list_entry(extents.next, struct extent_map, list);
				4591
				4592	list_del_init(&em->list);
				4593
				4594	/*
				4595	* If we had an error we just need to delete everybody from our
				4596	* private list.
				4597	*/
Josef Bacik	ff44c6e	2012-09-14 12:59:20 -0400	[diff] [blame]	4598	if (ret) {
Josef Bacik	201a903	2013-01-24 12:02:07 -0500	[diff] [blame]	4599	clear_em_logging(tree, em);
Josef Bacik	ff44c6e	2012-09-14 12:59:20 -0400	[diff] [blame]	4600	free_extent_map(em);
Josef Bacik	5dc562c	2012-08-17 13:14:17 -0400	[diff] [blame]	4601	continue;
Josef Bacik	ff44c6e	2012-09-14 12:59:20 -0400	[diff] [blame]	4602	}
				4603
				4604	write_unlock(&tree->lock);
Josef Bacik	5dc562c	2012-08-17 13:14:17 -0400	[diff] [blame]	4605
Josef Bacik	a2120a4	2018-05-23 11:58:35 -0400	[diff] [blame]	4606	ret = log_one_extent(trans, inode, root, em, path, ctx);
Josef Bacik	ff44c6e	2012-09-14 12:59:20 -0400	[diff] [blame]	4607	write_lock(&tree->lock);
Josef Bacik	201a903	2013-01-24 12:02:07 -0500	[diff] [blame]	4608	clear_em_logging(tree, em);
				4609	free_extent_map(em);
Josef Bacik	5dc562c	2012-08-17 13:14:17 -0400	[diff] [blame]	4610	}
Josef Bacik	ff44c6e	2012-09-14 12:59:20 -0400	[diff] [blame]	4611	WARN_ON(!list_empty(&extents));
				4612	write_unlock(&tree->lock);
Josef Bacik	5dc562c	2012-08-17 13:14:17 -0400	[diff] [blame]	4613
Josef Bacik	5dc562c	2012-08-17 13:14:17 -0400	[diff] [blame]	4614	btrfs_release_path(path);
Filipe Manana	31d11b8	2018-05-09 16:01:46 +0100	[diff] [blame]	4615	if (!ret)
				4616	ret = btrfs_log_prealloc_extents(trans, inode, path);
Filipe Manana	4877817	2020-08-11 12:43:58 +0100	[diff] [blame]	4617	if (ret)
				4618	return ret;
Filipe Manana	31d11b8	2018-05-09 16:01:46 +0100	[diff] [blame]	4619
Filipe Manana	4877817	2020-08-11 12:43:58 +0100	[diff] [blame]	4620	/*
				4621	* We have logged all extents successfully, now make sure the commit of
				4622	* the current transaction waits for the ordered extents to complete
				4623	* before it commits and wipes out the log trees, otherwise we would
				4624	* lose data if an ordered extents completes after the transaction
				4625	* commits and a power failure happens after the transaction commit.
				4626	*/
				4627	list_for_each_entry_safe(ordered, tmp, &ctx->ordered_extents, log_list) {
				4628	list_del_init(&ordered->log_list);
				4629	set_bit(BTRFS_ORDERED_LOGGED, &ordered->flags);
				4630
				4631	if (!test_bit(BTRFS_ORDERED_COMPLETE, &ordered->flags)) {
				4632	spin_lock_irq(&inode->ordered_tree.lock);
				4633	if (!test_bit(BTRFS_ORDERED_COMPLETE, &ordered->flags)) {
				4634	set_bit(BTRFS_ORDERED_PENDING, &ordered->flags);
				4635	atomic_inc(&trans->transaction->pending_ordered);
				4636	}
				4637	spin_unlock_irq(&inode->ordered_tree.lock);
				4638	}
				4639	btrfs_put_ordered_extent(ordered);
				4640	}
				4641
				4642	return 0;
Josef Bacik	5dc562c	2012-08-17 13:14:17 -0400	[diff] [blame]	4643	}
				4644
Nikolay Borisov	481b01c	2017-01-18 00:31:34 +0200	[diff] [blame]	4645	static int logged_inode_size(struct btrfs_root log, struct btrfs_inode inode,
Filipe Manana	1a4bcf4	2015-02-13 12:30:56 +0000	[diff] [blame]	4646	struct btrfs_path path, u64 size_ret)
				4647	{
				4648	struct btrfs_key key;
				4649	int ret;
				4650
Nikolay Borisov	481b01c	2017-01-18 00:31:34 +0200	[diff] [blame]	4651	key.objectid = btrfs_ino(inode);
Filipe Manana	1a4bcf4	2015-02-13 12:30:56 +0000	[diff] [blame]	4652	key.type = BTRFS_INODE_ITEM_KEY;
				4653	key.offset = 0;
				4654
				4655	ret = btrfs_search_slot(NULL, log, &key, path, 0, 0);
				4656	if (ret < 0) {
				4657	return ret;
				4658	} else if (ret > 0) {
Filipe Manana	2f2ff0e	2015-03-20 17:19:46 +0000	[diff] [blame]	4659	*size_ret = 0;
Filipe Manana	1a4bcf4	2015-02-13 12:30:56 +0000	[diff] [blame]	4660	} else {
				4661	struct btrfs_inode_item *item;
				4662
				4663	item = btrfs_item_ptr(path->nodes[0], path->slots[0],
				4664	struct btrfs_inode_item);
				4665	*size_ret = btrfs_inode_size(path->nodes[0], item);
Filipe Manana	bf50411	2019-03-04 14:06:12 +0000	[diff] [blame]	4666	/*
				4667	* If the in-memory inode's i_size is smaller then the inode
				4668	* size stored in the btree, return the inode's i_size, so
				4669	* that we get a correct inode size after replaying the log
				4670	* when before a power failure we had a shrinking truncate
				4671	* followed by addition of a new name (rename / new hard link).
				4672	* Otherwise return the inode size from the btree, to avoid
				4673	* data loss when replaying a log due to previously doing a
				4674	* write that expands the inode's size and logging a new name
				4675	* immediately after.
				4676	*/
				4677	if (*size_ret > inode->vfs_inode.i_size)
				4678	*size_ret = inode->vfs_inode.i_size;
Filipe Manana	1a4bcf4	2015-02-13 12:30:56 +0000	[diff] [blame]	4679	}
				4680
				4681	btrfs_release_path(path);
				4682	return 0;
				4683	}
				4684
Filipe Manana	36283bf	2015-06-20 00:44:51 +0100	[diff] [blame]	4685	/*
				4686	* At the moment we always log all xattrs. This is to figure out at log replay
				4687	* time which xattrs must have their deletion replayed. If a xattr is missing
				4688	* in the log tree and exists in the fs/subvol tree, we delete it. This is
				4689	* because if a xattr is deleted, the inode is fsynced and a power failure
				4690	* happens, causing the log to be replayed the next time the fs is mounted,
				4691	* we want the xattr to not exist anymore (same behaviour as other filesystems
				4692	* with a journal, ext3/4, xfs, f2fs, etc).
				4693	*/
				4694	static int btrfs_log_all_xattrs(struct btrfs_trans_handle *trans,
				4695	struct btrfs_root *root,
Nikolay Borisov	1a93c36	2017-01-18 00:31:37 +0200	[diff] [blame]	4696	struct btrfs_inode *inode,
Filipe Manana	36283bf	2015-06-20 00:44:51 +0100	[diff] [blame]	4697	struct btrfs_path *path,
				4698	struct btrfs_path *dst_path)
				4699	{
				4700	int ret;
				4701	struct btrfs_key key;
Nikolay Borisov	1a93c36	2017-01-18 00:31:37 +0200	[diff] [blame]	4702	const u64 ino = btrfs_ino(inode);
Filipe Manana	36283bf	2015-06-20 00:44:51 +0100	[diff] [blame]	4703	int ins_nr = 0;
				4704	int start_slot = 0;
Filipe Manana	f2f121a	2020-11-13 11:21:49 +0000	[diff] [blame]	4705	bool found_xattrs = false;
				4706
				4707	if (test_bit(BTRFS_INODE_NO_XATTRS, &inode->runtime_flags))
				4708	return 0;
Filipe Manana	36283bf	2015-06-20 00:44:51 +0100	[diff] [blame]	4709
				4710	key.objectid = ino;
				4711	key.type = BTRFS_XATTR_ITEM_KEY;
				4712	key.offset = 0;
				4713
				4714	ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
				4715	if (ret < 0)
				4716	return ret;
				4717
				4718	while (true) {
				4719	int slot = path->slots[0];
				4720	struct extent_buffer *leaf = path->nodes[0];
				4721	int nritems = btrfs_header_nritems(leaf);
				4722
				4723	if (slot >= nritems) {
				4724	if (ins_nr > 0) {
Nikolay Borisov	1a93c36	2017-01-18 00:31:37 +0200	[diff] [blame]	4725	ret = copy_items(trans, inode, dst_path, path,
Filipe Manana	0e56315	2019-11-19 12:07:33 +0000	[diff] [blame]	4726	start_slot, ins_nr, 1, 0);
Filipe Manana	36283bf	2015-06-20 00:44:51 +0100	[diff] [blame]	4727	if (ret < 0)
				4728	return ret;
				4729	ins_nr = 0;
				4730	}
				4731	ret = btrfs_next_leaf(root, path);
				4732	if (ret < 0)
				4733	return ret;
				4734	else if (ret > 0)
				4735	break;
				4736	continue;
				4737	}
				4738
				4739	btrfs_item_key_to_cpu(leaf, &key, slot);
				4740	if (key.objectid != ino \|\| key.type != BTRFS_XATTR_ITEM_KEY)
				4741	break;
				4742
				4743	if (ins_nr == 0)
				4744	start_slot = slot;
				4745	ins_nr++;
				4746	path->slots[0]++;
Filipe Manana	f2f121a	2020-11-13 11:21:49 +0000	[diff] [blame]	4747	found_xattrs = true;
Filipe Manana	36283bf	2015-06-20 00:44:51 +0100	[diff] [blame]	4748	cond_resched();
				4749	}
				4750	if (ins_nr > 0) {
Nikolay Borisov	1a93c36	2017-01-18 00:31:37 +0200	[diff] [blame]	4751	ret = copy_items(trans, inode, dst_path, path,
Filipe Manana	0e56315	2019-11-19 12:07:33 +0000	[diff] [blame]	4752	start_slot, ins_nr, 1, 0);
Filipe Manana	36283bf	2015-06-20 00:44:51 +0100	[diff] [blame]	4753	if (ret < 0)
				4754	return ret;
				4755	}
				4756
Filipe Manana	f2f121a	2020-11-13 11:21:49 +0000	[diff] [blame]	4757	if (!found_xattrs)
				4758	set_bit(BTRFS_INODE_NO_XATTRS, &inode->runtime_flags);
				4759
Filipe Manana	36283bf	2015-06-20 00:44:51 +0100	[diff] [blame]	4760	return 0;
				4761	}
				4762
Filipe Manana	a89ca6f	2015-06-25 04:17:46 +0100	[diff] [blame]	4763	/*
Filipe Manana	0e56315	2019-11-19 12:07:33 +0000	[diff] [blame]	4764	* When using the NO_HOLES feature if we punched a hole that causes the
				4765	* deletion of entire leafs or all the extent items of the first leaf (the one
				4766	* that contains the inode item and references) we may end up not processing
				4767	* any extents, because there are no leafs with a generation matching the
				4768	* current transaction that have extent items for our inode. So we need to find
				4769	* if any holes exist and then log them. We also need to log holes after any
				4770	* truncate operation that changes the inode's size.
Filipe Manana	a89ca6f	2015-06-25 04:17:46 +0100	[diff] [blame]	4771	*/
Filipe Manana	0e56315	2019-11-19 12:07:33 +0000	[diff] [blame]	4772	static int btrfs_log_holes(struct btrfs_trans_handle *trans,
				4773	struct btrfs_root *root,
				4774	struct btrfs_inode *inode,
Filipe Manana	7af5974	2020-04-07 11:37:44 +0100	[diff] [blame]	4775	struct btrfs_path *path)
Filipe Manana	a89ca6f	2015-06-25 04:17:46 +0100	[diff] [blame]	4776	{
Jeff Mahoney	0b246af	2016-06-22 18:54:23 -0400	[diff] [blame]	4777	struct btrfs_fs_info *fs_info = root->fs_info;
Filipe Manana	a89ca6f	2015-06-25 04:17:46 +0100	[diff] [blame]	4778	struct btrfs_key key;
Nikolay Borisov	a0308dd	2017-01-18 00:31:38 +0200	[diff] [blame]	4779	const u64 ino = btrfs_ino(inode);
				4780	const u64 i_size = i_size_read(&inode->vfs_inode);
Filipe Manana	7af5974	2020-04-07 11:37:44 +0100	[diff] [blame]	4781	u64 prev_extent_end = 0;
Filipe Manana	0e56315	2019-11-19 12:07:33 +0000	[diff] [blame]	4782	int ret;
Filipe Manana	a89ca6f	2015-06-25 04:17:46 +0100	[diff] [blame]	4783
Filipe Manana	0e56315	2019-11-19 12:07:33 +0000	[diff] [blame]	4784	if (!btrfs_fs_incompat(fs_info, NO_HOLES) \|\| i_size == 0)
Filipe Manana	a89ca6f	2015-06-25 04:17:46 +0100	[diff] [blame]	4785	return 0;
				4786
				4787	key.objectid = ino;
				4788	key.type = BTRFS_EXTENT_DATA_KEY;
Filipe Manana	7af5974	2020-04-07 11:37:44 +0100	[diff] [blame]	4789	key.offset = 0;
Filipe Manana	a89ca6f	2015-06-25 04:17:46 +0100	[diff] [blame]	4790
				4791	ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
Filipe Manana	a89ca6f	2015-06-25 04:17:46 +0100	[diff] [blame]	4792	if (ret < 0)
				4793	return ret;
				4794
Filipe Manana	0e56315	2019-11-19 12:07:33 +0000	[diff] [blame]	4795	while (true) {
Filipe Manana	0e56315	2019-11-19 12:07:33 +0000	[diff] [blame]	4796	struct extent_buffer *leaf = path->nodes[0];
Filipe Manana	a89ca6f	2015-06-25 04:17:46 +0100	[diff] [blame]	4797
Filipe Manana	0e56315	2019-11-19 12:07:33 +0000	[diff] [blame]	4798	if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
				4799	ret = btrfs_next_leaf(root, path);
				4800	if (ret < 0)
				4801	return ret;
				4802	if (ret > 0) {
				4803	ret = 0;
				4804	break;
				4805	}
				4806	leaf = path->nodes[0];
				4807	}
				4808
				4809	btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
				4810	if (key.objectid != ino \|\| key.type != BTRFS_EXTENT_DATA_KEY)
				4811	break;
				4812
				4813	/* We have a hole, log it. */
				4814	if (prev_extent_end < key.offset) {
Filipe Manana	7af5974	2020-04-07 11:37:44 +0100	[diff] [blame]	4815	const u64 hole_len = key.offset - prev_extent_end;
Filipe Manana	0e56315	2019-11-19 12:07:33 +0000	[diff] [blame]	4816
				4817	/*
				4818	* Release the path to avoid deadlocks with other code
				4819	* paths that search the root while holding locks on
				4820	* leafs from the log root.
				4821	*/
				4822	btrfs_release_path(path);
				4823	ret = btrfs_insert_file_extent(trans, root->log_root,
				4824	ino, prev_extent_end, 0,
				4825	0, hole_len, 0, hole_len,
				4826	0, 0, 0);
				4827	if (ret < 0)
				4828	return ret;
				4829
				4830	/*
				4831	* Search for the same key again in the root. Since it's
				4832	* an extent item and we are holding the inode lock, the
				4833	* key must still exist. If it doesn't just emit warning
				4834	* and return an error to fall back to a transaction
				4835	* commit.
				4836	*/
				4837	ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
				4838	if (ret < 0)
				4839	return ret;
				4840	if (WARN_ON(ret > 0))
				4841	return -ENOENT;
				4842	leaf = path->nodes[0];
				4843	}
Filipe Manana	a89ca6f	2015-06-25 04:17:46 +0100	[diff] [blame]	4844
Filipe Manana	7af5974	2020-04-07 11:37:44 +0100	[diff] [blame]	4845	prev_extent_end = btrfs_file_extent_end(path);
Filipe Manana	0e56315	2019-11-19 12:07:33 +0000	[diff] [blame]	4846	path->slots[0]++;
				4847	cond_resched();
Filipe Manana	a89ca6f	2015-06-25 04:17:46 +0100	[diff] [blame]	4848	}
Filipe Manana	a89ca6f	2015-06-25 04:17:46 +0100	[diff] [blame]	4849
Filipe Manana	7af5974	2020-04-07 11:37:44 +0100	[diff] [blame]	4850	if (prev_extent_end < i_size) {
Filipe Manana	0e56315	2019-11-19 12:07:33 +0000	[diff] [blame]	4851	u64 hole_len;
Filipe Manana	a89ca6f	2015-06-25 04:17:46 +0100	[diff] [blame]	4852
Filipe Manana	0e56315	2019-11-19 12:07:33 +0000	[diff] [blame]	4853	btrfs_release_path(path);
Filipe Manana	7af5974	2020-04-07 11:37:44 +0100	[diff] [blame]	4854	hole_len = ALIGN(i_size - prev_extent_end, fs_info->sectorsize);
Filipe Manana	0e56315	2019-11-19 12:07:33 +0000	[diff] [blame]	4855	ret = btrfs_insert_file_extent(trans, root->log_root,
				4856	ino, prev_extent_end, 0, 0,
				4857	hole_len, 0, hole_len,
				4858	0, 0, 0);
				4859	if (ret < 0)
				4860	return ret;
				4861	}
				4862
				4863	return 0;
Filipe Manana	a89ca6f	2015-06-25 04:17:46 +0100	[diff] [blame]	4864	}
				4865
Filipe Manana	56f23fd	2016-03-30 23:37:21 +0100	[diff] [blame]	4866	/*
				4867	* When we are logging a new inode X, check if it doesn't have a reference that
				4868	* matches the reference from some other inode Y created in a past transaction
				4869	* and that was renamed in the current transaction. If we don't do this, then at
				4870	* log replay time we can lose inode Y (and all its files if it's a directory):
				4871	*
				4872	* mkdir /mnt/x
				4873	* echo "hello world" > /mnt/x/foobar
				4874	* sync
				4875	* mv /mnt/x /mnt/y
				4876	* mkdir /mnt/x # or touch /mnt/x
				4877	* xfs_io -c fsync /mnt/x
				4878	* <power fail>
				4879	* mount fs, trigger log replay
				4880	*
				4881	* After the log replay procedure, we would lose the first directory and all its
				4882	* files (file foobar).
				4883	* For the case where inode Y is not a directory we simply end up losing it:
				4884	*
				4885	* echo "123" > /mnt/foo
				4886	* sync
				4887	* mv /mnt/foo /mnt/bar
				4888	* echo "abc" > /mnt/foo
				4889	* xfs_io -c fsync /mnt/foo
				4890	* <power fail>
				4891	*
				4892	* We also need this for cases where a snapshot entry is replaced by some other
				4893	* entry (file or directory) otherwise we end up with an unreplayable log due to
				4894	* attempts to delete the snapshot entry (entry of type BTRFS_ROOT_ITEM_KEY) as
				4895	* if it were a regular entry:
				4896	*
				4897	* mkdir /mnt/x
				4898	* btrfs subvolume snapshot /mnt /mnt/x/snap
				4899	* btrfs subvolume delete /mnt/x/snap
				4900	* rmdir /mnt/x
				4901	* mkdir /mnt/x
				4902	* fsync /mnt/x or fsync some new file inside it
				4903	* <power fail>
				4904	*
				4905	* The snapshot delete, rmdir of x, mkdir of a new x and the fsync all happen in
				4906	* the same transaction.
				4907	*/
				4908	static int btrfs_check_ref_name_override(struct extent_buffer *eb,
				4909	const int slot,
				4910	const struct btrfs_key *key,
Nikolay Borisov	4791c8f	2017-01-18 00:31:35 +0200	[diff] [blame]	4911	struct btrfs_inode *inode,
Filipe Manana	a3baaf0	2019-02-13 12:14:09 +0000	[diff] [blame]	4912	u64 other_ino, u64 other_parent)
Filipe Manana	56f23fd	2016-03-30 23:37:21 +0100	[diff] [blame]	4913	{
				4914	int ret;
				4915	struct btrfs_path *search_path;
				4916	char *name = NULL;
				4917	u32 name_len = 0;
				4918	u32 item_size = btrfs_item_size_nr(eb, slot);
				4919	u32 cur_offset = 0;
				4920	unsigned long ptr = btrfs_item_ptr_offset(eb, slot);
				4921
				4922	search_path = btrfs_alloc_path();
				4923	if (!search_path)
				4924	return -ENOMEM;
				4925	search_path->search_commit_root = 1;
				4926	search_path->skip_locking = 1;
				4927
				4928	while (cur_offset < item_size) {
				4929	u64 parent;
				4930	u32 this_name_len;
				4931	u32 this_len;
				4932	unsigned long name_ptr;
				4933	struct btrfs_dir_item *di;
				4934
				4935	if (key->type == BTRFS_INODE_REF_KEY) {
				4936	struct btrfs_inode_ref *iref;
				4937
				4938	iref = (struct btrfs_inode_ref *)(ptr + cur_offset);
				4939	parent = key->offset;
				4940	this_name_len = btrfs_inode_ref_name_len(eb, iref);
				4941	name_ptr = (unsigned long)(iref + 1);
				4942	this_len = sizeof(*iref) + this_name_len;
				4943	} else {
				4944	struct btrfs_inode_extref *extref;
				4945
				4946	extref = (struct btrfs_inode_extref *)(ptr +
				4947	cur_offset);
				4948	parent = btrfs_inode_extref_parent(eb, extref);
				4949	this_name_len = btrfs_inode_extref_name_len(eb, extref);
				4950	name_ptr = (unsigned long)&extref->name;
				4951	this_len = sizeof(*extref) + this_name_len;
				4952	}
				4953
				4954	if (this_name_len > name_len) {
				4955	char *new_name;
				4956
				4957	new_name = krealloc(name, this_name_len, GFP_NOFS);
				4958	if (!new_name) {
				4959	ret = -ENOMEM;
				4960	goto out;
				4961	}
				4962	name_len = this_name_len;
				4963	name = new_name;
				4964	}
				4965
				4966	read_extent_buffer(eb, name, name_ptr, this_name_len);
Nikolay Borisov	4791c8f	2017-01-18 00:31:35 +0200	[diff] [blame]	4967	di = btrfs_lookup_dir_item(NULL, inode->root, search_path,
				4968	parent, name, this_name_len, 0);
Filipe Manana	56f23fd	2016-03-30 23:37:21 +0100	[diff] [blame]	4969	if (di && !IS_ERR(di)) {
Filipe Manana	44f714d	2016-06-06 16:11:13 +0100	[diff] [blame]	4970	struct btrfs_key di_key;
				4971
				4972	btrfs_dir_item_key_to_cpu(search_path->nodes[0],
				4973	di, &di_key);
				4974	if (di_key.type == BTRFS_INODE_ITEM_KEY) {
Filipe Manana	6b5fc43	2019-02-13 12:14:03 +0000	[diff] [blame]	4975	if (di_key.objectid != key->objectid) {
				4976	ret = 1;
				4977	*other_ino = di_key.objectid;
Filipe Manana	a3baaf0	2019-02-13 12:14:09 +0000	[diff] [blame]	4978	*other_parent = parent;
Filipe Manana	6b5fc43	2019-02-13 12:14:03 +0000	[diff] [blame]	4979	} else {
				4980	ret = 0;
				4981	}
Filipe Manana	44f714d	2016-06-06 16:11:13 +0100	[diff] [blame]	4982	} else {
				4983	ret = -EAGAIN;
				4984	}
Filipe Manana	56f23fd	2016-03-30 23:37:21 +0100	[diff] [blame]	4985	goto out;
				4986	} else if (IS_ERR(di)) {
				4987	ret = PTR_ERR(di);
				4988	goto out;
				4989	}
				4990	btrfs_release_path(search_path);
				4991
				4992	cur_offset += this_len;
				4993	}
				4994	ret = 0;
				4995	out:
				4996	btrfs_free_path(search_path);
				4997	kfree(name);
				4998	return ret;
				4999	}
				5000
Filipe Manana	6b5fc43	2019-02-13 12:14:03 +0000	[diff] [blame]	5001	struct btrfs_ino_list {
				5002	u64 ino;
Filipe Manana	a3baaf0	2019-02-13 12:14:09 +0000	[diff] [blame]	5003	u64 parent;
Filipe Manana	6b5fc43	2019-02-13 12:14:03 +0000	[diff] [blame]	5004	struct list_head list;
				5005	};
				5006
				5007	static int log_conflicting_inodes(struct btrfs_trans_handle *trans,
				5008	struct btrfs_root *root,
				5009	struct btrfs_path *path,
				5010	struct btrfs_log_ctx *ctx,
Filipe Manana	a3baaf0	2019-02-13 12:14:09 +0000	[diff] [blame]	5011	u64 ino, u64 parent)
Filipe Manana	6b5fc43	2019-02-13 12:14:03 +0000	[diff] [blame]	5012	{
				5013	struct btrfs_ino_list *ino_elem;
				5014	LIST_HEAD(inode_list);
				5015	int ret = 0;
				5016
				5017	ino_elem = kmalloc(sizeof(*ino_elem), GFP_NOFS);
				5018	if (!ino_elem)
				5019	return -ENOMEM;
				5020	ino_elem->ino = ino;
Filipe Manana	a3baaf0	2019-02-13 12:14:09 +0000	[diff] [blame]	5021	ino_elem->parent = parent;
Filipe Manana	6b5fc43	2019-02-13 12:14:03 +0000	[diff] [blame]	5022	list_add_tail(&ino_elem->list, &inode_list);
				5023
				5024	while (!list_empty(&inode_list)) {
				5025	struct btrfs_fs_info *fs_info = root->fs_info;
				5026	struct btrfs_key key;
				5027	struct inode *inode;
				5028
				5029	ino_elem = list_first_entry(&inode_list, struct btrfs_ino_list,
				5030	list);
				5031	ino = ino_elem->ino;
Filipe Manana	a3baaf0	2019-02-13 12:14:09 +0000	[diff] [blame]	5032	parent = ino_elem->parent;
Filipe Manana	6b5fc43	2019-02-13 12:14:03 +0000	[diff] [blame]	5033	list_del(&ino_elem->list);
				5034	kfree(ino_elem);
				5035	if (ret)
				5036	continue;
				5037
				5038	btrfs_release_path(path);
				5039
David Sterba	0202e83	2020-05-15 19:35:59 +0200	[diff] [blame]	5040	inode = btrfs_iget(fs_info->sb, ino, root);
Filipe Manana	6b5fc43	2019-02-13 12:14:03 +0000	[diff] [blame]	5041	/*
				5042	* If the other inode that had a conflicting dir entry was
Filipe Manana	a3baaf0	2019-02-13 12:14:09 +0000	[diff] [blame]	5043	* deleted in the current transaction, we need to log its parent
				5044	* directory.
Filipe Manana	6b5fc43	2019-02-13 12:14:03 +0000	[diff] [blame]	5045	*/
				5046	if (IS_ERR(inode)) {
				5047	ret = PTR_ERR(inode);
Filipe Manana	a3baaf0	2019-02-13 12:14:09 +0000	[diff] [blame]	5048	if (ret == -ENOENT) {
David Sterba	0202e83	2020-05-15 19:35:59 +0200	[diff] [blame]	5049	inode = btrfs_iget(fs_info->sb, parent, root);
Filipe Manana	a3baaf0	2019-02-13 12:14:09 +0000	[diff] [blame]	5050	if (IS_ERR(inode)) {
				5051	ret = PTR_ERR(inode);
				5052	} else {
				5053	ret = btrfs_log_inode(trans, root,
				5054	BTRFS_I(inode),
				5055	LOG_OTHER_INODE_ALL,
Filipe Manana	4877817	2020-08-11 12:43:58 +0100	[diff] [blame]	5056	ctx);
Filipe Manana	410f954	2019-09-10 15:26:49 +0100	[diff] [blame]	5057	btrfs_add_delayed_iput(inode);
Filipe Manana	a3baaf0	2019-02-13 12:14:09 +0000	[diff] [blame]	5058	}
				5059	}
Filipe Manana	6b5fc43	2019-02-13 12:14:03 +0000	[diff] [blame]	5060	continue;
				5061	}
				5062	/*
Filipe Manana	b5e4ff9	2020-01-15 13:21:35 +0000	[diff] [blame]	5063	* If the inode was already logged skip it - otherwise we can
				5064	* hit an infinite loop. Example:
				5065	*
				5066	* From the commit root (previous transaction) we have the
				5067	* following inodes:
				5068	*
				5069	* inode 257 a directory
				5070	* inode 258 with references "zz" and "zz_link" on inode 257
				5071	* inode 259 with reference "a" on inode 257
				5072	*
				5073	* And in the current (uncommitted) transaction we have:
				5074	*
				5075	* inode 257 a directory, unchanged
				5076	* inode 258 with references "a" and "a2" on inode 257
				5077	* inode 259 with reference "zz_link" on inode 257
				5078	* inode 261 with reference "zz" on inode 257
				5079	*
				5080	* When logging inode 261 the following infinite loop could
				5081	* happen if we don't skip already logged inodes:
				5082	*
				5083	* - we detect inode 258 as a conflicting inode, with inode 261
				5084	* on reference "zz", and log it;
				5085	*
				5086	* - we detect inode 259 as a conflicting inode, with inode 258
				5087	* on reference "a", and log it;
				5088	*
				5089	* - we detect inode 258 as a conflicting inode, with inode 259
				5090	* on reference "zz_link", and log it - again! After this we
				5091	* repeat the above steps forever.
				5092	*/
				5093	spin_lock(&BTRFS_I(inode)->lock);
				5094	/*
				5095	* Check the inode's logged_trans only instead of
				5096	* btrfs_inode_in_log(). This is because the last_log_commit of
Filipe Manana	1f29537	2021-07-29 15:30:21 +0100	[diff] [blame]	5097	* the inode is not updated when we only log that it exists (see
				5098	* btrfs_log_inode()).
Filipe Manana	b5e4ff9	2020-01-15 13:21:35 +0000	[diff] [blame]	5099	*/
				5100	if (BTRFS_I(inode)->logged_trans == trans->transid) {
				5101	spin_unlock(&BTRFS_I(inode)->lock);
				5102	btrfs_add_delayed_iput(inode);
				5103	continue;
				5104	}
				5105	spin_unlock(&BTRFS_I(inode)->lock);
				5106	/*
Filipe Manana	6b5fc43	2019-02-13 12:14:03 +0000	[diff] [blame]	5107	* We are safe logging the other inode without acquiring its
				5108	* lock as long as we log with the LOG_INODE_EXISTS mode. We
				5109	* are safe against concurrent renames of the other inode as
				5110	* well because during a rename we pin the log and update the
				5111	* log with the new name before we unpin it.
				5112	*/
				5113	ret = btrfs_log_inode(trans, root, BTRFS_I(inode),
Filipe Manana	4877817	2020-08-11 12:43:58 +0100	[diff] [blame]	5114	LOG_OTHER_INODE, ctx);
Filipe Manana	6b5fc43	2019-02-13 12:14:03 +0000	[diff] [blame]	5115	if (ret) {
Filipe Manana	410f954	2019-09-10 15:26:49 +0100	[diff] [blame]	5116	btrfs_add_delayed_iput(inode);
Filipe Manana	6b5fc43	2019-02-13 12:14:03 +0000	[diff] [blame]	5117	continue;
				5118	}
				5119
				5120	key.objectid = ino;
				5121	key.type = BTRFS_INODE_REF_KEY;
				5122	key.offset = 0;
				5123	ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
				5124	if (ret < 0) {
Filipe Manana	410f954	2019-09-10 15:26:49 +0100	[diff] [blame]	5125	btrfs_add_delayed_iput(inode);
Filipe Manana	6b5fc43	2019-02-13 12:14:03 +0000	[diff] [blame]	5126	continue;
				5127	}
				5128
				5129	while (true) {
				5130	struct extent_buffer *leaf = path->nodes[0];
				5131	int slot = path->slots[0];
				5132	u64 other_ino = 0;
Filipe Manana	a3baaf0	2019-02-13 12:14:09 +0000	[diff] [blame]	5133	u64 other_parent = 0;
Filipe Manana	6b5fc43	2019-02-13 12:14:03 +0000	[diff] [blame]	5134
				5135	if (slot >= btrfs_header_nritems(leaf)) {
				5136	ret = btrfs_next_leaf(root, path);
				5137	if (ret < 0) {
				5138	break;
				5139	} else if (ret > 0) {
				5140	ret = 0;
				5141	break;
				5142	}
				5143	continue;
				5144	}
				5145
				5146	btrfs_item_key_to_cpu(leaf, &key, slot);
				5147	if (key.objectid != ino \|\|
				5148	(key.type != BTRFS_INODE_REF_KEY &&
				5149	key.type != BTRFS_INODE_EXTREF_KEY)) {
				5150	ret = 0;
				5151	break;
				5152	}
				5153
				5154	ret = btrfs_check_ref_name_override(leaf, slot, &key,
Filipe Manana	a3baaf0	2019-02-13 12:14:09 +0000	[diff] [blame]	5155	BTRFS_I(inode), &other_ino,
				5156	&other_parent);
Filipe Manana	6b5fc43	2019-02-13 12:14:03 +0000	[diff] [blame]	5157	if (ret < 0)
				5158	break;
				5159	if (ret > 0) {
				5160	ino_elem = kmalloc(sizeof(*ino_elem), GFP_NOFS);
				5161	if (!ino_elem) {
				5162	ret = -ENOMEM;
				5163	break;
				5164	}
				5165	ino_elem->ino = other_ino;
Filipe Manana	a3baaf0	2019-02-13 12:14:09 +0000	[diff] [blame]	5166	ino_elem->parent = other_parent;
Filipe Manana	6b5fc43	2019-02-13 12:14:03 +0000	[diff] [blame]	5167	list_add_tail(&ino_elem->list, &inode_list);
				5168	ret = 0;
				5169	}
				5170	path->slots[0]++;
				5171	}
Filipe Manana	410f954	2019-09-10 15:26:49 +0100	[diff] [blame]	5172	btrfs_add_delayed_iput(inode);
Filipe Manana	6b5fc43	2019-02-13 12:14:03 +0000	[diff] [blame]	5173	}
				5174
				5175	return ret;
				5176	}
				5177
Filipe Manana	da44700	2020-03-09 12:41:07 +0000	[diff] [blame]	5178	static int copy_inode_items_to_log(struct btrfs_trans_handle *trans,
				5179	struct btrfs_inode *inode,
				5180	struct btrfs_key *min_key,
				5181	const struct btrfs_key *max_key,
				5182	struct btrfs_path *path,
				5183	struct btrfs_path *dst_path,
				5184	const u64 logged_isize,
				5185	const bool recursive_logging,
				5186	const int inode_only,
				5187	struct btrfs_log_ctx *ctx,
				5188	bool *need_log_inode_item)
				5189	{
				5190	struct btrfs_root *root = inode->root;
				5191	int ins_start_slot = 0;
				5192	int ins_nr = 0;
				5193	int ret;
				5194
				5195	while (1) {
				5196	ret = btrfs_search_forward(root, min_key, path, trans->transid);
				5197	if (ret < 0)
				5198	return ret;
				5199	if (ret > 0) {
				5200	ret = 0;
				5201	break;
				5202	}
				5203	again:
				5204	/* Note, ins_nr might be > 0 here, cleanup outside the loop */
				5205	if (min_key->objectid != max_key->objectid)
				5206	break;
				5207	if (min_key->type > max_key->type)
				5208	break;
				5209
				5210	if (min_key->type == BTRFS_INODE_ITEM_KEY)
				5211	*need_log_inode_item = false;
				5212
				5213	if ((min_key->type == BTRFS_INODE_REF_KEY \|\|
				5214	min_key->type == BTRFS_INODE_EXTREF_KEY) &&
				5215	inode->generation == trans->transid &&
				5216	!recursive_logging) {
				5217	u64 other_ino = 0;
				5218	u64 other_parent = 0;
				5219
				5220	ret = btrfs_check_ref_name_override(path->nodes[0],
				5221	path->slots[0], min_key, inode,
				5222	&other_ino, &other_parent);
				5223	if (ret < 0) {
				5224	return ret;
				5225	} else if (ret > 0 && ctx &&
				5226	other_ino != btrfs_ino(BTRFS_I(ctx->inode))) {
				5227	if (ins_nr > 0) {
				5228	ins_nr++;
				5229	} else {
				5230	ins_nr = 1;
				5231	ins_start_slot = path->slots[0];
				5232	}
				5233	ret = copy_items(trans, inode, dst_path, path,
				5234	ins_start_slot, ins_nr,
				5235	inode_only, logged_isize);
				5236	if (ret < 0)
				5237	return ret;
				5238	ins_nr = 0;
				5239
				5240	ret = log_conflicting_inodes(trans, root, path,
				5241	ctx, other_ino, other_parent);
				5242	if (ret)
				5243	return ret;
				5244	btrfs_release_path(path);
				5245	goto next_key;
				5246	}
				5247	}
				5248
				5249	/* Skip xattrs, we log them later with btrfs_log_all_xattrs() */
				5250	if (min_key->type == BTRFS_XATTR_ITEM_KEY) {
				5251	if (ins_nr == 0)
				5252	goto next_slot;
				5253	ret = copy_items(trans, inode, dst_path, path,
				5254	ins_start_slot,
				5255	ins_nr, inode_only, logged_isize);
				5256	if (ret < 0)
				5257	return ret;
				5258	ins_nr = 0;
				5259	goto next_slot;
				5260	}
				5261
				5262	if (ins_nr && ins_start_slot + ins_nr == path->slots[0]) {
				5263	ins_nr++;
				5264	goto next_slot;
				5265	} else if (!ins_nr) {
				5266	ins_start_slot = path->slots[0];
				5267	ins_nr = 1;
				5268	goto next_slot;
				5269	}
				5270
				5271	ret = copy_items(trans, inode, dst_path, path, ins_start_slot,
				5272	ins_nr, inode_only, logged_isize);
				5273	if (ret < 0)
				5274	return ret;
				5275	ins_nr = 1;
				5276	ins_start_slot = path->slots[0];
				5277	next_slot:
				5278	path->slots[0]++;
				5279	if (path->slots[0] < btrfs_header_nritems(path->nodes[0])) {
				5280	btrfs_item_key_to_cpu(path->nodes[0], min_key,
				5281	path->slots[0]);
				5282	goto again;
				5283	}
				5284	if (ins_nr) {
				5285	ret = copy_items(trans, inode, dst_path, path,
				5286	ins_start_slot, ins_nr, inode_only,
				5287	logged_isize);
				5288	if (ret < 0)
				5289	return ret;
				5290	ins_nr = 0;
				5291	}
				5292	btrfs_release_path(path);
				5293	next_key:
				5294	if (min_key->offset < (u64)-1) {
				5295	min_key->offset++;
				5296	} else if (min_key->type < max_key->type) {
				5297	min_key->type++;
				5298	min_key->offset = 0;
				5299	} else {
				5300	break;
				5301	}
				5302	}
				5303	if (ins_nr)
				5304	ret = copy_items(trans, inode, dst_path, path, ins_start_slot,
				5305	ins_nr, inode_only, logged_isize);
				5306
				5307	return ret;
				5308	}
				5309
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	5310	/* log a single inode in the tree log.
				5311	* At least one parent directory for this inode must exist in the tree
				5312	* or be logged already.
				5313	*
				5314	* Any items from this inode changed by the current transaction are copied
				5315	* to the log tree. An extra reference is taken on any extents in this
				5316	* file, allowing us to avoid a whole pile of corner cases around logging
				5317	* blocks that have been removed from the tree.
				5318	*
				5319	* See LOG_INODE_ALL and related defines for a description of what inode_only
				5320	* does.
				5321	*
				5322	* This handles both files and directories.
				5323	*/
Chris Mason	12fcfd2	2009-03-24 10:24:20 -0400	[diff] [blame]	5324	static int btrfs_log_inode(struct btrfs_trans_handle *trans,
Nikolay Borisov	a59108a	2017-01-18 00:31:48 +0200	[diff] [blame]	5325	struct btrfs_root root, struct btrfs_inode inode,
Filipe Manana	49dae1b	2014-09-06 22:34:39 +0100	[diff] [blame]	5326	int inode_only,
Filipe Manana	8407f55	2014-09-05 15:14:39 +0100	[diff] [blame]	5327	struct btrfs_log_ctx *ctx)
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	5328	{
				5329	struct btrfs_path *path;
				5330	struct btrfs_path *dst_path;
				5331	struct btrfs_key min_key;
				5332	struct btrfs_key max_key;
				5333	struct btrfs_root *log = root->log_root;
Yan, Zheng	4a500fd	2010-05-16 10:49:59 -0400	[diff] [blame]	5334	int err = 0;
Filipe Manana	8c8648d	2020-07-02 12:31:59 +0100	[diff] [blame]	5335	int ret = 0;
Josef Bacik	5dc562c	2012-08-17 13:14:17 -0400	[diff] [blame]	5336	bool fast_search = false;
Nikolay Borisov	a59108a	2017-01-18 00:31:48 +0200	[diff] [blame]	5337	u64 ino = btrfs_ino(inode);
				5338	struct extent_map_tree *em_tree = &inode->extent_tree;
Filipe Manana	1a4bcf4	2015-02-13 12:30:56 +0000	[diff] [blame]	5339	u64 logged_isize = 0;
Filipe Manana	e4545de	2015-06-17 12:49:23 +0100	[diff] [blame]	5340	bool need_log_inode_item = true;
Filipe Manana	9a8fca6	2018-05-11 16:42:42 +0100	[diff] [blame]	5341	bool xattrs_logged = false;
Filipe Manana	a3baaf0	2019-02-13 12:14:09 +0000	[diff] [blame]	5342	bool recursive_logging = false;
Filipe Manana	2ac691d	2021-07-20 16:03:43 +0100	[diff] [blame]	5343	bool inode_item_dropped = true;
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	5344
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	5345	path = btrfs_alloc_path();
Tsutomu Itoh	5df6708	2011-02-01 09:17:35 +0000	[diff] [blame]	5346	if (!path)
				5347	return -ENOMEM;
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	5348	dst_path = btrfs_alloc_path();
Tsutomu Itoh	5df6708	2011-02-01 09:17:35 +0000	[diff] [blame]	5349	if (!dst_path) {
				5350	btrfs_free_path(path);
				5351	return -ENOMEM;
				5352	}
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	5353
Li Zefan	33345d01	2011-04-20 10:31:50 +0800	[diff] [blame]	5354	min_key.objectid = ino;
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	5355	min_key.type = BTRFS_INODE_ITEM_KEY;
				5356	min_key.offset = 0;
				5357
Li Zefan	33345d01	2011-04-20 10:31:50 +0800	[diff] [blame]	5358	max_key.objectid = ino;
Chris Mason	12fcfd2	2009-03-24 10:24:20 -0400	[diff] [blame]	5359
Chris Mason	12fcfd2	2009-03-24 10:24:20 -0400	[diff] [blame]	5360
Josef Bacik	5dc562c	2012-08-17 13:14:17 -0400	[diff] [blame]	5361	/* today the code can only do partial logging of directories */
Nikolay Borisov	a59108a	2017-01-18 00:31:48 +0200	[diff] [blame]	5362	if (S_ISDIR(inode->vfs_inode.i_mode) \|\|
Miao Xie	5269b67	2012-11-01 07:35:23 +0000	[diff] [blame]	5363	(!test_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
Nikolay Borisov	a59108a	2017-01-18 00:31:48 +0200	[diff] [blame]	5364	&inode->runtime_flags) &&
Liu Bo	781feef	2016-11-30 16:20:25 -0800	[diff] [blame]	5365	inode_only >= LOG_INODE_EXISTS))
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	5366	max_key.type = BTRFS_XATTR_ITEM_KEY;
				5367	else
				5368	max_key.type = (u8)-1;
				5369	max_key.offset = (u64)-1;
				5370
Filipe Manana	2c2c452	2015-01-13 16:40:04 +0000	[diff] [blame]	5371	/*
Filipe Manana	5aa7d1a	2020-07-02 12:32:20 +0100	[diff] [blame]	5372	* Only run delayed items if we are a directory. We want to make sure
				5373	* all directory indexes hit the fs/subvolume tree so we can find them
				5374	* and figure out which index ranges have to be logged.
				5375	*
Filipe Manana	8c8648d	2020-07-02 12:31:59 +0100	[diff] [blame]	5376	* Otherwise commit the delayed inode only if the full sync flag is set,
				5377	* as we want to make sure an up to date version is in the subvolume
				5378	* tree so copy_inode_items_to_log() / copy_items() can find it and copy
				5379	* it to the log tree. For a non full sync, we always log the inode item
				5380	* based on the in-memory struct btrfs_inode which is always up to date.
Filipe Manana	2c2c452	2015-01-13 16:40:04 +0000	[diff] [blame]	5381	*/
Filipe Manana	5aa7d1a	2020-07-02 12:32:20 +0100	[diff] [blame]	5382	if (S_ISDIR(inode->vfs_inode.i_mode))
Nikolay Borisov	a59108a	2017-01-18 00:31:48 +0200	[diff] [blame]	5383	ret = btrfs_commit_inode_delayed_items(trans, inode);
Filipe Manana	8c8648d	2020-07-02 12:31:59 +0100	[diff] [blame]	5384	else if (test_bit(BTRFS_INODE_NEEDS_FULL_SYNC, &inode->runtime_flags))
Nikolay Borisov	a59108a	2017-01-18 00:31:48 +0200	[diff] [blame]	5385	ret = btrfs_commit_inode_delayed_inode(inode);
Filipe Manana	2c2c452	2015-01-13 16:40:04 +0000	[diff] [blame]	5386
				5387	if (ret) {
				5388	btrfs_free_path(path);
				5389	btrfs_free_path(dst_path);
				5390	return ret;
Miao Xie	16cdcec	2011-04-22 18:12:22 +0800	[diff] [blame]	5391	}
				5392
Filipe Manana	a3baaf0	2019-02-13 12:14:09 +0000	[diff] [blame]	5393	if (inode_only == LOG_OTHER_INODE \|\| inode_only == LOG_OTHER_INODE_ALL) {
				5394	recursive_logging = true;
				5395	if (inode_only == LOG_OTHER_INODE)
				5396	inode_only = LOG_INODE_EXISTS;
				5397	else
				5398	inode_only = LOG_INODE_ALL;
Nikolay Borisov	a59108a	2017-01-18 00:31:48 +0200	[diff] [blame]	5399	mutex_lock_nested(&inode->log_mutex, SINGLE_DEPTH_NESTING);
Liu Bo	781feef	2016-11-30 16:20:25 -0800	[diff] [blame]	5400	} else {
Nikolay Borisov	a59108a	2017-01-18 00:31:48 +0200	[diff] [blame]	5401	mutex_lock(&inode->log_mutex);
Liu Bo	781feef	2016-11-30 16:20:25 -0800	[diff] [blame]	5402	}
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	5403
Filipe Manana	5e33a2b	2016-02-25 23:19:38 +0000	[diff] [blame]	5404	/*
Filipe Manana	64d6b28	2021-01-27 10:34:59 +0000	[diff] [blame]	5405	* This is for cases where logging a directory could result in losing a
				5406	* a file after replaying the log. For example, if we move a file from a
				5407	* directory A to a directory B, then fsync directory A, we have no way
				5408	* to known the file was moved from A to B, so logging just A would
				5409	* result in losing the file after a log replay.
				5410	*/
				5411	if (S_ISDIR(inode->vfs_inode.i_mode) &&
				5412	inode_only == LOG_INODE_ALL &&
				5413	inode->last_unlink_trans >= trans->transid) {
				5414	btrfs_set_log_full_commit(trans);
				5415	err = 1;
				5416	goto out_unlock;
				5417	}
				5418
				5419	/*
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	5420	* a brute force approach to making sure we get the most uptodate
				5421	* copies of everything.
				5422	*/
Nikolay Borisov	a59108a	2017-01-18 00:31:48 +0200	[diff] [blame]	5423	if (S_ISDIR(inode->vfs_inode.i_mode)) {
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	5424	int max_key_type = BTRFS_DIR_LOG_INDEX_KEY;
				5425
Filipe Manana	ab12313	2021-01-27 10:34:56 +0000	[diff] [blame]	5426	clear_bit(BTRFS_INODE_COPY_EVERYTHING, &inode->runtime_flags);
Filipe Manana	4f764e5	2015-02-23 19:53:35 +0000	[diff] [blame]	5427	if (inode_only == LOG_INODE_EXISTS)
				5428	max_key_type = BTRFS_XATTR_ITEM_KEY;
Li Zefan	33345d01	2011-04-20 10:31:50 +0800	[diff] [blame]	5429	ret = drop_objectid_items(trans, log, path, ino, max_key_type);
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	5430	} else {
Filipe Manana	1a4bcf4	2015-02-13 12:30:56 +0000	[diff] [blame]	5431	if (inode_only == LOG_INODE_EXISTS) {
				5432	/*
				5433	* Make sure the new inode item we write to the log has
				5434	* the same isize as the current one (if it exists).
				5435	* This is necessary to prevent data loss after log
				5436	* replay, and also to prevent doing a wrong expanding
				5437	* truncate - for e.g. create file, write 4K into offset
				5438	* 0, fsync, write 4K into offset 4096, add hard link,
				5439	* fsync some other file (to sync log), power fail - if
				5440	* we use the inode's current i_size, after log replay
				5441	* we get a 8Kb file, with the last 4Kb extent as a hole
				5442	* (zeroes), as if an expanding truncate happened,
				5443	* instead of getting a file of 4Kb only.
				5444	*/
Nikolay Borisov	a59108a	2017-01-18 00:31:48 +0200	[diff] [blame]	5445	err = logged_inode_size(log, inode, path, &logged_isize);
Filipe Manana	1a4bcf4	2015-02-13 12:30:56 +0000	[diff] [blame]	5446	if (err)
				5447	goto out_unlock;
				5448	}
Filipe Manana	a742994	2015-02-13 16:56:14 +0000	[diff] [blame]	5449	if (test_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
Nikolay Borisov	a59108a	2017-01-18 00:31:48 +0200	[diff] [blame]	5450	&inode->runtime_flags)) {
Filipe Manana	a742994	2015-02-13 16:56:14 +0000	[diff] [blame]	5451	if (inode_only == LOG_INODE_EXISTS) {
Filipe Manana	4f764e5	2015-02-23 19:53:35 +0000	[diff] [blame]	5452	max_key.type = BTRFS_XATTR_ITEM_KEY;
Filipe Manana	a742994	2015-02-13 16:56:14 +0000	[diff] [blame]	5453	ret = drop_objectid_items(trans, log, path, ino,
				5454	max_key.type);
				5455	} else {
				5456	clear_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
Nikolay Borisov	a59108a	2017-01-18 00:31:48 +0200	[diff] [blame]	5457	&inode->runtime_flags);
Filipe Manana	a742994	2015-02-13 16:56:14 +0000	[diff] [blame]	5458	clear_bit(BTRFS_INODE_COPY_EVERYTHING,
Nikolay Borisov	a59108a	2017-01-18 00:31:48 +0200	[diff] [blame]	5459	&inode->runtime_flags);
Chris Mason	28ed134	2014-12-17 09:41:04 -0800	[diff] [blame]	5460	while(1) {
				5461	ret = btrfs_truncate_inode_items(trans,
Filipe Manana	0d7d316	2021-05-24 11:35:55 +0100	[diff] [blame]	5462	log, inode, 0, 0, NULL);
Chris Mason	28ed134	2014-12-17 09:41:04 -0800	[diff] [blame]	5463	if (ret != -EAGAIN)
				5464	break;
				5465	}
Filipe Manana	a742994	2015-02-13 16:56:14 +0000	[diff] [blame]	5466	}
Filipe Manana	4f764e5	2015-02-23 19:53:35 +0000	[diff] [blame]	5467	} else if (test_and_clear_bit(BTRFS_INODE_COPY_EVERYTHING,
Nikolay Borisov	a59108a	2017-01-18 00:31:48 +0200	[diff] [blame]	5468	&inode->runtime_flags) \|\|
Josef Bacik	6cfab85	2013-11-12 16:25:58 -0500	[diff] [blame]	5469	inode_only == LOG_INODE_EXISTS) {
Filipe Manana	4f764e5	2015-02-23 19:53:35 +0000	[diff] [blame]	5470	if (inode_only == LOG_INODE_ALL)
Josef Bacik	a95249b	2012-10-11 16:17:34 -0400	[diff] [blame]	5471	fast_search = true;
Filipe Manana	4f764e5	2015-02-23 19:53:35 +0000	[diff] [blame]	5472	max_key.type = BTRFS_XATTR_ITEM_KEY;
Josef Bacik	a95249b	2012-10-11 16:17:34 -0400	[diff] [blame]	5473	ret = drop_objectid_items(trans, log, path, ino,
				5474	max_key.type);
Josef Bacik	5dc562c	2012-08-17 13:14:17 -0400	[diff] [blame]	5475	} else {
Liu Bo	183f37f	2012-11-01 06:38:47 +0000	[diff] [blame]	5476	if (inode_only == LOG_INODE_ALL)
				5477	fast_search = true;
Filipe Manana	2ac691d	2021-07-20 16:03:43 +0100	[diff] [blame]	5478	inode_item_dropped = false;
Josef Bacik	a95249b	2012-10-11 16:17:34 -0400	[diff] [blame]	5479	goto log_extents;
Josef Bacik	5dc562c	2012-08-17 13:14:17 -0400	[diff] [blame]	5480	}
Josef Bacik	a95249b	2012-10-11 16:17:34 -0400	[diff] [blame]	5481
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	5482	}
Yan, Zheng	4a500fd	2010-05-16 10:49:59 -0400	[diff] [blame]	5483	if (ret) {
				5484	err = ret;
				5485	goto out_unlock;
				5486	}
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	5487
Filipe Manana	da44700	2020-03-09 12:41:07 +0000	[diff] [blame]	5488	err = copy_inode_items_to_log(trans, inode, &min_key, &max_key,
				5489	path, dst_path, logged_isize,
Filipe Manana	7af5974	2020-04-07 11:37:44 +0100	[diff] [blame]	5490	recursive_logging, inode_only, ctx,
				5491	&need_log_inode_item);
Filipe Manana	da44700	2020-03-09 12:41:07 +0000	[diff] [blame]	5492	if (err)
				5493	goto out_unlock;
Josef Bacik	5dc562c	2012-08-17 13:14:17 -0400	[diff] [blame]	5494
Filipe Manana	36283bf	2015-06-20 00:44:51 +0100	[diff] [blame]	5495	btrfs_release_path(path);
				5496	btrfs_release_path(dst_path);
Nikolay Borisov	a59108a	2017-01-18 00:31:48 +0200	[diff] [blame]	5497	err = btrfs_log_all_xattrs(trans, root, inode, path, dst_path);
Filipe Manana	36283bf	2015-06-20 00:44:51 +0100	[diff] [blame]	5498	if (err)
				5499	goto out_unlock;
Filipe Manana	9a8fca6	2018-05-11 16:42:42 +0100	[diff] [blame]	5500	xattrs_logged = true;
Filipe Manana	a89ca6f	2015-06-25 04:17:46 +0100	[diff] [blame]	5501	if (max_key.type >= BTRFS_EXTENT_DATA_KEY && !fast_search) {
				5502	btrfs_release_path(path);
				5503	btrfs_release_path(dst_path);
Filipe Manana	7af5974	2020-04-07 11:37:44 +0100	[diff] [blame]	5504	err = btrfs_log_holes(trans, root, inode, path);
Filipe Manana	a89ca6f	2015-06-25 04:17:46 +0100	[diff] [blame]	5505	if (err)
				5506	goto out_unlock;
				5507	}
Josef Bacik	a95249b	2012-10-11 16:17:34 -0400	[diff] [blame]	5508	log_extents:
Josef Bacik	f3b15cc	2013-07-22 12:54:30 -0400	[diff] [blame]	5509	btrfs_release_path(path);
				5510	btrfs_release_path(dst_path);
Filipe Manana	e4545de	2015-06-17 12:49:23 +0100	[diff] [blame]	5511	if (need_log_inode_item) {
Filipe Manana	2ac691d	2021-07-20 16:03:43 +0100	[diff] [blame]	5512	err = log_inode_item(trans, log, dst_path, inode, inode_item_dropped);
Filipe Manana	e4545de	2015-06-17 12:49:23 +0100	[diff] [blame]	5513	if (err)
				5514	goto out_unlock;
Filipe Manana	b590b83	2021-05-28 11:37:32 +0100	[diff] [blame]	5515	/*
				5516	* If we are doing a fast fsync and the inode was logged before
				5517	* in this transaction, we don't need to log the xattrs because
				5518	* they were logged before. If xattrs were added, changed or
				5519	* deleted since the last time we logged the inode, then we have
				5520	* already logged them because the inode had the runtime flag
				5521	* BTRFS_INODE_COPY_EVERYTHING set.
				5522	*/
				5523	if (!xattrs_logged && inode->logged_trans < trans->transid) {
				5524	err = btrfs_log_all_xattrs(trans, root, inode, path,
				5525	dst_path);
				5526	if (err)
				5527	goto out_unlock;
				5528	btrfs_release_path(path);
				5529	}
Filipe Manana	e4545de	2015-06-17 12:49:23 +0100	[diff] [blame]	5530	}
Josef Bacik	5dc562c	2012-08-17 13:14:17 -0400	[diff] [blame]	5531	if (fast_search) {
Nikolay Borisov	a59108a	2017-01-18 00:31:48 +0200	[diff] [blame]	5532	ret = btrfs_log_changed_extents(trans, root, inode, dst_path,
Filipe Manana	4877817	2020-08-11 12:43:58 +0100	[diff] [blame]	5533	ctx);
Josef Bacik	5dc562c	2012-08-17 13:14:17 -0400	[diff] [blame]	5534	if (ret) {
				5535	err = ret;
				5536	goto out_unlock;
				5537	}
Josef Bacik	d006a04	2013-11-12 20:54:09 -0500	[diff] [blame]	5538	} else if (inode_only == LOG_INODE_ALL) {
Liu Bo	06d3d22	2012-08-27 10:52:19 -0600	[diff] [blame]	5539	struct extent_map em, n;
				5540
Filipe Manana	49dae1b	2014-09-06 22:34:39 +0100	[diff] [blame]	5541	write_lock(&em_tree->lock);
Filipe Manana	4877817	2020-08-11 12:43:58 +0100	[diff] [blame]	5542	list_for_each_entry_safe(em, n, &em_tree->modified_extents, list)
				5543	list_del_init(&em->list);
Filipe Manana	49dae1b	2014-09-06 22:34:39 +0100	[diff] [blame]	5544	write_unlock(&em_tree->lock);
Josef Bacik	5dc562c	2012-08-17 13:14:17 -0400	[diff] [blame]	5545	}
				5546
Nikolay Borisov	a59108a	2017-01-18 00:31:48 +0200	[diff] [blame]	5547	if (inode_only == LOG_INODE_ALL && S_ISDIR(inode->vfs_inode.i_mode)) {
				5548	ret = log_directory_changes(trans, root, inode, path, dst_path,
				5549	ctx);
Yan, Zheng	4a500fd	2010-05-16 10:49:59 -0400	[diff] [blame]	5550	if (ret) {
				5551	err = ret;
				5552	goto out_unlock;
				5553	}
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	5554	}
Filipe Manana	49dae1b	2014-09-06 22:34:39 +0100	[diff] [blame]	5555
Filipe Manana	d1d832a	2019-06-07 11:25:24 +0100	[diff] [blame]	5556	/*
Filipe Manana	75b463d	2020-08-11 12:43:48 +0100	[diff] [blame]	5557	* If we are logging that an ancestor inode exists as part of logging a
				5558	* new name from a link or rename operation, don't mark the inode as
				5559	* logged - otherwise if an explicit fsync is made against an ancestor,
				5560	* the fsync considers the inode in the log and doesn't sync the log,
				5561	* resulting in the ancestor missing after a power failure unless the
				5562	* log was synced as part of an fsync against any other unrelated inode.
				5563	* So keep it simple for this case and just don't flag the ancestors as
				5564	* logged.
Filipe Manana	d1d832a	2019-06-07 11:25:24 +0100	[diff] [blame]	5565	*/
Filipe Manana	75b463d	2020-08-11 12:43:48 +0100	[diff] [blame]	5566	if (!ctx \|\|
				5567	!(S_ISDIR(inode->vfs_inode.i_mode) && ctx->logging_new_name &&
				5568	&inode->vfs_inode != ctx->inode)) {
				5569	spin_lock(&inode->lock);
				5570	inode->logged_trans = trans->transid;
				5571	/*
Filipe Manana	9acc8103	2021-07-06 15:41:15 +0100	[diff] [blame]	5572	* Don't update last_log_commit if we logged that an inode exists.
				5573	* We do this for two reasons:
				5574	*
				5575	* 1) We might have had buffered writes to this inode that were
				5576	* flushed and had their ordered extents completed in this
				5577	* transaction, but we did not previously log the inode with
				5578	* LOG_INODE_ALL. Later the inode was evicted and after that
				5579	* it was loaded again and this LOG_INODE_EXISTS log operation
				5580	* happened. We must make sure that if an explicit fsync against
				5581	* the inode is performed later, it logs the new extents, an
				5582	* updated inode item, etc, and syncs the log. The same logic
				5583	* applies to direct IO writes instead of buffered writes.
				5584	*
				5585	* 2) When we log the inode with LOG_INODE_EXISTS, its inode item
				5586	* is logged with an i_size of 0 or whatever value was logged
				5587	* before. If later the i_size of the inode is increased by a
				5588	* truncate operation, the log is synced through an fsync of
				5589	* some other inode and then finally an explicit fsync against
				5590	* this inode is made, we must make sure this fsync logs the
				5591	* inode with the new i_size, the hole between old i_size and
				5592	* the new i_size, and syncs the log.
Filipe Manana	75b463d	2020-08-11 12:43:48 +0100	[diff] [blame]	5593	*/
Filipe Manana	9acc8103	2021-07-06 15:41:15 +0100	[diff] [blame]	5594	if (inode_only != LOG_INODE_EXISTS)
Filipe Manana	75b463d	2020-08-11 12:43:48 +0100	[diff] [blame]	5595	inode->last_log_commit = inode->last_sub_trans;
				5596	spin_unlock(&inode->lock);
				5597	}
Yan, Zheng	4a500fd	2010-05-16 10:49:59 -0400	[diff] [blame]	5598	out_unlock:
Nikolay Borisov	a59108a	2017-01-18 00:31:48 +0200	[diff] [blame]	5599	mutex_unlock(&inode->log_mutex);
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	5600
				5601	btrfs_free_path(path);
				5602	btrfs_free_path(dst_path);
Yan, Zheng	4a500fd	2010-05-16 10:49:59 -0400	[diff] [blame]	5603	return err;
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	5604	}
				5605
Chris Mason	12fcfd2	2009-03-24 10:24:20 -0400	[diff] [blame]	5606	/*
Filipe Manana	ab12313	2021-01-27 10:34:56 +0000	[diff] [blame]	5607	* Check if we need to log an inode. This is used in contexts where while
				5608	* logging an inode we need to log another inode (either that it exists or in
				5609	* full mode). This is used instead of btrfs_inode_in_log() because the later
				5610	* requires the inode to be in the log and have the log transaction committed,
				5611	* while here we do not care if the log transaction was already committed - our
				5612	* caller will commit the log later - and we want to avoid logging an inode
				5613	* multiple times when multiple tasks have joined the same log transaction.
				5614	*/
				5615	static bool need_log_inode(struct btrfs_trans_handle *trans,
				5616	struct btrfs_inode *inode)
				5617	{
				5618	/*
Filipe Manana	8be2ba2	2021-07-29 18:52:46 +0100	[diff] [blame]	5619	* If a directory was not modified, no dentries added or removed, we can
				5620	* and should avoid logging it.
				5621	*/
				5622	if (S_ISDIR(inode->vfs_inode.i_mode) && inode->last_trans < trans->transid)
				5623	return false;
				5624
				5625	/*
Filipe Manana	ab12313	2021-01-27 10:34:56 +0000	[diff] [blame]	5626	* If this inode does not have new/updated/deleted xattrs since the last
				5627	* time it was logged and is flagged as logged in the current transaction,
				5628	* we can skip logging it. As for new/deleted names, those are updated in
				5629	* the log by link/unlink/rename operations.
				5630	* In case the inode was logged and then evicted and reloaded, its
				5631	* logged_trans will be 0, in which case we have to fully log it since
				5632	* logged_trans is a transient field, not persisted.
				5633	*/
				5634	if (inode->logged_trans == trans->transid &&
				5635	!test_bit(BTRFS_INODE_COPY_EVERYTHING, &inode->runtime_flags))
				5636	return false;
				5637
				5638	return true;
				5639	}
				5640
Filipe Manana	2f2ff0e	2015-03-20 17:19:46 +0000	[diff] [blame]	5641	struct btrfs_dir_list {
				5642	u64 ino;
				5643	struct list_head list;
				5644	};
				5645
				5646	/*
				5647	* Log the inodes of the new dentries of a directory. See log_dir_items() for
				5648	* details about the why it is needed.
				5649	* This is a recursive operation - if an existing dentry corresponds to a
				5650	* directory, that directory's new entries are logged too (same behaviour as
				5651	* ext3/4, xfs, f2fs, reiserfs, nilfs2). Note that when logging the inodes
				5652	* the dentries point to we do not lock their i_mutex, otherwise lockdep
				5653	* complains about the following circular lock dependency / possible deadlock:
				5654	*
				5655	* CPU0 CPU1
				5656	* ---- ----
				5657	* lock(&type->i_mutex_dir_key#3/2);
				5658	* lock(sb_internal#2);
				5659	* lock(&type->i_mutex_dir_key#3/2);
				5660	* lock(&sb->s_type->i_mutex_key#14);
				5661	*
				5662	* Where sb_internal is the lock (a counter that works as a lock) acquired by
				5663	* sb_start_intwrite() in btrfs_start_transaction().
				5664	* Not locking i_mutex of the inodes is still safe because:
				5665	*
				5666	* 1) For regular files we log with a mode of LOG_INODE_EXISTS. It's possible
				5667	* that while logging the inode new references (names) are added or removed
				5668	* from the inode, leaving the logged inode item with a link count that does
				5669	* not match the number of logged inode reference items. This is fine because
				5670	* at log replay time we compute the real number of links and correct the
				5671	* link count in the inode item (see replay_one_buffer() and
				5672	* link_to_fixup_dir());
				5673	*
				5674	* 2) For directories we log with a mode of LOG_INODE_ALL. It's possible that
				5675	* while logging the inode's items new items with keys BTRFS_DIR_ITEM_KEY and
				5676	* BTRFS_DIR_INDEX_KEY are added to fs/subvol tree and the logged inode item
				5677	* has a size that doesn't match the sum of the lengths of all the logged
				5678	* names. This does not result in a problem because if a dir_item key is
				5679	* logged but its matching dir_index key is not logged, at log replay time we
				5680	* don't use it to replay the respective name (see replay_one_name()). On the
				5681	* other hand if only the dir_index key ends up being logged, the respective
				5682	* name is added to the fs/subvol tree with both the dir_item and dir_index
				5683	* keys created (see replay_one_name()).
				5684	* The directory's inode item with a wrong i_size is not a problem as well,
				5685	* since we don't use it at log replay time to set the i_size in the inode
				5686	* item of the fs/subvol tree (see overwrite_item()).
				5687	*/
				5688	static int log_new_dir_dentries(struct btrfs_trans_handle *trans,
				5689	struct btrfs_root *root,
Nikolay Borisov	51cc0d3	2017-01-18 00:31:43 +0200	[diff] [blame]	5690	struct btrfs_inode *start_inode,
Filipe Manana	2f2ff0e	2015-03-20 17:19:46 +0000	[diff] [blame]	5691	struct btrfs_log_ctx *ctx)
				5692	{
Jeff Mahoney	0b246af	2016-06-22 18:54:23 -0400	[diff] [blame]	5693	struct btrfs_fs_info *fs_info = root->fs_info;
Filipe Manana	2f2ff0e	2015-03-20 17:19:46 +0000	[diff] [blame]	5694	struct btrfs_root *log = root->log_root;
				5695	struct btrfs_path *path;
				5696	LIST_HEAD(dir_list);
				5697	struct btrfs_dir_list *dir_elem;
				5698	int ret = 0;
				5699
				5700	path = btrfs_alloc_path();
				5701	if (!path)
				5702	return -ENOMEM;
				5703
				5704	dir_elem = kmalloc(sizeof(*dir_elem), GFP_NOFS);
				5705	if (!dir_elem) {
				5706	btrfs_free_path(path);
				5707	return -ENOMEM;
				5708	}
Nikolay Borisov	51cc0d3	2017-01-18 00:31:43 +0200	[diff] [blame]	5709	dir_elem->ino = btrfs_ino(start_inode);
Filipe Manana	2f2ff0e	2015-03-20 17:19:46 +0000	[diff] [blame]	5710	list_add_tail(&dir_elem->list, &dir_list);
				5711
				5712	while (!list_empty(&dir_list)) {
				5713	struct extent_buffer *leaf;
				5714	struct btrfs_key min_key;
				5715	int nritems;
				5716	int i;
				5717
				5718	dir_elem = list_first_entry(&dir_list, struct btrfs_dir_list,
				5719	list);
				5720	if (ret)
				5721	goto next_dir_inode;
				5722
				5723	min_key.objectid = dir_elem->ino;
				5724	min_key.type = BTRFS_DIR_ITEM_KEY;
				5725	min_key.offset = 0;
				5726	again:
				5727	btrfs_release_path(path);
				5728	ret = btrfs_search_forward(log, &min_key, path, trans->transid);
				5729	if (ret < 0) {
				5730	goto next_dir_inode;
				5731	} else if (ret > 0) {
				5732	ret = 0;
				5733	goto next_dir_inode;
				5734	}
				5735
				5736	process_leaf:
				5737	leaf = path->nodes[0];
				5738	nritems = btrfs_header_nritems(leaf);
				5739	for (i = path->slots[0]; i < nritems; i++) {
				5740	struct btrfs_dir_item *di;
				5741	struct btrfs_key di_key;
				5742	struct inode *di_inode;
				5743	struct btrfs_dir_list *new_dir_elem;
				5744	int log_mode = LOG_INODE_EXISTS;
				5745	int type;
				5746
				5747	btrfs_item_key_to_cpu(leaf, &min_key, i);
				5748	if (min_key.objectid != dir_elem->ino \|\|
				5749	min_key.type != BTRFS_DIR_ITEM_KEY)
				5750	goto next_dir_inode;
				5751
				5752	di = btrfs_item_ptr(leaf, i, struct btrfs_dir_item);
				5753	type = btrfs_dir_type(leaf, di);
				5754	if (btrfs_dir_transid(leaf, di) < trans->transid &&
				5755	type != BTRFS_FT_DIR)
				5756	continue;
				5757	btrfs_dir_item_key_to_cpu(leaf, di, &di_key);
				5758	if (di_key.type == BTRFS_ROOT_ITEM_KEY)
				5759	continue;
				5760
Robbie Ko	ec125cf	2016-10-28 10:48:26 +0800	[diff] [blame]	5761	btrfs_release_path(path);
David Sterba	0202e83	2020-05-15 19:35:59 +0200	[diff] [blame]	5762	di_inode = btrfs_iget(fs_info->sb, di_key.objectid, root);
Filipe Manana	2f2ff0e	2015-03-20 17:19:46 +0000	[diff] [blame]	5763	if (IS_ERR(di_inode)) {
				5764	ret = PTR_ERR(di_inode);
				5765	goto next_dir_inode;
				5766	}
				5767
Filipe Manana	0e44cb3	2021-01-27 10:34:58 +0000	[diff] [blame]	5768	if (!need_log_inode(trans, BTRFS_I(di_inode))) {
Filipe Manana	410f954	2019-09-10 15:26:49 +0100	[diff] [blame]	5769	btrfs_add_delayed_iput(di_inode);
Robbie Ko	ec125cf	2016-10-28 10:48:26 +0800	[diff] [blame]	5770	break;
Filipe Manana	2f2ff0e	2015-03-20 17:19:46 +0000	[diff] [blame]	5771	}
				5772
				5773	ctx->log_new_dentries = false;
Filipe Manana	3f9749f	2016-04-25 04:45:02 +0100	[diff] [blame]	5774	if (type == BTRFS_FT_DIR \|\| type == BTRFS_FT_SYMLINK)
Filipe Manana	2f2ff0e	2015-03-20 17:19:46 +0000	[diff] [blame]	5775	log_mode = LOG_INODE_ALL;
Nikolay Borisov	a59108a	2017-01-18 00:31:48 +0200	[diff] [blame]	5776	ret = btrfs_log_inode(trans, root, BTRFS_I(di_inode),
Filipe Manana	4877817	2020-08-11 12:43:58 +0100	[diff] [blame]	5777	log_mode, ctx);
Filipe Manana	410f954	2019-09-10 15:26:49 +0100	[diff] [blame]	5778	btrfs_add_delayed_iput(di_inode);
Filipe Manana	2f2ff0e	2015-03-20 17:19:46 +0000	[diff] [blame]	5779	if (ret)
				5780	goto next_dir_inode;
				5781	if (ctx->log_new_dentries) {
				5782	new_dir_elem = kmalloc(sizeof(*new_dir_elem),
				5783	GFP_NOFS);
				5784	if (!new_dir_elem) {
				5785	ret = -ENOMEM;
				5786	goto next_dir_inode;
				5787	}
				5788	new_dir_elem->ino = di_key.objectid;
				5789	list_add_tail(&new_dir_elem->list, &dir_list);
				5790	}
				5791	break;
				5792	}
				5793	if (i == nritems) {
				5794	ret = btrfs_next_leaf(log, path);
				5795	if (ret < 0) {
				5796	goto next_dir_inode;
				5797	} else if (ret > 0) {
				5798	ret = 0;
				5799	goto next_dir_inode;
				5800	}
				5801	goto process_leaf;
				5802	}
				5803	if (min_key.offset < (u64)-1) {
				5804	min_key.offset++;
				5805	goto again;
				5806	}
				5807	next_dir_inode:
				5808	list_del(&dir_elem->list);
				5809	kfree(dir_elem);
				5810	}
				5811
				5812	btrfs_free_path(path);
				5813	return ret;
				5814	}
				5815
Filipe Manana	18aa092	2015-08-05 16:49:08 +0100	[diff] [blame]	5816	static int btrfs_log_all_parents(struct btrfs_trans_handle *trans,
Nikolay Borisov	d0a0b78	2017-02-20 13:50:30 +0200	[diff] [blame]	5817	struct btrfs_inode *inode,
Filipe Manana	18aa092	2015-08-05 16:49:08 +0100	[diff] [blame]	5818	struct btrfs_log_ctx *ctx)
				5819	{
David Sterba	3ffbd68	2018-06-29 10:56:42 +0200	[diff] [blame]	5820	struct btrfs_fs_info *fs_info = trans->fs_info;
Filipe Manana	18aa092	2015-08-05 16:49:08 +0100	[diff] [blame]	5821	int ret;
				5822	struct btrfs_path *path;
				5823	struct btrfs_key key;
Nikolay Borisov	d0a0b78	2017-02-20 13:50:30 +0200	[diff] [blame]	5824	struct btrfs_root *root = inode->root;
				5825	const u64 ino = btrfs_ino(inode);
Filipe Manana	18aa092	2015-08-05 16:49:08 +0100	[diff] [blame]	5826
				5827	path = btrfs_alloc_path();
				5828	if (!path)
				5829	return -ENOMEM;
				5830	path->skip_locking = 1;
				5831	path->search_commit_root = 1;
				5832
				5833	key.objectid = ino;
				5834	key.type = BTRFS_INODE_REF_KEY;
				5835	key.offset = 0;
				5836	ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
				5837	if (ret < 0)
				5838	goto out;
				5839
				5840	while (true) {
				5841	struct extent_buffer *leaf = path->nodes[0];
				5842	int slot = path->slots[0];
				5843	u32 cur_offset = 0;
				5844	u32 item_size;
				5845	unsigned long ptr;
				5846
				5847	if (slot >= btrfs_header_nritems(leaf)) {
				5848	ret = btrfs_next_leaf(root, path);
				5849	if (ret < 0)
				5850	goto out;
				5851	else if (ret > 0)
				5852	break;
				5853	continue;
				5854	}
				5855
				5856	btrfs_item_key_to_cpu(leaf, &key, slot);
				5857	/* BTRFS_INODE_EXTREF_KEY is BTRFS_INODE_REF_KEY + 1 */
				5858	if (key.objectid != ino \|\| key.type > BTRFS_INODE_EXTREF_KEY)
				5859	break;
				5860
				5861	item_size = btrfs_item_size_nr(leaf, slot);
				5862	ptr = btrfs_item_ptr_offset(leaf, slot);
				5863	while (cur_offset < item_size) {
				5864	struct btrfs_key inode_key;
				5865	struct inode *dir_inode;
				5866
				5867	inode_key.type = BTRFS_INODE_ITEM_KEY;
				5868	inode_key.offset = 0;
				5869
				5870	if (key.type == BTRFS_INODE_EXTREF_KEY) {
				5871	struct btrfs_inode_extref *extref;
				5872
				5873	extref = (struct btrfs_inode_extref *)
				5874	(ptr + cur_offset);
				5875	inode_key.objectid = btrfs_inode_extref_parent(
				5876	leaf, extref);
				5877	cur_offset += sizeof(*extref);
				5878	cur_offset += btrfs_inode_extref_name_len(leaf,
				5879	extref);
				5880	} else {
				5881	inode_key.objectid = key.offset;
				5882	cur_offset = item_size;
				5883	}
				5884
David Sterba	0202e83	2020-05-15 19:35:59 +0200	[diff] [blame]	5885	dir_inode = btrfs_iget(fs_info->sb, inode_key.objectid,
				5886	root);
Filipe Manana	0f375ee	2018-10-09 15:05:29 +0100	[diff] [blame]	5887	/*
				5888	* If the parent inode was deleted, return an error to
				5889	* fallback to a transaction commit. This is to prevent
				5890	* getting an inode that was moved from one parent A to
				5891	* a parent B, got its former parent A deleted and then
				5892	* it got fsync'ed, from existing at both parents after
				5893	* a log replay (and the old parent still existing).
				5894	* Example:
				5895	*
				5896	* mkdir /mnt/A
				5897	* mkdir /mnt/B
				5898	* touch /mnt/B/bar
				5899	* sync
				5900	* mv /mnt/B/bar /mnt/A/bar
				5901	* mv -T /mnt/A /mnt/B
				5902	* fsync /mnt/B/bar
				5903	* <power fail>
				5904	*
				5905	* If we ignore the old parent B which got deleted,
				5906	* after a log replay we would have file bar linked
				5907	* at both parents and the old parent B would still
				5908	* exist.
				5909	*/
				5910	if (IS_ERR(dir_inode)) {
				5911	ret = PTR_ERR(dir_inode);
				5912	goto out;
				5913	}
Filipe Manana	18aa092	2015-08-05 16:49:08 +0100	[diff] [blame]	5914
Filipe Manana	3e6a86a	2021-01-27 10:34:57 +0000	[diff] [blame]	5915	if (!need_log_inode(trans, BTRFS_I(dir_inode))) {
				5916	btrfs_add_delayed_iput(dir_inode);
				5917	continue;
				5918	}
				5919
Filipe Manana	657ed1a	2016-04-06 17:11:56 +0100	[diff] [blame]	5920	if (ctx)
				5921	ctx->log_new_dentries = false;
Nikolay Borisov	a59108a	2017-01-18 00:31:48 +0200	[diff] [blame]	5922	ret = btrfs_log_inode(trans, root, BTRFS_I(dir_inode),
Filipe Manana	4877817	2020-08-11 12:43:58 +0100	[diff] [blame]	5923	LOG_INODE_ALL, ctx);
Filipe Manana	657ed1a	2016-04-06 17:11:56 +0100	[diff] [blame]	5924	if (!ret && ctx && ctx->log_new_dentries)
				5925	ret = log_new_dir_dentries(trans, root,
David Sterba	f85b737	2017-01-20 14:54:07 +0100	[diff] [blame]	5926	BTRFS_I(dir_inode), ctx);
Filipe Manana	410f954	2019-09-10 15:26:49 +0100	[diff] [blame]	5927	btrfs_add_delayed_iput(dir_inode);
Filipe Manana	18aa092	2015-08-05 16:49:08 +0100	[diff] [blame]	5928	if (ret)
				5929	goto out;
				5930	}
				5931	path->slots[0]++;
				5932	}
				5933	ret = 0;
				5934	out:
				5935	btrfs_free_path(path);
				5936	return ret;
				5937	}
				5938
Filipe Manana	b8aa330	2019-04-17 11:31:06 +0100	[diff] [blame]	5939	static int log_new_ancestors(struct btrfs_trans_handle *trans,
				5940	struct btrfs_root *root,
				5941	struct btrfs_path *path,
				5942	struct btrfs_log_ctx *ctx)
				5943	{
				5944	struct btrfs_key found_key;
				5945
				5946	btrfs_item_key_to_cpu(path->nodes[0], &found_key, path->slots[0]);
				5947
				5948	while (true) {
				5949	struct btrfs_fs_info *fs_info = root->fs_info;
Filipe Manana	b8aa330	2019-04-17 11:31:06 +0100	[diff] [blame]	5950	struct extent_buffer *leaf = path->nodes[0];
				5951	int slot = path->slots[0];
				5952	struct btrfs_key search_key;
				5953	struct inode *inode;
David Sterba	0202e83	2020-05-15 19:35:59 +0200	[diff] [blame]	5954	u64 ino;
Filipe Manana	b8aa330	2019-04-17 11:31:06 +0100	[diff] [blame]	5955	int ret = 0;
				5956
				5957	btrfs_release_path(path);
				5958
David Sterba	0202e83	2020-05-15 19:35:59 +0200	[diff] [blame]	5959	ino = found_key.offset;
				5960
Filipe Manana	b8aa330	2019-04-17 11:31:06 +0100	[diff] [blame]	5961	search_key.objectid = found_key.offset;
				5962	search_key.type = BTRFS_INODE_ITEM_KEY;
				5963	search_key.offset = 0;
David Sterba	0202e83	2020-05-15 19:35:59 +0200	[diff] [blame]	5964	inode = btrfs_iget(fs_info->sb, ino, root);
Filipe Manana	b8aa330	2019-04-17 11:31:06 +0100	[diff] [blame]	5965	if (IS_ERR(inode))
				5966	return PTR_ERR(inode);
				5967
Filipe Manana	ab12313	2021-01-27 10:34:56 +0000	[diff] [blame]	5968	if (BTRFS_I(inode)->generation >= trans->transid &&
				5969	need_log_inode(trans, BTRFS_I(inode)))
Filipe Manana	b8aa330	2019-04-17 11:31:06 +0100	[diff] [blame]	5970	ret = btrfs_log_inode(trans, root, BTRFS_I(inode),
Filipe Manana	4877817	2020-08-11 12:43:58 +0100	[diff] [blame]	5971	LOG_INODE_EXISTS, ctx);
Filipe Manana	410f954	2019-09-10 15:26:49 +0100	[diff] [blame]	5972	btrfs_add_delayed_iput(inode);
Filipe Manana	b8aa330	2019-04-17 11:31:06 +0100	[diff] [blame]	5973	if (ret)
				5974	return ret;
				5975
				5976	if (search_key.objectid == BTRFS_FIRST_FREE_OBJECTID)
				5977	break;
				5978
				5979	search_key.type = BTRFS_INODE_REF_KEY;
				5980	ret = btrfs_search_slot(NULL, root, &search_key, path, 0, 0);
				5981	if (ret < 0)
				5982	return ret;
				5983
				5984	leaf = path->nodes[0];
				5985	slot = path->slots[0];
				5986	if (slot >= btrfs_header_nritems(leaf)) {
				5987	ret = btrfs_next_leaf(root, path);
				5988	if (ret < 0)
				5989	return ret;
				5990	else if (ret > 0)
				5991	return -ENOENT;
				5992	leaf = path->nodes[0];
				5993	slot = path->slots[0];
				5994	}
				5995
				5996	btrfs_item_key_to_cpu(leaf, &found_key, slot);
				5997	if (found_key.objectid != search_key.objectid \|\|
				5998	found_key.type != BTRFS_INODE_REF_KEY)
				5999	return -ENOENT;
				6000	}
				6001	return 0;
				6002	}
				6003
				6004	static int log_new_ancestors_fast(struct btrfs_trans_handle *trans,
				6005	struct btrfs_inode *inode,
				6006	struct dentry *parent,
				6007	struct btrfs_log_ctx *ctx)
				6008	{
				6009	struct btrfs_root *root = inode->root;
Filipe Manana	b8aa330	2019-04-17 11:31:06 +0100	[diff] [blame]	6010	struct dentry *old_parent = NULL;
				6011	struct super_block *sb = inode->vfs_inode.i_sb;
				6012	int ret = 0;
				6013
				6014	while (true) {
				6015	if (!parent \|\| d_really_is_negative(parent) \|\|
				6016	sb != parent->d_sb)
				6017	break;
				6018
				6019	inode = BTRFS_I(d_inode(parent));
				6020	if (root != inode->root)
				6021	break;
				6022
Filipe Manana	ab12313	2021-01-27 10:34:56 +0000	[diff] [blame]	6023	if (inode->generation >= trans->transid &&
				6024	need_log_inode(trans, inode)) {
Filipe Manana	b8aa330	2019-04-17 11:31:06 +0100	[diff] [blame]	6025	ret = btrfs_log_inode(trans, root, inode,
Filipe Manana	4877817	2020-08-11 12:43:58 +0100	[diff] [blame]	6026	LOG_INODE_EXISTS, ctx);
Filipe Manana	b8aa330	2019-04-17 11:31:06 +0100	[diff] [blame]	6027	if (ret)
				6028	break;
				6029	}
				6030	if (IS_ROOT(parent))
				6031	break;
				6032
				6033	parent = dget_parent(parent);
				6034	dput(old_parent);
				6035	old_parent = parent;
				6036	}
				6037	dput(old_parent);
				6038
				6039	return ret;
				6040	}
				6041
				6042	static int log_all_new_ancestors(struct btrfs_trans_handle *trans,
				6043	struct btrfs_inode *inode,
				6044	struct dentry *parent,
				6045	struct btrfs_log_ctx *ctx)
				6046	{
				6047	struct btrfs_root *root = inode->root;
				6048	const u64 ino = btrfs_ino(inode);
				6049	struct btrfs_path *path;
				6050	struct btrfs_key search_key;
				6051	int ret;
				6052
				6053	/*
				6054	* For a single hard link case, go through a fast path that does not
				6055	* need to iterate the fs/subvolume tree.
				6056	*/
				6057	if (inode->vfs_inode.i_nlink < 2)
				6058	return log_new_ancestors_fast(trans, inode, parent, ctx);
				6059
				6060	path = btrfs_alloc_path();
				6061	if (!path)
				6062	return -ENOMEM;
				6063
				6064	search_key.objectid = ino;
				6065	search_key.type = BTRFS_INODE_REF_KEY;
				6066	search_key.offset = 0;
				6067	again:
				6068	ret = btrfs_search_slot(NULL, root, &search_key, path, 0, 0);
				6069	if (ret < 0)
				6070	goto out;
				6071	if (ret == 0)
				6072	path->slots[0]++;
				6073
				6074	while (true) {
				6075	struct extent_buffer *leaf = path->nodes[0];
				6076	int slot = path->slots[0];
				6077	struct btrfs_key found_key;
				6078
				6079	if (slot >= btrfs_header_nritems(leaf)) {
				6080	ret = btrfs_next_leaf(root, path);
				6081	if (ret < 0)
				6082	goto out;
				6083	else if (ret > 0)
				6084	break;
				6085	continue;
				6086	}
				6087
				6088	btrfs_item_key_to_cpu(leaf, &found_key, slot);
				6089	if (found_key.objectid != ino \|\|
				6090	found_key.type > BTRFS_INODE_EXTREF_KEY)
				6091	break;
				6092
				6093	/*
				6094	* Don't deal with extended references because they are rare
				6095	* cases and too complex to deal with (we would need to keep
				6096	* track of which subitem we are processing for each item in
				6097	* this loop, etc). So just return some error to fallback to
				6098	* a transaction commit.
				6099	*/
				6100	if (found_key.type == BTRFS_INODE_EXTREF_KEY) {
				6101	ret = -EMLINK;
				6102	goto out;
				6103	}
				6104
				6105	/*
				6106	* Logging ancestors needs to do more searches on the fs/subvol
				6107	* tree, so it releases the path as needed to avoid deadlocks.
				6108	* Keep track of the last inode ref key and resume from that key
				6109	* after logging all new ancestors for the current hard link.
				6110	*/
				6111	memcpy(&search_key, &found_key, sizeof(search_key));
				6112
				6113	ret = log_new_ancestors(trans, root, path, ctx);
				6114	if (ret)
				6115	goto out;
				6116	btrfs_release_path(path);
				6117	goto again;
				6118	}
				6119	ret = 0;
				6120	out:
				6121	btrfs_free_path(path);
				6122	return ret;
				6123	}
				6124
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	6125	/*
				6126	* helper function around btrfs_log_inode to make sure newly created
				6127	* parent directories also end up in the log. A minimal inode and backref
				6128	* only logging is done of any parent directories that are older than
				6129	* the last committed transaction
				6130	*/
Eric Sandeen	48a3b63	2013-04-25 20:41:01 +0000	[diff] [blame]	6131	static int btrfs_log_inode_parent(struct btrfs_trans_handle *trans,
Nikolay Borisov	19df27a	2017-02-20 13:51:01 +0200	[diff] [blame]	6132	struct btrfs_inode *inode,
Filipe Manana	49dae1b	2014-09-06 22:34:39 +0100	[diff] [blame]	6133	struct dentry *parent,
Edmund Nadolski	41a1ead	2017-11-20 13:24:47 -0700	[diff] [blame]	6134	int inode_only,
Miao Xie	8b050d3	2014-02-20 18:08:58 +0800	[diff] [blame]	6135	struct btrfs_log_ctx *ctx)
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	6136	{
Nikolay Borisov	f882274	2018-02-27 17:37:17 +0200	[diff] [blame]	6137	struct btrfs_root *root = inode->root;
Jeff Mahoney	0b246af	2016-06-22 18:54:23 -0400	[diff] [blame]	6138	struct btrfs_fs_info *fs_info = root->fs_info;
Chris Mason	12fcfd2	2009-03-24 10:24:20 -0400	[diff] [blame]	6139	int ret = 0;
Filipe Manana	2f2ff0e	2015-03-20 17:19:46 +0000	[diff] [blame]	6140	bool log_dentries = false;
Chris Mason	12fcfd2	2009-03-24 10:24:20 -0400	[diff] [blame]	6141
Jeff Mahoney	0b246af	2016-06-22 18:54:23 -0400	[diff] [blame]	6142	if (btrfs_test_opt(fs_info, NOTREELOG)) {
Sage Weil	3a5e140	2009-04-02 16:49:40 -0400	[diff] [blame]	6143	ret = 1;
				6144	goto end_no_trans;
				6145	}
				6146
Nikolay Borisov	f882274	2018-02-27 17:37:17 +0200	[diff] [blame]	6147	if (btrfs_root_refs(&root->root_item) == 0) {
Yan, Zheng	76dda93	2009-09-21 16:00:26 -0400	[diff] [blame]	6148	ret = 1;
				6149	goto end_no_trans;
				6150	}
				6151
Filipe Manana	f2d72f4	2018-10-08 11:12:55 +0100	[diff] [blame]	6152	/*
				6153	* Skip already logged inodes or inodes corresponding to tmpfiles
				6154	* (since logging them is pointless, a link count of 0 means they
				6155	* will never be accessible).
				6156	*/
Filipe Manana	626e9f4	2021-04-27 11:27:20 +0100	[diff] [blame]	6157	if ((btrfs_inode_in_log(inode, trans->transid) &&
				6158	list_empty(&ctx->ordered_extents)) \|\|
Filipe Manana	f2d72f4	2018-10-08 11:12:55 +0100	[diff] [blame]	6159	inode->vfs_inode.i_nlink == 0) {
Chris Mason	257c62e	2009-10-13 13:21:08 -0400	[diff] [blame]	6160	ret = BTRFS_NO_LOG_SYNC;
				6161	goto end_no_trans;
				6162	}
				6163
Miao Xie	8b050d3	2014-02-20 18:08:58 +0800	[diff] [blame]	6164	ret = start_log_trans(trans, root, ctx);
Yan, Zheng	4a500fd	2010-05-16 10:49:59 -0400	[diff] [blame]	6165	if (ret)
Miao Xie	e87ac13	2014-02-20 18:08:53 +0800	[diff] [blame]	6166	goto end_no_trans;
Chris Mason	12fcfd2	2009-03-24 10:24:20 -0400	[diff] [blame]	6167
Filipe Manana	4877817	2020-08-11 12:43:58 +0100	[diff] [blame]	6168	ret = btrfs_log_inode(trans, root, inode, inode_only, ctx);
Yan, Zheng	4a500fd	2010-05-16 10:49:59 -0400	[diff] [blame]	6169	if (ret)
				6170	goto end_trans;
Chris Mason	12fcfd2	2009-03-24 10:24:20 -0400	[diff] [blame]	6171
Chris Mason	af4176b	2009-03-24 10:24:31 -0400	[diff] [blame]	6172	/*
				6173	* for regular files, if its inode is already on disk, we don't
				6174	* have to worry about the parents at all. This is because
				6175	* we can use the last_unlink_trans field to record renames
				6176	* and other fun in this file.
				6177	*/
Nikolay Borisov	19df27a	2017-02-20 13:51:01 +0200	[diff] [blame]	6178	if (S_ISREG(inode->vfs_inode.i_mode) &&
Filipe Manana	47d3db4	2020-11-25 12:19:26 +0000	[diff] [blame]	6179	inode->generation < trans->transid &&
				6180	inode->last_unlink_trans < trans->transid) {
Yan, Zheng	4a500fd	2010-05-16 10:49:59 -0400	[diff] [blame]	6181	ret = 0;
				6182	goto end_trans;
				6183	}
Chris Mason	af4176b	2009-03-24 10:24:31 -0400	[diff] [blame]	6184
Nikolay Borisov	19df27a	2017-02-20 13:51:01 +0200	[diff] [blame]	6185	if (S_ISDIR(inode->vfs_inode.i_mode) && ctx && ctx->log_new_dentries)
Filipe Manana	2f2ff0e	2015-03-20 17:19:46 +0000	[diff] [blame]	6186	log_dentries = true;
				6187
Filipe Manana	18aa092	2015-08-05 16:49:08 +0100	[diff] [blame]	6188	/*
Nicholas D Steeves	0132761	2016-05-19 21:18:45 -0400	[diff] [blame]	6189	* On unlink we must make sure all our current and old parent directory
Filipe Manana	18aa092	2015-08-05 16:49:08 +0100	[diff] [blame]	6190	* inodes are fully logged. This is to prevent leaving dangling
				6191	* directory index entries in directories that were our parents but are
				6192	* not anymore. Not doing this results in old parent directory being
				6193	* impossible to delete after log replay (rmdir will always fail with
				6194	* error -ENOTEMPTY).
				6195	*
				6196	* Example 1:
				6197	*
				6198	* mkdir testdir
				6199	* touch testdir/foo
				6200	* ln testdir/foo testdir/bar
				6201	* sync
				6202	* unlink testdir/bar
				6203	* xfs_io -c fsync testdir/foo
				6204	* <power failure>
				6205	* mount fs, triggers log replay
				6206	*
				6207	* If we don't log the parent directory (testdir), after log replay the
				6208	* directory still has an entry pointing to the file inode using the bar
				6209	* name, but a matching BTRFS_INODE_[REF\|EXTREF]_KEY does not exist and
				6210	* the file inode has a link count of 1.
				6211	*
				6212	* Example 2:
				6213	*
				6214	* mkdir testdir
				6215	* touch foo
				6216	* ln foo testdir/foo2
				6217	* ln foo testdir/foo3
				6218	* sync
				6219	* unlink testdir/foo3
				6220	* xfs_io -c fsync foo
				6221	* <power failure>
				6222	* mount fs, triggers log replay
				6223	*
				6224	* Similar as the first example, after log replay the parent directory
				6225	* testdir still has an entry pointing to the inode file with name foo3
				6226	* but the file inode does not have a matching BTRFS_INODE_REF_KEY item
				6227	* and has a link count of 2.
				6228	*/
Filipe Manana	47d3db4	2020-11-25 12:19:26 +0000	[diff] [blame]	6229	if (inode->last_unlink_trans >= trans->transid) {
Filipe Manana	b8aa330	2019-04-17 11:31:06 +0100	[diff] [blame]	6230	ret = btrfs_log_all_parents(trans, inode, ctx);
Filipe Manana	18aa092	2015-08-05 16:49:08 +0100	[diff] [blame]	6231	if (ret)
				6232	goto end_trans;
				6233	}
				6234
Filipe Manana	b8aa330	2019-04-17 11:31:06 +0100	[diff] [blame]	6235	ret = log_all_new_ancestors(trans, inode, parent, ctx);
				6236	if (ret)
Filipe Manana	41bd606	2018-11-28 14:54:28 +0000	[diff] [blame]	6237	goto end_trans;
Filipe Manana	41bd606	2018-11-28 14:54:28 +0000	[diff] [blame]	6238
Filipe Manana	2f2ff0e	2015-03-20 17:19:46 +0000	[diff] [blame]	6239	if (log_dentries)
Filipe Manana	b8aa330	2019-04-17 11:31:06 +0100	[diff] [blame]	6240	ret = log_new_dir_dentries(trans, root, inode, ctx);
Filipe Manana	2f2ff0e	2015-03-20 17:19:46 +0000	[diff] [blame]	6241	else
				6242	ret = 0;
Yan, Zheng	4a500fd	2010-05-16 10:49:59 -0400	[diff] [blame]	6243	end_trans:
				6244	if (ret < 0) {
David Sterba	9078776	2019-03-20 13:28:05 +0100	[diff] [blame]	6245	btrfs_set_log_full_commit(trans);
Yan, Zheng	4a500fd	2010-05-16 10:49:59 -0400	[diff] [blame]	6246	ret = 1;
				6247	}
Miao Xie	8b050d3	2014-02-20 18:08:58 +0800	[diff] [blame]	6248
				6249	if (ret)
				6250	btrfs_remove_log_ctx(root, ctx);
Chris Mason	12fcfd2	2009-03-24 10:24:20 -0400	[diff] [blame]	6251	btrfs_end_log_trans(root);
				6252	end_no_trans:
				6253	return ret;
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	6254	}
				6255
				6256	/*
				6257	* it is not safe to log dentry if the chunk root has added new
				6258	* chunks. This returns 0 if the dentry was logged, and 1 otherwise.
				6259	* If this returns 1, you must commit the transaction to safely get your
				6260	* data on disk.
				6261	*/
				6262	int btrfs_log_dentry_safe(struct btrfs_trans_handle *trans,
Nikolay Borisov	e5b84f7a	2018-02-27 17:37:18 +0200	[diff] [blame]	6263	struct dentry *dentry,
Miao Xie	8b050d3	2014-02-20 18:08:58 +0800	[diff] [blame]	6264	struct btrfs_log_ctx *ctx)
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	6265	{
Josef Bacik	6a91221	2010-11-20 09:48:00 +0000	[diff] [blame]	6266	struct dentry *parent = dget_parent(dentry);
				6267	int ret;
				6268
Nikolay Borisov	f882274	2018-02-27 17:37:17 +0200	[diff] [blame]	6269	ret = btrfs_log_inode_parent(trans, BTRFS_I(d_inode(dentry)), parent,
Filipe Manana	4877817	2020-08-11 12:43:58 +0100	[diff] [blame]	6270	LOG_INODE_ALL, ctx);
Josef Bacik	6a91221	2010-11-20 09:48:00 +0000	[diff] [blame]	6271	dput(parent);
				6272
				6273	return ret;
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	6274	}
				6275
				6276	/*
				6277	* should be called during mount to recover any replay any log trees
				6278	* from the FS
				6279	*/
				6280	int btrfs_recover_log_trees(struct btrfs_root *log_root_tree)
				6281	{
				6282	int ret;
				6283	struct btrfs_path *path;
				6284	struct btrfs_trans_handle *trans;
				6285	struct btrfs_key key;
				6286	struct btrfs_key found_key;
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	6287	struct btrfs_root *log;
				6288	struct btrfs_fs_info *fs_info = log_root_tree->fs_info;
				6289	struct walk_control wc = {
				6290	.process_func = process_one_buffer,
David Sterba	430a662	2019-08-01 14:50:35 +0200	[diff] [blame]	6291	.stage = LOG_WALK_PIN_ONLY,
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	6292	};
				6293
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	6294	path = btrfs_alloc_path();
Tsutomu Itoh	db5b493	2011-03-23 08:14:16 +0000	[diff] [blame]	6295	if (!path)
				6296	return -ENOMEM;
				6297
Josef Bacik	afcdd12	2016-09-02 15:40:02 -0400	[diff] [blame]	6298	set_bit(BTRFS_FS_LOG_RECOVERING, &fs_info->flags);
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	6299
Yan, Zheng	4a500fd	2010-05-16 10:49:59 -0400	[diff] [blame]	6300	trans = btrfs_start_transaction(fs_info->tree_root, 0);
Jeff Mahoney	79787ea	2012-03-12 16:03:00 +0100	[diff] [blame]	6301	if (IS_ERR(trans)) {
				6302	ret = PTR_ERR(trans);
				6303	goto error;
				6304	}
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	6305
				6306	wc.trans = trans;
				6307	wc.pin = 1;
				6308
Tsutomu Itoh	db5b493	2011-03-23 08:14:16 +0000	[diff] [blame]	6309	ret = walk_log_tree(trans, log_root_tree, &wc);
Jeff Mahoney	79787ea	2012-03-12 16:03:00 +0100	[diff] [blame]	6310	if (ret) {
Jeff Mahoney	5d163e0	2016-09-20 10:05:00 -0400	[diff] [blame]	6311	btrfs_handle_fs_error(fs_info, ret,
				6312	"Failed to pin buffers while recovering log root tree.");
Jeff Mahoney	79787ea	2012-03-12 16:03:00 +0100	[diff] [blame]	6313	goto error;
				6314	}
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	6315
				6316	again:
				6317	key.objectid = BTRFS_TREE_LOG_OBJECTID;
				6318	key.offset = (u64)-1;
David Sterba	962a298	2014-06-04 18:41:45 +0200	[diff] [blame]	6319	key.type = BTRFS_ROOT_ITEM_KEY;
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	6320
Chris Mason	d397712	2009-01-05 21:25:51 -0500	[diff] [blame]	6321	while (1) {
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	6322	ret = btrfs_search_slot(NULL, log_root_tree, &key, path, 0, 0);
Jeff Mahoney	79787ea	2012-03-12 16:03:00 +0100	[diff] [blame]	6323
				6324	if (ret < 0) {
Anand Jain	34d9700	2016-03-16 16:43:06 +0800	[diff] [blame]	6325	btrfs_handle_fs_error(fs_info, ret,
Jeff Mahoney	79787ea	2012-03-12 16:03:00 +0100	[diff] [blame]	6326	"Couldn't find tree log root.");
				6327	goto error;
				6328	}
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	6329	if (ret > 0) {
				6330	if (path->slots[0] == 0)
				6331	break;
				6332	path->slots[0]--;
				6333	}
				6334	btrfs_item_key_to_cpu(path->nodes[0], &found_key,
				6335	path->slots[0]);
David Sterba	b3b4aa7	2011-04-21 01:20:15 +0200	[diff] [blame]	6336	btrfs_release_path(path);
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	6337	if (found_key.objectid != BTRFS_TREE_LOG_OBJECTID)
				6338	break;
				6339
Josef Bacik	62a2c73	2020-01-24 09:32:21 -0500	[diff] [blame]	6340	log = btrfs_read_tree_root(log_root_tree, &found_key);
Jeff Mahoney	79787ea	2012-03-12 16:03:00 +0100	[diff] [blame]	6341	if (IS_ERR(log)) {
				6342	ret = PTR_ERR(log);
Anand Jain	34d9700	2016-03-16 16:43:06 +0800	[diff] [blame]	6343	btrfs_handle_fs_error(fs_info, ret,
Jeff Mahoney	79787ea	2012-03-12 16:03:00 +0100	[diff] [blame]	6344	"Couldn't read tree log root.");
				6345	goto error;
				6346	}
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	6347
David Sterba	56e9357	2020-05-15 19:35:55 +0200	[diff] [blame]	6348	wc.replay_dest = btrfs_get_fs_root(fs_info, found_key.offset,
				6349	true);
Jeff Mahoney	79787ea	2012-03-12 16:03:00 +0100	[diff] [blame]	6350	if (IS_ERR(wc.replay_dest)) {
				6351	ret = PTR_ERR(wc.replay_dest);
Josef Bacik	9bc574d	2019-12-06 09:37:17 -0500	[diff] [blame]	6352
				6353	/*
				6354	* We didn't find the subvol, likely because it was
				6355	* deleted. This is ok, simply skip this log and go to
				6356	* the next one.
				6357	*
				6358	* We need to exclude the root because we can't have
				6359	* other log replays overwriting this log as we'll read
				6360	* it back in a few more times. This will keep our
				6361	* block from being modified, and we'll just bail for
				6362	* each subsequent pass.
				6363	*/
				6364	if (ret == -ENOENT)
Nikolay Borisov	9fce570	2020-01-20 16:09:13 +0200	[diff] [blame]	6365	ret = btrfs_pin_extent_for_log_replay(trans,
Josef Bacik	9bc574d	2019-12-06 09:37:17 -0500	[diff] [blame]	6366	log->node->start,
				6367	log->node->len);
Josef Bacik	0024652	2020-01-24 09:33:01 -0500	[diff] [blame]	6368	btrfs_put_root(log);
Josef Bacik	9bc574d	2019-12-06 09:37:17 -0500	[diff] [blame]	6369
				6370	if (!ret)
				6371	goto next;
Jeff Mahoney	5d163e0	2016-09-20 10:05:00 -0400	[diff] [blame]	6372	btrfs_handle_fs_error(fs_info, ret,
				6373	"Couldn't read target root for tree log recovery.");
Jeff Mahoney	79787ea	2012-03-12 16:03:00 +0100	[diff] [blame]	6374	goto error;
				6375	}
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	6376
Yan Zheng	07d400a	2009-01-06 11:42:00 -0500	[diff] [blame]	6377	wc.replay_dest->log_root = log;
Josef Bacik	2002ae1	2021-03-12 15:25:05 -0500	[diff] [blame]	6378	ret = btrfs_record_root_in_trans(trans, wc.replay_dest);
				6379	if (ret)
				6380	/* The loop needs to continue due to the root refs */
				6381	btrfs_handle_fs_error(fs_info, ret,
				6382	"failed to record the log root in transaction");
				6383	else
				6384	ret = walk_log_tree(trans, log, &wc);
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	6385
Josef Bacik	b50c6e2	2013-04-25 15:55:30 -0400	[diff] [blame]	6386	if (!ret && wc.stage == LOG_WALK_REPLAY_ALL) {
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	6387	ret = fixup_inode_link_counts(trans, wc.replay_dest,
				6388	path);
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	6389	}
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	6390
Liu Bo	900c998	2018-01-25 11:02:56 -0700	[diff] [blame]	6391	if (!ret && wc.stage == LOG_WALK_REPLAY_ALL) {
				6392	struct btrfs_root *root = wc.replay_dest;
				6393
				6394	btrfs_release_path(path);
				6395
				6396	/*
				6397	* We have just replayed everything, and the highest
				6398	* objectid of fs roots probably has changed in case
				6399	* some inode_item's got replayed.
				6400	*
				6401	* root->objectid_mutex is not acquired as log replay
				6402	* could only happen during mount.
				6403	*/
Nikolay Borisov	453e487	2020-12-07 17:32:32 +0200	[diff] [blame]	6404	ret = btrfs_init_root_free_objectid(root);
Liu Bo	900c998	2018-01-25 11:02:56 -0700	[diff] [blame]	6405	}
				6406
Yan Zheng	07d400a	2009-01-06 11:42:00 -0500	[diff] [blame]	6407	wc.replay_dest->log_root = NULL;
Josef Bacik	0024652	2020-01-24 09:33:01 -0500	[diff] [blame]	6408	btrfs_put_root(wc.replay_dest);
Josef Bacik	0024652	2020-01-24 09:33:01 -0500	[diff] [blame]	6409	btrfs_put_root(log);
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	6410
Josef Bacik	b50c6e2	2013-04-25 15:55:30 -0400	[diff] [blame]	6411	if (ret)
				6412	goto error;
Josef Bacik	9bc574d	2019-12-06 09:37:17 -0500	[diff] [blame]	6413	next:
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	6414	if (found_key.offset == 0)
				6415	break;
Josef Bacik	9bc574d	2019-12-06 09:37:17 -0500	[diff] [blame]	6416	key.offset = found_key.offset - 1;
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	6417	}
David Sterba	b3b4aa7	2011-04-21 01:20:15 +0200	[diff] [blame]	6418	btrfs_release_path(path);
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	6419
				6420	/* step one is to pin it all, step two is to replay just inodes */
				6421	if (wc.pin) {
				6422	wc.pin = 0;
				6423	wc.process_func = replay_one_buffer;
				6424	wc.stage = LOG_WALK_REPLAY_INODES;
				6425	goto again;
				6426	}
				6427	/* step three is to replay everything */
				6428	if (wc.stage < LOG_WALK_REPLAY_ALL) {
				6429	wc.stage++;
				6430	goto again;
				6431	}
				6432
				6433	btrfs_free_path(path);
				6434
Josef Bacik	abefa55	2013-04-24 16:40:05 -0400	[diff] [blame]	6435	/* step 4: commit the transaction, which also unpins the blocks */
Jeff Mahoney	3a45bb2	2016-09-09 21:39:03 -0400	[diff] [blame]	6436	ret = btrfs_commit_transaction(trans);
Josef Bacik	abefa55	2013-04-24 16:40:05 -0400	[diff] [blame]	6437	if (ret)
				6438	return ret;
				6439
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	6440	log_root_tree->log_root = NULL;
Josef Bacik	afcdd12	2016-09-02 15:40:02 -0400	[diff] [blame]	6441	clear_bit(BTRFS_FS_LOG_RECOVERING, &fs_info->flags);
Josef Bacik	0024652	2020-01-24 09:33:01 -0500	[diff] [blame]	6442	btrfs_put_root(log_root_tree);
Jeff Mahoney	79787ea	2012-03-12 16:03:00 +0100	[diff] [blame]	6443
Josef Bacik	abefa55	2013-04-24 16:40:05 -0400	[diff] [blame]	6444	return 0;
Jeff Mahoney	79787ea	2012-03-12 16:03:00 +0100	[diff] [blame]	6445	error:
Josef Bacik	b50c6e2	2013-04-25 15:55:30 -0400	[diff] [blame]	6446	if (wc.trans)
Jeff Mahoney	3a45bb2	2016-09-09 21:39:03 -0400	[diff] [blame]	6447	btrfs_end_transaction(wc.trans);
David Sterba	1aeb6b5	2020-07-07 18:38:05 +0200	[diff] [blame]	6448	clear_bit(BTRFS_FS_LOG_RECOVERING, &fs_info->flags);
Jeff Mahoney	79787ea	2012-03-12 16:03:00 +0100	[diff] [blame]	6449	btrfs_free_path(path);
				6450	return ret;
Chris Mason	e02119d	2008-09-05 16:13:11 -0400	[diff] [blame]	6451	}
Chris Mason	12fcfd2	2009-03-24 10:24:20 -0400	[diff] [blame]	6452
				6453	/*
				6454	* there are some corner cases where we want to force a full
				6455	* commit instead of allowing a directory to be logged.
				6456	*
				6457	* They revolve around files there were unlinked from the directory, and
				6458	* this function updates the parent directory so that a full commit is
				6459	* properly done if it is fsync'd later after the unlinks are done.
Filipe Manana	2be63d5	2016-02-12 11:34:23 +0000	[diff] [blame]	6460	*
				6461	* Must be called before the unlink operations (updates to the subvolume tree,
				6462	* inodes, etc) are done.
Chris Mason	12fcfd2	2009-03-24 10:24:20 -0400	[diff] [blame]	6463	*/
				6464	void btrfs_record_unlink_dir(struct btrfs_trans_handle *trans,
Nikolay Borisov	4176bdb	2017-01-18 00:31:28 +0200	[diff] [blame]	6465	struct btrfs_inode dir, struct btrfs_inode inode,
Chris Mason	12fcfd2	2009-03-24 10:24:20 -0400	[diff] [blame]	6466	int for_rename)
				6467	{
				6468	/*
Chris Mason	af4176b	2009-03-24 10:24:31 -0400	[diff] [blame]	6469	* when we're logging a file, if it hasn't been renamed
				6470	* or unlinked, and its inode is fully committed on disk,
				6471	* we don't have to worry about walking up the directory chain
				6472	* to log its parents.
				6473	*
				6474	* So, we use the last_unlink_trans field to put this transid
				6475	* into the file. When the file is logged we check it and
				6476	* don't log the parents if the file is fully on disk.
				6477	*/
Nikolay Borisov	4176bdb	2017-01-18 00:31:28 +0200	[diff] [blame]	6478	mutex_lock(&inode->log_mutex);
				6479	inode->last_unlink_trans = trans->transid;
				6480	mutex_unlock(&inode->log_mutex);
Chris Mason	af4176b	2009-03-24 10:24:31 -0400	[diff] [blame]	6481
				6482	/*
Chris Mason	12fcfd2	2009-03-24 10:24:20 -0400	[diff] [blame]	6483	* if this directory was already logged any new
				6484	* names for this file/dir will get recorded
				6485	*/
Nikolay Borisov	4176bdb	2017-01-18 00:31:28 +0200	[diff] [blame]	6486	if (dir->logged_trans == trans->transid)
Chris Mason	12fcfd2	2009-03-24 10:24:20 -0400	[diff] [blame]	6487	return;
				6488
				6489	/*
				6490	* if the inode we're about to unlink was logged,
				6491	* the log will be properly updated for any new names
				6492	*/
Nikolay Borisov	4176bdb	2017-01-18 00:31:28 +0200	[diff] [blame]	6493	if (inode->logged_trans == trans->transid)
Chris Mason	12fcfd2	2009-03-24 10:24:20 -0400	[diff] [blame]	6494	return;
				6495
				6496	/*
				6497	* when renaming files across directories, if the directory
				6498	* there we're unlinking from gets fsync'd later on, there's
				6499	* no way to find the destination directory later and fsync it
				6500	* properly. So, we have to be conservative and force commits
				6501	* so the new name gets discovered.
				6502	*/
				6503	if (for_rename)
				6504	goto record;
				6505
				6506	/* we can safely do the unlink without any special recording */
				6507	return;
				6508
				6509	record:
Nikolay Borisov	4176bdb	2017-01-18 00:31:28 +0200	[diff] [blame]	6510	mutex_lock(&dir->log_mutex);
				6511	dir->last_unlink_trans = trans->transid;
				6512	mutex_unlock(&dir->log_mutex);
Chris Mason	12fcfd2	2009-03-24 10:24:20 -0400	[diff] [blame]	6513	}
				6514
				6515	/*
Filipe Manana	1ec9a1a	2016-02-10 10:42:25 +0000	[diff] [blame]	6516	* Make sure that if someone attempts to fsync the parent directory of a deleted
				6517	* snapshot, it ends up triggering a transaction commit. This is to guarantee
				6518	* that after replaying the log tree of the parent directory's root we will not
				6519	* see the snapshot anymore and at log replay time we will not see any log tree
				6520	* corresponding to the deleted snapshot's root, which could lead to replaying
				6521	* it after replaying the log tree of the parent directory (which would replay
				6522	* the snapshot delete operation).
Filipe Manana	2be63d5	2016-02-12 11:34:23 +0000	[diff] [blame]	6523	*
				6524	* Must be called before the actual snapshot destroy operation (updates to the
				6525	* parent root and tree of tree roots trees, etc) are done.
Filipe Manana	1ec9a1a	2016-02-10 10:42:25 +0000	[diff] [blame]	6526	*/
				6527	void btrfs_record_snapshot_destroy(struct btrfs_trans_handle *trans,
Nikolay Borisov	4366355	2017-01-18 00:31:29 +0200	[diff] [blame]	6528	struct btrfs_inode *dir)
Filipe Manana	1ec9a1a	2016-02-10 10:42:25 +0000	[diff] [blame]	6529	{
Nikolay Borisov	4366355	2017-01-18 00:31:29 +0200	[diff] [blame]	6530	mutex_lock(&dir->log_mutex);
				6531	dir->last_unlink_trans = trans->transid;
				6532	mutex_unlock(&dir->log_mutex);
Filipe Manana	1ec9a1a	2016-02-10 10:42:25 +0000	[diff] [blame]	6533	}
				6534
				6535	/*
Chris Mason	12fcfd2	2009-03-24 10:24:20 -0400	[diff] [blame]	6536	* Call this after adding a new name for a file and it will properly
				6537	* update the log to reflect the new name.
Chris Mason	12fcfd2	2009-03-24 10:24:20 -0400	[diff] [blame]	6538	*/
Filipe Manana	75b463d	2020-08-11 12:43:48 +0100	[diff] [blame]	6539	void btrfs_log_new_name(struct btrfs_trans_handle *trans,
Nikolay Borisov	9ca5fbfb	2017-01-18 00:31:31 +0200	[diff] [blame]	6540	struct btrfs_inode inode, struct btrfs_inode old_dir,
Filipe Manana	75b463d	2020-08-11 12:43:48 +0100	[diff] [blame]	6541	struct dentry *parent)
Chris Mason	12fcfd2	2009-03-24 10:24:20 -0400	[diff] [blame]	6542	{
Filipe Manana	75b463d	2020-08-11 12:43:48 +0100	[diff] [blame]	6543	struct btrfs_log_ctx ctx;
Chris Mason	12fcfd2	2009-03-24 10:24:20 -0400	[diff] [blame]	6544
				6545	/*
Chris Mason	af4176b	2009-03-24 10:24:31 -0400	[diff] [blame]	6546	* this will force the logging code to walk the dentry chain
				6547	* up for the file
				6548	*/
Filipe Manana	9a6509c	2018-02-28 15:55:40 +0000	[diff] [blame]	6549	if (!S_ISDIR(inode->vfs_inode.i_mode))
Nikolay Borisov	9ca5fbfb	2017-01-18 00:31:31 +0200	[diff] [blame]	6550	inode->last_unlink_trans = trans->transid;
Chris Mason	af4176b	2009-03-24 10:24:31 -0400	[diff] [blame]	6551
				6552	/*
Chris Mason	12fcfd2	2009-03-24 10:24:20 -0400	[diff] [blame]	6553	* if this inode hasn't been logged and directory we're renaming it
				6554	* from hasn't been logged, we don't need to log it
				6555	*/
Filipe Manana	ecc64fa	2021-07-27 11:24:43 +0100	[diff] [blame]	6556	if (!inode_logged(trans, inode) &&
				6557	(!old_dir \|\| !inode_logged(trans, old_dir)))
Filipe Manana	75b463d	2020-08-11 12:43:48 +0100	[diff] [blame]	6558	return;
Chris Mason	12fcfd2	2009-03-24 10:24:20 -0400	[diff] [blame]	6559
Filipe Manana	54a40fc	2021-05-12 16:27:16 +0100	[diff] [blame]	6560	/*
				6561	* If we are doing a rename (old_dir is not NULL) from a directory that
				6562	* was previously logged, make sure the next log attempt on the directory
				6563	* is not skipped and logs the inode again. This is because the log may
				6564	* not currently be authoritative for a range including the old
				6565	* BTRFS_DIR_ITEM_KEY and BTRFS_DIR_INDEX_KEY keys, so we want to make
				6566	* sure after a log replay we do not end up with both the new and old
				6567	* dentries around (in case the inode is a directory we would have a
				6568	* directory with two hard links and 2 inode references for different
				6569	* parents). The next log attempt of old_dir will happen at
				6570	* btrfs_log_all_parents(), called through btrfs_log_inode_parent()
				6571	* below, because we have previously set inode->last_unlink_trans to the
				6572	* current transaction ID, either here or at btrfs_record_unlink_dir() in
				6573	* case inode is a directory.
				6574	*/
				6575	if (old_dir)
				6576	old_dir->logged_trans = 0;
				6577
Filipe Manana	75b463d	2020-08-11 12:43:48 +0100	[diff] [blame]	6578	btrfs_init_log_ctx(&ctx, &inode->vfs_inode);
				6579	ctx.logging_new_name = true;
				6580	/*
				6581	* We don't care about the return value. If we fail to log the new name
				6582	* then we know the next attempt to sync the log will fallback to a full
				6583	* transaction commit (due to a call to btrfs_set_log_full_commit()), so
				6584	* we don't need to worry about getting a log committed that has an
				6585	* inconsistent state after a rename operation.
				6586	*/
Filipe Manana	4877817	2020-08-11 12:43:58 +0100	[diff] [blame]	6587	btrfs_log_inode_parent(trans, inode, parent, LOG_INODE_EXISTS, &ctx);
Chris Mason	12fcfd2	2009-03-24 10:24:20 -0400	[diff] [blame]	6588	}
				6589