Blame - fs/ext4/fast_commit.c - SHIFTPHONES/mainline/linux

blob: 48b7bc12939202e6221a4f6394e7b16ad1b99323 [file] [log] [blame]

Harshad Shirwadkar	6866d7b	2020-10-15 13:37:55 -0700	[diff] [blame]	1	// SPDX-License-Identifier: GPL-2.0
				2
				3	/*
				4	* fs/ext4/fast_commit.c
				5	*
				6	* Written by Harshad Shirwadkar <harshadshirwadkar@gmail.com>
				7	*
				8	* Ext4 fast commits routines.
				9	*/
Harshad Shirwadkar	aa75f4d	2020-10-15 13:37:57 -0700	[diff] [blame]	10	#include "ext4.h"
Harshad Shirwadkar	6866d7b	2020-10-15 13:37:55 -0700	[diff] [blame]	11	#include "ext4_jbd2.h"
Harshad Shirwadkar	aa75f4d	2020-10-15 13:37:57 -0700	[diff] [blame]	12	#include "ext4_extents.h"
				13	#include "mballoc.h"
				14
				15	/*
				16	* Ext4 Fast Commits
				17	* -----------------
				18	*
				19	* Ext4 fast commits implement fine grained journalling for Ext4.
				20	*
				21	* Fast commits are organized as a log of tag-length-value (TLV) structs. (See
				22	* struct ext4_fc_tl). Each TLV contains some delta that is replayed TLV by
				23	* TLV during the recovery phase. For the scenarios for which we currently
				24	* don't have replay code, fast commit falls back to full commits.
				25	* Fast commits record delta in one of the following three categories.
				26	*
				27	* (A) Directory entry updates:
				28	*
				29	* - EXT4_FC_TAG_UNLINK - records directory entry unlink
				30	* - EXT4_FC_TAG_LINK - records directory entry link
				31	* - EXT4_FC_TAG_CREAT - records inode and directory entry creation
				32	*
				33	* (B) File specific data range updates:
				34	*
				35	* - EXT4_FC_TAG_ADD_RANGE - records addition of new blocks to an inode
				36	* - EXT4_FC_TAG_DEL_RANGE - records deletion of blocks from an inode
				37	*
				38	* (C) Inode metadata (mtime / ctime etc):
				39	*
				40	* - EXT4_FC_TAG_INODE - record the inode that should be replayed
				41	* during recovery. Note that iblocks field is
				42	* not replayed and instead derived during
				43	* replay.
				44	* Commit Operation
				45	* ----------------
				46	* With fast commits, we maintain all the directory entry operations in the
				47	* order in which they are issued in an in-memory queue. This queue is flushed
				48	* to disk during the commit operation. We also maintain a list of inodes
				49	* that need to be committed during a fast commit in another in memory queue of
				50	* inodes. During the commit operation, we commit in the following order:
				51	*
				52	* [1] Lock inodes for any further data updates by setting COMMITTING state
				53	* [2] Submit data buffers of all the inodes
				54	* [3] Wait for [2] to complete
				55	* [4] Commit all the directory entry updates in the fast commit space
				56	* [5] Commit all the changed inode structures
				57	* [6] Write tail tag (this tag ensures the atomicity, please read the following
				58	* section for more details).
				59	* [7] Wait for [4], [5] and [6] to complete.
				60	*
				61	* All the inode updates must call ext4_fc_start_update() before starting an
				62	* update. If such an ongoing update is present, fast commit waits for it to
				63	* complete. The completion of such an update is marked by
				64	* ext4_fc_stop_update().
				65	*
				66	* Fast Commit Ineligibility
				67	* -------------------------
				68	* Not all operations are supported by fast commits today (e.g extended
				69	* attributes). Fast commit ineligiblity is marked by calling one of the
				70	* two following functions:
				71	*
				72	* - ext4_fc_mark_ineligible(): This makes next fast commit operation to fall
				73	* back to full commit. This is useful in case of transient errors.
				74	*
				75	* - ext4_fc_start_ineligible() and ext4_fc_stop_ineligible() - This makes all
				76	* the fast commits happening between ext4_fc_start_ineligible() and
				77	* ext4_fc_stop_ineligible() and one fast commit after the call to
				78	* ext4_fc_stop_ineligible() to fall back to full commits. It is important to
				79	* make one more fast commit to fall back to full commit after stop call so
				80	* that it guaranteed that the fast commit ineligible operation contained
				81	* within ext4_fc_start_ineligible() and ext4_fc_stop_ineligible() is
				82	* followed by at least 1 full commit.
				83	*
				84	* Atomicity of commits
				85	* --------------------
				86	* In order to gaurantee atomicity during the commit operation, fast commit
				87	* uses "EXT4_FC_TAG_TAIL" tag that marks a fast commit as complete. Tail
				88	* tag contains CRC of the contents and TID of the transaction after which
				89	* this fast commit should be applied. Recovery code replays fast commit
				90	* logs only if there's at least 1 valid tail present. For every fast commit
				91	* operation, there is 1 tail. This means, we may end up with multiple tails
				92	* in the fast commit space. Here's an example:
				93	*
				94	* - Create a new file A and remove existing file B
				95	* - fsync()
				96	* - Append contents to file A
				97	* - Truncate file A
				98	* - fsync()
				99	*
				100	* The fast commit space at the end of above operations would look like this:
				101	* [HEAD] [CREAT A] [UNLINK B] [TAIL] [ADD_RANGE A] [DEL_RANGE A] [TAIL]
				102	* \|<--- Fast Commit 1 --->\|<--- Fast Commit 2 ---->\|
				103	*
				104	* Replay code should thus check for all the valid tails in the FC area.
				105	*
				106	* TODOs
				107	* -----
				108	* 1) Make fast commit atomic updates more fine grained. Today, a fast commit
				109	* eligible update must be protected within ext4_fc_start_update() and
				110	* ext4_fc_stop_update(). These routines are called at much higher
				111	* routines. This can be made more fine grained by combining with
				112	* ext4_journal_start().
				113	*
				114	* 2) Same above for ext4_fc_start_ineligible() and ext4_fc_stop_ineligible()
				115	*
				116	* 3) Handle more ineligible cases.
				117	*/
				118
				119	#include <trace/events/ext4.h>
				120	static struct kmem_cache *ext4_fc_dentry_cachep;
				121
				122	static void ext4_end_buffer_io_sync(struct buffer_head *bh, int uptodate)
				123	{
				124	BUFFER_TRACE(bh, "");
				125	if (uptodate) {
				126	ext4_debug("%s: Block %lld up-to-date",
				127	__func__, bh->b_blocknr);
				128	set_buffer_uptodate(bh);
				129	} else {
				130	ext4_debug("%s: Block %lld not up-to-date",
				131	__func__, bh->b_blocknr);
				132	clear_buffer_uptodate(bh);
				133	}
				134
				135	unlock_buffer(bh);
				136	}
				137
				138	static inline void ext4_fc_reset_inode(struct inode *inode)
				139	{
				140	struct ext4_inode_info *ei = EXT4_I(inode);
				141
				142	ei->i_fc_lblk_start = 0;
				143	ei->i_fc_lblk_len = 0;
				144	}
				145
				146	void ext4_fc_init_inode(struct inode *inode)
				147	{
				148	struct ext4_inode_info *ei = EXT4_I(inode);
				149
				150	ext4_fc_reset_inode(inode);
				151	ext4_clear_inode_state(inode, EXT4_STATE_FC_COMMITTING);
				152	INIT_LIST_HEAD(&ei->i_fc_list);
				153	init_waitqueue_head(&ei->i_fc_wait);
				154	atomic_set(&ei->i_fc_updates, 0);
				155	ei->i_fc_committed_subtid = 0;
				156	}
				157
				158	/*
				159	* Inform Ext4's fast about start of an inode update
				160	*
				161	* This function is called by the high level call VFS callbacks before
				162	* performing any inode update. This function blocks if there's an ongoing
				163	* fast commit on the inode in question.
				164	*/
				165	void ext4_fc_start_update(struct inode *inode)
				166	{
				167	struct ext4_inode_info *ei = EXT4_I(inode);
				168
Harshad Shirwadkar	8016e29	2020-10-15 13:37:59 -0700	[diff] [blame]	169	if (!test_opt2(inode->i_sb, JOURNAL_FAST_COMMIT) \|\|
				170	(EXT4_SB(inode->i_sb)->s_mount_state & EXT4_FC_REPLAY))
Harshad Shirwadkar	aa75f4d	2020-10-15 13:37:57 -0700	[diff] [blame]	171	return;
				172
				173	restart:
				174	spin_lock(&EXT4_SB(inode->i_sb)->s_fc_lock);
				175	if (list_empty(&ei->i_fc_list))
				176	goto out;
				177
				178	if (ext4_test_inode_state(inode, EXT4_STATE_FC_COMMITTING)) {
				179	wait_queue_head_t *wq;
				180	#if (BITS_PER_LONG < 64)
				181	DEFINE_WAIT_BIT(wait, &ei->i_state_flags,
				182	EXT4_STATE_FC_COMMITTING);
				183	wq = bit_waitqueue(&ei->i_state_flags,
				184	EXT4_STATE_FC_COMMITTING);
				185	#else
				186	DEFINE_WAIT_BIT(wait, &ei->i_flags,
				187	EXT4_STATE_FC_COMMITTING);
				188	wq = bit_waitqueue(&ei->i_flags,
				189	EXT4_STATE_FC_COMMITTING);
				190	#endif
				191	prepare_to_wait(wq, &wait.wq_entry, TASK_UNINTERRUPTIBLE);
				192	spin_unlock(&EXT4_SB(inode->i_sb)->s_fc_lock);
				193	schedule();
				194	finish_wait(wq, &wait.wq_entry);
				195	goto restart;
				196	}
				197	out:
				198	atomic_inc(&ei->i_fc_updates);
				199	spin_unlock(&EXT4_SB(inode->i_sb)->s_fc_lock);
				200	}
				201
				202	/*
				203	* Stop inode update and wake up waiting fast commits if any.
				204	*/
				205	void ext4_fc_stop_update(struct inode *inode)
				206	{
				207	struct ext4_inode_info *ei = EXT4_I(inode);
				208
Harshad Shirwadkar	8016e29	2020-10-15 13:37:59 -0700	[diff] [blame]	209	if (!test_opt2(inode->i_sb, JOURNAL_FAST_COMMIT) \|\|
				210	(EXT4_SB(inode->i_sb)->s_mount_state & EXT4_FC_REPLAY))
Harshad Shirwadkar	aa75f4d	2020-10-15 13:37:57 -0700	[diff] [blame]	211	return;
				212
				213	if (atomic_dec_and_test(&ei->i_fc_updates))
				214	wake_up_all(&ei->i_fc_wait);
				215	}
				216
				217	/*
				218	* Remove inode from fast commit list. If the inode is being committed
				219	* we wait until inode commit is done.
				220	*/
				221	void ext4_fc_del(struct inode *inode)
				222	{
				223	struct ext4_inode_info *ei = EXT4_I(inode);
				224
Harshad Shirwadkar	8016e29	2020-10-15 13:37:59 -0700	[diff] [blame]	225	if (!test_opt2(inode->i_sb, JOURNAL_FAST_COMMIT) \|\|
				226	(EXT4_SB(inode->i_sb)->s_mount_state & EXT4_FC_REPLAY))
Harshad Shirwadkar	aa75f4d	2020-10-15 13:37:57 -0700	[diff] [blame]	227	return;
				228
				229	restart:
				230	spin_lock(&EXT4_SB(inode->i_sb)->s_fc_lock);
				231	if (list_empty(&ei->i_fc_list)) {
				232	spin_unlock(&EXT4_SB(inode->i_sb)->s_fc_lock);
				233	return;
				234	}
				235
				236	if (ext4_test_inode_state(inode, EXT4_STATE_FC_COMMITTING)) {
				237	wait_queue_head_t *wq;
				238	#if (BITS_PER_LONG < 64)
				239	DEFINE_WAIT_BIT(wait, &ei->i_state_flags,
				240	EXT4_STATE_FC_COMMITTING);
				241	wq = bit_waitqueue(&ei->i_state_flags,
				242	EXT4_STATE_FC_COMMITTING);
				243	#else
				244	DEFINE_WAIT_BIT(wait, &ei->i_flags,
				245	EXT4_STATE_FC_COMMITTING);
				246	wq = bit_waitqueue(&ei->i_flags,
				247	EXT4_STATE_FC_COMMITTING);
				248	#endif
				249	prepare_to_wait(wq, &wait.wq_entry, TASK_UNINTERRUPTIBLE);
				250	spin_unlock(&EXT4_SB(inode->i_sb)->s_fc_lock);
				251	schedule();
				252	finish_wait(wq, &wait.wq_entry);
				253	goto restart;
				254	}
				255	if (!list_empty(&ei->i_fc_list))
				256	list_del_init(&ei->i_fc_list);
				257	spin_unlock(&EXT4_SB(inode->i_sb)->s_fc_lock);
				258	}
				259
				260	/*
				261	* Mark file system as fast commit ineligible. This means that next commit
				262	* operation would result in a full jbd2 commit.
				263	*/
				264	void ext4_fc_mark_ineligible(struct super_block *sb, int reason)
				265	{
				266	struct ext4_sb_info *sbi = EXT4_SB(sb);
				267
Harshad Shirwadkar	8016e29	2020-10-15 13:37:59 -0700	[diff] [blame]	268	if (!test_opt2(sb, JOURNAL_FAST_COMMIT) \|\|
				269	(EXT4_SB(sb)->s_mount_state & EXT4_FC_REPLAY))
				270	return;
				271
Harshad Shirwadkar	ababea7	2020-10-26 21:49:15 -0700	[diff] [blame]	272	sbi->s_mount_flags \|= EXT4_MF_FC_INELIGIBLE;
Harshad Shirwadkar	aa75f4d	2020-10-15 13:37:57 -0700	[diff] [blame]	273	WARN_ON(reason >= EXT4_FC_REASON_MAX);
				274	sbi->s_fc_stats.fc_ineligible_reason_count[reason]++;
				275	}
				276
				277	/*
				278	* Start a fast commit ineligible update. Any commits that happen while
				279	* such an operation is in progress fall back to full commits.
				280	*/
				281	void ext4_fc_start_ineligible(struct super_block *sb, int reason)
				282	{
				283	struct ext4_sb_info *sbi = EXT4_SB(sb);
				284
Harshad Shirwadkar	8016e29	2020-10-15 13:37:59 -0700	[diff] [blame]	285	if (!test_opt2(sb, JOURNAL_FAST_COMMIT) \|\|
				286	(EXT4_SB(sb)->s_mount_state & EXT4_FC_REPLAY))
				287	return;
				288
Harshad Shirwadkar	aa75f4d	2020-10-15 13:37:57 -0700	[diff] [blame]	289	WARN_ON(reason >= EXT4_FC_REASON_MAX);
				290	sbi->s_fc_stats.fc_ineligible_reason_count[reason]++;
				291	atomic_inc(&sbi->s_fc_ineligible_updates);
				292	}
				293
				294	/*
Harshad Shirwadkar	ababea7	2020-10-26 21:49:15 -0700	[diff] [blame]	295	* Stop a fast commit ineligible update. We set EXT4_MF_FC_INELIGIBLE flag here
Harshad Shirwadkar	aa75f4d	2020-10-15 13:37:57 -0700	[diff] [blame]	296	* to ensure that after stopping the ineligible update, at least one full
				297	* commit takes place.
				298	*/
				299	void ext4_fc_stop_ineligible(struct super_block *sb)
				300	{
Harshad Shirwadkar	8016e29	2020-10-15 13:37:59 -0700	[diff] [blame]	301	if (!test_opt2(sb, JOURNAL_FAST_COMMIT) \|\|
				302	(EXT4_SB(sb)->s_mount_state & EXT4_FC_REPLAY))
				303	return;
				304
Harshad Shirwadkar	ababea7	2020-10-26 21:49:15 -0700	[diff] [blame]	305	EXT4_SB(sb)->s_mount_flags \|= EXT4_MF_FC_INELIGIBLE;
Harshad Shirwadkar	aa75f4d	2020-10-15 13:37:57 -0700	[diff] [blame]	306	atomic_dec(&EXT4_SB(sb)->s_fc_ineligible_updates);
				307	}
				308
				309	static inline int ext4_fc_is_ineligible(struct super_block *sb)
				310	{
Harshad Shirwadkar	ababea7	2020-10-26 21:49:15 -0700	[diff] [blame]	311	return (EXT4_SB(sb)->s_mount_flags & EXT4_MF_FC_INELIGIBLE) \|\|
Harshad Shirwadkar	aa75f4d	2020-10-15 13:37:57 -0700	[diff] [blame]	312	atomic_read(&EXT4_SB(sb)->s_fc_ineligible_updates);
				313	}
				314
				315	/*
				316	* Generic fast commit tracking function. If this is the first time this we are
				317	* called after a full commit, we initialize fast commit fields and then call
				318	* __fc_track_fn() with update = 0. If we have already been called after a full
				319	* commit, we pass update = 1. Based on that, the track function can determine
				320	* if it needs to track a field for the first time or if it needs to just
				321	* update the previously tracked value.
				322	*
				323	* If enqueue is set, this function enqueues the inode in fast commit list.
				324	*/
				325	static int ext4_fc_track_template(
				326	struct inode inode, int (__fc_track_fn)(struct inode , void , bool),
				327	void *args, int enqueue)
				328	{
				329	tid_t running_txn_tid;
				330	bool update = false;
				331	struct ext4_inode_info *ei = EXT4_I(inode);
				332	struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
				333	int ret;
				334
Harshad Shirwadkar	8016e29	2020-10-15 13:37:59 -0700	[diff] [blame]	335	if (!test_opt2(inode->i_sb, JOURNAL_FAST_COMMIT) \|\|
				336	(sbi->s_mount_state & EXT4_FC_REPLAY))
Harshad Shirwadkar	aa75f4d	2020-10-15 13:37:57 -0700	[diff] [blame]	337	return -EOPNOTSUPP;
				338
				339	if (ext4_fc_is_ineligible(inode->i_sb))
				340	return -EINVAL;
				341
				342	running_txn_tid = sbi->s_journal ?
				343	sbi->s_journal->j_commit_sequence + 1 : 0;
				344
				345	mutex_lock(&ei->i_fc_lock);
				346	if (running_txn_tid == ei->i_sync_tid) {
				347	update = true;
				348	} else {
				349	ext4_fc_reset_inode(inode);
				350	ei->i_sync_tid = running_txn_tid;
				351	}
				352	ret = __fc_track_fn(inode, args, update);
				353	mutex_unlock(&ei->i_fc_lock);
				354
				355	if (!enqueue)
				356	return ret;
				357
				358	spin_lock(&sbi->s_fc_lock);
				359	if (list_empty(&EXT4_I(inode)->i_fc_list))
				360	list_add_tail(&EXT4_I(inode)->i_fc_list,
Harshad Shirwadkar	ababea7	2020-10-26 21:49:15 -0700	[diff] [blame]	361	(sbi->s_mount_flags & EXT4_MF_FC_COMMITTING) ?
Harshad Shirwadkar	aa75f4d	2020-10-15 13:37:57 -0700	[diff] [blame]	362	&sbi->s_fc_q[FC_Q_STAGING] :
				363	&sbi->s_fc_q[FC_Q_MAIN]);
				364	spin_unlock(&sbi->s_fc_lock);
				365
				366	return ret;
				367	}
				368
				369	struct __track_dentry_update_args {
				370	struct dentry *dentry;
				371	int op;
				372	};
				373
				374	/* __track_fn for directory entry updates. Called with ei->i_fc_lock. */
				375	static int __track_dentry_update(struct inode inode, void arg, bool update)
				376	{
				377	struct ext4_fc_dentry_update *node;
				378	struct ext4_inode_info *ei = EXT4_I(inode);
				379	struct __track_dentry_update_args *dentry_update =
				380	(struct __track_dentry_update_args *)arg;
				381	struct dentry *dentry = dentry_update->dentry;
				382	struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
				383
				384	mutex_unlock(&ei->i_fc_lock);
				385	node = kmem_cache_alloc(ext4_fc_dentry_cachep, GFP_NOFS);
				386	if (!node) {
Harshad Shirwadkar	b21ebf1	2020-11-05 19:58:51 -0800	[diff] [blame^]	387	ext4_fc_mark_ineligible(inode->i_sb, EXT4_FC_REASON_NOMEM);
Harshad Shirwadkar	aa75f4d	2020-10-15 13:37:57 -0700	[diff] [blame]	388	mutex_lock(&ei->i_fc_lock);
				389	return -ENOMEM;
				390	}
				391
				392	node->fcd_op = dentry_update->op;
				393	node->fcd_parent = dentry->d_parent->d_inode->i_ino;
				394	node->fcd_ino = inode->i_ino;
				395	if (dentry->d_name.len > DNAME_INLINE_LEN) {
				396	node->fcd_name.name = kmalloc(dentry->d_name.len, GFP_NOFS);
				397	if (!node->fcd_name.name) {
				398	kmem_cache_free(ext4_fc_dentry_cachep, node);
				399	ext4_fc_mark_ineligible(inode->i_sb,
Harshad Shirwadkar	b21ebf1	2020-11-05 19:58:51 -0800	[diff] [blame^]	400	EXT4_FC_REASON_NOMEM);
Harshad Shirwadkar	aa75f4d	2020-10-15 13:37:57 -0700	[diff] [blame]	401	mutex_lock(&ei->i_fc_lock);
				402	return -ENOMEM;
				403	}
				404	memcpy((u8 *)node->fcd_name.name, dentry->d_name.name,
				405	dentry->d_name.len);
				406	} else {
				407	memcpy(node->fcd_iname, dentry->d_name.name,
				408	dentry->d_name.len);
				409	node->fcd_name.name = node->fcd_iname;
				410	}
				411	node->fcd_name.len = dentry->d_name.len;
				412
				413	spin_lock(&sbi->s_fc_lock);
Harshad Shirwadkar	ababea7	2020-10-26 21:49:15 -0700	[diff] [blame]	414	if (sbi->s_mount_flags & EXT4_MF_FC_COMMITTING)
Harshad Shirwadkar	aa75f4d	2020-10-15 13:37:57 -0700	[diff] [blame]	415	list_add_tail(&node->fcd_list,
				416	&sbi->s_fc_dentry_q[FC_Q_STAGING]);
				417	else
				418	list_add_tail(&node->fcd_list, &sbi->s_fc_dentry_q[FC_Q_MAIN]);
				419	spin_unlock(&sbi->s_fc_lock);
				420	mutex_lock(&ei->i_fc_lock);
				421
				422	return 0;
				423	}
				424
				425	void ext4_fc_track_unlink(struct inode inode, struct dentry dentry)
				426	{
				427	struct __track_dentry_update_args args;
				428	int ret;
				429
				430	args.dentry = dentry;
				431	args.op = EXT4_FC_TAG_UNLINK;
				432
				433	ret = ext4_fc_track_template(inode, __track_dentry_update,
				434	(void *)&args, 0);
				435	trace_ext4_fc_track_unlink(inode, dentry, ret);
				436	}
				437
				438	void ext4_fc_track_link(struct inode inode, struct dentry dentry)
				439	{
				440	struct __track_dentry_update_args args;
				441	int ret;
				442
				443	args.dentry = dentry;
				444	args.op = EXT4_FC_TAG_LINK;
				445
				446	ret = ext4_fc_track_template(inode, __track_dentry_update,
				447	(void *)&args, 0);
				448	trace_ext4_fc_track_link(inode, dentry, ret);
				449	}
				450
				451	void ext4_fc_track_create(struct inode inode, struct dentry dentry)
				452	{
				453	struct __track_dentry_update_args args;
				454	int ret;
				455
				456	args.dentry = dentry;
				457	args.op = EXT4_FC_TAG_CREAT;
				458
				459	ret = ext4_fc_track_template(inode, __track_dentry_update,
				460	(void *)&args, 0);
				461	trace_ext4_fc_track_create(inode, dentry, ret);
				462	}
				463
				464	/* __track_fn for inode tracking */
				465	static int __track_inode(struct inode inode, void arg, bool update)
				466	{
				467	if (update)
				468	return -EEXIST;
				469
				470	EXT4_I(inode)->i_fc_lblk_len = 0;
				471
				472	return 0;
				473	}
				474
				475	void ext4_fc_track_inode(struct inode *inode)
				476	{
				477	int ret;
				478
				479	if (S_ISDIR(inode->i_mode))
				480	return;
				481
				482	ret = ext4_fc_track_template(inode, __track_inode, NULL, 1);
				483	trace_ext4_fc_track_inode(inode, ret);
				484	}
				485
				486	struct __track_range_args {
				487	ext4_lblk_t start, end;
				488	};
				489
				490	/* __track_fn for tracking data updates */
				491	static int __track_range(struct inode inode, void arg, bool update)
				492	{
				493	struct ext4_inode_info *ei = EXT4_I(inode);
				494	ext4_lblk_t oldstart;
				495	struct __track_range_args *__arg =
				496	(struct __track_range_args *)arg;
				497
				498	if (inode->i_ino < EXT4_FIRST_INO(inode->i_sb)) {
				499	ext4_debug("Special inode %ld being modified\n", inode->i_ino);
				500	return -ECANCELED;
				501	}
				502
				503	oldstart = ei->i_fc_lblk_start;
				504
				505	if (update && ei->i_fc_lblk_len > 0) {
				506	ei->i_fc_lblk_start = min(ei->i_fc_lblk_start, __arg->start);
				507	ei->i_fc_lblk_len =
				508	max(oldstart + ei->i_fc_lblk_len - 1, __arg->end) -
				509	ei->i_fc_lblk_start + 1;
				510	} else {
				511	ei->i_fc_lblk_start = __arg->start;
				512	ei->i_fc_lblk_len = __arg->end - __arg->start + 1;
				513	}
				514
				515	return 0;
				516	}
				517
				518	void ext4_fc_track_range(struct inode *inode, ext4_lblk_t start,
				519	ext4_lblk_t end)
				520	{
				521	struct __track_range_args args;
				522	int ret;
				523
				524	if (S_ISDIR(inode->i_mode))
				525	return;
				526
				527	args.start = start;
				528	args.end = end;
				529
				530	ret = ext4_fc_track_template(inode, __track_range, &args, 1);
				531
				532	trace_ext4_fc_track_range(inode, start, end, ret);
				533	}
				534
				535	static void ext4_fc_submit_bh(struct super_block *sb)
				536	{
				537	int write_flags = REQ_SYNC;
				538	struct buffer_head *bh = EXT4_SB(sb)->s_fc_bh;
				539
				540	if (test_opt(sb, BARRIER))
				541	write_flags \|= REQ_FUA \| REQ_PREFLUSH;
				542	lock_buffer(bh);
				543	clear_buffer_dirty(bh);
				544	set_buffer_uptodate(bh);
				545	bh->b_end_io = ext4_end_buffer_io_sync;
				546	submit_bh(REQ_OP_WRITE, write_flags, bh);
				547	EXT4_SB(sb)->s_fc_bh = NULL;
				548	}
				549
				550	/* Ext4 commit path routines */
				551
				552	/* memzero and update CRC */
				553	static void ext4_fc_memzero(struct super_block sb, void *dst, int len,
				554	u32 *crc)
				555	{
				556	void *ret;
				557
				558	ret = memset(dst, 0, len);
				559	if (crc)
				560	crc = ext4_chksum(EXT4_SB(sb), crc, dst, len);
				561	return ret;
				562	}
				563
				564	/*
				565	* Allocate len bytes on a fast commit buffer.
				566	*
				567	* During the commit time this function is used to manage fast commit
				568	* block space. We don't split a fast commit log onto different
				569	* blocks. So this function makes sure that if there's not enough space
				570	* on the current block, the remaining space in the current block is
				571	* marked as unused by adding EXT4_FC_TAG_PAD tag. In that case,
				572	* new block is from jbd2 and CRC is updated to reflect the padding
				573	* we added.
				574	*/
				575	static u8 ext4_fc_reserve_space(struct super_block sb, int len, u32 *crc)
				576	{
				577	struct ext4_fc_tl *tl;
				578	struct ext4_sb_info *sbi = EXT4_SB(sb);
				579	struct buffer_head *bh;
				580	int bsize = sbi->s_journal->j_blocksize;
				581	int ret, off = sbi->s_fc_bytes % bsize;
				582	int pad_len;
				583
				584	/*
				585	* After allocating len, we should have space at least for a 0 byte
				586	* padding.
				587	*/
				588	if (len + sizeof(struct ext4_fc_tl) > bsize)
				589	return NULL;
				590
				591	if (bsize - off - 1 > len + sizeof(struct ext4_fc_tl)) {
				592	/*
				593	* Only allocate from current buffer if we have enough space for
				594	* this request AND we have space to add a zero byte padding.
				595	*/
				596	if (!sbi->s_fc_bh) {
				597	ret = jbd2_fc_get_buf(EXT4_SB(sb)->s_journal, &bh);
				598	if (ret)
				599	return NULL;
				600	sbi->s_fc_bh = bh;
				601	}
				602	sbi->s_fc_bytes += len;
				603	return sbi->s_fc_bh->b_data + off;
				604	}
				605	/* Need to add PAD tag */
				606	tl = (struct ext4_fc_tl *)(sbi->s_fc_bh->b_data + off);
				607	tl->fc_tag = cpu_to_le16(EXT4_FC_TAG_PAD);
				608	pad_len = bsize - off - 1 - sizeof(struct ext4_fc_tl);
				609	tl->fc_len = cpu_to_le16(pad_len);
				610	if (crc)
				611	crc = ext4_chksum(sbi, crc, tl, sizeof(*tl));
				612	if (pad_len > 0)
				613	ext4_fc_memzero(sb, tl + 1, pad_len, crc);
				614	ext4_fc_submit_bh(sb);
				615
				616	ret = jbd2_fc_get_buf(EXT4_SB(sb)->s_journal, &bh);
				617	if (ret)
				618	return NULL;
				619	sbi->s_fc_bh = bh;
				620	sbi->s_fc_bytes = (sbi->s_fc_bytes / bsize + 1) * bsize + len;
				621	return sbi->s_fc_bh->b_data;
				622	}
				623
				624	/* memcpy to fc reserved space and update CRC */
				625	static void ext4_fc_memcpy(struct super_block sb, void dst, const void src,
				626	int len, u32 *crc)
				627	{
				628	if (crc)
				629	crc = ext4_chksum(EXT4_SB(sb), crc, src, len);
				630	return memcpy(dst, src, len);
				631	}
				632
				633	/*
				634	* Complete a fast commit by writing tail tag.
				635	*
				636	* Writing tail tag marks the end of a fast commit. In order to guarantee
				637	* atomicity, after writing tail tag, even if there's space remaining
				638	* in the block, next commit shouldn't use it. That's why tail tag
				639	* has the length as that of the remaining space on the block.
				640	*/
				641	static int ext4_fc_write_tail(struct super_block *sb, u32 crc)
				642	{
				643	struct ext4_sb_info *sbi = EXT4_SB(sb);
				644	struct ext4_fc_tl tl;
				645	struct ext4_fc_tail tail;
				646	int off, bsize = sbi->s_journal->j_blocksize;
				647	u8 *dst;
				648
				649	/*
				650	* ext4_fc_reserve_space takes care of allocating an extra block if
				651	* there's no enough space on this block for accommodating this tail.
				652	*/
				653	dst = ext4_fc_reserve_space(sb, sizeof(tl) + sizeof(tail), &crc);
				654	if (!dst)
				655	return -ENOSPC;
				656
				657	off = sbi->s_fc_bytes % bsize;
				658
				659	tl.fc_tag = cpu_to_le16(EXT4_FC_TAG_TAIL);
				660	tl.fc_len = cpu_to_le16(bsize - off - 1 + sizeof(struct ext4_fc_tail));
				661	sbi->s_fc_bytes = round_up(sbi->s_fc_bytes, bsize);
				662
				663	ext4_fc_memcpy(sb, dst, &tl, sizeof(tl), &crc);
				664	dst += sizeof(tl);
				665	tail.fc_tid = cpu_to_le32(sbi->s_journal->j_running_transaction->t_tid);
				666	ext4_fc_memcpy(sb, dst, &tail.fc_tid, sizeof(tail.fc_tid), &crc);
				667	dst += sizeof(tail.fc_tid);
				668	tail.fc_crc = cpu_to_le32(crc);
				669	ext4_fc_memcpy(sb, dst, &tail.fc_crc, sizeof(tail.fc_crc), NULL);
				670
				671	ext4_fc_submit_bh(sb);
				672
				673	return 0;
				674	}
				675
				676	/*
				677	* Adds tag, length, value and updates CRC. Returns true if tlv was added.
				678	* Returns false if there's not enough space.
				679	*/
				680	static bool ext4_fc_add_tlv(struct super_block sb, u16 tag, u16 len, u8 val,
				681	u32 *crc)
				682	{
				683	struct ext4_fc_tl tl;
				684	u8 *dst;
				685
				686	dst = ext4_fc_reserve_space(sb, sizeof(tl) + len, crc);
				687	if (!dst)
				688	return false;
				689
				690	tl.fc_tag = cpu_to_le16(tag);
				691	tl.fc_len = cpu_to_le16(len);
				692
				693	ext4_fc_memcpy(sb, dst, &tl, sizeof(tl), crc);
				694	ext4_fc_memcpy(sb, dst + sizeof(tl), val, len, crc);
				695
				696	return true;
				697	}
				698
				699	/* Same as above, but adds dentry tlv. */
				700	static bool ext4_fc_add_dentry_tlv(struct super_block *sb, u16 tag,
				701	int parent_ino, int ino, int dlen,
				702	const unsigned char *dname,
				703	u32 *crc)
				704	{
				705	struct ext4_fc_dentry_info fcd;
				706	struct ext4_fc_tl tl;
				707	u8 *dst = ext4_fc_reserve_space(sb, sizeof(tl) + sizeof(fcd) + dlen,
				708	crc);
				709
				710	if (!dst)
				711	return false;
				712
				713	fcd.fc_parent_ino = cpu_to_le32(parent_ino);
				714	fcd.fc_ino = cpu_to_le32(ino);
				715	tl.fc_tag = cpu_to_le16(tag);
				716	tl.fc_len = cpu_to_le16(sizeof(fcd) + dlen);
				717	ext4_fc_memcpy(sb, dst, &tl, sizeof(tl), crc);
				718	dst += sizeof(tl);
				719	ext4_fc_memcpy(sb, dst, &fcd, sizeof(fcd), crc);
				720	dst += sizeof(fcd);
				721	ext4_fc_memcpy(sb, dst, dname, dlen, crc);
				722	dst += dlen;
				723
				724	return true;
				725	}
				726
				727	/*
				728	* Writes inode in the fast commit space under TLV with tag @tag.
				729	* Returns 0 on success, error on failure.
				730	*/
				731	static int ext4_fc_write_inode(struct inode inode, u32 crc)
				732	{
				733	struct ext4_inode_info *ei = EXT4_I(inode);
				734	int inode_len = EXT4_GOOD_OLD_INODE_SIZE;
				735	int ret;
				736	struct ext4_iloc iloc;
				737	struct ext4_fc_inode fc_inode;
				738	struct ext4_fc_tl tl;
				739	u8 *dst;
				740
				741	ret = ext4_get_inode_loc(inode, &iloc);
				742	if (ret)
				743	return ret;
				744
				745	if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE)
				746	inode_len += ei->i_extra_isize;
				747
				748	fc_inode.fc_ino = cpu_to_le32(inode->i_ino);
				749	tl.fc_tag = cpu_to_le16(EXT4_FC_TAG_INODE);
				750	tl.fc_len = cpu_to_le16(inode_len + sizeof(fc_inode.fc_ino));
				751
				752	dst = ext4_fc_reserve_space(inode->i_sb,
				753	sizeof(tl) + inode_len + sizeof(fc_inode.fc_ino), crc);
				754	if (!dst)
				755	return -ECANCELED;
				756
				757	if (!ext4_fc_memcpy(inode->i_sb, dst, &tl, sizeof(tl), crc))
				758	return -ECANCELED;
				759	dst += sizeof(tl);
				760	if (!ext4_fc_memcpy(inode->i_sb, dst, &fc_inode, sizeof(fc_inode), crc))
				761	return -ECANCELED;
				762	dst += sizeof(fc_inode);
				763	if (!ext4_fc_memcpy(inode->i_sb, dst, (u8 *)ext4_raw_inode(&iloc),
				764	inode_len, crc))
				765	return -ECANCELED;
				766
				767	return 0;
				768	}
				769
				770	/*
				771	* Writes updated data ranges for the inode in question. Updates CRC.
				772	* Returns 0 on success, error otherwise.
				773	*/
				774	static int ext4_fc_write_inode_data(struct inode inode, u32 crc)
				775	{
				776	ext4_lblk_t old_blk_size, cur_lblk_off, new_blk_size;
				777	struct ext4_inode_info *ei = EXT4_I(inode);
				778	struct ext4_map_blocks map;
				779	struct ext4_fc_add_range fc_ext;
				780	struct ext4_fc_del_range lrange;
				781	struct ext4_extent *ex;
				782	int ret;
				783
				784	mutex_lock(&ei->i_fc_lock);
				785	if (ei->i_fc_lblk_len == 0) {
				786	mutex_unlock(&ei->i_fc_lock);
				787	return 0;
				788	}
				789	old_blk_size = ei->i_fc_lblk_start;
				790	new_blk_size = ei->i_fc_lblk_start + ei->i_fc_lblk_len - 1;
				791	ei->i_fc_lblk_len = 0;
				792	mutex_unlock(&ei->i_fc_lock);
				793
				794	cur_lblk_off = old_blk_size;
				795	jbd_debug(1, "%s: will try writing %d to %d for inode %ld\n",
				796	__func__, cur_lblk_off, new_blk_size, inode->i_ino);
				797
				798	while (cur_lblk_off <= new_blk_size) {
				799	map.m_lblk = cur_lblk_off;
				800	map.m_len = new_blk_size - cur_lblk_off + 1;
				801	ret = ext4_map_blocks(NULL, inode, &map, 0);
				802	if (ret < 0)
				803	return -ECANCELED;
				804
				805	if (map.m_len == 0) {
				806	cur_lblk_off++;
				807	continue;
				808	}
				809
				810	if (ret == 0) {
				811	lrange.fc_ino = cpu_to_le32(inode->i_ino);
				812	lrange.fc_lblk = cpu_to_le32(map.m_lblk);
				813	lrange.fc_len = cpu_to_le32(map.m_len);
				814	if (!ext4_fc_add_tlv(inode->i_sb, EXT4_FC_TAG_DEL_RANGE,
				815	sizeof(lrange), (u8 *)&lrange, crc))
				816	return -ENOSPC;
				817	} else {
				818	fc_ext.fc_ino = cpu_to_le32(inode->i_ino);
				819	ex = (struct ext4_extent *)&fc_ext.fc_ex;
				820	ex->ee_block = cpu_to_le32(map.m_lblk);
				821	ex->ee_len = cpu_to_le16(map.m_len);
				822	ext4_ext_store_pblock(ex, map.m_pblk);
				823	if (map.m_flags & EXT4_MAP_UNWRITTEN)
				824	ext4_ext_mark_unwritten(ex);
				825	else
				826	ext4_ext_mark_initialized(ex);
				827	if (!ext4_fc_add_tlv(inode->i_sb, EXT4_FC_TAG_ADD_RANGE,
				828	sizeof(fc_ext), (u8 *)&fc_ext, crc))
				829	return -ENOSPC;
				830	}
				831
				832	cur_lblk_off += map.m_len;
				833	}
				834
				835	return 0;
				836	}
				837
				838
				839	/* Submit data for all the fast commit inodes */
				840	static int ext4_fc_submit_inode_data_all(journal_t *journal)
				841	{
				842	struct super_block sb = (struct super_block )(journal->j_private);
				843	struct ext4_sb_info *sbi = EXT4_SB(sb);
				844	struct ext4_inode_info *ei;
				845	struct list_head *pos;
				846	int ret = 0;
				847
				848	spin_lock(&sbi->s_fc_lock);
Harshad Shirwadkar	ababea7	2020-10-26 21:49:15 -0700	[diff] [blame]	849	sbi->s_mount_flags \|= EXT4_MF_FC_COMMITTING;
Harshad Shirwadkar	aa75f4d	2020-10-15 13:37:57 -0700	[diff] [blame]	850	list_for_each(pos, &sbi->s_fc_q[FC_Q_MAIN]) {
				851	ei = list_entry(pos, struct ext4_inode_info, i_fc_list);
				852	ext4_set_inode_state(&ei->vfs_inode, EXT4_STATE_FC_COMMITTING);
				853	while (atomic_read(&ei->i_fc_updates)) {
				854	DEFINE_WAIT(wait);
				855
				856	prepare_to_wait(&ei->i_fc_wait, &wait,
				857	TASK_UNINTERRUPTIBLE);
				858	if (atomic_read(&ei->i_fc_updates)) {
				859	spin_unlock(&sbi->s_fc_lock);
				860	schedule();
				861	spin_lock(&sbi->s_fc_lock);
				862	}
				863	finish_wait(&ei->i_fc_wait, &wait);
				864	}
				865	spin_unlock(&sbi->s_fc_lock);
				866	ret = jbd2_submit_inode_data(ei->jinode);
				867	if (ret)
				868	return ret;
				869	spin_lock(&sbi->s_fc_lock);
				870	}
				871	spin_unlock(&sbi->s_fc_lock);
				872
				873	return ret;
				874	}
				875
				876	/* Wait for completion of data for all the fast commit inodes */
				877	static int ext4_fc_wait_inode_data_all(journal_t *journal)
				878	{
				879	struct super_block sb = (struct super_block )(journal->j_private);
				880	struct ext4_sb_info *sbi = EXT4_SB(sb);
				881	struct ext4_inode_info pos, n;
				882	int ret = 0;
				883
				884	spin_lock(&sbi->s_fc_lock);
				885	list_for_each_entry_safe(pos, n, &sbi->s_fc_q[FC_Q_MAIN], i_fc_list) {
				886	if (!ext4_test_inode_state(&pos->vfs_inode,
				887	EXT4_STATE_FC_COMMITTING))
				888	continue;
				889	spin_unlock(&sbi->s_fc_lock);
				890
				891	ret = jbd2_wait_inode_data(journal, pos->jinode);
				892	if (ret)
				893	return ret;
				894	spin_lock(&sbi->s_fc_lock);
				895	}
				896	spin_unlock(&sbi->s_fc_lock);
				897
				898	return 0;
				899	}
				900
				901	/* Commit all the directory entry updates */
				902	static int ext4_fc_commit_dentry_updates(journal_t journal, u32 crc)
				903	{
				904	struct super_block sb = (struct super_block )(journal->j_private);
				905	struct ext4_sb_info *sbi = EXT4_SB(sb);
				906	struct ext4_fc_dentry_update *fc_dentry;
				907	struct inode *inode;
				908	struct list_head pos, n, fcd_pos, fcd_n;
				909	struct ext4_inode_info *ei;
				910	int ret;
				911
				912	if (list_empty(&sbi->s_fc_dentry_q[FC_Q_MAIN]))
				913	return 0;
				914	list_for_each_safe(fcd_pos, fcd_n, &sbi->s_fc_dentry_q[FC_Q_MAIN]) {
				915	fc_dentry = list_entry(fcd_pos, struct ext4_fc_dentry_update,
				916	fcd_list);
				917	if (fc_dentry->fcd_op != EXT4_FC_TAG_CREAT) {
				918	spin_unlock(&sbi->s_fc_lock);
				919	if (!ext4_fc_add_dentry_tlv(
				920	sb, fc_dentry->fcd_op,
				921	fc_dentry->fcd_parent, fc_dentry->fcd_ino,
				922	fc_dentry->fcd_name.len,
				923	fc_dentry->fcd_name.name, crc)) {
				924	ret = -ENOSPC;
				925	goto lock_and_exit;
				926	}
				927	spin_lock(&sbi->s_fc_lock);
				928	continue;
				929	}
				930
				931	inode = NULL;
				932	list_for_each_safe(pos, n, &sbi->s_fc_q[FC_Q_MAIN]) {
				933	ei = list_entry(pos, struct ext4_inode_info, i_fc_list);
				934	if (ei->vfs_inode.i_ino == fc_dentry->fcd_ino) {
				935	inode = &ei->vfs_inode;
				936	break;
				937	}
				938	}
				939	/*
				940	* If we don't find inode in our list, then it was deleted,
				941	* in which case, we don't need to record it's create tag.
				942	*/
				943	if (!inode)
				944	continue;
				945	spin_unlock(&sbi->s_fc_lock);
				946
				947	/*
				948	* We first write the inode and then the create dirent. This
				949	* allows the recovery code to create an unnamed inode first
				950	* and then link it to a directory entry. This allows us
				951	* to use namei.c routines almost as is and simplifies
				952	* the recovery code.
				953	*/
				954	ret = ext4_fc_write_inode(inode, crc);
				955	if (ret)
				956	goto lock_and_exit;
				957
				958	ret = ext4_fc_write_inode_data(inode, crc);
				959	if (ret)
				960	goto lock_and_exit;
				961
				962	if (!ext4_fc_add_dentry_tlv(
				963	sb, fc_dentry->fcd_op,
				964	fc_dentry->fcd_parent, fc_dentry->fcd_ino,
				965	fc_dentry->fcd_name.len,
				966	fc_dentry->fcd_name.name, crc)) {
Harshad Shirwadkar	aa75f4d	2020-10-15 13:37:57 -0700	[diff] [blame]	967	ret = -ENOSPC;
				968	goto lock_and_exit;
				969	}
				970
				971	spin_lock(&sbi->s_fc_lock);
				972	}
				973	return 0;
				974	lock_and_exit:
				975	spin_lock(&sbi->s_fc_lock);
				976	return ret;
				977	}
				978
				979	static int ext4_fc_perform_commit(journal_t *journal)
				980	{
				981	struct super_block sb = (struct super_block )(journal->j_private);
				982	struct ext4_sb_info *sbi = EXT4_SB(sb);
				983	struct ext4_inode_info *iter;
				984	struct ext4_fc_head head;
				985	struct list_head *pos;
				986	struct inode *inode;
				987	struct blk_plug plug;
				988	int ret = 0;
				989	u32 crc = 0;
				990
				991	ret = ext4_fc_submit_inode_data_all(journal);
				992	if (ret)
				993	return ret;
				994
				995	ret = ext4_fc_wait_inode_data_all(journal);
				996	if (ret)
				997	return ret;
				998
				999	blk_start_plug(&plug);
				1000	if (sbi->s_fc_bytes == 0) {
				1001	/*
				1002	* Add a head tag only if this is the first fast commit
				1003	* in this TID.
				1004	*/
				1005	head.fc_features = cpu_to_le32(EXT4_FC_SUPPORTED_FEATURES);
				1006	head.fc_tid = cpu_to_le32(
				1007	sbi->s_journal->j_running_transaction->t_tid);
				1008	if (!ext4_fc_add_tlv(sb, EXT4_FC_TAG_HEAD, sizeof(head),
				1009	(u8 *)&head, &crc))
				1010	goto out;
				1011	}
				1012
				1013	spin_lock(&sbi->s_fc_lock);
				1014	ret = ext4_fc_commit_dentry_updates(journal, &crc);
				1015	if (ret) {
				1016	spin_unlock(&sbi->s_fc_lock);
				1017	goto out;
				1018	}
				1019
				1020	list_for_each(pos, &sbi->s_fc_q[FC_Q_MAIN]) {
				1021	iter = list_entry(pos, struct ext4_inode_info, i_fc_list);
				1022	inode = &iter->vfs_inode;
				1023	if (!ext4_test_inode_state(inode, EXT4_STATE_FC_COMMITTING))
				1024	continue;
				1025
				1026	spin_unlock(&sbi->s_fc_lock);
				1027	ret = ext4_fc_write_inode_data(inode, &crc);
				1028	if (ret)
				1029	goto out;
				1030	ret = ext4_fc_write_inode(inode, &crc);
				1031	if (ret)
				1032	goto out;
				1033	spin_lock(&sbi->s_fc_lock);
				1034	EXT4_I(inode)->i_fc_committed_subtid =
				1035	atomic_read(&sbi->s_fc_subtid);
				1036	}
				1037	spin_unlock(&sbi->s_fc_lock);
				1038
				1039	ret = ext4_fc_write_tail(sb, crc);
				1040
				1041	out:
				1042	blk_finish_plug(&plug);
				1043	return ret;
				1044	}
				1045
				1046	/*
				1047	* The main commit entry point. Performs a fast commit for transaction
				1048	* commit_tid if needed. If it's not possible to perform a fast commit
				1049	* due to various reasons, we fall back to full commit. Returns 0
				1050	* on success, error otherwise.
				1051	*/
				1052	int ext4_fc_commit(journal_t *journal, tid_t commit_tid)
				1053	{
				1054	struct super_block sb = (struct super_block )(journal->j_private);
				1055	struct ext4_sb_info *sbi = EXT4_SB(sb);
				1056	int nblks = 0, ret, bsize = journal->j_blocksize;
				1057	int subtid = atomic_read(&sbi->s_fc_subtid);
				1058	int reason = EXT4_FC_REASON_OK, fc_bufs_before = 0;
				1059	ktime_t start_time, commit_time;
				1060
				1061	trace_ext4_fc_commit_start(sb);
				1062
				1063	start_time = ktime_get();
				1064
				1065	if (!test_opt2(sb, JOURNAL_FAST_COMMIT) \|\|
				1066	(ext4_fc_is_ineligible(sb))) {
				1067	reason = EXT4_FC_REASON_INELIGIBLE;
				1068	goto out;
				1069	}
				1070
				1071	restart_fc:
				1072	ret = jbd2_fc_begin_commit(journal, commit_tid);
				1073	if (ret == -EALREADY) {
				1074	/* There was an ongoing commit, check if we need to restart */
				1075	if (atomic_read(&sbi->s_fc_subtid) <= subtid &&
				1076	commit_tid > journal->j_commit_sequence)
				1077	goto restart_fc;
				1078	reason = EXT4_FC_REASON_ALREADY_COMMITTED;
				1079	goto out;
				1080	} else if (ret) {
				1081	sbi->s_fc_stats.fc_ineligible_reason_count[EXT4_FC_COMMIT_FAILED]++;
				1082	reason = EXT4_FC_REASON_FC_START_FAILED;
				1083	goto out;
				1084	}
				1085
				1086	fc_bufs_before = (sbi->s_fc_bytes + bsize - 1) / bsize;
				1087	ret = ext4_fc_perform_commit(journal);
				1088	if (ret < 0) {
				1089	sbi->s_fc_stats.fc_ineligible_reason_count[EXT4_FC_COMMIT_FAILED]++;
				1090	reason = EXT4_FC_REASON_FC_FAILED;
				1091	goto out;
				1092	}
				1093	nblks = (sbi->s_fc_bytes + bsize - 1) / bsize - fc_bufs_before;
				1094	ret = jbd2_fc_wait_bufs(journal, nblks);
				1095	if (ret < 0) {
				1096	sbi->s_fc_stats.fc_ineligible_reason_count[EXT4_FC_COMMIT_FAILED]++;
				1097	reason = EXT4_FC_REASON_FC_FAILED;
				1098	goto out;
				1099	}
				1100	atomic_inc(&sbi->s_fc_subtid);
				1101	jbd2_fc_end_commit(journal);
				1102	out:
				1103	/* Has any ineligible update happened since we started? */
				1104	if (reason == EXT4_FC_REASON_OK && ext4_fc_is_ineligible(sb)) {
				1105	sbi->s_fc_stats.fc_ineligible_reason_count[EXT4_FC_COMMIT_FAILED]++;
				1106	reason = EXT4_FC_REASON_INELIGIBLE;
				1107	}
				1108
				1109	spin_lock(&sbi->s_fc_lock);
				1110	if (reason != EXT4_FC_REASON_OK &&
				1111	reason != EXT4_FC_REASON_ALREADY_COMMITTED) {
				1112	sbi->s_fc_stats.fc_ineligible_commits++;
				1113	} else {
				1114	sbi->s_fc_stats.fc_num_commits++;
				1115	sbi->s_fc_stats.fc_numblks += nblks;
				1116	}
				1117	spin_unlock(&sbi->s_fc_lock);
				1118	nblks = (reason == EXT4_FC_REASON_OK) ? nblks : 0;
				1119	trace_ext4_fc_commit_stop(sb, nblks, reason);
				1120	commit_time = ktime_to_ns(ktime_sub(ktime_get(), start_time));
				1121	/*
				1122	* weight the commit time higher than the average time so we don't
				1123	* react too strongly to vast changes in the commit time
				1124	*/
				1125	if (likely(sbi->s_fc_avg_commit_time))
				1126	sbi->s_fc_avg_commit_time = (commit_time +
				1127	sbi->s_fc_avg_commit_time * 3) / 4;
				1128	else
				1129	sbi->s_fc_avg_commit_time = commit_time;
				1130	jbd_debug(1,
				1131	"Fast commit ended with blks = %d, reason = %d, subtid - %d",
				1132	nblks, reason, subtid);
				1133	if (reason == EXT4_FC_REASON_FC_FAILED)
				1134	return jbd2_fc_end_commit_fallback(journal, commit_tid);
				1135	if (reason == EXT4_FC_REASON_FC_START_FAILED \|\|
				1136	reason == EXT4_FC_REASON_INELIGIBLE)
				1137	return jbd2_complete_transaction(journal, commit_tid);
				1138	return 0;
				1139	}
				1140
Harshad Shirwadkar	ff780b9	2020-10-15 13:37:56 -0700	[diff] [blame]	1141	/*
				1142	* Fast commit cleanup routine. This is called after every fast commit and
				1143	* full commit. full is true if we are called after a full commit.
				1144	*/
				1145	static void ext4_fc_cleanup(journal_t *journal, int full)
				1146	{
Harshad Shirwadkar	aa75f4d	2020-10-15 13:37:57 -0700	[diff] [blame]	1147	struct super_block *sb = journal->j_private;
				1148	struct ext4_sb_info *sbi = EXT4_SB(sb);
				1149	struct ext4_inode_info *iter;
				1150	struct ext4_fc_dentry_update *fc_dentry;
				1151	struct list_head pos, n;
				1152
				1153	if (full && sbi->s_fc_bh)
				1154	sbi->s_fc_bh = NULL;
				1155
				1156	jbd2_fc_release_bufs(journal);
				1157
				1158	spin_lock(&sbi->s_fc_lock);
				1159	list_for_each_safe(pos, n, &sbi->s_fc_q[FC_Q_MAIN]) {
				1160	iter = list_entry(pos, struct ext4_inode_info, i_fc_list);
				1161	list_del_init(&iter->i_fc_list);
				1162	ext4_clear_inode_state(&iter->vfs_inode,
				1163	EXT4_STATE_FC_COMMITTING);
				1164	ext4_fc_reset_inode(&iter->vfs_inode);
				1165	/* Make sure EXT4_STATE_FC_COMMITTING bit is clear */
				1166	smp_mb();
				1167	#if (BITS_PER_LONG < 64)
				1168	wake_up_bit(&iter->i_state_flags, EXT4_STATE_FC_COMMITTING);
				1169	#else
				1170	wake_up_bit(&iter->i_flags, EXT4_STATE_FC_COMMITTING);
				1171	#endif
				1172	}
				1173
				1174	while (!list_empty(&sbi->s_fc_dentry_q[FC_Q_MAIN])) {
				1175	fc_dentry = list_first_entry(&sbi->s_fc_dentry_q[FC_Q_MAIN],
				1176	struct ext4_fc_dentry_update,
				1177	fcd_list);
				1178	list_del_init(&fc_dentry->fcd_list);
				1179	spin_unlock(&sbi->s_fc_lock);
				1180
				1181	if (fc_dentry->fcd_name.name &&
				1182	fc_dentry->fcd_name.len > DNAME_INLINE_LEN)
				1183	kfree(fc_dentry->fcd_name.name);
				1184	kmem_cache_free(ext4_fc_dentry_cachep, fc_dentry);
				1185	spin_lock(&sbi->s_fc_lock);
				1186	}
				1187
				1188	list_splice_init(&sbi->s_fc_dentry_q[FC_Q_STAGING],
				1189	&sbi->s_fc_dentry_q[FC_Q_MAIN]);
				1190	list_splice_init(&sbi->s_fc_q[FC_Q_STAGING],
				1191	&sbi->s_fc_q[FC_Q_STAGING]);
				1192
Harshad Shirwadkar	ababea7	2020-10-26 21:49:15 -0700	[diff] [blame]	1193	sbi->s_mount_flags &= ~EXT4_MF_FC_COMMITTING;
				1194	sbi->s_mount_flags &= ~EXT4_MF_FC_INELIGIBLE;
Harshad Shirwadkar	aa75f4d	2020-10-15 13:37:57 -0700	[diff] [blame]	1195
				1196	if (full)
				1197	sbi->s_fc_bytes = 0;
				1198	spin_unlock(&sbi->s_fc_lock);
				1199	trace_ext4_fc_stats(sb);
Harshad Shirwadkar	ff780b9	2020-10-15 13:37:56 -0700	[diff] [blame]	1200	}
Harshad Shirwadkar	6866d7b	2020-10-15 13:37:55 -0700	[diff] [blame]	1201
Harshad Shirwadkar	8016e29	2020-10-15 13:37:59 -0700	[diff] [blame]	1202	/* Ext4 Replay Path Routines */
				1203
				1204	/* Get length of a particular tlv */
				1205	static inline int ext4_fc_tag_len(struct ext4_fc_tl *tl)
				1206	{
				1207	return le16_to_cpu(tl->fc_len);
				1208	}
				1209
				1210	/* Get a pointer to "value" of a tlv */
				1211	static inline u8 ext4_fc_tag_val(struct ext4_fc_tl tl)
				1212	{
				1213	return (u8 )tl + sizeof(tl);
				1214	}
				1215
				1216	/* Helper struct for dentry replay routines */
				1217	struct dentry_info_args {
				1218	int parent_ino, dname_len, ino, inode_len;
				1219	char *dname;
				1220	};
				1221
				1222	static inline void tl_to_darg(struct dentry_info_args *darg,
				1223	struct ext4_fc_tl *tl)
				1224	{
				1225	struct ext4_fc_dentry_info *fcd;
				1226
				1227	fcd = (struct ext4_fc_dentry_info *)ext4_fc_tag_val(tl);
				1228
				1229	darg->parent_ino = le32_to_cpu(fcd->fc_parent_ino);
				1230	darg->ino = le32_to_cpu(fcd->fc_ino);
				1231	darg->dname = fcd->fc_dname;
				1232	darg->dname_len = ext4_fc_tag_len(tl) -
				1233	sizeof(struct ext4_fc_dentry_info);
				1234	}
				1235
				1236	/* Unlink replay function */
				1237	static int ext4_fc_replay_unlink(struct super_block sb, struct ext4_fc_tl tl)
				1238	{
				1239	struct inode inode, old_parent;
				1240	struct qstr entry;
				1241	struct dentry_info_args darg;
				1242	int ret = 0;
				1243
				1244	tl_to_darg(&darg, tl);
				1245
				1246	trace_ext4_fc_replay(sb, EXT4_FC_TAG_UNLINK, darg.ino,
				1247	darg.parent_ino, darg.dname_len);
				1248
				1249	entry.name = darg.dname;
				1250	entry.len = darg.dname_len;
				1251	inode = ext4_iget(sb, darg.ino, EXT4_IGET_NORMAL);
				1252
				1253	if (IS_ERR_OR_NULL(inode)) {
				1254	jbd_debug(1, "Inode %d not found", darg.ino);
				1255	return 0;
				1256	}
				1257
				1258	old_parent = ext4_iget(sb, darg.parent_ino,
				1259	EXT4_IGET_NORMAL);
				1260	if (IS_ERR_OR_NULL(old_parent)) {
				1261	jbd_debug(1, "Dir with inode %d not found", darg.parent_ino);
				1262	iput(inode);
				1263	return 0;
				1264	}
				1265
				1266	ret = __ext4_unlink(old_parent, &entry, inode);
				1267	/* -ENOENT ok coz it might not exist anymore. */
				1268	if (ret == -ENOENT)
				1269	ret = 0;
				1270	iput(old_parent);
				1271	iput(inode);
				1272	return ret;
				1273	}
				1274
				1275	static int ext4_fc_replay_link_internal(struct super_block *sb,
				1276	struct dentry_info_args *darg,
				1277	struct inode *inode)
				1278	{
				1279	struct inode *dir = NULL;
				1280	struct dentry dentry_dir = NULL, dentry_inode = NULL;
				1281	struct qstr qstr_dname = QSTR_INIT(darg->dname, darg->dname_len);
				1282	int ret = 0;
				1283
				1284	dir = ext4_iget(sb, darg->parent_ino, EXT4_IGET_NORMAL);
				1285	if (IS_ERR(dir)) {
				1286	jbd_debug(1, "Dir with inode %d not found.", darg->parent_ino);
				1287	dir = NULL;
				1288	goto out;
				1289	}
				1290
				1291	dentry_dir = d_obtain_alias(dir);
				1292	if (IS_ERR(dentry_dir)) {
				1293	jbd_debug(1, "Failed to obtain dentry");
				1294	dentry_dir = NULL;
				1295	goto out;
				1296	}
				1297
				1298	dentry_inode = d_alloc(dentry_dir, &qstr_dname);
				1299	if (!dentry_inode) {
				1300	jbd_debug(1, "Inode dentry not created.");
				1301	ret = -ENOMEM;
				1302	goto out;
				1303	}
				1304
				1305	ret = __ext4_link(dir, inode, dentry_inode);
				1306	/*
				1307	* It's possible that link already existed since data blocks
				1308	* for the dir in question got persisted before we crashed OR
				1309	* we replayed this tag and crashed before the entire replay
				1310	* could complete.
				1311	*/
				1312	if (ret && ret != -EEXIST) {
				1313	jbd_debug(1, "Failed to link\n");
				1314	goto out;
				1315	}
				1316
				1317	ret = 0;
				1318	out:
				1319	if (dentry_dir) {
				1320	d_drop(dentry_dir);
				1321	dput(dentry_dir);
				1322	} else if (dir) {
				1323	iput(dir);
				1324	}
				1325	if (dentry_inode) {
				1326	d_drop(dentry_inode);
				1327	dput(dentry_inode);
				1328	}
				1329
				1330	return ret;
				1331	}
				1332
				1333	/* Link replay function */
				1334	static int ext4_fc_replay_link(struct super_block sb, struct ext4_fc_tl tl)
				1335	{
				1336	struct inode *inode;
				1337	struct dentry_info_args darg;
				1338	int ret = 0;
				1339
				1340	tl_to_darg(&darg, tl);
				1341	trace_ext4_fc_replay(sb, EXT4_FC_TAG_LINK, darg.ino,
				1342	darg.parent_ino, darg.dname_len);
				1343
				1344	inode = ext4_iget(sb, darg.ino, EXT4_IGET_NORMAL);
				1345	if (IS_ERR_OR_NULL(inode)) {
				1346	jbd_debug(1, "Inode not found.");
				1347	return 0;
				1348	}
				1349
				1350	ret = ext4_fc_replay_link_internal(sb, &darg, inode);
				1351	iput(inode);
				1352	return ret;
				1353	}
				1354
				1355	/*
				1356	* Record all the modified inodes during replay. We use this later to setup
				1357	* block bitmaps correctly.
				1358	*/
				1359	static int ext4_fc_record_modified_inode(struct super_block *sb, int ino)
				1360	{
				1361	struct ext4_fc_replay_state *state;
				1362	int i;
				1363
				1364	state = &EXT4_SB(sb)->s_fc_replay_state;
				1365	for (i = 0; i < state->fc_modified_inodes_used; i++)
				1366	if (state->fc_modified_inodes[i] == ino)
				1367	return 0;
				1368	if (state->fc_modified_inodes_used == state->fc_modified_inodes_size) {
				1369	state->fc_modified_inodes_size +=
				1370	EXT4_FC_REPLAY_REALLOC_INCREMENT;
				1371	state->fc_modified_inodes = krealloc(
				1372	state->fc_modified_inodes, sizeof(int) *
				1373	state->fc_modified_inodes_size,
				1374	GFP_KERNEL);
				1375	if (!state->fc_modified_inodes)
				1376	return -ENOMEM;
				1377	}
				1378	state->fc_modified_inodes[state->fc_modified_inodes_used++] = ino;
				1379	return 0;
				1380	}
				1381
				1382	/*
				1383	* Inode replay function
				1384	*/
				1385	static int ext4_fc_replay_inode(struct super_block sb, struct ext4_fc_tl tl)
				1386	{
				1387	struct ext4_fc_inode *fc_inode;
				1388	struct ext4_inode *raw_inode;
				1389	struct ext4_inode *raw_fc_inode;
				1390	struct inode *inode = NULL;
				1391	struct ext4_iloc iloc;
				1392	int inode_len, ino, ret, tag = le16_to_cpu(tl->fc_tag);
				1393	struct ext4_extent_header *eh;
				1394
				1395	fc_inode = (struct ext4_fc_inode *)ext4_fc_tag_val(tl);
				1396
				1397	ino = le32_to_cpu(fc_inode->fc_ino);
				1398	trace_ext4_fc_replay(sb, tag, ino, 0, 0);
				1399
				1400	inode = ext4_iget(sb, ino, EXT4_IGET_NORMAL);
				1401	if (!IS_ERR_OR_NULL(inode)) {
				1402	ext4_ext_clear_bb(inode);
				1403	iput(inode);
				1404	}
				1405
				1406	ext4_fc_record_modified_inode(sb, ino);
				1407
				1408	raw_fc_inode = (struct ext4_inode *)fc_inode->fc_raw_inode;
				1409	ret = ext4_get_fc_inode_loc(sb, ino, &iloc);
				1410	if (ret)
				1411	goto out;
				1412
				1413	inode_len = ext4_fc_tag_len(tl) - sizeof(struct ext4_fc_inode);
				1414	raw_inode = ext4_raw_inode(&iloc);
				1415
				1416	memcpy(raw_inode, raw_fc_inode, offsetof(struct ext4_inode, i_block));
				1417	memcpy(&raw_inode->i_generation, &raw_fc_inode->i_generation,
				1418	inode_len - offsetof(struct ext4_inode, i_generation));
				1419	if (le32_to_cpu(raw_inode->i_flags) & EXT4_EXTENTS_FL) {
				1420	eh = (struct ext4_extent_header *)(&raw_inode->i_block[0]);
				1421	if (eh->eh_magic != EXT4_EXT_MAGIC) {
				1422	memset(eh, 0, sizeof(*eh));
				1423	eh->eh_magic = EXT4_EXT_MAGIC;
				1424	eh->eh_max = cpu_to_le16(
				1425	(sizeof(raw_inode->i_block) -
				1426	sizeof(struct ext4_extent_header))
				1427	/ sizeof(struct ext4_extent));
				1428	}
				1429	} else if (le32_to_cpu(raw_inode->i_flags) & EXT4_INLINE_DATA_FL) {
				1430	memcpy(raw_inode->i_block, raw_fc_inode->i_block,
				1431	sizeof(raw_inode->i_block));
				1432	}
				1433
				1434	/* Immediately update the inode on disk. */
				1435	ret = ext4_handle_dirty_metadata(NULL, NULL, iloc.bh);
				1436	if (ret)
				1437	goto out;
				1438	ret = sync_dirty_buffer(iloc.bh);
				1439	if (ret)
				1440	goto out;
				1441	ret = ext4_mark_inode_used(sb, ino);
				1442	if (ret)
				1443	goto out;
				1444
				1445	/* Given that we just wrote the inode on disk, this SHOULD succeed. */
				1446	inode = ext4_iget(sb, ino, EXT4_IGET_NORMAL);
				1447	if (IS_ERR_OR_NULL(inode)) {
				1448	jbd_debug(1, "Inode not found.");
				1449	return -EFSCORRUPTED;
				1450	}
				1451
				1452	/*
				1453	* Our allocator could have made different decisions than before
				1454	* crashing. This should be fixed but until then, we calculate
				1455	* the number of blocks the inode.
				1456	*/
				1457	ext4_ext_replay_set_iblocks(inode);
				1458
				1459	inode->i_generation = le32_to_cpu(ext4_raw_inode(&iloc)->i_generation);
				1460	ext4_reset_inode_seed(inode);
				1461
				1462	ext4_inode_csum_set(inode, ext4_raw_inode(&iloc), EXT4_I(inode));
				1463	ret = ext4_handle_dirty_metadata(NULL, NULL, iloc.bh);
				1464	sync_dirty_buffer(iloc.bh);
				1465	brelse(iloc.bh);
				1466	out:
				1467	iput(inode);
				1468	if (!ret)
				1469	blkdev_issue_flush(sb->s_bdev, GFP_KERNEL);
				1470
				1471	return 0;
				1472	}
				1473
				1474	/*
				1475	* Dentry create replay function.
				1476	*
				1477	* EXT4_FC_TAG_CREAT is preceded by EXT4_FC_TAG_INODE_FULL. Which means, the
				1478	* inode for which we are trying to create a dentry here, should already have
				1479	* been replayed before we start here.
				1480	*/
				1481	static int ext4_fc_replay_create(struct super_block sb, struct ext4_fc_tl tl)
				1482	{
				1483	int ret = 0;
				1484	struct inode *inode = NULL;
				1485	struct inode *dir = NULL;
				1486	struct dentry_info_args darg;
				1487
				1488	tl_to_darg(&darg, tl);
				1489
				1490	trace_ext4_fc_replay(sb, EXT4_FC_TAG_CREAT, darg.ino,
				1491	darg.parent_ino, darg.dname_len);
				1492
				1493	/* This takes care of update group descriptor and other metadata */
				1494	ret = ext4_mark_inode_used(sb, darg.ino);
				1495	if (ret)
				1496	goto out;
				1497
				1498	inode = ext4_iget(sb, darg.ino, EXT4_IGET_NORMAL);
				1499	if (IS_ERR_OR_NULL(inode)) {
				1500	jbd_debug(1, "inode %d not found.", darg.ino);
				1501	inode = NULL;
				1502	ret = -EINVAL;
				1503	goto out;
				1504	}
				1505
				1506	if (S_ISDIR(inode->i_mode)) {
				1507	/*
				1508	* If we are creating a directory, we need to make sure that the
				1509	* dot and dot dot dirents are setup properly.
				1510	*/
				1511	dir = ext4_iget(sb, darg.parent_ino, EXT4_IGET_NORMAL);
				1512	if (IS_ERR_OR_NULL(dir)) {
				1513	jbd_debug(1, "Dir %d not found.", darg.ino);
				1514	goto out;
				1515	}
				1516	ret = ext4_init_new_dir(NULL, dir, inode);
				1517	iput(dir);
				1518	if (ret) {
				1519	ret = 0;
				1520	goto out;
				1521	}
				1522	}
				1523	ret = ext4_fc_replay_link_internal(sb, &darg, inode);
				1524	if (ret)
				1525	goto out;
				1526	set_nlink(inode, 1);
				1527	ext4_mark_inode_dirty(NULL, inode);
				1528	out:
				1529	if (inode)
				1530	iput(inode);
				1531	return ret;
				1532	}
				1533
				1534	/*
				1535	* Record physical disk regions which are in use as per fast commit area. Our
				1536	* simple replay phase allocator excludes these regions from allocation.
				1537	*/
				1538	static int ext4_fc_record_regions(struct super_block *sb, int ino,
				1539	ext4_lblk_t lblk, ext4_fsblk_t pblk, int len)
				1540	{
				1541	struct ext4_fc_replay_state *state;
				1542	struct ext4_fc_alloc_region *region;
				1543
				1544	state = &EXT4_SB(sb)->s_fc_replay_state;
				1545	if (state->fc_regions_used == state->fc_regions_size) {
				1546	state->fc_regions_size +=
				1547	EXT4_FC_REPLAY_REALLOC_INCREMENT;
				1548	state->fc_regions = krealloc(
				1549	state->fc_regions,
				1550	state->fc_regions_size *
				1551	sizeof(struct ext4_fc_alloc_region),
				1552	GFP_KERNEL);
				1553	if (!state->fc_regions)
				1554	return -ENOMEM;
				1555	}
				1556	region = &state->fc_regions[state->fc_regions_used++];
				1557	region->ino = ino;
				1558	region->lblk = lblk;
				1559	region->pblk = pblk;
				1560	region->len = len;
				1561
				1562	return 0;
				1563	}
				1564
				1565	/* Replay add range tag */
				1566	static int ext4_fc_replay_add_range(struct super_block *sb,
				1567	struct ext4_fc_tl *tl)
				1568	{
				1569	struct ext4_fc_add_range *fc_add_ex;
				1570	struct ext4_extent newex, *ex;
				1571	struct inode *inode;
				1572	ext4_lblk_t start, cur;
				1573	int remaining, len;
				1574	ext4_fsblk_t start_pblk;
				1575	struct ext4_map_blocks map;
				1576	struct ext4_ext_path *path = NULL;
				1577	int ret;
				1578
				1579	fc_add_ex = (struct ext4_fc_add_range *)ext4_fc_tag_val(tl);
				1580	ex = (struct ext4_extent *)&fc_add_ex->fc_ex;
				1581
				1582	trace_ext4_fc_replay(sb, EXT4_FC_TAG_ADD_RANGE,
				1583	le32_to_cpu(fc_add_ex->fc_ino), le32_to_cpu(ex->ee_block),
				1584	ext4_ext_get_actual_len(ex));
				1585
				1586	inode = ext4_iget(sb, le32_to_cpu(fc_add_ex->fc_ino),
				1587	EXT4_IGET_NORMAL);
				1588	if (IS_ERR_OR_NULL(inode)) {
				1589	jbd_debug(1, "Inode not found.");
				1590	return 0;
				1591	}
				1592
				1593	ret = ext4_fc_record_modified_inode(sb, inode->i_ino);
				1594
				1595	start = le32_to_cpu(ex->ee_block);
				1596	start_pblk = ext4_ext_pblock(ex);
				1597	len = ext4_ext_get_actual_len(ex);
				1598
				1599	cur = start;
				1600	remaining = len;
				1601	jbd_debug(1, "ADD_RANGE, lblk %d, pblk %lld, len %d, unwritten %d, inode %ld\n",
				1602	start, start_pblk, len, ext4_ext_is_unwritten(ex),
				1603	inode->i_ino);
				1604
				1605	while (remaining > 0) {
				1606	map.m_lblk = cur;
				1607	map.m_len = remaining;
				1608	map.m_pblk = 0;
				1609	ret = ext4_map_blocks(NULL, inode, &map, 0);
				1610
				1611	if (ret < 0) {
				1612	iput(inode);
				1613	return 0;
				1614	}
				1615
				1616	if (ret == 0) {
				1617	/* Range is not mapped */
				1618	path = ext4_find_extent(inode, cur, NULL, 0);
Harshad Shirwadkar	8c9be1e	2020-10-27 13:43:42 -0700	[diff] [blame]	1619	if (IS_ERR(path)) {
				1620	iput(inode);
				1621	return 0;
				1622	}
Harshad Shirwadkar	8016e29	2020-10-15 13:37:59 -0700	[diff] [blame]	1623	memset(&newex, 0, sizeof(newex));
				1624	newex.ee_block = cpu_to_le32(cur);
				1625	ext4_ext_store_pblock(
				1626	&newex, start_pblk + cur - start);
				1627	newex.ee_len = cpu_to_le16(map.m_len);
				1628	if (ext4_ext_is_unwritten(ex))
				1629	ext4_ext_mark_unwritten(&newex);
				1630	down_write(&EXT4_I(inode)->i_data_sem);
				1631	ret = ext4_ext_insert_extent(
				1632	NULL, inode, &path, &newex, 0);
				1633	up_write((&EXT4_I(inode)->i_data_sem));
				1634	ext4_ext_drop_refs(path);
				1635	kfree(path);
				1636	if (ret) {
				1637	iput(inode);
				1638	return 0;
				1639	}
				1640	goto next;
				1641	}
				1642
				1643	if (start_pblk + cur - start != map.m_pblk) {
				1644	/*
				1645	* Logical to physical mapping changed. This can happen
				1646	* if this range was removed and then reallocated to
				1647	* map to new physical blocks during a fast commit.
				1648	*/
				1649	ret = ext4_ext_replay_update_ex(inode, cur, map.m_len,
				1650	ext4_ext_is_unwritten(ex),
				1651	start_pblk + cur - start);
				1652	if (ret) {
				1653	iput(inode);
				1654	return 0;
				1655	}
				1656	/*
				1657	* Mark the old blocks as free since they aren't used
				1658	* anymore. We maintain an array of all the modified
				1659	* inodes. In case these blocks are still used at either
				1660	* a different logical range in the same inode or in
				1661	* some different inode, we will mark them as allocated
				1662	* at the end of the FC replay using our array of
				1663	* modified inodes.
				1664	*/
				1665	ext4_mb_mark_bb(inode->i_sb, map.m_pblk, map.m_len, 0);
				1666	goto next;
				1667	}
				1668
				1669	/* Range is mapped and needs a state change */
				1670	jbd_debug(1, "Converting from %d to %d %lld",
				1671	map.m_flags & EXT4_MAP_UNWRITTEN,
				1672	ext4_ext_is_unwritten(ex), map.m_pblk);
				1673	ret = ext4_ext_replay_update_ex(inode, cur, map.m_len,
				1674	ext4_ext_is_unwritten(ex), map.m_pblk);
				1675	if (ret) {
				1676	iput(inode);
				1677	return 0;
				1678	}
				1679	/*
				1680	* We may have split the extent tree while toggling the state.
				1681	* Try to shrink the extent tree now.
				1682	*/
				1683	ext4_ext_replay_shrink_inode(inode, start + len);
				1684	next:
				1685	cur += map.m_len;
				1686	remaining -= map.m_len;
				1687	}
				1688	ext4_ext_replay_shrink_inode(inode, i_size_read(inode) >>
				1689	sb->s_blocksize_bits);
				1690	iput(inode);
				1691	return 0;
				1692	}
				1693
				1694	/* Replay DEL_RANGE tag */
				1695	static int
				1696	ext4_fc_replay_del_range(struct super_block sb, struct ext4_fc_tl tl)
				1697	{
				1698	struct inode *inode;
				1699	struct ext4_fc_del_range *lrange;
				1700	struct ext4_map_blocks map;
				1701	ext4_lblk_t cur, remaining;
				1702	int ret;
				1703
				1704	lrange = (struct ext4_fc_del_range *)ext4_fc_tag_val(tl);
				1705	cur = le32_to_cpu(lrange->fc_lblk);
				1706	remaining = le32_to_cpu(lrange->fc_len);
				1707
				1708	trace_ext4_fc_replay(sb, EXT4_FC_TAG_DEL_RANGE,
				1709	le32_to_cpu(lrange->fc_ino), cur, remaining);
				1710
				1711	inode = ext4_iget(sb, le32_to_cpu(lrange->fc_ino), EXT4_IGET_NORMAL);
				1712	if (IS_ERR_OR_NULL(inode)) {
				1713	jbd_debug(1, "Inode %d not found", le32_to_cpu(lrange->fc_ino));
				1714	return 0;
				1715	}
				1716
				1717	ret = ext4_fc_record_modified_inode(sb, inode->i_ino);
				1718
				1719	jbd_debug(1, "DEL_RANGE, inode %ld, lblk %d, len %d\n",
				1720	inode->i_ino, le32_to_cpu(lrange->fc_lblk),
				1721	le32_to_cpu(lrange->fc_len));
				1722	while (remaining > 0) {
				1723	map.m_lblk = cur;
				1724	map.m_len = remaining;
				1725
				1726	ret = ext4_map_blocks(NULL, inode, &map, 0);
				1727	if (ret < 0) {
				1728	iput(inode);
				1729	return 0;
				1730	}
				1731	if (ret > 0) {
				1732	remaining -= ret;
				1733	cur += ret;
				1734	ext4_mb_mark_bb(inode->i_sb, map.m_pblk, map.m_len, 0);
				1735	} else {
				1736	remaining -= map.m_len;
				1737	cur += map.m_len;
				1738	}
				1739	}
				1740
				1741	ret = ext4_punch_hole(inode,
				1742	le32_to_cpu(lrange->fc_lblk) << sb->s_blocksize_bits,
				1743	le32_to_cpu(lrange->fc_len) << sb->s_blocksize_bits);
				1744	if (ret)
				1745	jbd_debug(1, "ext4_punch_hole returned %d", ret);
				1746	ext4_ext_replay_shrink_inode(inode,
				1747	i_size_read(inode) >> sb->s_blocksize_bits);
				1748	ext4_mark_inode_dirty(NULL, inode);
				1749	iput(inode);
				1750
				1751	return 0;
				1752	}
				1753
				1754	static inline const char *tag2str(u16 tag)
				1755	{
				1756	switch (tag) {
				1757	case EXT4_FC_TAG_LINK:
				1758	return "TAG_ADD_ENTRY";
				1759	case EXT4_FC_TAG_UNLINK:
				1760	return "TAG_DEL_ENTRY";
				1761	case EXT4_FC_TAG_ADD_RANGE:
				1762	return "TAG_ADD_RANGE";
				1763	case EXT4_FC_TAG_CREAT:
				1764	return "TAG_CREAT_DENTRY";
				1765	case EXT4_FC_TAG_DEL_RANGE:
				1766	return "TAG_DEL_RANGE";
				1767	case EXT4_FC_TAG_INODE:
				1768	return "TAG_INODE";
				1769	case EXT4_FC_TAG_PAD:
				1770	return "TAG_PAD";
				1771	case EXT4_FC_TAG_TAIL:
				1772	return "TAG_TAIL";
				1773	case EXT4_FC_TAG_HEAD:
				1774	return "TAG_HEAD";
				1775	default:
				1776	return "TAG_ERROR";
				1777	}
				1778	}
				1779
				1780	static void ext4_fc_set_bitmaps_and_counters(struct super_block *sb)
				1781	{
				1782	struct ext4_fc_replay_state *state;
				1783	struct inode *inode;
				1784	struct ext4_ext_path *path = NULL;
				1785	struct ext4_map_blocks map;
				1786	int i, ret, j;
				1787	ext4_lblk_t cur, end;
				1788
				1789	state = &EXT4_SB(sb)->s_fc_replay_state;
				1790	for (i = 0; i < state->fc_modified_inodes_used; i++) {
				1791	inode = ext4_iget(sb, state->fc_modified_inodes[i],
				1792	EXT4_IGET_NORMAL);
				1793	if (IS_ERR_OR_NULL(inode)) {
				1794	jbd_debug(1, "Inode %d not found.",
				1795	state->fc_modified_inodes[i]);
				1796	continue;
				1797	}
				1798	cur = 0;
				1799	end = EXT_MAX_BLOCKS;
				1800	while (cur < end) {
				1801	map.m_lblk = cur;
				1802	map.m_len = end - cur;
				1803
				1804	ret = ext4_map_blocks(NULL, inode, &map, 0);
				1805	if (ret < 0)
				1806	break;
				1807
				1808	if (ret > 0) {
				1809	path = ext4_find_extent(inode, map.m_lblk, NULL, 0);
				1810	if (!IS_ERR_OR_NULL(path)) {
				1811	for (j = 0; j < path->p_depth; j++)
				1812	ext4_mb_mark_bb(inode->i_sb,
				1813	path[j].p_block, 1, 1);
				1814	ext4_ext_drop_refs(path);
				1815	kfree(path);
				1816	}
				1817	cur += ret;
				1818	ext4_mb_mark_bb(inode->i_sb, map.m_pblk,
				1819	map.m_len, 1);
				1820	} else {
				1821	cur = cur + (map.m_len ? map.m_len : 1);
				1822	}
				1823	}
				1824	iput(inode);
				1825	}
				1826	}
				1827
				1828	/*
				1829	* Check if block is in excluded regions for block allocation. The simple
				1830	* allocator that runs during replay phase is calls this function to see
				1831	* if it is okay to use a block.
				1832	*/
				1833	bool ext4_fc_replay_check_excluded(struct super_block *sb, ext4_fsblk_t blk)
				1834	{
				1835	int i;
				1836	struct ext4_fc_replay_state *state;
				1837
				1838	state = &EXT4_SB(sb)->s_fc_replay_state;
				1839	for (i = 0; i < state->fc_regions_valid; i++) {
				1840	if (state->fc_regions[i].ino == 0 \|\|
				1841	state->fc_regions[i].len == 0)
				1842	continue;
				1843	if (blk >= state->fc_regions[i].pblk &&
				1844	blk < state->fc_regions[i].pblk + state->fc_regions[i].len)
				1845	return true;
				1846	}
				1847	return false;
				1848	}
				1849
				1850	/* Cleanup function called after replay */
				1851	void ext4_fc_replay_cleanup(struct super_block *sb)
				1852	{
				1853	struct ext4_sb_info *sbi = EXT4_SB(sb);
				1854
				1855	sbi->s_mount_state &= ~EXT4_FC_REPLAY;
				1856	kfree(sbi->s_fc_replay_state.fc_regions);
				1857	kfree(sbi->s_fc_replay_state.fc_modified_inodes);
				1858	}
				1859
				1860	/*
				1861	* Recovery Scan phase handler
				1862	*
				1863	* This function is called during the scan phase and is responsible
				1864	* for doing following things:
				1865	* - Make sure the fast commit area has valid tags for replay
				1866	* - Count number of tags that need to be replayed by the replay handler
				1867	* - Verify CRC
				1868	* - Create a list of excluded blocks for allocation during replay phase
				1869	*
				1870	* This function returns JBD2_FC_REPLAY_CONTINUE to indicate that SCAN is
				1871	* incomplete and JBD2 should send more blocks. It returns JBD2_FC_REPLAY_STOP
				1872	* to indicate that scan has finished and JBD2 can now start replay phase.
				1873	* It returns a negative error to indicate that there was an error. At the end
				1874	* of a successful scan phase, sbi->s_fc_replay_state.fc_replay_num_tags is set
				1875	* to indicate the number of tags that need to replayed during the replay phase.
				1876	*/
				1877	static int ext4_fc_replay_scan(journal_t *journal,
				1878	struct buffer_head *bh, int off,
				1879	tid_t expected_tid)
				1880	{
				1881	struct super_block *sb = journal->j_private;
				1882	struct ext4_sb_info *sbi = EXT4_SB(sb);
				1883	struct ext4_fc_replay_state *state;
				1884	int ret = JBD2_FC_REPLAY_CONTINUE;
				1885	struct ext4_fc_add_range *ext;
				1886	struct ext4_fc_tl *tl;
				1887	struct ext4_fc_tail *tail;
				1888	__u8 start, end;
				1889	struct ext4_fc_head *head;
				1890	struct ext4_extent *ex;
				1891
				1892	state = &sbi->s_fc_replay_state;
				1893
				1894	start = (u8 *)bh->b_data;
				1895	end = (__u8 *)bh->b_data + journal->j_blocksize - 1;
				1896
				1897	if (state->fc_replay_expected_off == 0) {
				1898	state->fc_cur_tag = 0;
				1899	state->fc_replay_num_tags = 0;
				1900	state->fc_crc = 0;
				1901	state->fc_regions = NULL;
				1902	state->fc_regions_valid = state->fc_regions_used =
				1903	state->fc_regions_size = 0;
				1904	/* Check if we can stop early */
				1905	if (le16_to_cpu(((struct ext4_fc_tl *)start)->fc_tag)
				1906	!= EXT4_FC_TAG_HEAD)
				1907	return 0;
				1908	}
				1909
				1910	if (off != state->fc_replay_expected_off) {
				1911	ret = -EFSCORRUPTED;
				1912	goto out_err;
				1913	}
				1914
				1915	state->fc_replay_expected_off++;
				1916	fc_for_each_tl(start, end, tl) {
				1917	jbd_debug(3, "Scan phase, tag:%s, blk %lld\n",
				1918	tag2str(le16_to_cpu(tl->fc_tag)), bh->b_blocknr);
				1919	switch (le16_to_cpu(tl->fc_tag)) {
				1920	case EXT4_FC_TAG_ADD_RANGE:
				1921	ext = (struct ext4_fc_add_range *)ext4_fc_tag_val(tl);
				1922	ex = (struct ext4_extent *)&ext->fc_ex;
				1923	ret = ext4_fc_record_regions(sb,
				1924	le32_to_cpu(ext->fc_ino),
				1925	le32_to_cpu(ex->ee_block), ext4_ext_pblock(ex),
				1926	ext4_ext_get_actual_len(ex));
				1927	if (ret < 0)
				1928	break;
				1929	ret = JBD2_FC_REPLAY_CONTINUE;
				1930	fallthrough;
				1931	case EXT4_FC_TAG_DEL_RANGE:
				1932	case EXT4_FC_TAG_LINK:
				1933	case EXT4_FC_TAG_UNLINK:
				1934	case EXT4_FC_TAG_CREAT:
				1935	case EXT4_FC_TAG_INODE:
				1936	case EXT4_FC_TAG_PAD:
				1937	state->fc_cur_tag++;
				1938	state->fc_crc = ext4_chksum(sbi, state->fc_crc, tl,
				1939	sizeof(*tl) + ext4_fc_tag_len(tl));
				1940	break;
				1941	case EXT4_FC_TAG_TAIL:
				1942	state->fc_cur_tag++;
				1943	tail = (struct ext4_fc_tail *)ext4_fc_tag_val(tl);
				1944	state->fc_crc = ext4_chksum(sbi, state->fc_crc, tl,
				1945	sizeof(*tl) +
				1946	offsetof(struct ext4_fc_tail,
				1947	fc_crc));
				1948	if (le32_to_cpu(tail->fc_tid) == expected_tid &&
				1949	le32_to_cpu(tail->fc_crc) == state->fc_crc) {
				1950	state->fc_replay_num_tags = state->fc_cur_tag;
				1951	state->fc_regions_valid =
				1952	state->fc_regions_used;
				1953	} else {
				1954	ret = state->fc_replay_num_tags ?
				1955	JBD2_FC_REPLAY_STOP : -EFSBADCRC;
				1956	}
				1957	state->fc_crc = 0;
				1958	break;
				1959	case EXT4_FC_TAG_HEAD:
				1960	head = (struct ext4_fc_head *)ext4_fc_tag_val(tl);
				1961	if (le32_to_cpu(head->fc_features) &
				1962	~EXT4_FC_SUPPORTED_FEATURES) {
				1963	ret = -EOPNOTSUPP;
				1964	break;
				1965	}
				1966	if (le32_to_cpu(head->fc_tid) != expected_tid) {
				1967	ret = JBD2_FC_REPLAY_STOP;
				1968	break;
				1969	}
				1970	state->fc_cur_tag++;
				1971	state->fc_crc = ext4_chksum(sbi, state->fc_crc, tl,
				1972	sizeof(*tl) + ext4_fc_tag_len(tl));
				1973	break;
				1974	default:
				1975	ret = state->fc_replay_num_tags ?
				1976	JBD2_FC_REPLAY_STOP : -ECANCELED;
				1977	}
				1978	if (ret < 0 \|\| ret == JBD2_FC_REPLAY_STOP)
				1979	break;
				1980	}
				1981
				1982	out_err:
				1983	trace_ext4_fc_replay_scan(sb, ret, off);
				1984	return ret;
				1985	}
				1986
Harshad Shirwadkar	5b849b5	2020-10-15 13:37:58 -0700	[diff] [blame]	1987	/*
				1988	* Main recovery path entry point.
Harshad Shirwadkar	8016e29	2020-10-15 13:37:59 -0700	[diff] [blame]	1989	* The meaning of return codes is similar as above.
Harshad Shirwadkar	5b849b5	2020-10-15 13:37:58 -0700	[diff] [blame]	1990	*/
				1991	static int ext4_fc_replay(journal_t journal, struct buffer_head bh,
				1992	enum passtype pass, int off, tid_t expected_tid)
				1993	{
Harshad Shirwadkar	8016e29	2020-10-15 13:37:59 -0700	[diff] [blame]	1994	struct super_block *sb = journal->j_private;
				1995	struct ext4_sb_info *sbi = EXT4_SB(sb);
				1996	struct ext4_fc_tl *tl;
				1997	__u8 start, end;
				1998	int ret = JBD2_FC_REPLAY_CONTINUE;
				1999	struct ext4_fc_replay_state *state = &sbi->s_fc_replay_state;
				2000	struct ext4_fc_tail *tail;
				2001
				2002	if (pass == PASS_SCAN) {
				2003	state->fc_current_pass = PASS_SCAN;
				2004	return ext4_fc_replay_scan(journal, bh, off, expected_tid);
				2005	}
				2006
				2007	if (state->fc_current_pass != pass) {
				2008	state->fc_current_pass = pass;
				2009	sbi->s_mount_state \|= EXT4_FC_REPLAY;
				2010	}
				2011	if (!sbi->s_fc_replay_state.fc_replay_num_tags) {
				2012	jbd_debug(1, "Replay stops\n");
				2013	ext4_fc_set_bitmaps_and_counters(sb);
				2014	return 0;
				2015	}
				2016
				2017	#ifdef CONFIG_EXT4_DEBUG
				2018	if (sbi->s_fc_debug_max_replay && off >= sbi->s_fc_debug_max_replay) {
				2019	pr_warn("Dropping fc block %d because max_replay set\n", off);
				2020	return JBD2_FC_REPLAY_STOP;
				2021	}
				2022	#endif
				2023
				2024	start = (u8 *)bh->b_data;
				2025	end = (__u8 *)bh->b_data + journal->j_blocksize - 1;
				2026
				2027	fc_for_each_tl(start, end, tl) {
				2028	if (state->fc_replay_num_tags == 0) {
				2029	ret = JBD2_FC_REPLAY_STOP;
				2030	ext4_fc_set_bitmaps_and_counters(sb);
				2031	break;
				2032	}
				2033	jbd_debug(3, "Replay phase, tag:%s\n",
				2034	tag2str(le16_to_cpu(tl->fc_tag)));
				2035	state->fc_replay_num_tags--;
				2036	switch (le16_to_cpu(tl->fc_tag)) {
				2037	case EXT4_FC_TAG_LINK:
				2038	ret = ext4_fc_replay_link(sb, tl);
				2039	break;
				2040	case EXT4_FC_TAG_UNLINK:
				2041	ret = ext4_fc_replay_unlink(sb, tl);
				2042	break;
				2043	case EXT4_FC_TAG_ADD_RANGE:
				2044	ret = ext4_fc_replay_add_range(sb, tl);
				2045	break;
				2046	case EXT4_FC_TAG_CREAT:
				2047	ret = ext4_fc_replay_create(sb, tl);
				2048	break;
				2049	case EXT4_FC_TAG_DEL_RANGE:
				2050	ret = ext4_fc_replay_del_range(sb, tl);
				2051	break;
				2052	case EXT4_FC_TAG_INODE:
				2053	ret = ext4_fc_replay_inode(sb, tl);
				2054	break;
				2055	case EXT4_FC_TAG_PAD:
				2056	trace_ext4_fc_replay(sb, EXT4_FC_TAG_PAD, 0,
				2057	ext4_fc_tag_len(tl), 0);
				2058	break;
				2059	case EXT4_FC_TAG_TAIL:
				2060	trace_ext4_fc_replay(sb, EXT4_FC_TAG_TAIL, 0,
				2061	ext4_fc_tag_len(tl), 0);
				2062	tail = (struct ext4_fc_tail *)ext4_fc_tag_val(tl);
				2063	WARN_ON(le32_to_cpu(tail->fc_tid) != expected_tid);
				2064	break;
				2065	case EXT4_FC_TAG_HEAD:
				2066	break;
				2067	default:
				2068	trace_ext4_fc_replay(sb, le16_to_cpu(tl->fc_tag), 0,
				2069	ext4_fc_tag_len(tl), 0);
				2070	ret = -ECANCELED;
				2071	break;
				2072	}
				2073	if (ret < 0)
				2074	break;
				2075	ret = JBD2_FC_REPLAY_CONTINUE;
				2076	}
				2077	return ret;
Harshad Shirwadkar	5b849b5	2020-10-15 13:37:58 -0700	[diff] [blame]	2078	}
				2079
Harshad Shirwadkar	6866d7b	2020-10-15 13:37:55 -0700	[diff] [blame]	2080	void ext4_fc_init(struct super_block sb, journal_t journal)
				2081	{
Harshad Shirwadkar	e029c5f	2020-10-26 21:49:14 -0700	[diff] [blame]	2082	int num_fc_blocks;
				2083
Harshad Shirwadkar	5b849b5	2020-10-15 13:37:58 -0700	[diff] [blame]	2084	/*
				2085	* We set replay callback even if fast commit disabled because we may
				2086	* could still have fast commit blocks that need to be replayed even if
				2087	* fast commit has now been turned off.
				2088	*/
				2089	journal->j_fc_replay_callback = ext4_fc_replay;
Harshad Shirwadkar	6866d7b	2020-10-15 13:37:55 -0700	[diff] [blame]	2090	if (!test_opt2(sb, JOURNAL_FAST_COMMIT))
				2091	return;
Harshad Shirwadkar	ff780b9	2020-10-15 13:37:56 -0700	[diff] [blame]	2092	journal->j_fc_cleanup_callback = ext4_fc_cleanup;
Harshad Shirwadkar	e029c5f	2020-10-26 21:49:14 -0700	[diff] [blame]	2093	if (!buffer_uptodate(journal->j_sb_buffer)
				2094	&& ext4_read_bh_lock(journal->j_sb_buffer, REQ_META \| REQ_PRIO,
				2095	true)) {
				2096	ext4_msg(sb, KERN_ERR, "I/O error on journal");
				2097	return;
				2098	}
				2099	num_fc_blocks = be32_to_cpu(journal->j_superblock->s_num_fc_blks);
				2100	if (jbd2_fc_init(journal, num_fc_blocks ? num_fc_blocks :
				2101	EXT4_NUM_FC_BLKS)) {
Harshad Shirwadkar	6866d7b	2020-10-15 13:37:55 -0700	[diff] [blame]	2102	pr_warn("Error while enabling fast commits, turning off.");
				2103	ext4_clear_feature_fast_commit(sb);
				2104	}
				2105	}
Harshad Shirwadkar	aa75f4d	2020-10-15 13:37:57 -0700	[diff] [blame]	2106
Harshad Shirwadkar	ce8c59d	2020-10-15 13:38:01 -0700	[diff] [blame]	2107	const char *fc_ineligible_reasons[] = {
				2108	"Extended attributes changed",
				2109	"Cross rename",
				2110	"Journal flag changed",
				2111	"Insufficient memory",
				2112	"Swap boot",
				2113	"Resize",
				2114	"Dir renamed",
				2115	"Falloc range op",
				2116	"FC Commit Failed"
				2117	};
				2118
				2119	int ext4_fc_info_show(struct seq_file seq, void v)
				2120	{
				2121	struct ext4_sb_info sbi = EXT4_SB((struct super_block )seq->private);
				2122	struct ext4_fc_stats *stats = &sbi->s_fc_stats;
				2123	int i;
				2124
				2125	if (v != SEQ_START_TOKEN)
				2126	return 0;
				2127
				2128	seq_printf(seq,
				2129	"fc stats:\n%ld commits\n%ld ineligible\n%ld numblks\n%lluus avg_commit_time\n",
				2130	stats->fc_num_commits, stats->fc_ineligible_commits,
				2131	stats->fc_numblks,
				2132	div_u64(sbi->s_fc_avg_commit_time, 1000));
				2133	seq_puts(seq, "Ineligible reasons:\n");
				2134	for (i = 0; i < EXT4_FC_REASON_MAX; i++)
				2135	seq_printf(seq, "\"%s\":\t%d\n", fc_ineligible_reasons[i],
				2136	stats->fc_ineligible_reason_count[i]);
				2137
				2138	return 0;
				2139	}
				2140
Harshad Shirwadkar	aa75f4d	2020-10-15 13:37:57 -0700	[diff] [blame]	2141	int __init ext4_fc_init_dentry_cache(void)
				2142	{
				2143	ext4_fc_dentry_cachep = KMEM_CACHE(ext4_fc_dentry_update,
				2144	SLAB_RECLAIM_ACCOUNT);
				2145
				2146	if (ext4_fc_dentry_cachep == NULL)
				2147	return -ENOMEM;
				2148
				2149	return 0;
				2150	}