Blame - fs/xfs/xfs_log_recover.c - SHIFTPHONES/mainline/linux

blob: 96c997ed2ec8faa54d4429dd22e338d9e7ebb605 [file] [log] [blame]

Dave Chinner	0b61f8a	2018-06-05 19:42:14 -0700	[diff] [blame]	1	// SPDX-License-Identifier: GPL-2.0
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2	/*
Tim Shimmin	87c199c	2006-06-09 14:56:16 +1000	[diff] [blame]	3	* Copyright (c) 2000-2006 Silicon Graphics, Inc.
Nathan Scott	7b71876	2005-11-02 14:58:39 +1100	[diff] [blame]	4	* All Rights Reserved.
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	5	*/
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	6	#include "xfs.h"
Nathan Scott	a844f45	2005-11-02 14:38:42 +1100	[diff] [blame]	7	#include "xfs_fs.h"
Dave Chinner	70a9883	2013-10-23 10:36:05 +1100	[diff] [blame]	8	#include "xfs_shared.h"
Dave Chinner	239880e	2013-10-23 10:50:10 +1100	[diff] [blame]	9	#include "xfs_format.h"
				10	#include "xfs_log_format.h"
				11	#include "xfs_trans_resv.h"
Nathan Scott	a844f45	2005-11-02 14:38:42 +1100	[diff] [blame]	12	#include "xfs_bit.h"
Nathan Scott	a844f45	2005-11-02 14:38:42 +1100	[diff] [blame]	13	#include "xfs_sb.h"
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	14	#include "xfs_mount.h"
Darrick J. Wong	5099558	2017-11-21 20:53:02 -0800	[diff] [blame]	15	#include "xfs_defer.h"
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	16	#include "xfs_inode.h"
Dave Chinner	239880e	2013-10-23 10:50:10 +1100	[diff] [blame]	17	#include "xfs_trans.h"
Dave Chinner	239880e	2013-10-23 10:50:10 +1100	[diff] [blame]	18	#include "xfs_log.h"
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	19	#include "xfs_log_priv.h"
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	20	#include "xfs_log_recover.h"
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	21	#include "xfs_trans_priv.h"
Dave Chinner	a4fbe6a	2013-10-23 10:51:50 +1100	[diff] [blame]	22	#include "xfs_alloc.h"
				23	#include "xfs_ialloc.h"
Christoph Hellwig	0b1b213	2009-12-14 23:14:59 +0000	[diff] [blame]	24	#include "xfs_trace.h"
Dave Chinner	33479e0	2012-10-08 21:56:11 +1100	[diff] [blame]	25	#include "xfs_icache.h"
Dave Chinner	a4fbe6a	2013-10-23 10:51:50 +1100	[diff] [blame]	26	#include "xfs_error.h"
Brian Foster	60a4a22	2016-09-26 08:34:27 +1000	[diff] [blame]	27	#include "xfs_buf_item.h"
Dave Chinner	9bbafc71	2021-06-02 10:48:24 +1000	[diff] [blame]	28	#include "xfs_ag.h"
Darrick J. Wong	4bc6198	2021-08-08 08:27:13 -0700	[diff] [blame]	29	#include "xfs_quota.h"
Darrick J. Wong	7993f1a	2021-12-15 11:52:23 -0800	[diff] [blame]	30	#include "xfs_reflink.h"
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	31
Dave Chinner	fc06c6d	2013-08-12 20:49:22 +1000	[diff] [blame]	32	#define BLK_AVG(blk1, blk2) ((blk1+blk2) >> 1)
				33
Mark Tinguely	9a8d2fd	2012-06-14 09:22:16 -0500	[diff] [blame]	34	STATIC int
				35	xlog_find_zeroed(
				36	struct xlog *,
				37	xfs_daddr_t *);
				38	STATIC int
				39	xlog_clear_stale_blocks(
				40	struct xlog *,
				41	xfs_lsn_t);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	42	#if defined(DEBUG)
Mark Tinguely	9a8d2fd	2012-06-14 09:22:16 -0500	[diff] [blame]	43	STATIC void
				44	xlog_recover_check_summary(
				45	struct xlog *);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	46	#else
				47	#define xlog_recover_check_summary(log)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	48	#endif
Brian Foster	7088c41	2016-01-05 07:40:16 +1100	[diff] [blame]	49	STATIC int
				50	xlog_do_recovery_pass(
				51	struct xlog , xfs_daddr_t, xfs_daddr_t, int, xfs_daddr_t );
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	52
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	53	/*
				54	* Sector aligned buffer routines for buffer create/read/write/access
				55	*/
				56
Alex Elder	ff30a62	2010-04-13 15:22:58 +1000	[diff] [blame]	57	/*
Brian Foster	99c2659	2017-10-26 09:31:15 -0700	[diff] [blame]	58	* Verify the log-relative block number and length in basic blocks are valid for
				59	* an operation involving the given XFS log buffer. Returns true if the fields
				60	* are valid, false otherwise.
Alex Elder	ff30a62	2010-04-13 15:22:58 +1000	[diff] [blame]	61	*/
Brian Foster	99c2659	2017-10-26 09:31:15 -0700	[diff] [blame]	62	static inline bool
Christoph Hellwig	6e9b3dd	2019-06-28 19:27:27 -0700	[diff] [blame]	63	xlog_verify_bno(
Mark Tinguely	9a8d2fd	2012-06-14 09:22:16 -0500	[diff] [blame]	64	struct xlog *log,
Brian Foster	99c2659	2017-10-26 09:31:15 -0700	[diff] [blame]	65	xfs_daddr_t blk_no,
Alex Elder	ff30a62	2010-04-13 15:22:58 +1000	[diff] [blame]	66	int bbcount)
				67	{
Brian Foster	99c2659	2017-10-26 09:31:15 -0700	[diff] [blame]	68	if (blk_no < 0 \|\| blk_no >= log->l_logBBsize)
				69	return false;
				70	if (bbcount <= 0 \|\| (blk_no + bbcount) > log->l_logBBsize)
				71	return false;
				72	return true;
Alex Elder	ff30a62	2010-04-13 15:22:58 +1000	[diff] [blame]	73	}
				74
Alex Elder	36adecf	2010-04-13 15:21:13 +1000	[diff] [blame]	75	/*
Christoph Hellwig	6ad5b32	2019-06-28 19:27:26 -0700	[diff] [blame]	76	* Allocate a buffer to hold log data. The buffer needs to be able to map to
				77	* a range of nbblks basic blocks at any valid offset within the log.
Alex Elder	36adecf	2010-04-13 15:21:13 +1000	[diff] [blame]	78	*/
Christoph Hellwig	6ad5b32	2019-06-28 19:27:26 -0700	[diff] [blame]	79	static char *
Christoph Hellwig	6e9b3dd	2019-06-28 19:27:27 -0700	[diff] [blame]	80	xlog_alloc_buffer(
Mark Tinguely	9a8d2fd	2012-06-14 09:22:16 -0500	[diff] [blame]	81	struct xlog *log,
Dave Chinner	3228149	2009-01-22 15:37:47 +1100	[diff] [blame]	82	int nbblks)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	83	{
Brian Foster	99c2659	2017-10-26 09:31:15 -0700	[diff] [blame]	84	/*
				85	* Pass log block 0 since we don't have an addr yet, buffer will be
				86	* verified on read.
				87	*/
Darrick J. Wong	a71895c	2019-11-11 12:53:22 -0800	[diff] [blame]	88	if (XFS_IS_CORRUPT(log->l_mp, !xlog_verify_bno(log, 0, nbblks))) {
Dave Chinner	a0fa2b6	2011-03-07 10:01:35 +1100	[diff] [blame]	89	xfs_warn(log->l_mp, "Invalid block length (0x%x) for buffer",
Alex Elder	ff30a62	2010-04-13 15:22:58 +1000	[diff] [blame]	90	nbblks);
Dave Chinner	3228149	2009-01-22 15:37:47 +1100	[diff] [blame]	91	return NULL;
				92	}
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	93
Alex Elder	36adecf	2010-04-13 15:21:13 +1000	[diff] [blame]	94	/*
Christoph Hellwig	6ad5b32	2019-06-28 19:27:26 -0700	[diff] [blame]	95	* We do log I/O in units of log sectors (a power-of-2 multiple of the
				96	* basic block size), so we round up the requested size to accommodate
				97	* the basic blocks required for complete log sectors.
Alex Elder	36adecf	2010-04-13 15:21:13 +1000	[diff] [blame]	98	*
Christoph Hellwig	6ad5b32	2019-06-28 19:27:26 -0700	[diff] [blame]	99	* In addition, the buffer may be used for a non-sector-aligned block
				100	* offset, in which case an I/O of the requested size could extend
				101	* beyond the end of the buffer. If the requested size is only 1 basic
				102	* block it will never straddle a sector boundary, so this won't be an
				103	* issue. Nor will this be a problem if the log I/O is done in basic
				104	* blocks (sector size 1). But otherwise we extend the buffer by one
				105	* extra log sector to ensure there's space to accommodate this
				106	* possibility.
Alex Elder	36adecf	2010-04-13 15:21:13 +1000	[diff] [blame]	107	*/
Alex Elder	69ce58f	2010-04-20 17:09:59 +1000	[diff] [blame]	108	if (nbblks > 1 && log->l_sectBBsize > 1)
				109	nbblks += log->l_sectBBsize;
				110	nbblks = round_up(nbblks, log->l_sectBBsize);
Dave Chinner	d634525	2021-08-09 10:10:01 -0700	[diff] [blame]	111	return kvzalloc(BBTOB(nbblks), GFP_KERNEL \| __GFP_RETRY_MAYFAIL);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	112	}
				113
Alex Elder	48389ef	2010-04-20 17:10:21 +1000	[diff] [blame]	114	/*
				115	* Return the address of the start of the given block number's data
				116	* in a log buffer. The buffer covers a log sector-aligned region.
				117	*/
Christoph Hellwig	18ffb8c	2019-06-28 19:27:26 -0700	[diff] [blame]	118	static inline unsigned int
Christoph Hellwig	076e6ac	2009-03-16 08:24:13 +0100	[diff] [blame]	119	xlog_align(
Mark Tinguely	9a8d2fd	2012-06-14 09:22:16 -0500	[diff] [blame]	120	struct xlog *log,
Christoph Hellwig	18ffb8c	2019-06-28 19:27:26 -0700	[diff] [blame]	121	xfs_daddr_t blk_no)
Christoph Hellwig	076e6ac	2009-03-16 08:24:13 +0100	[diff] [blame]	122	{
Christoph Hellwig	18ffb8c	2019-06-28 19:27:26 -0700	[diff] [blame]	123	return BBTOB(blk_no & ((xfs_daddr_t)log->l_sectBBsize - 1));
Christoph Hellwig	076e6ac	2009-03-16 08:24:13 +0100	[diff] [blame]	124	}
				125
Christoph Hellwig	6ad5b32	2019-06-28 19:27:26 -0700	[diff] [blame]	126	static int
				127	xlog_do_io(
				128	struct xlog *log,
				129	xfs_daddr_t blk_no,
				130	unsigned int nbblks,
				131	char *data,
				132	unsigned int op)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	133	{
Christoph Hellwig	6ad5b32	2019-06-28 19:27:26 -0700	[diff] [blame]	134	int error;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	135
Darrick J. Wong	a71895c	2019-11-11 12:53:22 -0800	[diff] [blame]	136	if (XFS_IS_CORRUPT(log->l_mp, !xlog_verify_bno(log, blk_no, nbblks))) {
Brian Foster	99c2659	2017-10-26 09:31:15 -0700	[diff] [blame]	137	xfs_warn(log->l_mp,
				138	"Invalid log block/length (0x%llx, 0x%x) for buffer",
				139	blk_no, nbblks);
Dave Chinner	2451337	2014-06-25 14:58:08 +1000	[diff] [blame]	140	return -EFSCORRUPTED;
Dave Chinner	3228149	2009-01-22 15:37:47 +1100	[diff] [blame]	141	}
				142
Alex Elder	69ce58f	2010-04-20 17:09:59 +1000	[diff] [blame]	143	blk_no = round_down(blk_no, log->l_sectBBsize);
				144	nbblks = round_up(nbblks, log->l_sectBBsize);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	145	ASSERT(nbblks > 0);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	146
Christoph Hellwig	6ad5b32	2019-06-28 19:27:26 -0700	[diff] [blame]	147	error = xfs_rw_bdev(log->l_targ->bt_bdev, log->l_logBBstart + blk_no,
				148	BBTOB(nbblks), data, op);
Dave Chinner	2039a27	2021-08-10 17:59:01 -0700	[diff] [blame]	149	if (error && !xlog_is_shutdown(log)) {
Christoph Hellwig	6ad5b32	2019-06-28 19:27:26 -0700	[diff] [blame]	150	xfs_alert(log->l_mp,
				151	"log recovery %s I/O error at daddr 0x%llx len %d error %d",
				152	op == REQ_OP_WRITE ? "write" : "read",
				153	blk_no, nbblks, error);
				154	}
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	155	return error;
				156	}
				157
Christoph Hellwig	076e6ac	2009-03-16 08:24:13 +0100	[diff] [blame]	158	STATIC int
Christoph Hellwig	6ad5b32	2019-06-28 19:27:26 -0700	[diff] [blame]	159	xlog_bread_noalign(
				160	struct xlog *log,
				161	xfs_daddr_t blk_no,
				162	int nbblks,
				163	char *data)
				164	{
				165	return xlog_do_io(log, blk_no, nbblks, data, REQ_OP_READ);
				166	}
				167
				168	STATIC int
Christoph Hellwig	076e6ac	2009-03-16 08:24:13 +0100	[diff] [blame]	169	xlog_bread(
Mark Tinguely	9a8d2fd	2012-06-14 09:22:16 -0500	[diff] [blame]	170	struct xlog *log,
Christoph Hellwig	076e6ac	2009-03-16 08:24:13 +0100	[diff] [blame]	171	xfs_daddr_t blk_no,
				172	int nbblks,
Christoph Hellwig	6ad5b32	2019-06-28 19:27:26 -0700	[diff] [blame]	173	char *data,
Christoph Hellwig	b2a922c	2015-06-22 09:45:10 +1000	[diff] [blame]	174	char **offset)
Christoph Hellwig	076e6ac	2009-03-16 08:24:13 +0100	[diff] [blame]	175	{
				176	int error;
				177
Christoph Hellwig	6ad5b32	2019-06-28 19:27:26 -0700	[diff] [blame]	178	error = xlog_do_io(log, blk_no, nbblks, data, REQ_OP_READ);
				179	if (!error)
				180	*offset = data + xlog_align(log, blk_no);
				181	return error;
Christoph Hellwig	076e6ac	2009-03-16 08:24:13 +0100	[diff] [blame]	182	}
				183
Christoph Hellwig	ba0f32d	2005-06-21 15:36:52 +1000	[diff] [blame]	184	STATIC int
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	185	xlog_bwrite(
Mark Tinguely	9a8d2fd	2012-06-14 09:22:16 -0500	[diff] [blame]	186	struct xlog *log,
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	187	xfs_daddr_t blk_no,
				188	int nbblks,
Christoph Hellwig	6ad5b32	2019-06-28 19:27:26 -0700	[diff] [blame]	189	char *data)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	190	{
Christoph Hellwig	6ad5b32	2019-06-28 19:27:26 -0700	[diff] [blame]	191	return xlog_do_io(log, blk_no, nbblks, data, REQ_OP_WRITE);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	192	}
				193
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	194	#ifdef DEBUG
				195	/*
				196	* dump debug superblock and log record information
				197	*/
				198	STATIC void
				199	xlog_header_check_dump(
				200	xfs_mount_t *mp,
				201	xlog_rec_header_t *head)
				202	{
Eric Sandeen	08e96e1	2013-10-11 20:59:05 -0500	[diff] [blame]	203	xfs_debug(mp, "%s: SB : uuid = %pU, fmt = %d",
Joe Perches	03daa57	2009-12-14 18:01:10 -0800	[diff] [blame]	204	__func__, &mp->m_sb.sb_uuid, XLOG_FMT);
Eric Sandeen	08e96e1	2013-10-11 20:59:05 -0500	[diff] [blame]	205	xfs_debug(mp, " log : uuid = %pU, fmt = %d",
Joe Perches	03daa57	2009-12-14 18:01:10 -0800	[diff] [blame]	206	&head->h_fs_uuid, be32_to_cpu(head->h_fmt));
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	207	}
				208	#else
				209	#define xlog_header_check_dump(mp, head)
				210	#endif
				211
				212	/*
				213	* check log record header for recovery
				214	*/
				215	STATIC int
				216	xlog_header_check_recover(
				217	xfs_mount_t *mp,
				218	xlog_rec_header_t *head)
				219	{
Christoph Hellwig	69ef921	2011-07-08 14:36:05 +0200	[diff] [blame]	220	ASSERT(head->h_magicno == cpu_to_be32(XLOG_HEADER_MAGIC_NUM));
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	221
				222	/*
				223	* IRIX doesn't write the h_fmt field and leaves it zeroed
				224	* (XLOG_FMT_UNKNOWN). This stops us from trying to recover
				225	* a dirty log created in IRIX.
				226	*/
Darrick J. Wong	a71895c	2019-11-11 12:53:22 -0800	[diff] [blame]	227	if (XFS_IS_CORRUPT(mp, head->h_fmt != cpu_to_be32(XLOG_FMT))) {
Dave Chinner	a0fa2b6	2011-03-07 10:01:35 +1100	[diff] [blame]	228	xfs_warn(mp,
				229	"dirty log written in incompatible format - can't recover");
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	230	xlog_header_check_dump(mp, head);
Dave Chinner	2451337	2014-06-25 14:58:08 +1000	[diff] [blame]	231	return -EFSCORRUPTED;
Darrick J. Wong	a71895c	2019-11-11 12:53:22 -0800	[diff] [blame]	232	}
				233	if (XFS_IS_CORRUPT(mp, !uuid_equal(&mp->m_sb.sb_uuid,
				234	&head->h_fs_uuid))) {
Dave Chinner	a0fa2b6	2011-03-07 10:01:35 +1100	[diff] [blame]	235	xfs_warn(mp,
				236	"dirty log entry has mismatched uuid - can't recover");
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	237	xlog_header_check_dump(mp, head);
Dave Chinner	2451337	2014-06-25 14:58:08 +1000	[diff] [blame]	238	return -EFSCORRUPTED;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	239	}
				240	return 0;
				241	}
				242
				243	/*
				244	* read the head block of the log and check the header
				245	*/
				246	STATIC int
				247	xlog_header_check_mount(
				248	xfs_mount_t *mp,
				249	xlog_rec_header_t *head)
				250	{
Christoph Hellwig	69ef921	2011-07-08 14:36:05 +0200	[diff] [blame]	251	ASSERT(head->h_magicno == cpu_to_be32(XLOG_HEADER_MAGIC_NUM));
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	252
Amir Goldstein	d905fda	2017-05-04 16:26:23 +0300	[diff] [blame]	253	if (uuid_is_null(&head->h_fs_uuid)) {
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	254	/*
				255	* IRIX doesn't write the h_fs_uuid or h_fmt fields. If
Amir Goldstein	d905fda	2017-05-04 16:26:23 +0300	[diff] [blame]	256	* h_fs_uuid is null, we assume this log was last mounted
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	257	* by IRIX and continue.
				258	*/
Amir Goldstein	d905fda	2017-05-04 16:26:23 +0300	[diff] [blame]	259	xfs_warn(mp, "null uuid in log - IRIX style log");
Darrick J. Wong	a71895c	2019-11-11 12:53:22 -0800	[diff] [blame]	260	} else if (XFS_IS_CORRUPT(mp, !uuid_equal(&mp->m_sb.sb_uuid,
				261	&head->h_fs_uuid))) {
Dave Chinner	a0fa2b6	2011-03-07 10:01:35 +1100	[diff] [blame]	262	xfs_warn(mp, "log has mismatched uuid - can't recover");
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	263	xlog_header_check_dump(mp, head);
Dave Chinner	2451337	2014-06-25 14:58:08 +1000	[diff] [blame]	264	return -EFSCORRUPTED;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	265	}
				266	return 0;
				267	}
				268
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	269	/*
				270	* This routine finds (to an approximation) the first block in the physical
				271	* log which contains the given cycle. It uses a binary search algorithm.
				272	* Note that the algorithm can not be perfect because the disk will not
				273	* necessarily be perfect.
				274	*/
David Chinner	a8272ce	2007-11-23 16:28:09 +1100	[diff] [blame]	275	STATIC int
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	276	xlog_find_cycle_start(
Mark Tinguely	9a8d2fd	2012-06-14 09:22:16 -0500	[diff] [blame]	277	struct xlog *log,
Christoph Hellwig	6e9b3dd	2019-06-28 19:27:27 -0700	[diff] [blame]	278	char *buffer,
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	279	xfs_daddr_t first_blk,
				280	xfs_daddr_t *last_blk,
				281	uint cycle)
				282	{
Christoph Hellwig	b2a922c	2015-06-22 09:45:10 +1000	[diff] [blame]	283	char *offset;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	284	xfs_daddr_t mid_blk;
Alex Elder	e3bb2e3	2010-04-15 18:17:30 +0000	[diff] [blame]	285	xfs_daddr_t end_blk;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	286	uint mid_cycle;
				287	int error;
				288
Alex Elder	e3bb2e3	2010-04-15 18:17:30 +0000	[diff] [blame]	289	end_blk = *last_blk;
				290	mid_blk = BLK_AVG(first_blk, end_blk);
				291	while (mid_blk != first_blk && mid_blk != end_blk) {
Christoph Hellwig	6e9b3dd	2019-06-28 19:27:27 -0700	[diff] [blame]	292	error = xlog_bread(log, mid_blk, 1, buffer, &offset);
Christoph Hellwig	076e6ac	2009-03-16 08:24:13 +0100	[diff] [blame]	293	if (error)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	294	return error;
Christoph Hellwig	03bea6f	2007-10-12 10:58:05 +1000	[diff] [blame]	295	mid_cycle = xlog_get_cycle(offset);
Alex Elder	e3bb2e3	2010-04-15 18:17:30 +0000	[diff] [blame]	296	if (mid_cycle == cycle)
				297	end_blk = mid_blk; /* last_half_cycle == mid_cycle */
				298	else
				299	first_blk = mid_blk; /* first_half_cycle == mid_cycle */
				300	mid_blk = BLK_AVG(first_blk, end_blk);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	301	}
Alex Elder	e3bb2e3	2010-04-15 18:17:30 +0000	[diff] [blame]	302	ASSERT((mid_blk == first_blk && mid_blk+1 == end_blk) \|\|
				303	(mid_blk == end_blk && mid_blk-1 == first_blk));
				304
				305	*last_blk = end_blk;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	306
				307	return 0;
				308	}
				309
				310	/*
Alex Elder	3f943d8	2010-04-15 18:17:34 +0000	[diff] [blame]	311	* Check that a range of blocks does not contain stop_on_cycle_no.
				312	* Fill in *new_blk with the block offset where such a block is
				313	* found, or with -1 (an invalid block number) if there is no such
				314	* block in the range. The scan needs to occur from front to back
				315	* and the pointer into the region must be updated since a later
				316	* routine will need to perform another test.
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	317	*/
				318	STATIC int
				319	xlog_find_verify_cycle(
Mark Tinguely	9a8d2fd	2012-06-14 09:22:16 -0500	[diff] [blame]	320	struct xlog *log,
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	321	xfs_daddr_t start_blk,
				322	int nbblks,
				323	uint stop_on_cycle_no,
				324	xfs_daddr_t *new_blk)
				325	{
				326	xfs_daddr_t i, j;
				327	uint cycle;
Christoph Hellwig	6e9b3dd	2019-06-28 19:27:27 -0700	[diff] [blame]	328	char *buffer;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	329	xfs_daddr_t bufblks;
Christoph Hellwig	b2a922c	2015-06-22 09:45:10 +1000	[diff] [blame]	330	char *buf = NULL;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	331	int error = 0;
				332
Alex Elder	6881a22	2010-04-13 15:22:29 +1000	[diff] [blame]	333	/*
				334	* Greedily allocate a buffer big enough to handle the full
				335	* range of basic blocks we'll be examining. If that fails,
				336	* try a smaller size. We need to be able to read at least
				337	* a log sector, or we're out of luck.
				338	*/
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	339	bufblks = 1 << ffs(nbblks);
Dave Chinner	81158e0	2012-04-27 19:45:22 +1000	[diff] [blame]	340	while (bufblks > log->l_logBBsize)
				341	bufblks >>= 1;
Christoph Hellwig	6e9b3dd	2019-06-28 19:27:27 -0700	[diff] [blame]	342	while (!(buffer = xlog_alloc_buffer(log, bufblks))) {
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	343	bufblks >>= 1;
Alex Elder	69ce58f	2010-04-20 17:09:59 +1000	[diff] [blame]	344	if (bufblks < log->l_sectBBsize)
Dave Chinner	2451337	2014-06-25 14:58:08 +1000	[diff] [blame]	345	return -ENOMEM;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	346	}
				347
				348	for (i = start_blk; i < start_blk + nbblks; i += bufblks) {
				349	int bcount;
				350
				351	bcount = min(bufblks, (start_blk + nbblks - i));
				352
Christoph Hellwig	6e9b3dd	2019-06-28 19:27:27 -0700	[diff] [blame]	353	error = xlog_bread(log, i, bcount, buffer, &buf);
Christoph Hellwig	076e6ac	2009-03-16 08:24:13 +0100	[diff] [blame]	354	if (error)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	355	goto out;
				356
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	357	for (j = 0; j < bcount; j++) {
Christoph Hellwig	03bea6f	2007-10-12 10:58:05 +1000	[diff] [blame]	358	cycle = xlog_get_cycle(buf);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	359	if (cycle == stop_on_cycle_no) {
				360	*new_blk = i+j;
				361	goto out;
				362	}
				363
				364	buf += BBSIZE;
				365	}
				366	}
				367
				368	*new_blk = -1;
				369
				370	out:
Christoph Hellwig	6e9b3dd	2019-06-28 19:27:27 -0700	[diff] [blame]	371	kmem_free(buffer);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	372	return error;
				373	}
				374
Gao Xiang	0c771b9	2020-09-22 09:41:06 -0700	[diff] [blame]	375	static inline int
				376	xlog_logrec_hblks(struct xlog log, struct xlog_rec_header rh)
				377	{
Dave Chinner	38c26bf	2021-08-18 18:46:37 -0700	[diff] [blame]	378	if (xfs_has_logv2(log->l_mp)) {
Gao Xiang	0c771b9	2020-09-22 09:41:06 -0700	[diff] [blame]	379	int h_size = be32_to_cpu(rh->h_size);
				380
				381	if ((be32_to_cpu(rh->h_version) & XLOG_VERSION_2) &&
				382	h_size > XLOG_HEADER_CYCLE_SIZE)
				383	return DIV_ROUND_UP(h_size, XLOG_HEADER_CYCLE_SIZE);
				384	}
				385	return 1;
				386	}
				387
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	388	/*
				389	* Potentially backup over partial log record write.
				390	*
				391	* In the typical case, last_blk is the number of the block directly after
				392	* a good log record. Therefore, we subtract one to get the block number
				393	* of the last block in the given buffer. extra_bblks contains the number
				394	* of blocks we would have read on a previous read. This happens when the
				395	* last log record is split over the end of the physical log.
				396	*
				397	* extra_bblks is the number of blocks potentially verified on a previous
				398	* call to this routine.
				399	*/
				400	STATIC int
				401	xlog_find_verify_log_record(
Mark Tinguely	9a8d2fd	2012-06-14 09:22:16 -0500	[diff] [blame]	402	struct xlog *log,
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	403	xfs_daddr_t start_blk,
				404	xfs_daddr_t *last_blk,
				405	int extra_bblks)
				406	{
				407	xfs_daddr_t i;
Christoph Hellwig	6e9b3dd	2019-06-28 19:27:27 -0700	[diff] [blame]	408	char *buffer;
Christoph Hellwig	b2a922c	2015-06-22 09:45:10 +1000	[diff] [blame]	409	char *offset = NULL;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	410	xlog_rec_header_t *head = NULL;
				411	int error = 0;
				412	int smallmem = 0;
				413	int num_blks = *last_blk - start_blk;
				414	int xhdrs;
				415
				416	ASSERT(start_blk != 0 \|\| *last_blk != start_blk);
				417
Christoph Hellwig	6e9b3dd	2019-06-28 19:27:27 -0700	[diff] [blame]	418	buffer = xlog_alloc_buffer(log, num_blks);
				419	if (!buffer) {
				420	buffer = xlog_alloc_buffer(log, 1);
				421	if (!buffer)
Dave Chinner	2451337	2014-06-25 14:58:08 +1000	[diff] [blame]	422	return -ENOMEM;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	423	smallmem = 1;
				424	} else {
Christoph Hellwig	6e9b3dd	2019-06-28 19:27:27 -0700	[diff] [blame]	425	error = xlog_bread(log, start_blk, num_blks, buffer, &offset);
Christoph Hellwig	076e6ac	2009-03-16 08:24:13 +0100	[diff] [blame]	426	if (error)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	427	goto out;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	428	offset += ((num_blks - 1) << BBSHIFT);
				429	}
				430
				431	for (i = (*last_blk) - 1; i >= 0; i--) {
				432	if (i < start_blk) {
				433	/* valid log record not found */
Dave Chinner	a0fa2b6	2011-03-07 10:01:35 +1100	[diff] [blame]	434	xfs_warn(log->l_mp,
				435	"Log inconsistent (didn't find previous header)");
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	436	ASSERT(0);
Darrick J. Wong	895e196	2019-11-06 09:17:43 -0800	[diff] [blame]	437	error = -EFSCORRUPTED;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	438	goto out;
				439	}
				440
				441	if (smallmem) {
Christoph Hellwig	6e9b3dd	2019-06-28 19:27:27 -0700	[diff] [blame]	442	error = xlog_bread(log, i, 1, buffer, &offset);
Christoph Hellwig	076e6ac	2009-03-16 08:24:13 +0100	[diff] [blame]	443	if (error)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	444	goto out;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	445	}
				446
				447	head = (xlog_rec_header_t *)offset;
				448
Christoph Hellwig	69ef921	2011-07-08 14:36:05 +0200	[diff] [blame]	449	if (head->h_magicno == cpu_to_be32(XLOG_HEADER_MAGIC_NUM))
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	450	break;
				451
				452	if (!smallmem)
				453	offset -= BBSIZE;
				454	}
				455
				456	/*
				457	* We hit the beginning of the physical log & still no header. Return
				458	* to caller. If caller can handle a return of -1, then this routine
				459	* will be called again for the end of the physical log.
				460	*/
				461	if (i == -1) {
Dave Chinner	2451337	2014-06-25 14:58:08 +1000	[diff] [blame]	462	error = 1;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	463	goto out;
				464	}
				465
				466	/*
				467	* We have the final block of the good log (the first block
				468	* of the log record _before_ the head. So we check the uuid.
				469	*/
				470	if ((error = xlog_header_check_mount(log->l_mp, head)))
				471	goto out;
				472
				473	/*
				474	* We may have found a log record header before we expected one.
				475	* last_blk will be the 1st block # with a given cycle #. We may end
				476	* up reading an entire log record. In this case, we don't want to
				477	* reset last_blk. Only when last_blk points in the middle of a log
				478	* record do we update last_blk.
				479	*/
Gao Xiang	0c771b9	2020-09-22 09:41:06 -0700	[diff] [blame]	480	xhdrs = xlog_logrec_hblks(log, head);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	481
Christoph Hellwig	b53e675	2007-10-12 10:59:34 +1000	[diff] [blame]	482	if (*last_blk - i + extra_bblks !=
				483	BTOBB(be32_to_cpu(head->h_len)) + xhdrs)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	484	*last_blk = i;
				485
				486	out:
Christoph Hellwig	6e9b3dd	2019-06-28 19:27:27 -0700	[diff] [blame]	487	kmem_free(buffer);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	488	return error;
				489	}
				490
				491	/*
				492	* Head is defined to be the point of the log where the next log write
Zhi Yong Wu	0a94da2	2013-08-07 10:11:08 +0000	[diff] [blame]	493	* could go. This means that incomplete LR writes at the end are
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	494	* eliminated when calculating the head. We aren't guaranteed that previous
				495	* LR have complete transactions. We only know that a cycle number of
				496	* current cycle number -1 won't be present in the log if we start writing
				497	* from our current block number.
				498	*
				499	* last_blk contains the block number of the first block with a given
				500	* cycle number.
				501	*
				502	* Return: zero if normal, non-zero if error.
				503	*/
Christoph Hellwig	ba0f32d	2005-06-21 15:36:52 +1000	[diff] [blame]	504	STATIC int
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	505	xlog_find_head(
Mark Tinguely	9a8d2fd	2012-06-14 09:22:16 -0500	[diff] [blame]	506	struct xlog *log,
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	507	xfs_daddr_t *return_head_blk)
				508	{
Christoph Hellwig	6e9b3dd	2019-06-28 19:27:27 -0700	[diff] [blame]	509	char *buffer;
Christoph Hellwig	b2a922c	2015-06-22 09:45:10 +1000	[diff] [blame]	510	char *offset;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	511	xfs_daddr_t new_blk, first_blk, start_blk, last_blk, head_blk;
				512	int num_scan_bblks;
				513	uint first_half_cycle, last_half_cycle;
				514	uint stop_on_cycle;
				515	int error, log_bbnum = log->l_logBBsize;
				516
				517	/* Is the end of the log device zeroed? */
Dave Chinner	2451337	2014-06-25 14:58:08 +1000	[diff] [blame]	518	error = xlog_find_zeroed(log, &first_blk);
				519	if (error < 0) {
				520	xfs_warn(log->l_mp, "empty log check failed");
				521	return error;
				522	}
				523	if (error == 1) {
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	524	*return_head_blk = first_blk;
				525
				526	/* Is the whole lot zeroed? */
				527	if (!first_blk) {
				528	/* Linux XFS shouldn't generate totally zeroed logs -
				529	* mkfs etc write a dummy unmount record to a fresh
				530	* log so we can store the uuid in there
				531	*/
Dave Chinner	a0fa2b6	2011-03-07 10:01:35 +1100	[diff] [blame]	532	xfs_warn(log->l_mp, "totally zeroed log");
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	533	}
				534
				535	return 0;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	536	}
				537
				538	first_blk = 0; /* get cycle # of 1st block */
Christoph Hellwig	6e9b3dd	2019-06-28 19:27:27 -0700	[diff] [blame]	539	buffer = xlog_alloc_buffer(log, 1);
				540	if (!buffer)
Dave Chinner	2451337	2014-06-25 14:58:08 +1000	[diff] [blame]	541	return -ENOMEM;
Christoph Hellwig	076e6ac	2009-03-16 08:24:13 +0100	[diff] [blame]	542
Christoph Hellwig	6e9b3dd	2019-06-28 19:27:27 -0700	[diff] [blame]	543	error = xlog_bread(log, 0, 1, buffer, &offset);
Christoph Hellwig	076e6ac	2009-03-16 08:24:13 +0100	[diff] [blame]	544	if (error)
Christoph Hellwig	6e9b3dd	2019-06-28 19:27:27 -0700	[diff] [blame]	545	goto out_free_buffer;
Christoph Hellwig	076e6ac	2009-03-16 08:24:13 +0100	[diff] [blame]	546
Christoph Hellwig	03bea6f	2007-10-12 10:58:05 +1000	[diff] [blame]	547	first_half_cycle = xlog_get_cycle(offset);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	548
				549	last_blk = head_blk = log_bbnum - 1; /* get cycle # of last block */
Christoph Hellwig	6e9b3dd	2019-06-28 19:27:27 -0700	[diff] [blame]	550	error = xlog_bread(log, last_blk, 1, buffer, &offset);
Christoph Hellwig	076e6ac	2009-03-16 08:24:13 +0100	[diff] [blame]	551	if (error)
Christoph Hellwig	6e9b3dd	2019-06-28 19:27:27 -0700	[diff] [blame]	552	goto out_free_buffer;
Christoph Hellwig	076e6ac	2009-03-16 08:24:13 +0100	[diff] [blame]	553
Christoph Hellwig	03bea6f	2007-10-12 10:58:05 +1000	[diff] [blame]	554	last_half_cycle = xlog_get_cycle(offset);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	555	ASSERT(last_half_cycle != 0);
				556
				557	/*
				558	* If the 1st half cycle number is equal to the last half cycle number,
				559	* then the entire log is stamped with the same cycle number. In this
				560	* case, head_blk can't be set to zero (which makes sense). The below
				561	* math doesn't work out properly with head_blk equal to zero. Instead,
				562	* we set it to log_bbnum which is an invalid block number, but this
				563	* value makes the math correct. If head_blk doesn't changed through
				564	* all the tests below, *head_blk is set to zero at the very end rather
				565	* than log_bbnum. In a sense, log_bbnum and zero are the same block
				566	* in a circular file.
				567	*/
				568	if (first_half_cycle == last_half_cycle) {
				569	/*
				570	* In this case we believe that the entire log should have
				571	* cycle number last_half_cycle. We need to scan backwards
				572	* from the end verifying that there are no holes still
				573	* containing last_half_cycle - 1. If we find such a hole,
				574	* then the start of that hole will be the new head. The
				575	* simple case looks like
				576	* x \| x ... \| x - 1 \| x
				577	* Another case that fits this picture would be
				578	* x \| x + 1 \| x ... \| x
Nathan Scott	c41564b	2006-03-29 08:55:14 +1000	[diff] [blame]	579	* In this case the head really is somewhere at the end of the
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	580	* log, as one of the latest writes at the beginning was
				581	* incomplete.
				582	* One more case is
				583	* x \| x + 1 \| x ... \| x - 1 \| x
				584	* This is really the combination of the above two cases, and
				585	* the head has to end up at the start of the x-1 hole at the
				586	* end of the log.
				587	*
				588	* In the 256k log case, we will read from the beginning to the
				589	* end of the log and search for cycle numbers equal to x-1.
				590	* We don't worry about the x+1 blocks that we encounter,
				591	* because we know that they cannot be the head since the log
				592	* started with x.
				593	*/
				594	head_blk = log_bbnum;
				595	stop_on_cycle = last_half_cycle - 1;
				596	} else {
				597	/*
				598	* In this case we want to find the first block with cycle
				599	* number matching last_half_cycle. We expect the log to be
				600	* some variation on
Alex Elder	3f943d8	2010-04-15 18:17:34 +0000	[diff] [blame]	601	* x + 1 ... \| x ... \| x
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	602	* The first block with cycle number x (last_half_cycle) will
				603	* be where the new head belongs. First we do a binary search
				604	* for the first occurrence of last_half_cycle. The binary
				605	* search may not be totally accurate, so then we scan back
				606	* from there looking for occurrences of last_half_cycle before
				607	* us. If that backwards scan wraps around the beginning of
				608	* the log, then we look for occurrences of last_half_cycle - 1
				609	* at the end of the log. The cases we're looking for look
				610	* like
Alex Elder	3f943d8	2010-04-15 18:17:34 +0000	[diff] [blame]	611	* v binary search stopped here
				612	* x + 1 ... \| x \| x + 1 \| x ... \| x
				613	* ^ but we want to locate this spot
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	614	* or
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	615	* <---------> less than scan distance
Alex Elder	3f943d8	2010-04-15 18:17:34 +0000	[diff] [blame]	616	* x + 1 ... \| x ... \| x - 1 \| x
				617	* ^ we want to locate this spot
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	618	*/
				619	stop_on_cycle = last_half_cycle;
Christoph Hellwig	6e9b3dd	2019-06-28 19:27:27 -0700	[diff] [blame]	620	error = xlog_find_cycle_start(log, buffer, first_blk, &head_blk,
				621	last_half_cycle);
				622	if (error)
				623	goto out_free_buffer;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	624	}
				625
				626	/*
				627	* Now validate the answer. Scan back some number of maximum possible
				628	* blocks and make sure each one has the expected cycle number. The
				629	* maximum is determined by the total possible amount of buffering
				630	* in the in-core log. The following number can be made tighter if
				631	* we actually look at the block size of the filesystem.
				632	*/
Brian Foster	9f2a450	2017-10-26 09:31:16 -0700	[diff] [blame]	633	num_scan_bblks = min_t(int, log_bbnum, XLOG_TOTAL_REC_SHIFT(log));
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	634	if (head_blk >= num_scan_bblks) {
				635	/*
				636	* We are guaranteed that the entire check can be performed
				637	* in one buffer.
				638	*/
				639	start_blk = head_blk - num_scan_bblks;
				640	if ((error = xlog_find_verify_cycle(log,
				641	start_blk, num_scan_bblks,
				642	stop_on_cycle, &new_blk)))
Christoph Hellwig	6e9b3dd	2019-06-28 19:27:27 -0700	[diff] [blame]	643	goto out_free_buffer;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	644	if (new_blk != -1)
				645	head_blk = new_blk;
				646	} else { /* need to read 2 parts of log */
				647	/*
				648	* We are going to scan backwards in the log in two parts.
				649	* First we scan the physical end of the log. In this part
				650	* of the log, we are looking for blocks with cycle number
				651	* last_half_cycle - 1.
				652	* If we find one, then we know that the log starts there, as
				653	* we've found a hole that didn't get written in going around
				654	* the end of the physical log. The simple case for this is
				655	* x + 1 ... \| x ... \| x - 1 \| x
				656	* <---------> less than scan distance
				657	* If all of the blocks at the end of the log have cycle number
				658	* last_half_cycle, then we check the blocks at the start of
				659	* the log looking for occurrences of last_half_cycle. If we
				660	* find one, then our current estimate for the location of the
				661	* first occurrence of last_half_cycle is wrong and we move
				662	* back to the hole we've found. This case looks like
				663	* x + 1 ... \| x \| x + 1 \| x ...
				664	* ^ binary search stopped here
				665	* Another case we need to handle that only occurs in 256k
				666	* logs is
				667	* x + 1 ... \| x ... \| x+1 \| x ...
				668	* ^ binary search stops here
				669	* In a 256k log, the scan at the end of the log will see the
				670	* x + 1 blocks. We need to skip past those since that is
				671	* certainly not the head of the log. By searching for
				672	* last_half_cycle-1 we accomplish that.
				673	*/
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	674	ASSERT(head_blk <= INT_MAX &&
Alex Elder	3f943d8	2010-04-15 18:17:34 +0000	[diff] [blame]	675	(xfs_daddr_t) num_scan_bblks >= head_blk);
				676	start_blk = log_bbnum - (num_scan_bblks - head_blk);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	677	if ((error = xlog_find_verify_cycle(log, start_blk,
				678	num_scan_bblks - (int)head_blk,
				679	(stop_on_cycle - 1), &new_blk)))
Christoph Hellwig	6e9b3dd	2019-06-28 19:27:27 -0700	[diff] [blame]	680	goto out_free_buffer;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	681	if (new_blk != -1) {
				682	head_blk = new_blk;
Alex Elder	9db127e	2010-04-15 18:17:26 +0000	[diff] [blame]	683	goto validate_head;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	684	}
				685
				686	/*
				687	* Scan beginning of log now. The last part of the physical
				688	* log is good. This scan needs to verify that it doesn't find
				689	* the last_half_cycle.
				690	*/
				691	start_blk = 0;
				692	ASSERT(head_blk <= INT_MAX);
				693	if ((error = xlog_find_verify_cycle(log,
				694	start_blk, (int)head_blk,
				695	stop_on_cycle, &new_blk)))
Christoph Hellwig	6e9b3dd	2019-06-28 19:27:27 -0700	[diff] [blame]	696	goto out_free_buffer;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	697	if (new_blk != -1)
				698	head_blk = new_blk;
				699	}
				700
Alex Elder	9db127e	2010-04-15 18:17:26 +0000	[diff] [blame]	701	validate_head:
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	702	/*
				703	* Now we need to make sure head_blk is not pointing to a block in
				704	* the middle of a log record.
				705	*/
				706	num_scan_bblks = XLOG_REC_SHIFT(log);
				707	if (head_blk >= num_scan_bblks) {
				708	start_blk = head_blk - num_scan_bblks; /* don't read head_blk */
				709
				710	/* start ptr at last block ptr before head_blk */
Dave Chinner	2451337	2014-06-25 14:58:08 +1000	[diff] [blame]	711	error = xlog_find_verify_log_record(log, start_blk, &head_blk, 0);
				712	if (error == 1)
				713	error = -EIO;
				714	if (error)
Christoph Hellwig	6e9b3dd	2019-06-28 19:27:27 -0700	[diff] [blame]	715	goto out_free_buffer;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	716	} else {
				717	start_blk = 0;
				718	ASSERT(head_blk <= INT_MAX);
Dave Chinner	2451337	2014-06-25 14:58:08 +1000	[diff] [blame]	719	error = xlog_find_verify_log_record(log, start_blk, &head_blk, 0);
				720	if (error < 0)
Christoph Hellwig	6e9b3dd	2019-06-28 19:27:27 -0700	[diff] [blame]	721	goto out_free_buffer;
Dave Chinner	2451337	2014-06-25 14:58:08 +1000	[diff] [blame]	722	if (error == 1) {
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	723	/* We hit the beginning of the log during our search */
Alex Elder	3f943d8	2010-04-15 18:17:34 +0000	[diff] [blame]	724	start_blk = log_bbnum - (num_scan_bblks - head_blk);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	725	new_blk = log_bbnum;
				726	ASSERT(start_blk <= INT_MAX &&
				727	(xfs_daddr_t) log_bbnum-start_blk >= 0);
				728	ASSERT(head_blk <= INT_MAX);
Dave Chinner	2451337	2014-06-25 14:58:08 +1000	[diff] [blame]	729	error = xlog_find_verify_log_record(log, start_blk,
				730	&new_blk, (int)head_blk);
				731	if (error == 1)
				732	error = -EIO;
				733	if (error)
Christoph Hellwig	6e9b3dd	2019-06-28 19:27:27 -0700	[diff] [blame]	734	goto out_free_buffer;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	735	if (new_blk != log_bbnum)
				736	head_blk = new_blk;
				737	} else if (error)
Christoph Hellwig	6e9b3dd	2019-06-28 19:27:27 -0700	[diff] [blame]	738	goto out_free_buffer;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	739	}
				740
Christoph Hellwig	6e9b3dd	2019-06-28 19:27:27 -0700	[diff] [blame]	741	kmem_free(buffer);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	742	if (head_blk == log_bbnum)
				743	*return_head_blk = 0;
				744	else
				745	*return_head_blk = head_blk;
				746	/*
				747	* When returning here, we have a good block number. Bad block
				748	* means that during a previous crash, we didn't have a clean break
				749	* from cycle number N to cycle number N-1. In this case, we need
				750	* to find the first block with cycle number N-1.
				751	*/
				752	return 0;
				753
Christoph Hellwig	6e9b3dd	2019-06-28 19:27:27 -0700	[diff] [blame]	754	out_free_buffer:
				755	kmem_free(buffer);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	756	if (error)
Dave Chinner	a0fa2b6	2011-03-07 10:01:35 +1100	[diff] [blame]	757	xfs_warn(log->l_mp, "failed to find log head");
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	758	return error;
				759	}
				760
				761	/*
Brian Foster	eed6b46	2016-01-04 15:55:10 +1100	[diff] [blame]	762	* Seek backwards in the log for log record headers.
				763	*
				764	* Given a starting log block, walk backwards until we find the provided number
				765	* of records or hit the provided tail block. The return value is the number of
				766	* records encountered or a negative error code. The log block and buffer
				767	* pointer of the last record seen are returned in rblk and rhead respectively.
				768	*/
				769	STATIC int
				770	xlog_rseek_logrec_hdr(
				771	struct xlog *log,
				772	xfs_daddr_t head_blk,
				773	xfs_daddr_t tail_blk,
				774	int count,
Christoph Hellwig	6e9b3dd	2019-06-28 19:27:27 -0700	[diff] [blame]	775	char *buffer,
Brian Foster	eed6b46	2016-01-04 15:55:10 +1100	[diff] [blame]	776	xfs_daddr_t *rblk,
				777	struct xlog_rec_header **rhead,
				778	bool *wrapped)
				779	{
				780	int i;
				781	int error;
				782	int found = 0;
				783	char *offset = NULL;
				784	xfs_daddr_t end_blk;
				785
				786	*wrapped = false;
				787
				788	/*
				789	* Walk backwards from the head block until we hit the tail or the first
				790	* block in the log.
				791	*/
				792	end_blk = head_blk > tail_blk ? tail_blk : 0;
				793	for (i = (int) head_blk - 1; i >= end_blk; i--) {
Christoph Hellwig	6e9b3dd	2019-06-28 19:27:27 -0700	[diff] [blame]	794	error = xlog_bread(log, i, 1, buffer, &offset);
Brian Foster	eed6b46	2016-01-04 15:55:10 +1100	[diff] [blame]	795	if (error)
				796	goto out_error;
				797
				798	if ((__be32 ) offset == cpu_to_be32(XLOG_HEADER_MAGIC_NUM)) {
				799	*rblk = i;
				800	rhead = (struct xlog_rec_header ) offset;
				801	if (++found == count)
				802	break;
				803	}
				804	}
				805
				806	/*
				807	* If we haven't hit the tail block or the log record header count,
				808	* start looking again from the end of the physical log. Note that
				809	* callers can pass head == tail if the tail is not yet known.
				810	*/
				811	if (tail_blk >= head_blk && found != count) {
				812	for (i = log->l_logBBsize - 1; i >= (int) tail_blk; i--) {
Christoph Hellwig	6e9b3dd	2019-06-28 19:27:27 -0700	[diff] [blame]	813	error = xlog_bread(log, i, 1, buffer, &offset);
Brian Foster	eed6b46	2016-01-04 15:55:10 +1100	[diff] [blame]	814	if (error)
				815	goto out_error;
				816
				817	if ((__be32 )offset ==
				818	cpu_to_be32(XLOG_HEADER_MAGIC_NUM)) {
				819	*wrapped = true;
				820	*rblk = i;
				821	rhead = (struct xlog_rec_header ) offset;
				822	if (++found == count)
				823	break;
				824	}
				825	}
				826	}
				827
				828	return found;
				829
				830	out_error:
				831	return error;
				832	}
				833
				834	/*
Brian Foster	7088c41	2016-01-05 07:40:16 +1100	[diff] [blame]	835	* Seek forward in the log for log record headers.
				836	*
				837	* Given head and tail blocks, walk forward from the tail block until we find
				838	* the provided number of records or hit the head block. The return value is the
				839	* number of records encountered or a negative error code. The log block and
				840	* buffer pointer of the last record seen are returned in rblk and rhead
				841	* respectively.
				842	*/
				843	STATIC int
				844	xlog_seek_logrec_hdr(
				845	struct xlog *log,
				846	xfs_daddr_t head_blk,
				847	xfs_daddr_t tail_blk,
				848	int count,
Christoph Hellwig	6e9b3dd	2019-06-28 19:27:27 -0700	[diff] [blame]	849	char *buffer,
Brian Foster	7088c41	2016-01-05 07:40:16 +1100	[diff] [blame]	850	xfs_daddr_t *rblk,
				851	struct xlog_rec_header **rhead,
				852	bool *wrapped)
				853	{
				854	int i;
				855	int error;
				856	int found = 0;
				857	char *offset = NULL;
				858	xfs_daddr_t end_blk;
				859
				860	*wrapped = false;
				861
				862	/*
				863	* Walk forward from the tail block until we hit the head or the last
				864	* block in the log.
				865	*/
				866	end_blk = head_blk > tail_blk ? head_blk : log->l_logBBsize - 1;
				867	for (i = (int) tail_blk; i <= end_blk; i++) {
Christoph Hellwig	6e9b3dd	2019-06-28 19:27:27 -0700	[diff] [blame]	868	error = xlog_bread(log, i, 1, buffer, &offset);
Brian Foster	7088c41	2016-01-05 07:40:16 +1100	[diff] [blame]	869	if (error)
				870	goto out_error;
				871
				872	if ((__be32 ) offset == cpu_to_be32(XLOG_HEADER_MAGIC_NUM)) {
				873	*rblk = i;
				874	rhead = (struct xlog_rec_header ) offset;
				875	if (++found == count)
				876	break;
				877	}
				878	}
				879
				880	/*
				881	* If we haven't hit the head block or the log record header count,
				882	* start looking again from the start of the physical log.
				883	*/
				884	if (tail_blk > head_blk && found != count) {
				885	for (i = 0; i < (int) head_blk; i++) {
Christoph Hellwig	6e9b3dd	2019-06-28 19:27:27 -0700	[diff] [blame]	886	error = xlog_bread(log, i, 1, buffer, &offset);
Brian Foster	7088c41	2016-01-05 07:40:16 +1100	[diff] [blame]	887	if (error)
				888	goto out_error;
				889
				890	if ((__be32 )offset ==
				891	cpu_to_be32(XLOG_HEADER_MAGIC_NUM)) {
				892	*wrapped = true;
				893	*rblk = i;
				894	rhead = (struct xlog_rec_header ) offset;
				895	if (++found == count)
				896	break;
				897	}
				898	}
				899	}
				900
				901	return found;
				902
				903	out_error:
				904	return error;
				905	}
				906
				907	/*
Brian Foster	4a4f66e	2017-08-08 18:21:52 -0700	[diff] [blame]	908	* Calculate distance from head to tail (i.e., unused space in the log).
				909	*/
				910	static inline int
				911	xlog_tail_distance(
				912	struct xlog *log,
				913	xfs_daddr_t head_blk,
				914	xfs_daddr_t tail_blk)
				915	{
				916	if (head_blk < tail_blk)
				917	return tail_blk - head_blk;
				918
				919	return tail_blk + (log->l_logBBsize - head_blk);
				920	}
				921
				922	/*
				923	* Verify the log tail. This is particularly important when torn or incomplete
				924	* writes have been detected near the front of the log and the head has been
				925	* walked back accordingly.
Brian Foster	7088c41	2016-01-05 07:40:16 +1100	[diff] [blame]	926	*
Brian Foster	4a4f66e	2017-08-08 18:21:52 -0700	[diff] [blame]	927	* We also have to handle the case where the tail was pinned and the head
				928	* blocked behind the tail right before a crash. If the tail had been pushed
				929	* immediately prior to the crash and the subsequent checkpoint was only
				930	* partially written, it's possible it overwrote the last referenced tail in the
				931	* log with garbage. This is not a coherency problem because the tail must have
				932	* been pushed before it can be overwritten, but appears as log corruption to
				933	* recovery because we have no way to know the tail was updated if the
				934	* subsequent checkpoint didn't write successfully.
				935	*
				936	* Therefore, CRC check the log from tail to head. If a failure occurs and the
				937	* offending record is within max iclog bufs from the head, walk the tail
				938	* forward and retry until a valid tail is found or corruption is detected out
				939	* of the range of a possible overwrite.
Brian Foster	7088c41	2016-01-05 07:40:16 +1100	[diff] [blame]	940	*/
				941	STATIC int
				942	xlog_verify_tail(
				943	struct xlog *log,
				944	xfs_daddr_t head_blk,
Brian Foster	4a4f66e	2017-08-08 18:21:52 -0700	[diff] [blame]	945	xfs_daddr_t *tail_blk,
				946	int hsize)
Brian Foster	7088c41	2016-01-05 07:40:16 +1100	[diff] [blame]	947	{
				948	struct xlog_rec_header *thead;
Christoph Hellwig	6e9b3dd	2019-06-28 19:27:27 -0700	[diff] [blame]	949	char *buffer;
Brian Foster	7088c41	2016-01-05 07:40:16 +1100	[diff] [blame]	950	xfs_daddr_t first_bad;
Brian Foster	7088c41	2016-01-05 07:40:16 +1100	[diff] [blame]	951	int error = 0;
				952	bool wrapped;
Brian Foster	4a4f66e	2017-08-08 18:21:52 -0700	[diff] [blame]	953	xfs_daddr_t tmp_tail;
				954	xfs_daddr_t orig_tail = *tail_blk;
Brian Foster	7088c41	2016-01-05 07:40:16 +1100	[diff] [blame]	955
Christoph Hellwig	6e9b3dd	2019-06-28 19:27:27 -0700	[diff] [blame]	956	buffer = xlog_alloc_buffer(log, 1);
				957	if (!buffer)
Brian Foster	7088c41	2016-01-05 07:40:16 +1100	[diff] [blame]	958	return -ENOMEM;
				959
				960	/*
Brian Foster	4a4f66e	2017-08-08 18:21:52 -0700	[diff] [blame]	961	* Make sure the tail points to a record (returns positive count on
				962	* success).
Brian Foster	7088c41	2016-01-05 07:40:16 +1100	[diff] [blame]	963	*/
Christoph Hellwig	6e9b3dd	2019-06-28 19:27:27 -0700	[diff] [blame]	964	error = xlog_seek_logrec_hdr(log, head_blk, *tail_blk, 1, buffer,
Brian Foster	4a4f66e	2017-08-08 18:21:52 -0700	[diff] [blame]	965	&tmp_tail, &thead, &wrapped);
				966	if (error < 0)
Brian Foster	7088c41	2016-01-05 07:40:16 +1100	[diff] [blame]	967	goto out;
Brian Foster	4a4f66e	2017-08-08 18:21:52 -0700	[diff] [blame]	968	if (*tail_blk != tmp_tail)
				969	*tail_blk = tmp_tail;
				970
				971	/*
				972	* Run a CRC check from the tail to the head. We can't just check
				973	* MAX_ICLOGS records past the tail because the tail may point to stale
				974	* blocks cleared during the search for the head/tail. These blocks are
				975	* overwritten with zero-length records and thus record count is not a
				976	* reliable indicator of the iclog state before a crash.
				977	*/
				978	first_bad = 0;
				979	error = xlog_do_recovery_pass(log, head_blk, *tail_blk,
				980	XLOG_RECOVER_CRCPASS, &first_bad);
Brian Foster	a4c9b34	2017-08-08 18:21:53 -0700	[diff] [blame]	981	while ((error == -EFSBADCRC \|\| error == -EFSCORRUPTED) && first_bad) {
Brian Foster	4a4f66e	2017-08-08 18:21:52 -0700	[diff] [blame]	982	int tail_distance;
				983
				984	/*
				985	* Is corruption within range of the head? If so, retry from
				986	* the next record. Otherwise return an error.
				987	*/
				988	tail_distance = xlog_tail_distance(log, head_blk, first_bad);
				989	if (tail_distance > BTOBB(XLOG_MAX_ICLOGS * hsize))
				990	break;
				991
				992	/* skip to the next record; returns positive count on success */
Christoph Hellwig	6e9b3dd	2019-06-28 19:27:27 -0700	[diff] [blame]	993	error = xlog_seek_logrec_hdr(log, head_blk, first_bad, 2,
				994	buffer, &tmp_tail, &thead, &wrapped);
Brian Foster	4a4f66e	2017-08-08 18:21:52 -0700	[diff] [blame]	995	if (error < 0)
				996	goto out;
				997
				998	*tail_blk = tmp_tail;
				999	first_bad = 0;
				1000	error = xlog_do_recovery_pass(log, head_blk, *tail_blk,
				1001	XLOG_RECOVER_CRCPASS, &first_bad);
Brian Foster	7088c41	2016-01-05 07:40:16 +1100	[diff] [blame]	1002	}
				1003
Brian Foster	4a4f66e	2017-08-08 18:21:52 -0700	[diff] [blame]	1004	if (!error && *tail_blk != orig_tail)
				1005	xfs_warn(log->l_mp,
				1006	"Tail block (0x%llx) overwrite detected. Updated to 0x%llx",
				1007	orig_tail, *tail_blk);
Brian Foster	7088c41	2016-01-05 07:40:16 +1100	[diff] [blame]	1008	out:
Christoph Hellwig	6e9b3dd	2019-06-28 19:27:27 -0700	[diff] [blame]	1009	kmem_free(buffer);
Brian Foster	7088c41	2016-01-05 07:40:16 +1100	[diff] [blame]	1010	return error;
				1011	}
				1012
				1013	/*
				1014	* Detect and trim torn writes from the head of the log.
				1015	*
				1016	* Storage without sector atomicity guarantees can result in torn writes in the
				1017	* log in the event of a crash. Our only means to detect this scenario is via
				1018	* CRC verification. While we can't always be certain that CRC verification
				1019	* failure is due to a torn write vs. an unrelated corruption, we do know that
				1020	* only a certain number (XLOG_MAX_ICLOGS) of log records can be written out at
				1021	* one time. Therefore, CRC verify up to XLOG_MAX_ICLOGS records at the head of
				1022	* the log and treat failures in this range as torn writes as a matter of
				1023	* policy. In the event of CRC failure, the head is walked back to the last good
				1024	* record in the log and the tail is updated from that record and verified.
				1025	*/
				1026	STATIC int
				1027	xlog_verify_head(
				1028	struct xlog *log,
				1029	xfs_daddr_t head_blk, / in/out: unverified head */
				1030	xfs_daddr_t tail_blk, / out: tail block */
Christoph Hellwig	6e9b3dd	2019-06-28 19:27:27 -0700	[diff] [blame]	1031	char *buffer,
Brian Foster	7088c41	2016-01-05 07:40:16 +1100	[diff] [blame]	1032	xfs_daddr_t rhead_blk, / start blk of last record */
				1033	struct xlog_rec_header *rhead, / ptr to last record */
				1034	bool wrapped) / last rec. wraps phys. log */
				1035	{
				1036	struct xlog_rec_header *tmp_rhead;
Christoph Hellwig	6e9b3dd	2019-06-28 19:27:27 -0700	[diff] [blame]	1037	char *tmp_buffer;
Brian Foster	7088c41	2016-01-05 07:40:16 +1100	[diff] [blame]	1038	xfs_daddr_t first_bad;
				1039	xfs_daddr_t tmp_rhead_blk;
				1040	int found;
				1041	int error;
				1042	bool tmp_wrapped;
				1043
				1044	/*
Brian Foster	82ff6cc	2016-03-07 08:22:22 +1100	[diff] [blame]	1045	* Check the head of the log for torn writes. Search backwards from the
				1046	* head until we hit the tail or the maximum number of log record I/Os
				1047	* that could have been in flight at one time. Use a temporary buffer so
Christoph Hellwig	6e9b3dd	2019-06-28 19:27:27 -0700	[diff] [blame]	1048	* we don't trash the rhead/buffer pointers from the caller.
Brian Foster	7088c41	2016-01-05 07:40:16 +1100	[diff] [blame]	1049	*/
Christoph Hellwig	6e9b3dd	2019-06-28 19:27:27 -0700	[diff] [blame]	1050	tmp_buffer = xlog_alloc_buffer(log, 1);
				1051	if (!tmp_buffer)
Brian Foster	7088c41	2016-01-05 07:40:16 +1100	[diff] [blame]	1052	return -ENOMEM;
				1053	error = xlog_rseek_logrec_hdr(log, head_blk, tail_blk,
Christoph Hellwig	6e9b3dd	2019-06-28 19:27:27 -0700	[diff] [blame]	1054	XLOG_MAX_ICLOGS, tmp_buffer,
				1055	&tmp_rhead_blk, &tmp_rhead, &tmp_wrapped);
				1056	kmem_free(tmp_buffer);
Brian Foster	7088c41	2016-01-05 07:40:16 +1100	[diff] [blame]	1057	if (error < 0)
				1058	return error;
				1059
				1060	/*
				1061	* Now run a CRC verification pass over the records starting at the
				1062	* block found above to the current head. If a CRC failure occurs, the
				1063	* log block of the first bad record is saved in first_bad.
				1064	*/
				1065	error = xlog_do_recovery_pass(log, *head_blk, tmp_rhead_blk,
				1066	XLOG_RECOVER_CRCPASS, &first_bad);
Brian Foster	a4c9b34	2017-08-08 18:21:53 -0700	[diff] [blame]	1067	if ((error == -EFSBADCRC \|\| error == -EFSCORRUPTED) && first_bad) {
Brian Foster	7088c41	2016-01-05 07:40:16 +1100	[diff] [blame]	1068	/*
				1069	* We've hit a potential torn write. Reset the error and warn
				1070	* about it.
				1071	*/
				1072	error = 0;
				1073	xfs_warn(log->l_mp,
				1074	"Torn write (CRC failure) detected at log block 0x%llx. Truncating head block from 0x%llx.",
				1075	first_bad, *head_blk);
				1076
				1077	/*
				1078	* Get the header block and buffer pointer for the last good
				1079	* record before the bad record.
				1080	*
				1081	* Note that xlog_find_tail() clears the blocks at the new head
				1082	* (i.e., the records with invalid CRC) if the cycle number
Randy Dunlap	b63da6c	2020-08-05 08:49:58 -0700	[diff] [blame]	1083	* matches the current cycle.
Brian Foster	7088c41	2016-01-05 07:40:16 +1100	[diff] [blame]	1084	*/
Christoph Hellwig	6e9b3dd	2019-06-28 19:27:27 -0700	[diff] [blame]	1085	found = xlog_rseek_logrec_hdr(log, first_bad, *tail_blk, 1,
				1086	buffer, rhead_blk, rhead, wrapped);
Brian Foster	7088c41	2016-01-05 07:40:16 +1100	[diff] [blame]	1087	if (found < 0)
				1088	return found;
				1089	if (found == 0) /* XXX: right thing to do here? */
				1090	return -EIO;
				1091
				1092	/*
				1093	* Reset the head block to the starting block of the first bad
				1094	* log record and set the tail block based on the last good
				1095	* record.
				1096	*
				1097	* Bail out if the updated head/tail match as this indicates
				1098	* possible corruption outside of the acceptable
				1099	* (XLOG_MAX_ICLOGS) range. This is a job for xfs_repair...
				1100	*/
				1101	*head_blk = first_bad;
				1102	tail_blk = BLOCK_LSN(be64_to_cpu((rhead)->h_tail_lsn));
				1103	if (head_blk == tail_blk) {
				1104	ASSERT(0);
				1105	return 0;
				1106	}
Brian Foster	7088c41	2016-01-05 07:40:16 +1100	[diff] [blame]	1107	}
Brian Foster	5297ac1	2017-08-08 18:21:51 -0700	[diff] [blame]	1108	if (error)
				1109	return error;
Brian Foster	7088c41	2016-01-05 07:40:16 +1100	[diff] [blame]	1110
Brian Foster	4a4f66e	2017-08-08 18:21:52 -0700	[diff] [blame]	1111	return xlog_verify_tail(log, *head_blk, tail_blk,
				1112	be32_to_cpu((*rhead)->h_size));
Brian Foster	7088c41	2016-01-05 07:40:16 +1100	[diff] [blame]	1113	}
				1114
				1115	/*
Dave Chinner	0703a8e	2018-06-08 09:54:22 -0700	[diff] [blame]	1116	* We need to make sure we handle log wrapping properly, so we can't use the
				1117	* calculated logbno directly. Make sure it wraps to the correct bno inside the
				1118	* log.
				1119	*
				1120	* The log is limited to 32 bit sizes, so we use the appropriate modulus
				1121	* operation here and cast it back to a 64 bit daddr on return.
				1122	*/
				1123	static inline xfs_daddr_t
				1124	xlog_wrap_logbno(
				1125	struct xlog *log,
				1126	xfs_daddr_t bno)
				1127	{
				1128	int mod;
				1129
				1130	div_s64_rem(bno, log->l_logBBsize, &mod);
				1131	return mod;
				1132	}
				1133
				1134	/*
Brian Foster	65b99a0	2016-03-07 08:22:22 +1100	[diff] [blame]	1135	* Check whether the head of the log points to an unmount record. In other
				1136	* words, determine whether the log is clean. If so, update the in-core state
				1137	* appropriately.
				1138	*/
				1139	static int
				1140	xlog_check_unmount_rec(
				1141	struct xlog *log,
				1142	xfs_daddr_t *head_blk,
				1143	xfs_daddr_t *tail_blk,
				1144	struct xlog_rec_header *rhead,
				1145	xfs_daddr_t rhead_blk,
Christoph Hellwig	6e9b3dd	2019-06-28 19:27:27 -0700	[diff] [blame]	1146	char *buffer,
Brian Foster	65b99a0	2016-03-07 08:22:22 +1100	[diff] [blame]	1147	bool *clean)
				1148	{
				1149	struct xlog_op_header *op_head;
				1150	xfs_daddr_t umount_data_blk;
				1151	xfs_daddr_t after_umount_blk;
				1152	int hblks;
				1153	int error;
				1154	char *offset;
				1155
				1156	*clean = false;
				1157
				1158	/*
				1159	* Look for unmount record. If we find it, then we know there was a
				1160	* clean unmount. Since 'i' could be the last block in the physical
				1161	* log, we convert to a log block before comparing to the head_blk.
				1162	*
				1163	* Save the current tail lsn to use to pass to xlog_clear_stale_blocks()
				1164	* below. We won't want to clear the unmount record if there is one, so
				1165	* we pass the lsn of the unmount record rather than the block after it.
				1166	*/
Gao Xiang	0c771b9	2020-09-22 09:41:06 -0700	[diff] [blame]	1167	hblks = xlog_logrec_hblks(log, rhead);
Dave Chinner	0703a8e	2018-06-08 09:54:22 -0700	[diff] [blame]	1168	after_umount_blk = xlog_wrap_logbno(log,
				1169	rhead_blk + hblks + BTOBB(be32_to_cpu(rhead->h_len)));
				1170
Brian Foster	65b99a0	2016-03-07 08:22:22 +1100	[diff] [blame]	1171	if (*head_blk == after_umount_blk &&
				1172	be32_to_cpu(rhead->h_num_logops) == 1) {
Dave Chinner	0703a8e	2018-06-08 09:54:22 -0700	[diff] [blame]	1173	umount_data_blk = xlog_wrap_logbno(log, rhead_blk + hblks);
Christoph Hellwig	6e9b3dd	2019-06-28 19:27:27 -0700	[diff] [blame]	1174	error = xlog_bread(log, umount_data_blk, 1, buffer, &offset);
Brian Foster	65b99a0	2016-03-07 08:22:22 +1100	[diff] [blame]	1175	if (error)
				1176	return error;
				1177
				1178	op_head = (struct xlog_op_header *)offset;
				1179	if (op_head->oh_flags & XLOG_UNMOUNT_TRANS) {
				1180	/*
				1181	* Set tail and last sync so that newly written log
				1182	* records will point recovery to after the current
				1183	* unmount record.
				1184	*/
				1185	xlog_assign_atomic_lsn(&log->l_tail_lsn,
				1186	log->l_curr_cycle, after_umount_blk);
				1187	xlog_assign_atomic_lsn(&log->l_last_sync_lsn,
				1188	log->l_curr_cycle, after_umount_blk);
				1189	*tail_blk = after_umount_blk;
				1190
				1191	*clean = true;
				1192	}
				1193	}
				1194
				1195	return 0;
				1196	}
				1197
Brian Foster	717bc0e	2016-03-07 08:22:22 +1100	[diff] [blame]	1198	static void
				1199	xlog_set_state(
				1200	struct xlog *log,
				1201	xfs_daddr_t head_blk,
				1202	struct xlog_rec_header *rhead,
				1203	xfs_daddr_t rhead_blk,
				1204	bool bump_cycle)
				1205	{
				1206	/*
				1207	* Reset log values according to the state of the log when we
				1208	* crashed. In the case where head_blk == 0, we bump curr_cycle
				1209	* one because the next write starts a new cycle rather than
				1210	* continuing the cycle of the last good log record. At this
				1211	* point we have guaranteed that all partial log records have been
				1212	* accounted for. Therefore, we know that the last good log record
				1213	* written was complete and ended exactly on the end boundary
				1214	* of the physical log.
				1215	*/
				1216	log->l_prev_block = rhead_blk;
				1217	log->l_curr_block = (int)head_blk;
				1218	log->l_curr_cycle = be32_to_cpu(rhead->h_cycle);
				1219	if (bump_cycle)
				1220	log->l_curr_cycle++;
				1221	atomic64_set(&log->l_tail_lsn, be64_to_cpu(rhead->h_tail_lsn));
				1222	atomic64_set(&log->l_last_sync_lsn, be64_to_cpu(rhead->h_lsn));
				1223	xlog_assign_grant_head(&log->l_reserve_head.grant, log->l_curr_cycle,
				1224	BBTOB(log->l_curr_block));
				1225	xlog_assign_grant_head(&log->l_write_head.grant, log->l_curr_cycle,
				1226	BBTOB(log->l_curr_block));
				1227	}
				1228
Brian Foster	65b99a0	2016-03-07 08:22:22 +1100	[diff] [blame]	1229	/*
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1230	* Find the sync block number or the tail of the log.
				1231	*
				1232	* This will be the block number of the last record to have its
				1233	* associated buffers synced to disk. Every log record header has
				1234	* a sync lsn embedded in it. LSNs hold block numbers, so it is easy
				1235	* to get a sync block number. The only concern is to figure out which
				1236	* log record header to believe.
				1237	*
				1238	* The following algorithm uses the log record header with the largest
				1239	* lsn. The entire log record does not need to be valid. We only care
				1240	* that the header is valid.
				1241	*
				1242	* We could speed up search by using current head_blk buffer, but it is not
				1243	* available.
				1244	*/
Eric Sandeen	5d77c0d	2009-11-19 15:52:00 +0000	[diff] [blame]	1245	STATIC int
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1246	xlog_find_tail(
Mark Tinguely	9a8d2fd	2012-06-14 09:22:16 -0500	[diff] [blame]	1247	struct xlog *log,
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1248	xfs_daddr_t *head_blk,
Eric Sandeen	65be605	2006-01-11 15:34:19 +1100	[diff] [blame]	1249	xfs_daddr_t *tail_blk)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1250	{
				1251	xlog_rec_header_t *rhead;
Christoph Hellwig	b2a922c	2015-06-22 09:45:10 +1000	[diff] [blame]	1252	char *offset = NULL;
Christoph Hellwig	6e9b3dd	2019-06-28 19:27:27 -0700	[diff] [blame]	1253	char *buffer;
Brian Foster	7088c41	2016-01-05 07:40:16 +1100	[diff] [blame]	1254	int error;
Brian Foster	7088c41	2016-01-05 07:40:16 +1100	[diff] [blame]	1255	xfs_daddr_t rhead_blk;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1256	xfs_lsn_t tail_lsn;
Brian Foster	eed6b46	2016-01-04 15:55:10 +1100	[diff] [blame]	1257	bool wrapped = false;
Brian Foster	65b99a0	2016-03-07 08:22:22 +1100	[diff] [blame]	1258	bool clean = false;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1259
				1260	/*
				1261	* Find previous log record
				1262	*/
				1263	if ((error = xlog_find_head(log, head_blk)))
				1264	return error;
Brian Foster	82ff6cc	2016-03-07 08:22:22 +1100	[diff] [blame]	1265	ASSERT(*head_blk < INT_MAX);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1266
Christoph Hellwig	6e9b3dd	2019-06-28 19:27:27 -0700	[diff] [blame]	1267	buffer = xlog_alloc_buffer(log, 1);
				1268	if (!buffer)
Dave Chinner	2451337	2014-06-25 14:58:08 +1000	[diff] [blame]	1269	return -ENOMEM;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1270	if (head_blk == 0) { / special case */
Christoph Hellwig	6e9b3dd	2019-06-28 19:27:27 -0700	[diff] [blame]	1271	error = xlog_bread(log, 0, 1, buffer, &offset);
Christoph Hellwig	076e6ac	2009-03-16 08:24:13 +0100	[diff] [blame]	1272	if (error)
Alex Elder	9db127e	2010-04-15 18:17:26 +0000	[diff] [blame]	1273	goto done;
Christoph Hellwig	076e6ac	2009-03-16 08:24:13 +0100	[diff] [blame]	1274
Christoph Hellwig	03bea6f	2007-10-12 10:58:05 +1000	[diff] [blame]	1275	if (xlog_get_cycle(offset) == 0) {
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1276	*tail_blk = 0;
				1277	/* leave all other log inited values alone */
Alex Elder	9db127e	2010-04-15 18:17:26 +0000	[diff] [blame]	1278	goto done;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1279	}
				1280	}
				1281
				1282	/*
Brian Foster	82ff6cc	2016-03-07 08:22:22 +1100	[diff] [blame]	1283	* Search backwards through the log looking for the log record header
				1284	* block. This wraps all the way back around to the head so something is
				1285	* seriously wrong if we can't find it.
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1286	*/
Christoph Hellwig	6e9b3dd	2019-06-28 19:27:27 -0700	[diff] [blame]	1287	error = xlog_rseek_logrec_hdr(log, head_blk, head_blk, 1, buffer,
Brian Foster	82ff6cc	2016-03-07 08:22:22 +1100	[diff] [blame]	1288	&rhead_blk, &rhead, &wrapped);
				1289	if (error < 0)
Darrick J. Wong	050552c	2019-11-14 12:51:34 -0800	[diff] [blame]	1290	goto done;
Brian Foster	82ff6cc	2016-03-07 08:22:22 +1100	[diff] [blame]	1291	if (!error) {
				1292	xfs_warn(log->l_mp, "%s: couldn't find sync record", __func__);
Darrick J. Wong	050552c	2019-11-14 12:51:34 -0800	[diff] [blame]	1293	error = -EFSCORRUPTED;
				1294	goto done;
Brian Foster	82ff6cc	2016-03-07 08:22:22 +1100	[diff] [blame]	1295	}
				1296	*tail_blk = BLOCK_LSN(be64_to_cpu(rhead->h_tail_lsn));
				1297
				1298	/*
Brian Foster	717bc0e	2016-03-07 08:22:22 +1100	[diff] [blame]	1299	* Set the log state based on the current head record.
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1300	*/
Brian Foster	717bc0e	2016-03-07 08:22:22 +1100	[diff] [blame]	1301	xlog_set_state(log, *head_blk, rhead, rhead_blk, wrapped);
Brian Foster	65b99a0	2016-03-07 08:22:22 +1100	[diff] [blame]	1302	tail_lsn = atomic64_read(&log->l_tail_lsn);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1303
				1304	/*
Brian Foster	65b99a0	2016-03-07 08:22:22 +1100	[diff] [blame]	1305	* Look for an unmount record at the head of the log. This sets the log
				1306	* state to determine whether recovery is necessary.
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1307	*/
Brian Foster	65b99a0	2016-03-07 08:22:22 +1100	[diff] [blame]	1308	error = xlog_check_unmount_rec(log, head_blk, tail_blk, rhead,
Christoph Hellwig	6e9b3dd	2019-06-28 19:27:27 -0700	[diff] [blame]	1309	rhead_blk, buffer, &clean);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1310	if (error)
				1311	goto done;
				1312
				1313	/*
Brian Foster	7f6aff3a	2016-03-07 08:22:22 +1100	[diff] [blame]	1314	* Verify the log head if the log is not clean (e.g., we have anything
				1315	* but an unmount record at the head). This uses CRC verification to
				1316	* detect and trim torn writes. If discovered, CRC failures are
				1317	* considered torn writes and the log head is trimmed accordingly.
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1318	*
Brian Foster	7f6aff3a	2016-03-07 08:22:22 +1100	[diff] [blame]	1319	* Note that we can only run CRC verification when the log is dirty
				1320	* because there's no guarantee that the log data behind an unmount
				1321	* record is compatible with the current architecture.
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1322	*/
Brian Foster	7f6aff3a	2016-03-07 08:22:22 +1100	[diff] [blame]	1323	if (!clean) {
				1324	xfs_daddr_t orig_head = *head_blk;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1325
Christoph Hellwig	6e9b3dd	2019-06-28 19:27:27 -0700	[diff] [blame]	1326	error = xlog_verify_head(log, head_blk, tail_blk, buffer,
Brian Foster	7f6aff3a	2016-03-07 08:22:22 +1100	[diff] [blame]	1327	&rhead_blk, &rhead, &wrapped);
Christoph Hellwig	076e6ac	2009-03-16 08:24:13 +0100	[diff] [blame]	1328	if (error)
Alex Elder	9db127e	2010-04-15 18:17:26 +0000	[diff] [blame]	1329	goto done;
Christoph Hellwig	076e6ac	2009-03-16 08:24:13 +0100	[diff] [blame]	1330
Brian Foster	7f6aff3a	2016-03-07 08:22:22 +1100	[diff] [blame]	1331	/* update in-core state again if the head changed */
				1332	if (*head_blk != orig_head) {
				1333	xlog_set_state(log, *head_blk, rhead, rhead_blk,
				1334	wrapped);
				1335	tail_lsn = atomic64_read(&log->l_tail_lsn);
				1336	error = xlog_check_unmount_rec(log, head_blk, tail_blk,
Christoph Hellwig	6e9b3dd	2019-06-28 19:27:27 -0700	[diff] [blame]	1337	rhead, rhead_blk, buffer,
Brian Foster	7f6aff3a	2016-03-07 08:22:22 +1100	[diff] [blame]	1338	&clean);
				1339	if (error)
				1340	goto done;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1341	}
				1342	}
				1343
				1344	/*
Brian Foster	65b99a0	2016-03-07 08:22:22 +1100	[diff] [blame]	1345	* Note that the unmount was clean. If the unmount was not clean, we
				1346	* need to know this to rebuild the superblock counters from the perag
				1347	* headers if we have a filesystem using non-persistent counters.
				1348	*/
				1349	if (clean)
Dave Chinner	2e973b2	2021-08-18 18:46:52 -0700	[diff] [blame]	1350	set_bit(XFS_OPSTATE_CLEAN, &log->l_mp->m_opstate);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1351
				1352	/*
				1353	* Make sure that there are no blocks in front of the head
				1354	* with the same cycle number as the head. This can happen
				1355	* because we allow multiple outstanding log writes concurrently,
				1356	* and the later writes might make it out before earlier ones.
				1357	*
				1358	* We use the lsn from before modifying it so that we'll never
				1359	* overwrite the unmount record after a clean unmount.
				1360	*
				1361	* Do this only if we are going to recover the filesystem
				1362	*
				1363	* NOTE: This used to say "if (!readonly)"
				1364	* However on Linux, we can & do recover a read-only filesystem.
				1365	* We only skip recovery if NORECOVERY is specified on mount,
				1366	* in which case we would not be here.
				1367	*
				1368	* But... if the -device- itself is readonly, just skip this.
				1369	* We can't recover this device anyway, so it won't matter.
				1370	*/
Christoph Hellwig	2d15d2c	2019-06-28 19:27:24 -0700	[diff] [blame]	1371	if (!xfs_readonly_buftarg(log->l_targ))
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1372	error = xlog_clear_stale_blocks(log, tail_lsn);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1373
Alex Elder	9db127e	2010-04-15 18:17:26 +0000	[diff] [blame]	1374	done:
Christoph Hellwig	6e9b3dd	2019-06-28 19:27:27 -0700	[diff] [blame]	1375	kmem_free(buffer);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1376
				1377	if (error)
Dave Chinner	a0fa2b6	2011-03-07 10:01:35 +1100	[diff] [blame]	1378	xfs_warn(log->l_mp, "failed to locate log tail");
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1379	return error;
				1380	}
				1381
				1382	/*
				1383	* Is the log zeroed at all?
				1384	*
				1385	* The last binary search should be changed to perform an X block read
				1386	* once X becomes small enough. You can then search linearly through
				1387	* the X blocks. This will cut down on the number of reads we need to do.
				1388	*
				1389	* If the log is partially zeroed, this routine will pass back the blkno
				1390	* of the first block with cycle number 0. It won't have a complete LR
				1391	* preceding it.
				1392	*
				1393	* Return:
				1394	* 0 => the log is completely written to
Dave Chinner	2451337	2014-06-25 14:58:08 +1000	[diff] [blame]	1395	* 1 => use *blk_no as the first block of the log
				1396	* <0 => error has occurred
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1397	*/
David Chinner	a8272ce	2007-11-23 16:28:09 +1100	[diff] [blame]	1398	STATIC int
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1399	xlog_find_zeroed(
Mark Tinguely	9a8d2fd	2012-06-14 09:22:16 -0500	[diff] [blame]	1400	struct xlog *log,
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1401	xfs_daddr_t *blk_no)
				1402	{
Christoph Hellwig	6e9b3dd	2019-06-28 19:27:27 -0700	[diff] [blame]	1403	char *buffer;
Christoph Hellwig	b2a922c	2015-06-22 09:45:10 +1000	[diff] [blame]	1404	char *offset;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1405	uint first_cycle, last_cycle;
				1406	xfs_daddr_t new_blk, last_blk, start_blk;
				1407	xfs_daddr_t num_scan_bblks;
				1408	int error, log_bbnum = log->l_logBBsize;
				1409
Nathan Scott	6fdf8cc	2006-06-28 10:13:52 +1000	[diff] [blame]	1410	*blk_no = 0;
				1411
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1412	/* check totally zeroed log */
Christoph Hellwig	6e9b3dd	2019-06-28 19:27:27 -0700	[diff] [blame]	1413	buffer = xlog_alloc_buffer(log, 1);
				1414	if (!buffer)
Dave Chinner	2451337	2014-06-25 14:58:08 +1000	[diff] [blame]	1415	return -ENOMEM;
Christoph Hellwig	6e9b3dd	2019-06-28 19:27:27 -0700	[diff] [blame]	1416	error = xlog_bread(log, 0, 1, buffer, &offset);
Christoph Hellwig	076e6ac	2009-03-16 08:24:13 +0100	[diff] [blame]	1417	if (error)
Christoph Hellwig	6e9b3dd	2019-06-28 19:27:27 -0700	[diff] [blame]	1418	goto out_free_buffer;
Christoph Hellwig	076e6ac	2009-03-16 08:24:13 +0100	[diff] [blame]	1419
Christoph Hellwig	03bea6f	2007-10-12 10:58:05 +1000	[diff] [blame]	1420	first_cycle = xlog_get_cycle(offset);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1421	if (first_cycle == 0) { /* completely zeroed log */
				1422	*blk_no = 0;
Christoph Hellwig	6e9b3dd	2019-06-28 19:27:27 -0700	[diff] [blame]	1423	kmem_free(buffer);
Dave Chinner	2451337	2014-06-25 14:58:08 +1000	[diff] [blame]	1424	return 1;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1425	}
				1426
				1427	/* check partially zeroed log */
Christoph Hellwig	6e9b3dd	2019-06-28 19:27:27 -0700	[diff] [blame]	1428	error = xlog_bread(log, log_bbnum-1, 1, buffer, &offset);
Christoph Hellwig	076e6ac	2009-03-16 08:24:13 +0100	[diff] [blame]	1429	if (error)
Christoph Hellwig	6e9b3dd	2019-06-28 19:27:27 -0700	[diff] [blame]	1430	goto out_free_buffer;
Christoph Hellwig	076e6ac	2009-03-16 08:24:13 +0100	[diff] [blame]	1431
Christoph Hellwig	03bea6f	2007-10-12 10:58:05 +1000	[diff] [blame]	1432	last_cycle = xlog_get_cycle(offset);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1433	if (last_cycle != 0) { /* log completely written to */
Christoph Hellwig	6e9b3dd	2019-06-28 19:27:27 -0700	[diff] [blame]	1434	kmem_free(buffer);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1435	return 0;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1436	}
				1437
				1438	/* we have a partially zeroed log */
				1439	last_blk = log_bbnum-1;
Christoph Hellwig	6e9b3dd	2019-06-28 19:27:27 -0700	[diff] [blame]	1440	error = xlog_find_cycle_start(log, buffer, 0, &last_blk, 0);
				1441	if (error)
				1442	goto out_free_buffer;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1443
				1444	/*
				1445	* Validate the answer. Because there is no way to guarantee that
				1446	* the entire log is made up of log records which are the same size,
				1447	* we scan over the defined maximum blocks. At this point, the maximum
				1448	* is not chosen to mean anything special. XXXmiken
				1449	*/
				1450	num_scan_bblks = XLOG_TOTAL_REC_SHIFT(log);
				1451	ASSERT(num_scan_bblks <= INT_MAX);
				1452
				1453	if (last_blk < num_scan_bblks)
				1454	num_scan_bblks = last_blk;
				1455	start_blk = last_blk - num_scan_bblks;
				1456
				1457	/*
				1458	* We search for any instances of cycle number 0 that occur before
				1459	* our current estimate of the head. What we're trying to detect is
				1460	* 1 ... \| 0 \| 1 \| 0...
				1461	* ^ binary search ends here
				1462	*/
				1463	if ((error = xlog_find_verify_cycle(log, start_blk,
				1464	(int)num_scan_bblks, 0, &new_blk)))
Christoph Hellwig	6e9b3dd	2019-06-28 19:27:27 -0700	[diff] [blame]	1465	goto out_free_buffer;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1466	if (new_blk != -1)
				1467	last_blk = new_blk;
				1468
				1469	/*
				1470	* Potentially backup over partial log record write. We don't need
				1471	* to search the end of the log because we know it is zero.
				1472	*/
Dave Chinner	2451337	2014-06-25 14:58:08 +1000	[diff] [blame]	1473	error = xlog_find_verify_log_record(log, start_blk, &last_blk, 0);
				1474	if (error == 1)
				1475	error = -EIO;
				1476	if (error)
Christoph Hellwig	6e9b3dd	2019-06-28 19:27:27 -0700	[diff] [blame]	1477	goto out_free_buffer;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1478
				1479	*blk_no = last_blk;
Christoph Hellwig	6e9b3dd	2019-06-28 19:27:27 -0700	[diff] [blame]	1480	out_free_buffer:
				1481	kmem_free(buffer);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1482	if (error)
				1483	return error;
Dave Chinner	2451337	2014-06-25 14:58:08 +1000	[diff] [blame]	1484	return 1;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1485	}
				1486
				1487	/*
				1488	* These are simple subroutines used by xlog_clear_stale_blocks() below
				1489	* to initialize a buffer full of empty log record headers and write
				1490	* them into the log.
				1491	*/
				1492	STATIC void
				1493	xlog_add_record(
Mark Tinguely	9a8d2fd	2012-06-14 09:22:16 -0500	[diff] [blame]	1494	struct xlog *log,
Christoph Hellwig	b2a922c	2015-06-22 09:45:10 +1000	[diff] [blame]	1495	char *buf,
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1496	int cycle,
				1497	int block,
				1498	int tail_cycle,
				1499	int tail_block)
				1500	{
				1501	xlog_rec_header_t recp = (xlog_rec_header_t )buf;
				1502
				1503	memset(buf, 0, BBSIZE);
Christoph Hellwig	b53e675	2007-10-12 10:59:34 +1000	[diff] [blame]	1504	recp->h_magicno = cpu_to_be32(XLOG_HEADER_MAGIC_NUM);
				1505	recp->h_cycle = cpu_to_be32(cycle);
				1506	recp->h_version = cpu_to_be32(
Dave Chinner	38c26bf	2021-08-18 18:46:37 -0700	[diff] [blame]	1507	xfs_has_logv2(log->l_mp) ? 2 : 1);
Christoph Hellwig	b53e675	2007-10-12 10:59:34 +1000	[diff] [blame]	1508	recp->h_lsn = cpu_to_be64(xlog_assign_lsn(cycle, block));
				1509	recp->h_tail_lsn = cpu_to_be64(xlog_assign_lsn(tail_cycle, tail_block));
				1510	recp->h_fmt = cpu_to_be32(XLOG_FMT);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1511	memcpy(&recp->h_fs_uuid, &log->l_mp->m_sb.sb_uuid, sizeof(uuid_t));
				1512	}
				1513
				1514	STATIC int
				1515	xlog_write_log_records(
Mark Tinguely	9a8d2fd	2012-06-14 09:22:16 -0500	[diff] [blame]	1516	struct xlog *log,
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1517	int cycle,
				1518	int start_block,
				1519	int blocks,
				1520	int tail_cycle,
				1521	int tail_block)
				1522	{
Christoph Hellwig	b2a922c	2015-06-22 09:45:10 +1000	[diff] [blame]	1523	char *offset;
Christoph Hellwig	6e9b3dd	2019-06-28 19:27:27 -0700	[diff] [blame]	1524	char *buffer;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1525	int balign, ealign;
Alex Elder	69ce58f	2010-04-20 17:09:59 +1000	[diff] [blame]	1526	int sectbb = log->l_sectBBsize;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1527	int end_block = start_block + blocks;
				1528	int bufblks;
				1529	int error = 0;
				1530	int i, j = 0;
				1531
Alex Elder	6881a22	2010-04-13 15:22:29 +1000	[diff] [blame]	1532	/*
				1533	* Greedily allocate a buffer big enough to handle the full
				1534	* range of basic blocks to be written. If that fails, try
				1535	* a smaller size. We need to be able to write at least a
				1536	* log sector, or we're out of luck.
				1537	*/
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1538	bufblks = 1 << ffs(blocks);
Dave Chinner	81158e0	2012-04-27 19:45:22 +1000	[diff] [blame]	1539	while (bufblks > log->l_logBBsize)
				1540	bufblks >>= 1;
Christoph Hellwig	6e9b3dd	2019-06-28 19:27:27 -0700	[diff] [blame]	1541	while (!(buffer = xlog_alloc_buffer(log, bufblks))) {
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1542	bufblks >>= 1;
Alex Elder	69ce58f	2010-04-20 17:09:59 +1000	[diff] [blame]	1543	if (bufblks < sectbb)
Dave Chinner	2451337	2014-06-25 14:58:08 +1000	[diff] [blame]	1544	return -ENOMEM;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1545	}
				1546
				1547	/* We may need to do a read at the start to fill in part of
				1548	* the buffer in the starting sector not covered by the first
				1549	* write below.
				1550	*/
Alex Elder	5c17f53	2010-04-13 15:22:48 +1000	[diff] [blame]	1551	balign = round_down(start_block, sectbb);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1552	if (balign != start_block) {
Christoph Hellwig	6e9b3dd	2019-06-28 19:27:27 -0700	[diff] [blame]	1553	error = xlog_bread_noalign(log, start_block, 1, buffer);
Christoph Hellwig	076e6ac	2009-03-16 08:24:13 +0100	[diff] [blame]	1554	if (error)
Christoph Hellwig	6e9b3dd	2019-06-28 19:27:27 -0700	[diff] [blame]	1555	goto out_free_buffer;
Christoph Hellwig	076e6ac	2009-03-16 08:24:13 +0100	[diff] [blame]	1556
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1557	j = start_block - balign;
				1558	}
				1559
				1560	for (i = start_block; i < end_block; i += bufblks) {
				1561	int bcount, endcount;
				1562
				1563	bcount = min(bufblks, end_block - start_block);
				1564	endcount = bcount - j;
				1565
				1566	/* We may need to do a read at the end to fill in part of
				1567	* the buffer in the final sector not covered by the write.
				1568	* If this is the same sector as the above read, skip it.
				1569	*/
Alex Elder	5c17f53	2010-04-13 15:22:48 +1000	[diff] [blame]	1570	ealign = round_down(end_block, sectbb);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1571	if (j == 0 && (start_block + endcount > ealign)) {
Christoph Hellwig	6ad5b32	2019-06-28 19:27:26 -0700	[diff] [blame]	1572	error = xlog_bread_noalign(log, ealign, sectbb,
Christoph Hellwig	6e9b3dd	2019-06-28 19:27:27 -0700	[diff] [blame]	1573	buffer + BBTOB(ealign - start_block));
Christoph Hellwig	076e6ac	2009-03-16 08:24:13 +0100	[diff] [blame]	1574	if (error)
				1575	break;
				1576
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1577	}
				1578
Christoph Hellwig	6e9b3dd	2019-06-28 19:27:27 -0700	[diff] [blame]	1579	offset = buffer + xlog_align(log, start_block);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1580	for (; j < endcount; j++) {
				1581	xlog_add_record(log, offset, cycle, i+j,
				1582	tail_cycle, tail_block);
				1583	offset += BBSIZE;
				1584	}
Christoph Hellwig	6e9b3dd	2019-06-28 19:27:27 -0700	[diff] [blame]	1585	error = xlog_bwrite(log, start_block, endcount, buffer);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1586	if (error)
				1587	break;
				1588	start_block += endcount;
				1589	j = 0;
				1590	}
Christoph Hellwig	076e6ac	2009-03-16 08:24:13 +0100	[diff] [blame]	1591
Christoph Hellwig	6e9b3dd	2019-06-28 19:27:27 -0700	[diff] [blame]	1592	out_free_buffer:
				1593	kmem_free(buffer);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1594	return error;
				1595	}
				1596
				1597	/*
				1598	* This routine is called to blow away any incomplete log writes out
				1599	* in front of the log head. We do this so that we won't become confused
				1600	* if we come up, write only a little bit more, and then crash again.
				1601	* If we leave the partial log records out there, this situation could
				1602	* cause us to think those partial writes are valid blocks since they
				1603	* have the current cycle number. We get rid of them by overwriting them
				1604	* with empty log records with the old cycle number rather than the
				1605	* current one.
				1606	*
				1607	* The tail lsn is passed in rather than taken from
				1608	* the log so that we will not write over the unmount record after a
				1609	* clean unmount in a 512 block log. Doing so would leave the log without
				1610	* any valid log records in it until a new one was written. If we crashed
				1611	* during that time we would not be able to recover.
				1612	*/
				1613	STATIC int
				1614	xlog_clear_stale_blocks(
Mark Tinguely	9a8d2fd	2012-06-14 09:22:16 -0500	[diff] [blame]	1615	struct xlog *log,
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1616	xfs_lsn_t tail_lsn)
				1617	{
				1618	int tail_cycle, head_cycle;
				1619	int tail_block, head_block;
				1620	int tail_distance, max_distance;
				1621	int distance;
				1622	int error;
				1623
				1624	tail_cycle = CYCLE_LSN(tail_lsn);
				1625	tail_block = BLOCK_LSN(tail_lsn);
				1626	head_cycle = log->l_curr_cycle;
				1627	head_block = log->l_curr_block;
				1628
				1629	/*
				1630	* Figure out the distance between the new head of the log
				1631	* and the tail. We want to write over any blocks beyond the
				1632	* head that we may have written just before the crash, but
				1633	* we don't want to overwrite the tail of the log.
				1634	*/
				1635	if (head_cycle == tail_cycle) {
				1636	/*
				1637	* The tail is behind the head in the physical log,
				1638	* so the distance from the head to the tail is the
				1639	* distance from the head to the end of the log plus
				1640	* the distance from the beginning of the log to the
				1641	* tail.
				1642	*/
Darrick J. Wong	a71895c	2019-11-11 12:53:22 -0800	[diff] [blame]	1643	if (XFS_IS_CORRUPT(log->l_mp,
				1644	head_block < tail_block \|\|
				1645	head_block >= log->l_logBBsize))
Dave Chinner	2451337	2014-06-25 14:58:08 +1000	[diff] [blame]	1646	return -EFSCORRUPTED;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1647	tail_distance = tail_block + (log->l_logBBsize - head_block);
				1648	} else {
				1649	/*
				1650	* The head is behind the tail in the physical log,
				1651	* so the distance from the head to the tail is just
				1652	* the tail block minus the head block.
				1653	*/
Darrick J. Wong	a71895c	2019-11-11 12:53:22 -0800	[diff] [blame]	1654	if (XFS_IS_CORRUPT(log->l_mp,
				1655	head_block >= tail_block \|\|
				1656	head_cycle != tail_cycle + 1))
Dave Chinner	2451337	2014-06-25 14:58:08 +1000	[diff] [blame]	1657	return -EFSCORRUPTED;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1658	tail_distance = tail_block - head_block;
				1659	}
				1660
				1661	/*
				1662	* If the head is right up against the tail, we can't clear
				1663	* anything.
				1664	*/
				1665	if (tail_distance <= 0) {
				1666	ASSERT(tail_distance == 0);
				1667	return 0;
				1668	}
				1669
				1670	max_distance = XLOG_TOTAL_REC_SHIFT(log);
				1671	/*
				1672	* Take the smaller of the maximum amount of outstanding I/O
				1673	* we could have and the distance to the tail to clear out.
				1674	* We take the smaller so that we don't overwrite the tail and
				1675	* we don't waste all day writing from the head to the tail
				1676	* for no reason.
				1677	*/
Dave Chinner	9bb54cb	2018-06-07 07:54:02 -0700	[diff] [blame]	1678	max_distance = min(max_distance, tail_distance);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1679
				1680	if ((head_block + max_distance) <= log->l_logBBsize) {
				1681	/*
				1682	* We can stomp all the blocks we need to without
				1683	* wrapping around the end of the log. Just do it
				1684	* in a single write. Use the cycle number of the
				1685	* current cycle minus one so that the log will look like:
				1686	* n ... \| n - 1 ...
				1687	*/
				1688	error = xlog_write_log_records(log, (head_cycle - 1),
				1689	head_block, max_distance, tail_cycle,
				1690	tail_block);
				1691	if (error)
				1692	return error;
				1693	} else {
				1694	/*
				1695	* We need to wrap around the end of the physical log in
				1696	* order to clear all the blocks. Do it in two separate
				1697	* I/Os. The first write should be from the head to the
				1698	* end of the physical log, and it should use the current
				1699	* cycle number minus one just like above.
				1700	*/
				1701	distance = log->l_logBBsize - head_block;
				1702	error = xlog_write_log_records(log, (head_cycle - 1),
				1703	head_block, distance, tail_cycle,
				1704	tail_block);
				1705
				1706	if (error)
				1707	return error;
				1708
				1709	/*
				1710	* Now write the blocks at the start of the physical log.
				1711	* This writes the remainder of the blocks we want to clear.
				1712	* It uses the current cycle number since we're now on the
				1713	* same cycle as the head so that we get:
				1714	* n ... n ... \| n - 1 ...
				1715	* ^^^^^ blocks we're writing
				1716	*/
				1717	distance = max_distance - (log->l_logBBsize - head_block);
				1718	error = xlog_write_log_records(log, head_cycle, 0, distance,
				1719	tail_cycle, tail_block);
				1720	if (error)
				1721	return error;
				1722	}
				1723
				1724	return 0;
				1725	}
				1726
Darrick J. Wong	154c733	2020-05-01 16:00:54 -0700	[diff] [blame]	1727	/*
				1728	* Release the recovered intent item in the AIL that matches the given intent
				1729	* type and intent id.
				1730	*/
				1731	void
				1732	xlog_recover_release_intent(
				1733	struct xlog *log,
				1734	unsigned short intent_type,
				1735	uint64_t intent_id)
				1736	{
				1737	struct xfs_ail_cursor cur;
				1738	struct xfs_log_item *lip;
				1739	struct xfs_ail *ailp = log->l_ailp;
				1740
				1741	spin_lock(&ailp->ail_lock);
				1742	for (lip = xfs_trans_ail_cursor_first(ailp, &cur, 0); lip != NULL;
				1743	lip = xfs_trans_ail_cursor_next(ailp, &cur)) {
				1744	if (lip->li_type != intent_type)
				1745	continue;
				1746	if (!lip->li_ops->iop_match(lip, intent_id))
				1747	continue;
				1748
				1749	spin_unlock(&ailp->ail_lock);
				1750	lip->li_ops->iop_release(lip);
				1751	spin_lock(&ailp->ail_lock);
				1752	break;
				1753	}
				1754
				1755	xfs_trans_ail_cursor_done(&cur);
				1756	spin_unlock(&ailp->ail_lock);
				1757	}
				1758
Darrick J. Wong	4bc6198	2021-08-08 08:27:13 -0700	[diff] [blame]	1759	int
				1760	xlog_recover_iget(
				1761	struct xfs_mount *mp,
				1762	xfs_ino_t ino,
				1763	struct xfs_inode **ipp)
				1764	{
				1765	int error;
				1766
				1767	error = xfs_iget(mp, NULL, ino, 0, 0, ipp);
				1768	if (error)
				1769	return error;
				1770
				1771	error = xfs_qm_dqattach(*ipp);
				1772	if (error) {
				1773	xfs_irele(*ipp);
				1774	return error;
				1775	}
				1776
				1777	if (VFS_I(*ipp)->i_nlink == 0)
				1778	xfs_iflags_set(*ipp, XFS_IRECOVERY);
				1779
				1780	return 0;
				1781	}
				1782
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1783	/******************************************************************************
				1784	*
				1785	* Log recover routines
				1786	*
				1787	******************************************************************************
				1788	*/
Darrick J. Wong	86ffa47	2020-05-01 16:00:45 -0700	[diff] [blame]	1789	static const struct xlog_recover_item_ops *xlog_recover_item_ops[] = {
				1790	&xlog_buf_item_ops,
				1791	&xlog_inode_item_ops,
				1792	&xlog_dquot_item_ops,
				1793	&xlog_quotaoff_item_ops,
				1794	&xlog_icreate_item_ops,
				1795	&xlog_efi_item_ops,
				1796	&xlog_efd_item_ops,
				1797	&xlog_rui_item_ops,
				1798	&xlog_rud_item_ops,
				1799	&xlog_cui_item_ops,
				1800	&xlog_cud_item_ops,
				1801	&xlog_bui_item_ops,
				1802	&xlog_bud_item_ops,
				1803	};
				1804
				1805	static const struct xlog_recover_item_ops *
				1806	xlog_find_item_ops(
				1807	struct xlog_recover_item *item)
				1808	{
				1809	unsigned int i;
				1810
				1811	for (i = 0; i < ARRAY_SIZE(xlog_recover_item_ops); i++)
				1812	if (ITEM_TYPE(item) == xlog_recover_item_ops[i]->item_type)
				1813	return xlog_recover_item_ops[i];
				1814
				1815	return NULL;
				1816	}
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1817
Dave Chinner	f0a7695	2010-01-11 11:49:57 +0000	[diff] [blame]	1818	/*
Dave Chinner	a775ad7	2013-06-05 12:09:07 +1000	[diff] [blame]	1819	* Sort the log items in the transaction.
				1820	*
				1821	* The ordering constraints are defined by the inode allocation and unlink
				1822	* behaviour. The rules are:
				1823	*
				1824	* 1. Every item is only logged once in a given transaction. Hence it
				1825	* represents the last logged state of the item. Hence ordering is
				1826	* dependent on the order in which operations need to be performed so
				1827	* required initial conditions are always met.
				1828	*
				1829	* 2. Cancelled buffers are recorded in pass 1 in a separate table and
				1830	* there's nothing to replay from them so we can simply cull them
				1831	* from the transaction. However, we can't do that until after we've
				1832	* replayed all the other items because they may be dependent on the
				1833	* cancelled buffer and replaying the cancelled buffer can remove it
				1834	* form the cancelled buffer table. Hence they have tobe done last.
				1835	*
				1836	* 3. Inode allocation buffers must be replayed before inode items that
Dave Chinner	28c8e41	2013-06-27 16:04:55 +1000	[diff] [blame]	1837	* read the buffer and replay changes into it. For filesystems using the
				1838	* ICREATE transactions, this means XFS_LI_ICREATE objects need to get
				1839	* treated the same as inode allocation buffers as they create and
				1840	* initialise the buffers directly.
Dave Chinner	a775ad7	2013-06-05 12:09:07 +1000	[diff] [blame]	1841	*
				1842	* 4. Inode unlink buffers must be replayed after inode items are replayed.
				1843	* This ensures that inodes are completely flushed to the inode buffer
				1844	* in a "free" state before we remove the unlinked inode list pointer.
				1845	*
				1846	* Hence the ordering needs to be inode allocation buffers first, inode items
				1847	* second, inode unlink buffers third and cancelled buffers last.
				1848	*
				1849	* But there's a problem with that - we can't tell an inode allocation buffer
				1850	* apart from a regular buffer, so we can't separate them. We can, however,
				1851	* tell an inode unlink buffer from the others, and so we can separate them out
				1852	* from all the other buffers and move them to last.
				1853	*
				1854	* Hence, 4 lists, in order from head to tail:
Dave Chinner	28c8e41	2013-06-27 16:04:55 +1000	[diff] [blame]	1855	* - buffer_list for all buffers except cancelled/inode unlink buffers
				1856	* - item_list for all non-buffer items
				1857	* - inode_buffer_list for inode unlink buffers
				1858	* - cancel_list for the cancelled buffers
				1859	*
				1860	* Note that we add objects to the tail of the lists so that first-to-last
				1861	* ordering is preserved within the lists. Adding objects to the head of the
				1862	* list means when we traverse from the head we walk them in last-to-first
				1863	* order. For cancelled buffers and inode unlink buffers this doesn't matter,
				1864	* but for all other items there may be specific ordering that we need to
				1865	* preserve.
Dave Chinner	f0a7695	2010-01-11 11:49:57 +0000	[diff] [blame]	1866	*/
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1867	STATIC int
				1868	xlog_recover_reorder_trans(
Mark Tinguely	ad223e6	2012-06-14 09:22:15 -0500	[diff] [blame]	1869	struct xlog *log,
				1870	struct xlog_recover *trans,
Dave Chinner	9abbc53	2010-04-13 15:06:46 +1000	[diff] [blame]	1871	int pass)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1872	{
Darrick J. Wong	35f4521	2020-04-30 10:45:41 -0700	[diff] [blame]	1873	struct xlog_recover_item item, n;
Mark Tinguely	2a84108	2013-10-02 07:51:12 -0500	[diff] [blame]	1874	int error = 0;
Dave Chinner	f0a7695	2010-01-11 11:49:57 +0000	[diff] [blame]	1875	LIST_HEAD(sort_list);
Dave Chinner	a775ad7	2013-06-05 12:09:07 +1000	[diff] [blame]	1876	LIST_HEAD(cancel_list);
				1877	LIST_HEAD(buffer_list);
				1878	LIST_HEAD(inode_buffer_list);
Christoph Hellwig	5ce70b7	2020-04-27 11:14:59 -0700	[diff] [blame]	1879	LIST_HEAD(item_list);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1880
Dave Chinner	f0a7695	2010-01-11 11:49:57 +0000	[diff] [blame]	1881	list_splice_init(&trans->r_itemq, &sort_list);
				1882	list_for_each_entry_safe(item, n, &sort_list, ri_list) {
Darrick J. Wong	86ffa47	2020-05-01 16:00:45 -0700	[diff] [blame]	1883	enum xlog_recover_reorder fate = XLOG_REORDER_ITEM_LIST;
Dave Chinner	f0a7695	2010-01-11 11:49:57 +0000	[diff] [blame]	1884
Darrick J. Wong	86ffa47	2020-05-01 16:00:45 -0700	[diff] [blame]	1885	item->ri_ops = xlog_find_item_ops(item);
				1886	if (!item->ri_ops) {
Dave Chinner	a0fa2b6	2011-03-07 10:01:35 +1100	[diff] [blame]	1887	xfs_warn(log->l_mp,
Darrick J. Wong	0d2d35a	2020-04-21 14:16:52 -0700	[diff] [blame]	1888	"%s: unrecognized type of log operation (%d)",
				1889	__func__, ITEM_TYPE(item));
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1890	ASSERT(0);
Mark Tinguely	2a84108	2013-10-02 07:51:12 -0500	[diff] [blame]	1891	/*
				1892	* return the remaining items back to the transaction
				1893	* item list so they can be freed in caller.
				1894	*/
				1895	if (!list_empty(&sort_list))
				1896	list_splice_init(&sort_list, &trans->r_itemq);
Darrick J. Wong	86ffa47	2020-05-01 16:00:45 -0700	[diff] [blame]	1897	error = -EFSCORRUPTED;
				1898	break;
				1899	}
				1900
				1901	if (item->ri_ops->reorder)
				1902	fate = item->ri_ops->reorder(item);
				1903
				1904	switch (fate) {
				1905	case XLOG_REORDER_BUFFER_LIST:
				1906	list_move_tail(&item->ri_list, &buffer_list);
				1907	break;
				1908	case XLOG_REORDER_CANCEL_LIST:
				1909	trace_xfs_log_recover_item_reorder_head(log,
				1910	trans, item, pass);
				1911	list_move(&item->ri_list, &cancel_list);
				1912	break;
				1913	case XLOG_REORDER_INODE_BUFFER_LIST:
				1914	list_move(&item->ri_list, &inode_buffer_list);
				1915	break;
				1916	case XLOG_REORDER_ITEM_LIST:
				1917	trace_xfs_log_recover_item_reorder_tail(log,
				1918	trans, item, pass);
				1919	list_move_tail(&item->ri_list, &item_list);
				1920	break;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1921	}
Dave Chinner	f0a7695	2010-01-11 11:49:57 +0000	[diff] [blame]	1922	}
Darrick J. Wong	86ffa47	2020-05-01 16:00:45 -0700	[diff] [blame]	1923
Dave Chinner	f0a7695	2010-01-11 11:49:57 +0000	[diff] [blame]	1924	ASSERT(list_empty(&sort_list));
Dave Chinner	a775ad7	2013-06-05 12:09:07 +1000	[diff] [blame]	1925	if (!list_empty(&buffer_list))
				1926	list_splice(&buffer_list, &trans->r_itemq);
Christoph Hellwig	5ce70b7	2020-04-27 11:14:59 -0700	[diff] [blame]	1927	if (!list_empty(&item_list))
				1928	list_splice_tail(&item_list, &trans->r_itemq);
Dave Chinner	a775ad7	2013-06-05 12:09:07 +1000	[diff] [blame]	1929	if (!list_empty(&inode_buffer_list))
				1930	list_splice_tail(&inode_buffer_list, &trans->r_itemq);
				1931	if (!list_empty(&cancel_list))
				1932	list_splice_tail(&cancel_list, &trans->r_itemq);
Mark Tinguely	2a84108	2013-10-02 07:51:12 -0500	[diff] [blame]	1933	return error;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1934	}
				1935
Darrick J. Wong	8ea5682	2020-05-01 16:00:46 -0700	[diff] [blame]	1936	void
Christoph Hellwig	7d4894b	2020-04-27 18:23:17 -0700	[diff] [blame]	1937	xlog_buf_readahead(
				1938	struct xlog *log,
				1939	xfs_daddr_t blkno,
				1940	uint len,
				1941	const struct xfs_buf_ops *ops)
				1942	{
				1943	if (!xlog_is_buffer_cancelled(log, blkno, len))
				1944	xfs_buf_readahead(log->l_mp->m_ddev_targp, blkno, len, ops);
				1945	}
				1946
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1947	STATIC int
Zhi Yong Wu	00574da	2013-08-14 15:16:03 +0800	[diff] [blame]	1948	xlog_recover_items_pass2(
				1949	struct xlog *log,
				1950	struct xlog_recover *trans,
				1951	struct list_head *buffer_list,
				1952	struct list_head *item_list)
				1953	{
				1954	struct xlog_recover_item *item;
				1955	int error = 0;
				1956
				1957	list_for_each_entry(item, item_list, ri_list) {
Darrick J. Wong	2565a11	2020-05-01 16:00:50 -0700	[diff] [blame]	1958	trace_xfs_log_recover_item_recover(log, trans, item,
				1959	XLOG_RECOVER_PASS2);
				1960
				1961	if (item->ri_ops->commit_pass2)
				1962	error = item->ri_ops->commit_pass2(log, buffer_list,
				1963	item, trans->r_lsn);
Zhi Yong Wu	00574da	2013-08-14 15:16:03 +0800	[diff] [blame]	1964	if (error)
				1965	return error;
				1966	}
				1967
				1968	return error;
				1969	}
				1970
Christoph Hellwig	d045094	2010-12-01 22:06:23 +0000	[diff] [blame]	1971	/*
				1972	* Perform the transaction.
				1973	*
				1974	* If the transaction modifies a buffer or inode, do it now. Otherwise,
				1975	* EFIs and EFDs get queued up by adding entries into the AIL for them.
				1976	*/
				1977	STATIC int
				1978	xlog_recover_commit_trans(
Mark Tinguely	ad223e6	2012-06-14 09:22:15 -0500	[diff] [blame]	1979	struct xlog *log,
Christoph Hellwig	d045094	2010-12-01 22:06:23 +0000	[diff] [blame]	1980	struct xlog_recover *trans,
Brian Foster	12818d2	2016-09-26 08:22:16 +1000	[diff] [blame]	1981	int pass,
				1982	struct list_head *buffer_list)
Christoph Hellwig	d045094	2010-12-01 22:06:23 +0000	[diff] [blame]	1983	{
Zhi Yong Wu	00574da	2013-08-14 15:16:03 +0800	[diff] [blame]	1984	int error = 0;
Zhi Yong Wu	00574da	2013-08-14 15:16:03 +0800	[diff] [blame]	1985	int items_queued = 0;
				1986	struct xlog_recover_item *item;
				1987	struct xlog_recover_item *next;
Zhi Yong Wu	00574da	2013-08-14 15:16:03 +0800	[diff] [blame]	1988	LIST_HEAD (ra_list);
				1989	LIST_HEAD (done_list);
				1990
				1991	#define XLOG_RECOVER_COMMIT_QUEUE_MAX 100
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1992
Brian Foster	3977543	2017-06-24 10:11:41 -0700	[diff] [blame]	1993	hlist_del_init(&trans->r_list);
Christoph Hellwig	d045094	2010-12-01 22:06:23 +0000	[diff] [blame]	1994
				1995	error = xlog_recover_reorder_trans(log, trans, pass);
				1996	if (error)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1997	return error;
Christoph Hellwig	d045094	2010-12-01 22:06:23 +0000	[diff] [blame]	1998
Zhi Yong Wu	00574da	2013-08-14 15:16:03 +0800	[diff] [blame]	1999	list_for_each_entry_safe(item, next, &trans->r_itemq, ri_list) {
Darrick J. Wong	3304a4f	2020-05-01 16:00:46 -0700	[diff] [blame]	2000	trace_xfs_log_recover_item_recover(log, trans, item, pass);
				2001
Christoph Hellwig	43ff212	2012-04-23 15:58:39 +1000	[diff] [blame]	2002	switch (pass) {
				2003	case XLOG_RECOVER_PASS1:
Darrick J. Wong	3304a4f	2020-05-01 16:00:46 -0700	[diff] [blame]	2004	if (item->ri_ops->commit_pass1)
				2005	error = item->ri_ops->commit_pass1(log, item);
Christoph Hellwig	43ff212	2012-04-23 15:58:39 +1000	[diff] [blame]	2006	break;
				2007	case XLOG_RECOVER_PASS2:
Darrick J. Wong	8ea5682	2020-05-01 16:00:46 -0700	[diff] [blame]	2008	if (item->ri_ops->ra_pass2)
				2009	item->ri_ops->ra_pass2(log, item);
Zhi Yong Wu	00574da	2013-08-14 15:16:03 +0800	[diff] [blame]	2010	list_move_tail(&item->ri_list, &ra_list);
				2011	items_queued++;
				2012	if (items_queued >= XLOG_RECOVER_COMMIT_QUEUE_MAX) {
				2013	error = xlog_recover_items_pass2(log, trans,
Brian Foster	12818d2	2016-09-26 08:22:16 +1000	[diff] [blame]	2014	buffer_list, &ra_list);
Zhi Yong Wu	00574da	2013-08-14 15:16:03 +0800	[diff] [blame]	2015	list_splice_tail_init(&ra_list, &done_list);
				2016	items_queued = 0;
				2017	}
				2018
Christoph Hellwig	43ff212	2012-04-23 15:58:39 +1000	[diff] [blame]	2019	break;
				2020	default:
				2021	ASSERT(0);
				2022	}
				2023
Christoph Hellwig	d045094	2010-12-01 22:06:23 +0000	[diff] [blame]	2024	if (error)
Christoph Hellwig	43ff212	2012-04-23 15:58:39 +1000	[diff] [blame]	2025	goto out;
Christoph Hellwig	d045094	2010-12-01 22:06:23 +0000	[diff] [blame]	2026	}
				2027
Zhi Yong Wu	00574da	2013-08-14 15:16:03 +0800	[diff] [blame]	2028	out:
				2029	if (!list_empty(&ra_list)) {
				2030	if (!error)
				2031	error = xlog_recover_items_pass2(log, trans,
Brian Foster	12818d2	2016-09-26 08:22:16 +1000	[diff] [blame]	2032	buffer_list, &ra_list);
Zhi Yong Wu	00574da	2013-08-14 15:16:03 +0800	[diff] [blame]	2033	list_splice_tail_init(&ra_list, &done_list);
				2034	}
				2035
				2036	if (!list_empty(&done_list))
				2037	list_splice_init(&done_list, &trans->r_itemq);
				2038
Brian Foster	12818d2	2016-09-26 08:22:16 +1000	[diff] [blame]	2039	return error;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2040	}
				2041
Dave Chinner	7656066	2014-09-29 09:45:42 +1000	[diff] [blame]	2042	STATIC void
				2043	xlog_recover_add_item(
				2044	struct list_head *head)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2045	{
Darrick J. Wong	35f4521	2020-04-30 10:45:41 -0700	[diff] [blame]	2046	struct xlog_recover_item *item;
Dave Chinner	7656066	2014-09-29 09:45:42 +1000	[diff] [blame]	2047
Darrick J. Wong	35f4521	2020-04-30 10:45:41 -0700	[diff] [blame]	2048	item = kmem_zalloc(sizeof(struct xlog_recover_item), 0);
Dave Chinner	7656066	2014-09-29 09:45:42 +1000	[diff] [blame]	2049	INIT_LIST_HEAD(&item->ri_list);
				2050	list_add_tail(&item->ri_list, head);
				2051	}
				2052
				2053	STATIC int
				2054	xlog_recover_add_to_cont_trans(
				2055	struct xlog *log,
				2056	struct xlog_recover *trans,
Christoph Hellwig	b2a922c	2015-06-22 09:45:10 +1000	[diff] [blame]	2057	char *dp,
Dave Chinner	7656066	2014-09-29 09:45:42 +1000	[diff] [blame]	2058	int len)
				2059	{
Darrick J. Wong	35f4521	2020-04-30 10:45:41 -0700	[diff] [blame]	2060	struct xlog_recover_item *item;
Christoph Hellwig	b2a922c	2015-06-22 09:45:10 +1000	[diff] [blame]	2061	char ptr, old_ptr;
Dave Chinner	7656066	2014-09-29 09:45:42 +1000	[diff] [blame]	2062	int old_len;
				2063
Brian Foster	89cebc84	2015-07-29 11:51:10 +1000	[diff] [blame]	2064	/*
				2065	* If the transaction is empty, the header was split across this and the
				2066	* previous record. Copy the rest of the header.
				2067	*/
Dave Chinner	7656066	2014-09-29 09:45:42 +1000	[diff] [blame]	2068	if (list_empty(&trans->r_itemq)) {
Brian Foster	848ccfc	2015-11-10 10:10:33 +1100	[diff] [blame]	2069	ASSERT(len <= sizeof(struct xfs_trans_header));
Brian Foster	89cebc84	2015-07-29 11:51:10 +1000	[diff] [blame]	2070	if (len > sizeof(struct xfs_trans_header)) {
				2071	xfs_warn(log->l_mp, "%s: bad header length", __func__);
Darrick J. Wong	895e196	2019-11-06 09:17:43 -0800	[diff] [blame]	2072	return -EFSCORRUPTED;
Brian Foster	89cebc84	2015-07-29 11:51:10 +1000	[diff] [blame]	2073	}
				2074
Dave Chinner	7656066	2014-09-29 09:45:42 +1000	[diff] [blame]	2075	xlog_recover_add_item(&trans->r_itemq);
Christoph Hellwig	b2a922c	2015-06-22 09:45:10 +1000	[diff] [blame]	2076	ptr = (char *)&trans->r_theader +
Brian Foster	89cebc84	2015-07-29 11:51:10 +1000	[diff] [blame]	2077	sizeof(struct xfs_trans_header) - len;
Dave Chinner	7656066	2014-09-29 09:45:42 +1000	[diff] [blame]	2078	memcpy(ptr, dp, len);
				2079	return 0;
				2080	}
Brian Foster	89cebc84	2015-07-29 11:51:10 +1000	[diff] [blame]	2081
Dave Chinner	7656066	2014-09-29 09:45:42 +1000	[diff] [blame]	2082	/* take the tail entry */
Darrick J. Wong	35f4521	2020-04-30 10:45:41 -0700	[diff] [blame]	2083	item = list_entry(trans->r_itemq.prev, struct xlog_recover_item,
				2084	ri_list);
Dave Chinner	7656066	2014-09-29 09:45:42 +1000	[diff] [blame]	2085
				2086	old_ptr = item->ri_buf[item->ri_cnt-1].i_addr;
				2087	old_len = item->ri_buf[item->ri_cnt-1].i_len;
				2088
Dave Chinner	de2860f	2021-08-09 10:10:00 -0700	[diff] [blame]	2089	ptr = kvrealloc(old_ptr, old_len, len + old_len, GFP_KERNEL);
				2090	if (!ptr)
				2091	return -ENOMEM;
Dave Chinner	7656066	2014-09-29 09:45:42 +1000	[diff] [blame]	2092	memcpy(&ptr[old_len], dp, len);
				2093	item->ri_buf[item->ri_cnt-1].i_len += len;
				2094	item->ri_buf[item->ri_cnt-1].i_addr = ptr;
				2095	trace_xfs_log_recover_item_add_cont(log, trans, item, 0);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2096	return 0;
				2097	}
				2098
				2099	/*
Dave Chinner	7656066	2014-09-29 09:45:42 +1000	[diff] [blame]	2100	* The next region to add is the start of a new region. It could be
				2101	* a whole region or it could be the first part of a new region. Because
				2102	* of this, the assumption here is that the type and size fields of all
				2103	* format structures fit into the first 32 bits of the structure.
				2104	*
				2105	* This works because all regions must be 32 bit aligned. Therefore, we
				2106	* either have both fields or we have neither field. In the case we have
				2107	* neither field, the data part of the region is zero length. We only have
				2108	* a log_op_header and can throw away the header since a new one will appear
				2109	* later. If we have at least 4 bytes, then we can determine how many regions
				2110	* will appear in the current log item.
				2111	*/
				2112	STATIC int
				2113	xlog_recover_add_to_trans(
				2114	struct xlog *log,
				2115	struct xlog_recover *trans,
Christoph Hellwig	b2a922c	2015-06-22 09:45:10 +1000	[diff] [blame]	2116	char *dp,
Dave Chinner	7656066	2014-09-29 09:45:42 +1000	[diff] [blame]	2117	int len)
				2118	{
Darrick J. Wong	06b1132	2017-10-31 12:04:24 -0700	[diff] [blame]	2119	struct xfs_inode_log_format in_f; / any will do */
Darrick J. Wong	35f4521	2020-04-30 10:45:41 -0700	[diff] [blame]	2120	struct xlog_recover_item *item;
Christoph Hellwig	b2a922c	2015-06-22 09:45:10 +1000	[diff] [blame]	2121	char *ptr;
Dave Chinner	7656066	2014-09-29 09:45:42 +1000	[diff] [blame]	2122
				2123	if (!len)
				2124	return 0;
				2125	if (list_empty(&trans->r_itemq)) {
				2126	/* we need to catch log corruptions here */
				2127	if ((uint )dp != XFS_TRANS_HEADER_MAGIC) {
				2128	xfs_warn(log->l_mp, "%s: bad header magic number",
				2129	__func__);
				2130	ASSERT(0);
Darrick J. Wong	895e196	2019-11-06 09:17:43 -0800	[diff] [blame]	2131	return -EFSCORRUPTED;
Dave Chinner	7656066	2014-09-29 09:45:42 +1000	[diff] [blame]	2132	}
Brian Foster	89cebc84	2015-07-29 11:51:10 +1000	[diff] [blame]	2133
				2134	if (len > sizeof(struct xfs_trans_header)) {
				2135	xfs_warn(log->l_mp, "%s: bad header length", __func__);
				2136	ASSERT(0);
Darrick J. Wong	895e196	2019-11-06 09:17:43 -0800	[diff] [blame]	2137	return -EFSCORRUPTED;
Brian Foster	89cebc84	2015-07-29 11:51:10 +1000	[diff] [blame]	2138	}
				2139
				2140	/*
				2141	* The transaction header can be arbitrarily split across op
				2142	* records. If we don't have the whole thing here, copy what we
				2143	* do have and handle the rest in the next record.
				2144	*/
				2145	if (len == sizeof(struct xfs_trans_header))
Dave Chinner	7656066	2014-09-29 09:45:42 +1000	[diff] [blame]	2146	xlog_recover_add_item(&trans->r_itemq);
				2147	memcpy(&trans->r_theader, dp, len);
				2148	return 0;
				2149	}
				2150
Tetsuo Handa	707e0dd	2019-08-26 12:06:22 -0700	[diff] [blame]	2151	ptr = kmem_alloc(len, 0);
Dave Chinner	7656066	2014-09-29 09:45:42 +1000	[diff] [blame]	2152	memcpy(ptr, dp, len);
Darrick J. Wong	06b1132	2017-10-31 12:04:24 -0700	[diff] [blame]	2153	in_f = (struct xfs_inode_log_format *)ptr;
Dave Chinner	7656066	2014-09-29 09:45:42 +1000	[diff] [blame]	2154
				2155	/* take the tail entry */
Darrick J. Wong	35f4521	2020-04-30 10:45:41 -0700	[diff] [blame]	2156	item = list_entry(trans->r_itemq.prev, struct xlog_recover_item,
				2157	ri_list);
Dave Chinner	7656066	2014-09-29 09:45:42 +1000	[diff] [blame]	2158	if (item->ri_total != 0 &&
				2159	item->ri_total == item->ri_cnt) {
				2160	/* tail item is in use, get a new one */
				2161	xlog_recover_add_item(&trans->r_itemq);
				2162	item = list_entry(trans->r_itemq.prev,
Darrick J. Wong	35f4521	2020-04-30 10:45:41 -0700	[diff] [blame]	2163	struct xlog_recover_item, ri_list);
Dave Chinner	7656066	2014-09-29 09:45:42 +1000	[diff] [blame]	2164	}
				2165
				2166	if (item->ri_total == 0) { /* first region to be added */
				2167	if (in_f->ilf_size == 0 \|\|
				2168	in_f->ilf_size > XLOG_MAX_REGIONS_IN_ITEM) {
				2169	xfs_warn(log->l_mp,
				2170	"bad number of regions (%d) in inode log format",
				2171	in_f->ilf_size);
				2172	ASSERT(0);
				2173	kmem_free(ptr);
Darrick J. Wong	895e196	2019-11-06 09:17:43 -0800	[diff] [blame]	2174	return -EFSCORRUPTED;
Dave Chinner	7656066	2014-09-29 09:45:42 +1000	[diff] [blame]	2175	}
				2176
				2177	item->ri_total = in_f->ilf_size;
				2178	item->ri_buf =
				2179	kmem_zalloc(item->ri_total * sizeof(xfs_log_iovec_t),
Tetsuo Handa	707e0dd	2019-08-26 12:06:22 -0700	[diff] [blame]	2180	0);
Dave Chinner	7656066	2014-09-29 09:45:42 +1000	[diff] [blame]	2181	}
Darrick J. Wong	d6abecb	2019-11-06 09:11:23 -0800	[diff] [blame]	2182
				2183	if (item->ri_total <= item->ri_cnt) {
				2184	xfs_warn(log->l_mp,
				2185	"log item region count (%d) overflowed size (%d)",
				2186	item->ri_cnt, item->ri_total);
				2187	ASSERT(0);
				2188	kmem_free(ptr);
				2189	return -EFSCORRUPTED;
				2190	}
				2191
Dave Chinner	7656066	2014-09-29 09:45:42 +1000	[diff] [blame]	2192	/* Description region is ri_buf[0] */
				2193	item->ri_buf[item->ri_cnt].i_addr = ptr;
				2194	item->ri_buf[item->ri_cnt].i_len = len;
				2195	item->ri_cnt++;
				2196	trace_xfs_log_recover_item_add(log, trans, item, 0);
				2197	return 0;
				2198	}
Dave Chinner	b818cca	2014-09-29 09:45:54 +1000	[diff] [blame]	2199
Dave Chinner	7656066	2014-09-29 09:45:42 +1000	[diff] [blame]	2200	/*
				2201	* Free up any resources allocated by the transaction
				2202	*
				2203	* Remember that EFIs, EFDs, and IUNLINKs are handled later.
				2204	*/
				2205	STATIC void
				2206	xlog_recover_free_trans(
				2207	struct xlog_recover *trans)
				2208	{
Darrick J. Wong	35f4521	2020-04-30 10:45:41 -0700	[diff] [blame]	2209	struct xlog_recover_item item, n;
Dave Chinner	7656066	2014-09-29 09:45:42 +1000	[diff] [blame]	2210	int i;
				2211
Brian Foster	3977543	2017-06-24 10:11:41 -0700	[diff] [blame]	2212	hlist_del_init(&trans->r_list);
				2213
Dave Chinner	7656066	2014-09-29 09:45:42 +1000	[diff] [blame]	2214	list_for_each_entry_safe(item, n, &trans->r_itemq, ri_list) {
				2215	/* Free the regions in the item. */
				2216	list_del(&item->ri_list);
				2217	for (i = 0; i < item->ri_cnt; i++)
				2218	kmem_free(item->ri_buf[i].i_addr);
				2219	/* Free the item itself */
				2220	kmem_free(item->ri_buf);
				2221	kmem_free(item);
				2222	}
				2223	/* Free the transaction recover structure */
				2224	kmem_free(trans);
				2225	}
				2226
Dave Chinner	e9131e50	2014-09-29 09:45:18 +1000	[diff] [blame]	2227	/*
				2228	* On error or completion, trans is freed.
				2229	*/
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2230	STATIC int
Dave Chinner	eeb1168	2014-09-29 09:45:03 +1000	[diff] [blame]	2231	xlog_recovery_process_trans(
				2232	struct xlog *log,
				2233	struct xlog_recover *trans,
Christoph Hellwig	b2a922c	2015-06-22 09:45:10 +1000	[diff] [blame]	2234	char *dp,
Dave Chinner	eeb1168	2014-09-29 09:45:03 +1000	[diff] [blame]	2235	unsigned int len,
				2236	unsigned int flags,
Brian Foster	12818d2	2016-09-26 08:22:16 +1000	[diff] [blame]	2237	int pass,
				2238	struct list_head *buffer_list)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2239	{
Dave Chinner	e9131e50	2014-09-29 09:45:18 +1000	[diff] [blame]	2240	int error = 0;
				2241	bool freeit = false;
Dave Chinner	eeb1168	2014-09-29 09:45:03 +1000	[diff] [blame]	2242
				2243	/* mask off ophdr transaction container flags */
				2244	flags &= ~XLOG_END_TRANS;
				2245	if (flags & XLOG_WAS_CONT_TRANS)
				2246	flags &= ~XLOG_CONTINUE_TRANS;
				2247
Dave Chinner	88b863d	2014-09-29 09:45:32 +1000	[diff] [blame]	2248	/*
				2249	* Callees must not free the trans structure. We'll decide if we need to
				2250	* free it or not based on the operation being done and it's result.
				2251	*/
Dave Chinner	eeb1168	2014-09-29 09:45:03 +1000	[diff] [blame]	2252	switch (flags) {
				2253	/* expected flag values */
				2254	case 0:
				2255	case XLOG_CONTINUE_TRANS:
				2256	error = xlog_recover_add_to_trans(log, trans, dp, len);
				2257	break;
				2258	case XLOG_WAS_CONT_TRANS:
				2259	error = xlog_recover_add_to_cont_trans(log, trans, dp, len);
				2260	break;
				2261	case XLOG_COMMIT_TRANS:
Brian Foster	12818d2	2016-09-26 08:22:16 +1000	[diff] [blame]	2262	error = xlog_recover_commit_trans(log, trans, pass,
				2263	buffer_list);
Dave Chinner	88b863d	2014-09-29 09:45:32 +1000	[diff] [blame]	2264	/* success or fail, we are now done with this transaction. */
				2265	freeit = true;
Dave Chinner	eeb1168	2014-09-29 09:45:03 +1000	[diff] [blame]	2266	break;
				2267
				2268	/* unexpected flag values */
				2269	case XLOG_UNMOUNT_TRANS:
Dave Chinner	e9131e50	2014-09-29 09:45:18 +1000	[diff] [blame]	2270	/* just skip trans */
Dave Chinner	eeb1168	2014-09-29 09:45:03 +1000	[diff] [blame]	2271	xfs_warn(log->l_mp, "%s: Unmount LR", __func__);
Dave Chinner	e9131e50	2014-09-29 09:45:18 +1000	[diff] [blame]	2272	freeit = true;
Dave Chinner	eeb1168	2014-09-29 09:45:03 +1000	[diff] [blame]	2273	break;
				2274	case XLOG_START_TRANS:
Dave Chinner	eeb1168	2014-09-29 09:45:03 +1000	[diff] [blame]	2275	default:
				2276	xfs_warn(log->l_mp, "%s: bad flag 0x%x", __func__, flags);
				2277	ASSERT(0);
Darrick J. Wong	895e196	2019-11-06 09:17:43 -0800	[diff] [blame]	2278	error = -EFSCORRUPTED;
Dave Chinner	eeb1168	2014-09-29 09:45:03 +1000	[diff] [blame]	2279	break;
				2280	}
Dave Chinner	e9131e50	2014-09-29 09:45:18 +1000	[diff] [blame]	2281	if (error \|\| freeit)
				2282	xlog_recover_free_trans(trans);
Dave Chinner	eeb1168	2014-09-29 09:45:03 +1000	[diff] [blame]	2283	return error;
				2284	}
				2285
Dave Chinner	b818cca	2014-09-29 09:45:54 +1000	[diff] [blame]	2286	/*
				2287	* Lookup the transaction recovery structure associated with the ID in the
				2288	* current ophdr. If the transaction doesn't exist and the start flag is set in
				2289	* the ophdr, then allocate a new transaction for future ID matches to find.
				2290	* Either way, return what we found during the lookup - an existing transaction
				2291	* or nothing.
				2292	*/
Dave Chinner	eeb1168	2014-09-29 09:45:03 +1000	[diff] [blame]	2293	STATIC struct xlog_recover *
				2294	xlog_recover_ophdr_to_trans(
				2295	struct hlist_head rhash[],
				2296	struct xlog_rec_header *rhead,
				2297	struct xlog_op_header *ohead)
				2298	{
				2299	struct xlog_recover *trans;
				2300	xlog_tid_t tid;
				2301	struct hlist_head *rhp;
				2302
				2303	tid = be32_to_cpu(ohead->oh_tid);
				2304	rhp = &rhash[XLOG_RHASH(tid)];
Dave Chinner	b818cca	2014-09-29 09:45:54 +1000	[diff] [blame]	2305	hlist_for_each_entry(trans, rhp, r_list) {
				2306	if (trans->r_log_tid == tid)
				2307	return trans;
				2308	}
Dave Chinner	eeb1168	2014-09-29 09:45:03 +1000	[diff] [blame]	2309
				2310	/*
Dave Chinner	b818cca	2014-09-29 09:45:54 +1000	[diff] [blame]	2311	* skip over non-start transaction headers - we could be
				2312	* processing slack space before the next transaction starts
Dave Chinner	eeb1168	2014-09-29 09:45:03 +1000	[diff] [blame]	2313	*/
Dave Chinner	b818cca	2014-09-29 09:45:54 +1000	[diff] [blame]	2314	if (!(ohead->oh_flags & XLOG_START_TRANS))
				2315	return NULL;
				2316
				2317	ASSERT(be32_to_cpu(ohead->oh_len) == 0);
				2318
				2319	/*
				2320	* This is a new transaction so allocate a new recovery container to
				2321	* hold the recovery ops that will follow.
				2322	*/
Tetsuo Handa	707e0dd	2019-08-26 12:06:22 -0700	[diff] [blame]	2323	trans = kmem_zalloc(sizeof(struct xlog_recover), 0);
Dave Chinner	b818cca	2014-09-29 09:45:54 +1000	[diff] [blame]	2324	trans->r_log_tid = tid;
				2325	trans->r_lsn = be64_to_cpu(rhead->h_lsn);
				2326	INIT_LIST_HEAD(&trans->r_itemq);
				2327	INIT_HLIST_NODE(&trans->r_list);
				2328	hlist_add_head(&trans->r_list, rhp);
				2329
				2330	/*
				2331	* Nothing more to do for this ophdr. Items to be added to this new
				2332	* transaction will be in subsequent ophdr containers.
				2333	*/
Dave Chinner	eeb1168	2014-09-29 09:45:03 +1000	[diff] [blame]	2334	return NULL;
				2335	}
				2336
				2337	STATIC int
				2338	xlog_recover_process_ophdr(
				2339	struct xlog *log,
				2340	struct hlist_head rhash[],
				2341	struct xlog_rec_header *rhead,
				2342	struct xlog_op_header *ohead,
Christoph Hellwig	b2a922c	2015-06-22 09:45:10 +1000	[diff] [blame]	2343	char *dp,
				2344	char *end,
Brian Foster	12818d2	2016-09-26 08:22:16 +1000	[diff] [blame]	2345	int pass,
				2346	struct list_head *buffer_list)
Dave Chinner	eeb1168	2014-09-29 09:45:03 +1000	[diff] [blame]	2347	{
				2348	struct xlog_recover *trans;
Dave Chinner	eeb1168	2014-09-29 09:45:03 +1000	[diff] [blame]	2349	unsigned int len;
Brian Foster	12818d2	2016-09-26 08:22:16 +1000	[diff] [blame]	2350	int error;
Dave Chinner	eeb1168	2014-09-29 09:45:03 +1000	[diff] [blame]	2351
				2352	/* Do we understand who wrote this op? */
				2353	if (ohead->oh_clientid != XFS_TRANSACTION &&
				2354	ohead->oh_clientid != XFS_LOG) {
				2355	xfs_warn(log->l_mp, "%s: bad clientid 0x%x",
				2356	__func__, ohead->oh_clientid);
				2357	ASSERT(0);
Darrick J. Wong	895e196	2019-11-06 09:17:43 -0800	[diff] [blame]	2358	return -EFSCORRUPTED;
Dave Chinner	eeb1168	2014-09-29 09:45:03 +1000	[diff] [blame]	2359	}
				2360
				2361	/*
				2362	* Check the ophdr contains all the data it is supposed to contain.
				2363	*/
				2364	len = be32_to_cpu(ohead->oh_len);
				2365	if (dp + len > end) {
				2366	xfs_warn(log->l_mp, "%s: bad length 0x%x", __func__, len);
				2367	WARN_ON(1);
Darrick J. Wong	895e196	2019-11-06 09:17:43 -0800	[diff] [blame]	2368	return -EFSCORRUPTED;
Dave Chinner	eeb1168	2014-09-29 09:45:03 +1000	[diff] [blame]	2369	}
				2370
				2371	trans = xlog_recover_ophdr_to_trans(rhash, rhead, ohead);
				2372	if (!trans) {
				2373	/* nothing to do, so skip over this ophdr */
				2374	return 0;
				2375	}
				2376
Brian Foster	12818d2	2016-09-26 08:22:16 +1000	[diff] [blame]	2377	/*
				2378	* The recovered buffer queue is drained only once we know that all
				2379	* recovery items for the current LSN have been processed. This is
				2380	* required because:
				2381	*
				2382	* - Buffer write submission updates the metadata LSN of the buffer.
				2383	* - Log recovery skips items with a metadata LSN >= the current LSN of
				2384	* the recovery item.
				2385	* - Separate recovery items against the same metadata buffer can share
				2386	* a current LSN. I.e., consider that the LSN of a recovery item is
				2387	* defined as the starting LSN of the first record in which its
				2388	* transaction appears, that a record can hold multiple transactions,
				2389	* and/or that a transaction can span multiple records.
				2390	*
				2391	* In other words, we are allowed to submit a buffer from log recovery
				2392	* once per current LSN. Otherwise, we may incorrectly skip recovery
				2393	* items and cause corruption.
				2394	*
				2395	* We don't know up front whether buffers are updated multiple times per
				2396	* LSN. Therefore, track the current LSN of each commit log record as it
				2397	* is processed and drain the queue when it changes. Use commit records
				2398	* because they are ordered correctly by the logging code.
				2399	*/
				2400	if (log->l_recovery_lsn != trans->r_lsn &&
				2401	ohead->oh_flags & XLOG_COMMIT_TRANS) {
				2402	error = xfs_buf_delwri_submit(buffer_list);
				2403	if (error)
				2404	return error;
				2405	log->l_recovery_lsn = trans->r_lsn;
				2406	}
				2407
Dave Chinner	e9131e50	2014-09-29 09:45:18 +1000	[diff] [blame]	2408	return xlog_recovery_process_trans(log, trans, dp, len,
Brian Foster	12818d2	2016-09-26 08:22:16 +1000	[diff] [blame]	2409	ohead->oh_flags, pass, buffer_list);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2410	}
				2411
				2412	/*
				2413	* There are two valid states of the r_state field. 0 indicates that the
				2414	* transaction structure is in a normal state. We have either seen the
				2415	* start of the transaction or the last operation we added was not a partial
				2416	* operation. If the last operation we added to the transaction was a
				2417	* partial operation, we need to mark r_state with XLOG_WAS_CONT_TRANS.
				2418	*
				2419	* NOTE: skip LRs with 0 data length.
				2420	*/
				2421	STATIC int
				2422	xlog_recover_process_data(
Mark Tinguely	9a8d2fd	2012-06-14 09:22:16 -0500	[diff] [blame]	2423	struct xlog *log,
Dave Chinner	f0a7695	2010-01-11 11:49:57 +0000	[diff] [blame]	2424	struct hlist_head rhash[],
Mark Tinguely	9a8d2fd	2012-06-14 09:22:16 -0500	[diff] [blame]	2425	struct xlog_rec_header *rhead,
Christoph Hellwig	b2a922c	2015-06-22 09:45:10 +1000	[diff] [blame]	2426	char *dp,
Brian Foster	12818d2	2016-09-26 08:22:16 +1000	[diff] [blame]	2427	int pass,
				2428	struct list_head *buffer_list)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2429	{
Dave Chinner	eeb1168	2014-09-29 09:45:03 +1000	[diff] [blame]	2430	struct xlog_op_header *ohead;
Christoph Hellwig	b2a922c	2015-06-22 09:45:10 +1000	[diff] [blame]	2431	char *end;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2432	int num_logops;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2433	int error;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2434
Dave Chinner	eeb1168	2014-09-29 09:45:03 +1000	[diff] [blame]	2435	end = dp + be32_to_cpu(rhead->h_len);
Christoph Hellwig	b53e675	2007-10-12 10:59:34 +1000	[diff] [blame]	2436	num_logops = be32_to_cpu(rhead->h_num_logops);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2437
				2438	/* check the log format matches our own - else we can't recover */
				2439	if (xlog_header_check_recover(log->l_mp, rhead))
Dave Chinner	2451337	2014-06-25 14:58:08 +1000	[diff] [blame]	2440	return -EIO;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2441
Brian Foster	5cd9cee	2016-09-26 08:34:52 +1000	[diff] [blame]	2442	trace_xfs_log_recover_record(log, rhead, pass);
Dave Chinner	eeb1168	2014-09-29 09:45:03 +1000	[diff] [blame]	2443	while ((dp < end) && num_logops) {
				2444
				2445	ohead = (struct xlog_op_header *)dp;
				2446	dp += sizeof(*ohead);
				2447	ASSERT(dp <= end);
				2448
				2449	/* errors will abort recovery */
				2450	error = xlog_recover_process_ophdr(log, rhash, rhead, ohead,
Brian Foster	12818d2	2016-09-26 08:22:16 +1000	[diff] [blame]	2451	dp, end, pass, buffer_list);
Dave Chinner	eeb1168	2014-09-29 09:45:03 +1000	[diff] [blame]	2452	if (error)
				2453	return error;
				2454
Christoph Hellwig	67fcb7b	2007-10-12 10:58:59 +1000	[diff] [blame]	2455	dp += be32_to_cpu(ohead->oh_len);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2456	num_logops--;
				2457	}
				2458	return 0;
				2459	}
				2460
Darrick J. Wong	5099558	2017-11-21 20:53:02 -0800	[diff] [blame]	2461	/* Take all the collected deferred ops and finish them in order. */
				2462	static int
				2463	xlog_finish_defer_ops(
Darrick J. Wong	e6fff81	2020-09-25 17:39:37 -0700	[diff] [blame]	2464	struct xfs_mount *mp,
				2465	struct list_head *capture_list)
Darrick J. Wong	5099558	2017-11-21 20:53:02 -0800	[diff] [blame]	2466	{
Darrick J. Wong	e6fff81	2020-09-25 17:39:37 -0700	[diff] [blame]	2467	struct xfs_defer_capture dfc, next;
Darrick J. Wong	5099558	2017-11-21 20:53:02 -0800	[diff] [blame]	2468	struct xfs_trans *tp;
Darrick J. Wong	e6fff81	2020-09-25 17:39:37 -0700	[diff] [blame]	2469	int error = 0;
Darrick J. Wong	5099558	2017-11-21 20:53:02 -0800	[diff] [blame]	2470
Darrick J. Wong	e6fff81	2020-09-25 17:39:37 -0700	[diff] [blame]	2471	list_for_each_entry_safe(dfc, next, capture_list, dfc_list) {
Darrick J. Wong	929b92f	2020-09-25 17:39:50 -0700	[diff] [blame]	2472	struct xfs_trans_res resv;
Darrick J. Wong	512edfa	2021-09-16 17:28:07 -0700	[diff] [blame]	2473	struct xfs_defer_resources dres;
Darrick J. Wong	929b92f	2020-09-25 17:39:50 -0700	[diff] [blame]	2474
				2475	/*
				2476	* Create a new transaction reservation from the captured
				2477	* information. Set logcount to 1 to force the new transaction
				2478	* to regrant every roll so that we can make forward progress
				2479	* in recovery no matter how full the log might be.
				2480	*/
				2481	resv.tr_logres = dfc->dfc_logres;
				2482	resv.tr_logcount = 1;
				2483	resv.tr_logflags = XFS_TRANS_PERM_LOG_RES;
				2484
				2485	error = xfs_trans_alloc(mp, &resv, dfc->dfc_blkres,
				2486	dfc->dfc_rtxres, XFS_TRANS_RESERVE, &tp);
Darrick J. Wong	4e6b827	2021-06-18 11:57:07 -0700	[diff] [blame]	2487	if (error) {
				2488	xfs_force_shutdown(mp, SHUTDOWN_LOG_IO_ERROR);
Darrick J. Wong	e6fff81	2020-09-25 17:39:37 -0700	[diff] [blame]	2489	return error;
Darrick J. Wong	4e6b827	2021-06-18 11:57:07 -0700	[diff] [blame]	2490	}
Darrick J. Wong	e6fff81	2020-09-25 17:39:37 -0700	[diff] [blame]	2491
				2492	/*
				2493	* Transfer to this new transaction all the dfops we captured
				2494	* from recovering a single intent item.
				2495	*/
				2496	list_del_init(&dfc->dfc_list);
Darrick J. Wong	512edfa	2021-09-16 17:28:07 -0700	[diff] [blame]	2497	xfs_defer_ops_continue(dfc, tp, &dres);
Darrick J. Wong	e6fff81	2020-09-25 17:39:37 -0700	[diff] [blame]	2498	error = xfs_trans_commit(tp);
Darrick J. Wong	512edfa	2021-09-16 17:28:07 -0700	[diff] [blame]	2499	xfs_defer_resources_rele(&dres);
Darrick J. Wong	e6fff81	2020-09-25 17:39:37 -0700	[diff] [blame]	2500	if (error)
				2501	return error;
				2502	}
				2503
				2504	ASSERT(list_empty(capture_list));
				2505	return 0;
Darrick J. Wong	5099558	2017-11-21 20:53:02 -0800	[diff] [blame]	2506	}
				2507
Darrick J. Wong	e6fff81	2020-09-25 17:39:37 -0700	[diff] [blame]	2508	/* Release all the captured defer ops and capture structures in this list. */
				2509	static void
				2510	xlog_abort_defer_ops(
				2511	struct xfs_mount *mp,
				2512	struct list_head *capture_list)
				2513	{
				2514	struct xfs_defer_capture *dfc;
				2515	struct xfs_defer_capture *next;
				2516
				2517	list_for_each_entry_safe(dfc, next, capture_list, dfc_list) {
				2518	list_del_init(&dfc->dfc_list);
Darrick J. Wong	512edfa	2021-09-16 17:28:07 -0700	[diff] [blame]	2519	xfs_defer_ops_capture_free(mp, dfc);
Darrick J. Wong	e6fff81	2020-09-25 17:39:37 -0700	[diff] [blame]	2520	}
				2521	}
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2522	/*
Darrick J. Wong	dc42375	2016-08-03 11:23:49 +1000	[diff] [blame]	2523	* When this is called, all of the log intent items which did not have
				2524	* corresponding log done items should be in the AIL. What we do now
				2525	* is update the data structures associated with each one.
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2526	*
Darrick J. Wong	dc42375	2016-08-03 11:23:49 +1000	[diff] [blame]	2527	* Since we process the log intent items in normal transactions, they
				2528	* will be removed at some point after the commit. This prevents us
				2529	* from just walking down the list processing each one. We'll use a
				2530	* flag in the intent item to skip those that we've already processed
				2531	* and use the AIL iteration mechanism's generation count to try to
				2532	* speed this up at least a bit.
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2533	*
Darrick J. Wong	dc42375	2016-08-03 11:23:49 +1000	[diff] [blame]	2534	* When we start, we know that the intents are the only things in the
				2535	* AIL. As we process them, however, other items are added to the
				2536	* AIL.
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2537	*/
David Chinner	3c1e2bb	2008-04-10 12:21:11 +1000	[diff] [blame]	2538	STATIC int
Darrick J. Wong	dc42375	2016-08-03 11:23:49 +1000	[diff] [blame]	2539	xlog_recover_process_intents(
Brian Foster	f0b2efa	2015-08-19 09:58:36 +1000	[diff] [blame]	2540	struct xlog *log)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2541	{
Darrick J. Wong	e6fff81	2020-09-25 17:39:37 -0700	[diff] [blame]	2542	LIST_HEAD(capture_list);
David Chinner	27d8d5f	2008-10-30 17:38:39 +1100	[diff] [blame]	2543	struct xfs_ail_cursor cur;
Darrick J. Wong	5099558	2017-11-21 20:53:02 -0800	[diff] [blame]	2544	struct xfs_log_item *lip;
David Chinner	a9c21c1	2008-10-30 17:39:35 +1100	[diff] [blame]	2545	struct xfs_ail *ailp;
Darrick J. Wong	e6fff81	2020-09-25 17:39:37 -0700	[diff] [blame]	2546	int error = 0;
Darrick J. Wong	7bf7a19	2017-08-31 15:11:06 -0700	[diff] [blame]	2547	#if defined(DEBUG) \|\| defined(XFS_WARN)
Darrick J. Wong	dc42375	2016-08-03 11:23:49 +1000	[diff] [blame]	2548	xfs_lsn_t last_lsn;
Darrick J. Wong	7bf7a19	2017-08-31 15:11:06 -0700	[diff] [blame]	2549	#endif
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2550
David Chinner	a9c21c1	2008-10-30 17:39:35 +1100	[diff] [blame]	2551	ailp = log->l_ailp;
Matthew Wilcox	57e8095	2018-03-07 14:59:39 -0800	[diff] [blame]	2552	spin_lock(&ailp->ail_lock);
Darrick J. Wong	7bf7a19	2017-08-31 15:11:06 -0700	[diff] [blame]	2553	#if defined(DEBUG) \|\| defined(XFS_WARN)
Darrick J. Wong	dc42375	2016-08-03 11:23:49 +1000	[diff] [blame]	2554	last_lsn = xlog_assign_lsn(log->l_curr_cycle, log->l_curr_block);
Darrick J. Wong	7bf7a19	2017-08-31 15:11:06 -0700	[diff] [blame]	2555	#endif
Darrick J. Wong	e6fff81	2020-09-25 17:39:37 -0700	[diff] [blame]	2556	for (lip = xfs_trans_ail_cursor_first(ailp, &cur, 0);
				2557	lip != NULL;
				2558	lip = xfs_trans_ail_cursor_next(ailp, &cur)) {
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2559	/*
Darrick J. Wong	dc42375	2016-08-03 11:23:49 +1000	[diff] [blame]	2560	* We're done when we see something other than an intent.
				2561	* There should be no intents left in the AIL now.
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2562	*/
Darrick J. Wong	dc42375	2016-08-03 11:23:49 +1000	[diff] [blame]	2563	if (!xlog_item_is_intent(lip)) {
David Chinner	27d8d5f	2008-10-30 17:38:39 +1100	[diff] [blame]	2564	#ifdef DEBUG
David Chinner	a9c21c1	2008-10-30 17:39:35 +1100	[diff] [blame]	2565	for (; lip; lip = xfs_trans_ail_cursor_next(ailp, &cur))
Darrick J. Wong	dc42375	2016-08-03 11:23:49 +1000	[diff] [blame]	2566	ASSERT(!xlog_item_is_intent(lip));
David Chinner	27d8d5f	2008-10-30 17:38:39 +1100	[diff] [blame]	2567	#endif
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2568	break;
				2569	}
				2570
				2571	/*
Darrick J. Wong	dc42375	2016-08-03 11:23:49 +1000	[diff] [blame]	2572	* We should never see a redo item with a LSN higher than
				2573	* the last transaction we found in the log at the start
				2574	* of recovery.
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2575	*/
Darrick J. Wong	dc42375	2016-08-03 11:23:49 +1000	[diff] [blame]	2576	ASSERT(XFS_LSN_CMP(last_lsn, lip->li_lsn) >= 0);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2577
Darrick J. Wong	5099558	2017-11-21 20:53:02 -0800	[diff] [blame]	2578	/*
				2579	* NOTE: If your intent processing routine can create more
Darrick J. Wong	e6fff81	2020-09-25 17:39:37 -0700	[diff] [blame]	2580	* deferred ops, you /must/ attach them to the capture list in
				2581	* the recover routine or else those subsequent intents will be
Darrick J. Wong	5099558	2017-11-21 20:53:02 -0800	[diff] [blame]	2582	* replayed in the wrong order!
				2583	*/
Darrick J. Wong	901219b	2020-09-28 11:01:45 -0700	[diff] [blame]	2584	spin_unlock(&ailp->ail_lock);
Darrick J. Wong	e6fff81	2020-09-25 17:39:37 -0700	[diff] [blame]	2585	error = lip->li_ops->iop_recover(lip, &capture_list);
Darrick J. Wong	901219b	2020-09-28 11:01:45 -0700	[diff] [blame]	2586	spin_lock(&ailp->ail_lock);
Darrick J. Wong	6337032	2020-11-29 16:33:39 -0800	[diff] [blame]	2587	if (error) {
				2588	trace_xlog_intent_recovery_failed(log->l_mp, error,
				2589	lip->li_ops->iop_recover);
Darrick J. Wong	e6fff81	2020-09-25 17:39:37 -0700	[diff] [blame]	2590	break;
Darrick J. Wong	6337032	2020-11-29 16:33:39 -0800	[diff] [blame]	2591	}
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2592	}
Darrick J. Wong	e6fff81	2020-09-25 17:39:37 -0700	[diff] [blame]	2593
Eric Sandeen	e4a1e29	2014-04-14 19:06:05 +1000	[diff] [blame]	2594	xfs_trans_ail_cursor_done(&cur);
Matthew Wilcox	57e8095	2018-03-07 14:59:39 -0800	[diff] [blame]	2595	spin_unlock(&ailp->ail_lock);
Darrick J. Wong	e6fff81	2020-09-25 17:39:37 -0700	[diff] [blame]	2596	if (error)
				2597	goto err;
Darrick J. Wong	5099558	2017-11-21 20:53:02 -0800	[diff] [blame]	2598
Darrick J. Wong	e6fff81	2020-09-25 17:39:37 -0700	[diff] [blame]	2599	error = xlog_finish_defer_ops(log->l_mp, &capture_list);
				2600	if (error)
				2601	goto err;
				2602
				2603	return 0;
				2604	err:
				2605	xlog_abort_defer_ops(log->l_mp, &capture_list);
David Chinner	3c1e2bb	2008-04-10 12:21:11 +1000	[diff] [blame]	2606	return error;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2607	}
				2608
				2609	/*
Darrick J. Wong	dc42375	2016-08-03 11:23:49 +1000	[diff] [blame]	2610	* A cancel occurs when the mount has failed and we're bailing out.
				2611	* Release all pending log intent items so they don't pin the AIL.
Brian Foster	f0b2efa	2015-08-19 09:58:36 +1000	[diff] [blame]	2612	*/
Hariprasad Kelam	a7a9250	2019-07-03 07:34:18 -0700	[diff] [blame]	2613	STATIC void
Darrick J. Wong	dc42375	2016-08-03 11:23:49 +1000	[diff] [blame]	2614	xlog_recover_cancel_intents(
Brian Foster	f0b2efa	2015-08-19 09:58:36 +1000	[diff] [blame]	2615	struct xlog *log)
				2616	{
				2617	struct xfs_log_item *lip;
Brian Foster	f0b2efa	2015-08-19 09:58:36 +1000	[diff] [blame]	2618	struct xfs_ail_cursor cur;
				2619	struct xfs_ail *ailp;
				2620
				2621	ailp = log->l_ailp;
Matthew Wilcox	57e8095	2018-03-07 14:59:39 -0800	[diff] [blame]	2622	spin_lock(&ailp->ail_lock);
Brian Foster	f0b2efa	2015-08-19 09:58:36 +1000	[diff] [blame]	2623	lip = xfs_trans_ail_cursor_first(ailp, &cur, 0);
				2624	while (lip != NULL) {
				2625	/*
Darrick J. Wong	dc42375	2016-08-03 11:23:49 +1000	[diff] [blame]	2626	* We're done when we see something other than an intent.
				2627	* There should be no intents left in the AIL now.
Brian Foster	f0b2efa	2015-08-19 09:58:36 +1000	[diff] [blame]	2628	*/
Darrick J. Wong	dc42375	2016-08-03 11:23:49 +1000	[diff] [blame]	2629	if (!xlog_item_is_intent(lip)) {
Brian Foster	f0b2efa	2015-08-19 09:58:36 +1000	[diff] [blame]	2630	#ifdef DEBUG
				2631	for (; lip; lip = xfs_trans_ail_cursor_next(ailp, &cur))
Darrick J. Wong	dc42375	2016-08-03 11:23:49 +1000	[diff] [blame]	2632	ASSERT(!xlog_item_is_intent(lip));
Brian Foster	f0b2efa	2015-08-19 09:58:36 +1000	[diff] [blame]	2633	#endif
				2634	break;
				2635	}
				2636
Darrick J. Wong	9329ba8	2020-05-01 16:00:52 -0700	[diff] [blame]	2637	spin_unlock(&ailp->ail_lock);
				2638	lip->li_ops->iop_release(lip);
				2639	spin_lock(&ailp->ail_lock);
Brian Foster	f0b2efa	2015-08-19 09:58:36 +1000	[diff] [blame]	2640	lip = xfs_trans_ail_cursor_next(ailp, &cur);
				2641	}
				2642
				2643	xfs_trans_ail_cursor_done(&cur);
Matthew Wilcox	57e8095	2018-03-07 14:59:39 -0800	[diff] [blame]	2644	spin_unlock(&ailp->ail_lock);
Brian Foster	f0b2efa	2015-08-19 09:58:36 +1000	[diff] [blame]	2645	}
				2646
				2647	/*
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2648	* This routine performs a transaction to null out a bad inode pointer
				2649	* in an agi unlinked inode hash bucket.
				2650	*/
				2651	STATIC void
				2652	xlog_recover_clear_agi_bucket(
				2653	xfs_mount_t *mp,
				2654	xfs_agnumber_t agno,
				2655	int bucket)
				2656	{
				2657	xfs_trans_t *tp;
				2658	xfs_agi_t *agi;
Dave Chinner	e822261	2020-12-16 16:07:34 -0800	[diff] [blame]	2659	struct xfs_buf *agibp;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2660	int offset;
				2661	int error;
				2662
Christoph Hellwig	253f491	2016-04-06 09:19:55 +1000	[diff] [blame]	2663	error = xfs_trans_alloc(mp, &M_RES(mp)->tr_clearagi, 0, 0, 0, &tp);
David Chinner	e5720ee	2008-04-10 12:21:18 +1000	[diff] [blame]	2664	if (error)
Christoph Hellwig	253f491	2016-04-06 09:19:55 +1000	[diff] [blame]	2665	goto out_error;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2666
Christoph Hellwig	5e1be0f	2008-11-28 14:23:37 +1100	[diff] [blame]	2667	error = xfs_read_agi(mp, tp, agno, &agibp);
				2668	if (error)
David Chinner	e5720ee	2008-04-10 12:21:18 +1000	[diff] [blame]	2669	goto out_abort;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2670
Christoph Hellwig	370c782	2020-03-10 08:57:29 -0700	[diff] [blame]	2671	agi = agibp->b_addr;
Christoph Hellwig	16259e7	2005-11-02 15:11:25 +1100	[diff] [blame]	2672	agi->agi_unlinked[bucket] = cpu_to_be32(NULLAGINO);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2673	offset = offsetof(xfs_agi_t, agi_unlinked) +
				2674	(sizeof(xfs_agino_t) * bucket);
				2675	xfs_trans_log_buf(tp, agibp, offset,
				2676	(offset + sizeof(xfs_agino_t) - 1));
				2677
Christoph Hellwig	7039331	2015-06-04 13:48:08 +1000	[diff] [blame]	2678	error = xfs_trans_commit(tp);
David Chinner	e5720ee	2008-04-10 12:21:18 +1000	[diff] [blame]	2679	if (error)
				2680	goto out_error;
				2681	return;
				2682
				2683	out_abort:
Christoph Hellwig	4906e21	2015-06-04 13:47:56 +1000	[diff] [blame]	2684	xfs_trans_cancel(tp);
David Chinner	e5720ee	2008-04-10 12:21:18 +1000	[diff] [blame]	2685	out_error:
Dave Chinner	a0fa2b6	2011-03-07 10:01:35 +1100	[diff] [blame]	2686	xfs_warn(mp, "%s: failed to clear agi %d. Continuing.", __func__, agno);
David Chinner	e5720ee	2008-04-10 12:21:18 +1000	[diff] [blame]	2687	return;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2688	}
				2689
Christoph Hellwig	23fac50	2008-11-28 14:23:40 +1100	[diff] [blame]	2690	STATIC xfs_agino_t
				2691	xlog_recover_process_one_iunlink(
				2692	struct xfs_mount *mp,
				2693	xfs_agnumber_t agno,
				2694	xfs_agino_t agino,
				2695	int bucket)
				2696	{
				2697	struct xfs_buf *ibp;
				2698	struct xfs_dinode *dip;
				2699	struct xfs_inode *ip;
				2700	xfs_ino_t ino;
				2701	int error;
				2702
				2703	ino = XFS_AGINO_TO_INO(mp, agno, agino);
Dave Chinner	7b6259e	2010-06-24 11:35:17 +1000	[diff] [blame]	2704	error = xfs_iget(mp, NULL, ino, 0, 0, &ip);
Christoph Hellwig	23fac50	2008-11-28 14:23:40 +1100	[diff] [blame]	2705	if (error)
				2706	goto fail;
				2707
				2708	/*
				2709	* Get the on disk inode to find the next inode in the bucket.
				2710	*/
Christoph Hellwig	af9dcdd	2021-03-29 11:11:37 -0700	[diff] [blame]	2711	error = xfs_imap_to_bp(mp, NULL, &ip->i_imap, &ibp);
Christoph Hellwig	23fac50	2008-11-28 14:23:40 +1100	[diff] [blame]	2712	if (error)
Christoph Hellwig	0e44667	2008-11-28 14:23:42 +1100	[diff] [blame]	2713	goto fail_iput;
Christoph Hellwig	af9dcdd	2021-03-29 11:11:37 -0700	[diff] [blame]	2714	dip = xfs_buf_offset(ibp, ip->i_imap.im_boffset);
Christoph Hellwig	23fac50	2008-11-28 14:23:40 +1100	[diff] [blame]	2715
Darrick J. Wong	17c12bc	2016-10-03 09:11:29 -0700	[diff] [blame]	2716	xfs_iflags_clear(ip, XFS_IRECOVERY);
Dave Chinner	54d7b5c	2016-02-09 16:54:58 +1100	[diff] [blame]	2717	ASSERT(VFS_I(ip)->i_nlink == 0);
Dave Chinner	c19b3b05	2016-02-09 16:54:58 +1100	[diff] [blame]	2718	ASSERT(VFS_I(ip)->i_mode != 0);
Christoph Hellwig	23fac50	2008-11-28 14:23:40 +1100	[diff] [blame]	2719
				2720	/* setup for the next pass */
				2721	agino = be32_to_cpu(dip->di_next_unlinked);
				2722	xfs_buf_relse(ibp);
				2723
Darrick J. Wong	44a8736	2018-07-25 12:52:32 -0700	[diff] [blame]	2724	xfs_irele(ip);
Christoph Hellwig	23fac50	2008-11-28 14:23:40 +1100	[diff] [blame]	2725	return agino;
				2726
Christoph Hellwig	0e44667	2008-11-28 14:23:42 +1100	[diff] [blame]	2727	fail_iput:
Darrick J. Wong	44a8736	2018-07-25 12:52:32 -0700	[diff] [blame]	2728	xfs_irele(ip);
Christoph Hellwig	23fac50	2008-11-28 14:23:40 +1100	[diff] [blame]	2729	fail:
				2730	/*
				2731	* We can't read in the inode this bucket points to, or this inode
				2732	* is messed up. Just ditch this bucket of inodes. We will lose
				2733	* some inodes and space, but at least we won't hang.
				2734	*
				2735	* Call xlog_recover_clear_agi_bucket() to perform a transaction to
				2736	* clear the inode pointer in the bucket.
				2737	*/
				2738	xlog_recover_clear_agi_bucket(mp, agno, bucket);
				2739	return NULLAGINO;
				2740	}
				2741
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2742	/*
Dave Chinner	8ab39f1	2019-09-05 21:35:39 -0700	[diff] [blame]	2743	* Recover AGI unlinked lists
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2744	*
Dave Chinner	8ab39f1	2019-09-05 21:35:39 -0700	[diff] [blame]	2745	* This is called during recovery to process any inodes which we unlinked but
				2746	* not freed when the system crashed. These inodes will be on the lists in the
				2747	* AGI blocks. What we do here is scan all the AGIs and fully truncate and free
				2748	* any inodes found on the lists. Each inode is removed from the lists when it
				2749	* has been fully truncated and is freed. The freeing of the inode and its
				2750	* removal from the list must be atomic.
				2751	*
				2752	* If everything we touch in the agi processing loop is already in memory, this
				2753	* loop can hold the cpu for a long time. It runs without lock contention,
				2754	* memory allocation contention, the need wait for IO, etc, and so will run
				2755	* until we either run out of inodes to process, run low on memory or we run out
				2756	* of log space.
				2757	*
				2758	* This behaviour is bad for latency on single CPU and non-preemptible kernels,
Bhaskar Chowdhury	bd24a4f	2021-03-23 16:59:30 -0700	[diff] [blame]	2759	* and can prevent other filesystem work (such as CIL pushes) from running. This
Dave Chinner	8ab39f1	2019-09-05 21:35:39 -0700	[diff] [blame]	2760	* can lead to deadlocks if the recovery process runs out of log reservation
				2761	* space. Hence we need to yield the CPU when there is other kernel work
				2762	* scheduled on this CPU to ensure other scheduled work can run without undue
				2763	* latency.
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2764	*/
Eric Sandeen	d96f8f8	2009-07-02 00:09:33 -0500	[diff] [blame]	2765	STATIC void
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2766	xlog_recover_process_iunlinks(
Mark Tinguely	9a8d2fd	2012-06-14 09:22:16 -0500	[diff] [blame]	2767	struct xlog *log)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2768	{
Dave Chinner	934933c	2021-06-02 10:48:24 +1000	[diff] [blame]	2769	struct xfs_mount *mp = log->l_mp;
				2770	struct xfs_perag *pag;
				2771	xfs_agnumber_t agno;
				2772	struct xfs_agi *agi;
				2773	struct xfs_buf *agibp;
				2774	xfs_agino_t agino;
				2775	int bucket;
				2776	int error;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2777
Dave Chinner	934933c	2021-06-02 10:48:24 +1000	[diff] [blame]	2778	for_each_perag(mp, agno, pag) {
				2779	error = xfs_read_agi(mp, NULL, pag->pag_agno, &agibp);
Christoph Hellwig	5e1be0f	2008-11-28 14:23:37 +1100	[diff] [blame]	2780	if (error) {
				2781	/*
				2782	* AGI is b0rked. Don't process it.
				2783	*
				2784	* We should probably mark the filesystem as corrupt
				2785	* after we've recovered all the ag's we can....
				2786	*/
				2787	continue;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2788	}
Jan Kara	d97d32e	2012-03-15 09:34:02 +0000	[diff] [blame]	2789	/*
				2790	* Unlock the buffer so that it can be acquired in the normal
				2791	* course of the transaction to truncate and free each inode.
				2792	* Because we are not racing with anyone else here for the AGI
				2793	* buffer, we don't even need to hold it locked to read the
				2794	* initial unlinked bucket entries out of the buffer. We keep
				2795	* buffer reference though, so that it stays pinned in memory
				2796	* while we need the buffer.
				2797	*/
Christoph Hellwig	370c782	2020-03-10 08:57:29 -0700	[diff] [blame]	2798	agi = agibp->b_addr;
Jan Kara	d97d32e	2012-03-15 09:34:02 +0000	[diff] [blame]	2799	xfs_buf_unlock(agibp);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2800
				2801	for (bucket = 0; bucket < XFS_AGI_UNLINKED_BUCKETS; bucket++) {
Christoph Hellwig	16259e7	2005-11-02 15:11:25 +1100	[diff] [blame]	2802	agino = be32_to_cpu(agi->agi_unlinked[bucket]);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2803	while (agino != NULLAGINO) {
Christoph Hellwig	23fac50	2008-11-28 14:23:40 +1100	[diff] [blame]	2804	agino = xlog_recover_process_one_iunlink(mp,
Dave Chinner	934933c	2021-06-02 10:48:24 +1000	[diff] [blame]	2805	pag->pag_agno, agino, bucket);
Dave Chinner	8ab39f1	2019-09-05 21:35:39 -0700	[diff] [blame]	2806	cond_resched();
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2807	}
				2808	}
Jan Kara	d97d32e	2012-03-15 09:34:02 +0000	[diff] [blame]	2809	xfs_buf_rele(agibp);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2810	}
Dave Chinner	ab23a77	2021-08-06 11:05:39 -0700	[diff] [blame]	2811
				2812	/*
				2813	* Flush the pending unlinked inodes to ensure that the inactivations
				2814	* are fully completed on disk and the incore inodes can be reclaimed
				2815	* before we signal that recovery is complete.
				2816	*/
				2817	xfs_inodegc_flush(mp);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2818	}
				2819
Eric Sandeen	9108326	2019-05-01 20:26:30 -0700	[diff] [blame]	2820	STATIC void
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2821	xlog_unpack_data(
Mark Tinguely	9a8d2fd	2012-06-14 09:22:16 -0500	[diff] [blame]	2822	struct xlog_rec_header *rhead,
Christoph Hellwig	b2a922c	2015-06-22 09:45:10 +1000	[diff] [blame]	2823	char *dp,
Mark Tinguely	9a8d2fd	2012-06-14 09:22:16 -0500	[diff] [blame]	2824	struct xlog *log)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2825	{
				2826	int i, j, k;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2827
Christoph Hellwig	b53e675	2007-10-12 10:59:34 +1000	[diff] [blame]	2828	for (i = 0; i < BTOBB(be32_to_cpu(rhead->h_len)) &&
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2829	i < (XLOG_HEADER_CYCLE_SIZE / BBSIZE); i++) {
Christoph Hellwig	b53e675	2007-10-12 10:59:34 +1000	[diff] [blame]	2830	(__be32 )dp = (__be32 )&rhead->h_cycle_data[i];
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2831	dp += BBSIZE;
				2832	}
				2833
Dave Chinner	38c26bf	2021-08-18 18:46:37 -0700	[diff] [blame]	2834	if (xfs_has_logv2(log->l_mp)) {
Christoph Hellwig	b28708d	2008-11-28 14:23:38 +1100	[diff] [blame]	2835	xlog_in_core_2_t xhdr = (xlog_in_core_2_t )rhead;
Christoph Hellwig	b53e675	2007-10-12 10:59:34 +1000	[diff] [blame]	2836	for ( ; i < BTOBB(be32_to_cpu(rhead->h_len)); i++) {
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2837	j = i / (XLOG_HEADER_CYCLE_SIZE / BBSIZE);
				2838	k = i % (XLOG_HEADER_CYCLE_SIZE / BBSIZE);
Christoph Hellwig	b53e675	2007-10-12 10:59:34 +1000	[diff] [blame]	2839	(__be32 )dp = xhdr[j].hic_xheader.xh_cycle_data[k];
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2840	dp += BBSIZE;
				2841	}
				2842	}
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2843	}
				2844
Brian Foster	9d94901	2016-01-04 15:55:10 +1100	[diff] [blame]	2845	/*
Brian Foster	b94fb2d	2016-01-04 15:55:10 +1100	[diff] [blame]	2846	* CRC check, unpack and process a log record.
Brian Foster	9d94901	2016-01-04 15:55:10 +1100	[diff] [blame]	2847	*/
				2848	STATIC int
				2849	xlog_recover_process(
				2850	struct xlog *log,
				2851	struct hlist_head rhash[],
				2852	struct xlog_rec_header *rhead,
				2853	char *dp,
Brian Foster	12818d2	2016-09-26 08:22:16 +1000	[diff] [blame]	2854	int pass,
				2855	struct list_head *buffer_list)
Brian Foster	9d94901	2016-01-04 15:55:10 +1100	[diff] [blame]	2856	{
Dave Chinner	cae028d	2016-12-05 14:40:32 +1100	[diff] [blame]	2857	__le32 old_crc = rhead->h_crc;
Brian Foster	b94fb2d	2016-01-04 15:55:10 +1100	[diff] [blame]	2858	__le32 crc;
				2859
Brian Foster	b94fb2d	2016-01-04 15:55:10 +1100	[diff] [blame]	2860	crc = xlog_cksum(log, rhead, dp, be32_to_cpu(rhead->h_len));
Brian Foster	6528250	2016-01-04 15:55:10 +1100	[diff] [blame]	2861
				2862	/*
				2863	* Nothing else to do if this is a CRC verification pass. Just return
				2864	* if this a record with a non-zero crc. Unfortunately, mkfs always
Dave Chinner	cae028d	2016-12-05 14:40:32 +1100	[diff] [blame]	2865	* sets old_crc to 0 so we must consider this valid even on v5 supers.
Brian Foster	6528250	2016-01-04 15:55:10 +1100	[diff] [blame]	2866	* Otherwise, return EFSBADCRC on failure so the callers up the stack
				2867	* know precisely what failed.
				2868	*/
				2869	if (pass == XLOG_RECOVER_CRCPASS) {
Dave Chinner	cae028d	2016-12-05 14:40:32 +1100	[diff] [blame]	2870	if (old_crc && crc != old_crc)
Brian Foster	6528250	2016-01-04 15:55:10 +1100	[diff] [blame]	2871	return -EFSBADCRC;
				2872	return 0;
				2873	}
				2874
				2875	/*
				2876	* We're in the normal recovery path. Issue a warning if and only if the
				2877	* CRC in the header is non-zero. This is an advisory warning and the
				2878	* zero CRC check prevents warnings from being emitted when upgrading
				2879	* the kernel from one that does not add CRCs by default.
				2880	*/
Dave Chinner	cae028d	2016-12-05 14:40:32 +1100	[diff] [blame]	2881	if (crc != old_crc) {
Dave Chinner	38c26bf	2021-08-18 18:46:37 -0700	[diff] [blame]	2882	if (old_crc \|\| xfs_has_crc(log->l_mp)) {
Brian Foster	b94fb2d	2016-01-04 15:55:10 +1100	[diff] [blame]	2883	xfs_alert(log->l_mp,
				2884	"log record CRC mismatch: found 0x%x, expected 0x%x.",
Dave Chinner	cae028d	2016-12-05 14:40:32 +1100	[diff] [blame]	2885	le32_to_cpu(old_crc),
Brian Foster	b94fb2d	2016-01-04 15:55:10 +1100	[diff] [blame]	2886	le32_to_cpu(crc));
				2887	xfs_hex_dump(dp, 32);
				2888	}
				2889
				2890	/*
				2891	* If the filesystem is CRC enabled, this mismatch becomes a
				2892	* fatal log corruption failure.
				2893	*/
Dave Chinner	38c26bf	2021-08-18 18:46:37 -0700	[diff] [blame]	2894	if (xfs_has_crc(log->l_mp)) {
Darrick J. Wong	a5155b8	2019-11-02 09:40:53 -0700	[diff] [blame]	2895	XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, log->l_mp);
Brian Foster	b94fb2d	2016-01-04 15:55:10 +1100	[diff] [blame]	2896	return -EFSCORRUPTED;
Darrick J. Wong	a5155b8	2019-11-02 09:40:53 -0700	[diff] [blame]	2897	}
Brian Foster	b94fb2d	2016-01-04 15:55:10 +1100	[diff] [blame]	2898	}
Brian Foster	9d94901	2016-01-04 15:55:10 +1100	[diff] [blame]	2899
Eric Sandeen	9108326	2019-05-01 20:26:30 -0700	[diff] [blame]	2900	xlog_unpack_data(rhead, dp, log);
Brian Foster	9d94901	2016-01-04 15:55:10 +1100	[diff] [blame]	2901
Brian Foster	12818d2	2016-09-26 08:22:16 +1000	[diff] [blame]	2902	return xlog_recover_process_data(log, rhash, rhead, dp, pass,
				2903	buffer_list);
Brian Foster	9d94901	2016-01-04 15:55:10 +1100	[diff] [blame]	2904	}
				2905
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2906	STATIC int
				2907	xlog_valid_rec_header(
Mark Tinguely	9a8d2fd	2012-06-14 09:22:16 -0500	[diff] [blame]	2908	struct xlog *log,
				2909	struct xlog_rec_header *rhead,
Gao Xiang	f692d09	2020-09-22 09:41:06 -0700	[diff] [blame]	2910	xfs_daddr_t blkno,
				2911	int bufsize)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2912	{
				2913	int hlen;
				2914
Darrick J. Wong	a71895c	2019-11-11 12:53:22 -0800	[diff] [blame]	2915	if (XFS_IS_CORRUPT(log->l_mp,
				2916	rhead->h_magicno != cpu_to_be32(XLOG_HEADER_MAGIC_NUM)))
Dave Chinner	2451337	2014-06-25 14:58:08 +1000	[diff] [blame]	2917	return -EFSCORRUPTED;
Darrick J. Wong	a71895c	2019-11-11 12:53:22 -0800	[diff] [blame]	2918	if (XFS_IS_CORRUPT(log->l_mp,
				2919	(!rhead->h_version \|\|
				2920	(be32_to_cpu(rhead->h_version) &
				2921	(~XLOG_VERSION_OKBITS))))) {
Dave Chinner	a0fa2b6	2011-03-07 10:01:35 +1100	[diff] [blame]	2922	xfs_warn(log->l_mp, "%s: unrecognised log version (%d).",
Harvey Harrison	34a622b	2008-04-10 12:19:21 +1000	[diff] [blame]	2923	__func__, be32_to_cpu(rhead->h_version));
Darrick J. Wong	895e196	2019-11-06 09:17:43 -0800	[diff] [blame]	2924	return -EFSCORRUPTED;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2925	}
				2926
Gao Xiang	f692d09	2020-09-22 09:41:06 -0700	[diff] [blame]	2927	/*
				2928	* LR body must have data (or it wouldn't have been written)
				2929	* and h_len must not be greater than LR buffer size.
				2930	*/
Christoph Hellwig	b53e675	2007-10-12 10:59:34 +1000	[diff] [blame]	2931	hlen = be32_to_cpu(rhead->h_len);
Gao Xiang	f692d09	2020-09-22 09:41:06 -0700	[diff] [blame]	2932	if (XFS_IS_CORRUPT(log->l_mp, hlen <= 0 \|\| hlen > bufsize))
Dave Chinner	2451337	2014-06-25 14:58:08 +1000	[diff] [blame]	2933	return -EFSCORRUPTED;
Gao Xiang	f692d09	2020-09-22 09:41:06 -0700	[diff] [blame]	2934
Darrick J. Wong	a71895c	2019-11-11 12:53:22 -0800	[diff] [blame]	2935	if (XFS_IS_CORRUPT(log->l_mp,
				2936	blkno > log->l_logBBsize \|\| blkno > INT_MAX))
Dave Chinner	2451337	2014-06-25 14:58:08 +1000	[diff] [blame]	2937	return -EFSCORRUPTED;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2938	return 0;
				2939	}
				2940
				2941	/*
				2942	* Read the log from tail to head and process the log records found.
				2943	* Handle the two cases where the tail and head are in the same cycle
				2944	* and where the active portion of the log wraps around the end of
				2945	* the physical log separately. The pass parameter is passed through
				2946	* to the routines called to process the data and is not looked at
				2947	* here.
				2948	*/
				2949	STATIC int
				2950	xlog_do_recovery_pass(
Mark Tinguely	9a8d2fd	2012-06-14 09:22:16 -0500	[diff] [blame]	2951	struct xlog *log,
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2952	xfs_daddr_t head_blk,
				2953	xfs_daddr_t tail_blk,
Brian Foster	d7f3769	2016-01-04 15:55:10 +1100	[diff] [blame]	2954	int pass,
				2955	xfs_daddr_t first_bad) / out: first bad log rec */
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2956	{
				2957	xlog_rec_header_t *rhead;
Brian Foster	284f1c2	2017-08-08 18:21:51 -0700	[diff] [blame]	2958	xfs_daddr_t blk_no, rblk_no;
Brian Foster	d7f3769	2016-01-04 15:55:10 +1100	[diff] [blame]	2959	xfs_daddr_t rhead_blk;
Christoph Hellwig	b2a922c	2015-06-22 09:45:10 +1000	[diff] [blame]	2960	char *offset;
Christoph Hellwig	6ad5b32	2019-06-28 19:27:26 -0700	[diff] [blame]	2961	char hbp, dbp;
Brian Foster	a70f9fe	2016-01-04 15:55:10 +1100	[diff] [blame]	2962	int error = 0, h_size, h_len;
Brian Foster	12818d2	2016-09-26 08:22:16 +1000	[diff] [blame]	2963	int error2 = 0;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2964	int bblks, split_bblks;
				2965	int hblks, split_hblks, wrapped_hblks;
Brian Foster	3977543	2017-06-24 10:11:41 -0700	[diff] [blame]	2966	int i;
Dave Chinner	f0a7695	2010-01-11 11:49:57 +0000	[diff] [blame]	2967	struct hlist_head rhash[XLOG_RHASH_SIZE];
Brian Foster	12818d2	2016-09-26 08:22:16 +1000	[diff] [blame]	2968	LIST_HEAD (buffer_list);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2969
				2970	ASSERT(head_blk != tail_blk);
Brian Foster	a4c9b34	2017-08-08 18:21:53 -0700	[diff] [blame]	2971	blk_no = rhead_blk = tail_blk;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2972
Brian Foster	3977543	2017-06-24 10:11:41 -0700	[diff] [blame]	2973	for (i = 0; i < XLOG_RHASH_SIZE; i++)
				2974	INIT_HLIST_HEAD(&rhash[i]);
				2975
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2976	/*
				2977	* Read the header of the tail block and get the iclog buffer size from
				2978	* h_size. Use this to tell how many sectors make up the log header.
				2979	*/
Dave Chinner	38c26bf	2021-08-18 18:46:37 -0700	[diff] [blame]	2980	if (xfs_has_logv2(log->l_mp)) {
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2981	/*
				2982	* When using variable length iclogs, read first sector of
				2983	* iclog header and extract the header size from it. Get a
				2984	* new hbp that is the correct size.
				2985	*/
Christoph Hellwig	6e9b3dd	2019-06-28 19:27:27 -0700	[diff] [blame]	2986	hbp = xlog_alloc_buffer(log, 1);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2987	if (!hbp)
Dave Chinner	2451337	2014-06-25 14:58:08 +1000	[diff] [blame]	2988	return -ENOMEM;
Christoph Hellwig	076e6ac	2009-03-16 08:24:13 +0100	[diff] [blame]	2989
				2990	error = xlog_bread(log, tail_blk, 1, hbp, &offset);
				2991	if (error)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2992	goto bread_err1;
Christoph Hellwig	076e6ac	2009-03-16 08:24:13 +0100	[diff] [blame]	2993
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2994	rhead = (xlog_rec_header_t *)offset;
Brian Foster	a70f9fe	2016-01-04 15:55:10 +1100	[diff] [blame]	2995
				2996	/*
				2997	* xfsprogs has a bug where record length is based on lsunit but
				2998	* h_size (iclog size) is hardcoded to 32k. Now that we
				2999	* unconditionally CRC verify the unmount record, this means the
				3000	* log buffer can be too small for the record and cause an
				3001	* overrun.
				3002	*
				3003	* Detect this condition here. Use lsunit for the buffer size as
				3004	* long as this looks like the mkfs case. Otherwise, return an
				3005	* error to avoid a buffer overrun.
				3006	*/
Christoph Hellwig	b53e675	2007-10-12 10:59:34 +1000	[diff] [blame]	3007	h_size = be32_to_cpu(rhead->h_size);
Brian Foster	a70f9fe	2016-01-04 15:55:10 +1100	[diff] [blame]	3008	h_len = be32_to_cpu(rhead->h_len);
Gao Xiang	f692d09	2020-09-22 09:41:06 -0700	[diff] [blame]	3009	if (h_len > h_size && h_len <= log->l_mp->m_logbsize &&
				3010	rhead->h_num_logops == cpu_to_be32(1)) {
				3011	xfs_warn(log->l_mp,
Brian Foster	a70f9fe	2016-01-04 15:55:10 +1100	[diff] [blame]	3012	"invalid iclog size (%d bytes), using lsunit (%d bytes)",
Gao Xiang	f692d09	2020-09-22 09:41:06 -0700	[diff] [blame]	3013	h_size, log->l_mp->m_logbsize);
				3014	h_size = log->l_mp->m_logbsize;
Brian Foster	a70f9fe	2016-01-04 15:55:10 +1100	[diff] [blame]	3015	}
				3016
Gao Xiang	f692d09	2020-09-22 09:41:06 -0700	[diff] [blame]	3017	error = xlog_valid_rec_header(log, rhead, tail_blk, h_size);
				3018	if (error)
				3019	goto bread_err1;
				3020
Gao Xiang	0c771b9	2020-09-22 09:41:06 -0700	[diff] [blame]	3021	hblks = xlog_logrec_hblks(log, rhead);
				3022	if (hblks != 1) {
Christoph Hellwig	6ad5b32	2019-06-28 19:27:26 -0700	[diff] [blame]	3023	kmem_free(hbp);
Christoph Hellwig	6e9b3dd	2019-06-28 19:27:27 -0700	[diff] [blame]	3024	hbp = xlog_alloc_buffer(log, hblks);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	3025	}
				3026	} else {
Alex Elder	69ce58f	2010-04-20 17:09:59 +1000	[diff] [blame]	3027	ASSERT(log->l_sectBBsize == 1);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	3028	hblks = 1;
Christoph Hellwig	6e9b3dd	2019-06-28 19:27:27 -0700	[diff] [blame]	3029	hbp = xlog_alloc_buffer(log, 1);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	3030	h_size = XLOG_BIG_RECORD_BSIZE;
				3031	}
				3032
				3033	if (!hbp)
Dave Chinner	2451337	2014-06-25 14:58:08 +1000	[diff] [blame]	3034	return -ENOMEM;
Christoph Hellwig	6e9b3dd	2019-06-28 19:27:27 -0700	[diff] [blame]	3035	dbp = xlog_alloc_buffer(log, BTOBB(h_size));
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	3036	if (!dbp) {
Christoph Hellwig	6ad5b32	2019-06-28 19:27:26 -0700	[diff] [blame]	3037	kmem_free(hbp);
Dave Chinner	2451337	2014-06-25 14:58:08 +1000	[diff] [blame]	3038	return -ENOMEM;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	3039	}
				3040
				3041	memset(rhash, 0, sizeof(rhash));
Eric Sandeen	970fd3f	2014-09-09 11:57:29 +1000	[diff] [blame]	3042	if (tail_blk > head_blk) {
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	3043	/*
				3044	* Perform recovery around the end of the physical log.
				3045	* When the head is not on the same cycle number as the tail,
Eric Sandeen	970fd3f	2014-09-09 11:57:29 +1000	[diff] [blame]	3046	* we can't do a sequential recovery.
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	3047	*/
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	3048	while (blk_no < log->l_logBBsize) {
				3049	/*
				3050	* Check for header wrapping around physical end-of-log
				3051	*/
Christoph Hellwig	6ad5b32	2019-06-28 19:27:26 -0700	[diff] [blame]	3052	offset = hbp;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	3053	split_hblks = 0;
				3054	wrapped_hblks = 0;
				3055	if (blk_no + hblks <= log->l_logBBsize) {
				3056	/* Read header in one read */
Christoph Hellwig	076e6ac	2009-03-16 08:24:13 +0100	[diff] [blame]	3057	error = xlog_bread(log, blk_no, hblks, hbp,
				3058	&offset);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	3059	if (error)
				3060	goto bread_err2;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	3061	} else {
				3062	/* This LR is split across physical log end */
				3063	if (blk_no != log->l_logBBsize) {
				3064	/* some data before physical log end */
				3065	ASSERT(blk_no <= INT_MAX);
				3066	split_hblks = log->l_logBBsize - (int)blk_no;
				3067	ASSERT(split_hblks > 0);
Christoph Hellwig	076e6ac	2009-03-16 08:24:13 +0100	[diff] [blame]	3068	error = xlog_bread(log, blk_no,
				3069	split_hblks, hbp,
				3070	&offset);
				3071	if (error)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	3072	goto bread_err2;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	3073	}
Christoph Hellwig	076e6ac	2009-03-16 08:24:13 +0100	[diff] [blame]	3074
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	3075	/*
				3076	* Note: this black magic still works with
				3077	* large sector sizes (non-512) only because:
				3078	* - we increased the buffer size originally
				3079	* by 1 sector giving us enough extra space
				3080	* for the second read;
				3081	* - the log start is guaranteed to be sector
				3082	* aligned;
				3083	* - we read the log end (LR header start)
				3084	* _first_, then the log start (LR header end)
				3085	* - order is important.
				3086	*/
David Chinner	234f56a	2008-04-10 12:24:24 +1000	[diff] [blame]	3087	wrapped_hblks = hblks - split_hblks;
Christoph Hellwig	6ad5b32	2019-06-28 19:27:26 -0700	[diff] [blame]	3088	error = xlog_bread_noalign(log, 0,
				3089	wrapped_hblks,
Dave Chinner	4439647	2011-04-21 09:34:27 +0000	[diff] [blame]	3090	offset + BBTOB(split_hblks));
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	3091	if (error)
				3092	goto bread_err2;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	3093	}
				3094	rhead = (xlog_rec_header_t *)offset;
				3095	error = xlog_valid_rec_header(log, rhead,
Gao Xiang	f692d09	2020-09-22 09:41:06 -0700	[diff] [blame]	3096	split_hblks ? blk_no : 0, h_size);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	3097	if (error)
				3098	goto bread_err2;
				3099
Christoph Hellwig	b53e675	2007-10-12 10:59:34 +1000	[diff] [blame]	3100	bblks = (int)BTOBB(be32_to_cpu(rhead->h_len));
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	3101	blk_no += hblks;
				3102
Brian Foster	284f1c2	2017-08-08 18:21:51 -0700	[diff] [blame]	3103	/*
				3104	* Read the log record data in multiple reads if it
				3105	* wraps around the end of the log. Note that if the
				3106	* header already wrapped, blk_no could point past the
				3107	* end of the log. The record data is contiguous in
				3108	* that case.
				3109	*/
				3110	if (blk_no + bblks <= log->l_logBBsize \|\|
				3111	blk_no >= log->l_logBBsize) {
Dave Chinner	0703a8e	2018-06-08 09:54:22 -0700	[diff] [blame]	3112	rblk_no = xlog_wrap_logbno(log, blk_no);
Brian Foster	284f1c2	2017-08-08 18:21:51 -0700	[diff] [blame]	3113	error = xlog_bread(log, rblk_no, bblks, dbp,
Christoph Hellwig	076e6ac	2009-03-16 08:24:13 +0100	[diff] [blame]	3114	&offset);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	3115	if (error)
				3116	goto bread_err2;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	3117	} else {
				3118	/* This log record is split across the
				3119	* physical end of log */
Christoph Hellwig	6ad5b32	2019-06-28 19:27:26 -0700	[diff] [blame]	3120	offset = dbp;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	3121	split_bblks = 0;
				3122	if (blk_no != log->l_logBBsize) {
				3123	/* some data is before the physical
				3124	* end of log */
				3125	ASSERT(!wrapped_hblks);
				3126	ASSERT(blk_no <= INT_MAX);
				3127	split_bblks =
				3128	log->l_logBBsize - (int)blk_no;
				3129	ASSERT(split_bblks > 0);
Christoph Hellwig	076e6ac	2009-03-16 08:24:13 +0100	[diff] [blame]	3130	error = xlog_bread(log, blk_no,
				3131	split_bblks, dbp,
				3132	&offset);
				3133	if (error)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	3134	goto bread_err2;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	3135	}
Christoph Hellwig	076e6ac	2009-03-16 08:24:13 +0100	[diff] [blame]	3136
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	3137	/*
				3138	* Note: this black magic still works with
				3139	* large sector sizes (non-512) only because:
				3140	* - we increased the buffer size originally
				3141	* by 1 sector giving us enough extra space
				3142	* for the second read;
				3143	* - the log start is guaranteed to be sector
				3144	* aligned;
				3145	* - we read the log end (LR header start)
				3146	* _first_, then the log start (LR header end)
				3147	* - order is important.
				3148	*/
Christoph Hellwig	6ad5b32	2019-06-28 19:27:26 -0700	[diff] [blame]	3149	error = xlog_bread_noalign(log, 0,
				3150	bblks - split_bblks,
Dave Chinner	4439647	2011-04-21 09:34:27 +0000	[diff] [blame]	3151	offset + BBTOB(split_bblks));
Christoph Hellwig	076e6ac	2009-03-16 08:24:13 +0100	[diff] [blame]	3152	if (error)
				3153	goto bread_err2;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	3154	}
Christoph Hellwig	0e446be	2012-11-12 22:54:24 +1100	[diff] [blame]	3155
Brian Foster	9d94901	2016-01-04 15:55:10 +1100	[diff] [blame]	3156	error = xlog_recover_process(log, rhash, rhead, offset,
Brian Foster	12818d2	2016-09-26 08:22:16 +1000	[diff] [blame]	3157	pass, &buffer_list);
Christoph Hellwig	0e446be	2012-11-12 22:54:24 +1100	[diff] [blame]	3158	if (error)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	3159	goto bread_err2;
Brian Foster	d7f3769	2016-01-04 15:55:10 +1100	[diff] [blame]	3160
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	3161	blk_no += bblks;
Brian Foster	d7f3769	2016-01-04 15:55:10 +1100	[diff] [blame]	3162	rhead_blk = blk_no;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	3163	}
				3164
				3165	ASSERT(blk_no >= log->l_logBBsize);
				3166	blk_no -= log->l_logBBsize;
Brian Foster	d7f3769	2016-01-04 15:55:10 +1100	[diff] [blame]	3167	rhead_blk = blk_no;
Eric Sandeen	970fd3f	2014-09-09 11:57:29 +1000	[diff] [blame]	3168	}
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	3169
Eric Sandeen	970fd3f	2014-09-09 11:57:29 +1000	[diff] [blame]	3170	/* read first part of physical log */
				3171	while (blk_no < head_blk) {
				3172	error = xlog_bread(log, blk_no, hblks, hbp, &offset);
				3173	if (error)
				3174	goto bread_err2;
Christoph Hellwig	076e6ac	2009-03-16 08:24:13 +0100	[diff] [blame]	3175
Eric Sandeen	970fd3f	2014-09-09 11:57:29 +1000	[diff] [blame]	3176	rhead = (xlog_rec_header_t *)offset;
Gao Xiang	f692d09	2020-09-22 09:41:06 -0700	[diff] [blame]	3177	error = xlog_valid_rec_header(log, rhead, blk_no, h_size);
Eric Sandeen	970fd3f	2014-09-09 11:57:29 +1000	[diff] [blame]	3178	if (error)
				3179	goto bread_err2;
Christoph Hellwig	076e6ac	2009-03-16 08:24:13 +0100	[diff] [blame]	3180
Eric Sandeen	970fd3f	2014-09-09 11:57:29 +1000	[diff] [blame]	3181	/* blocks in data section */
				3182	bblks = (int)BTOBB(be32_to_cpu(rhead->h_len));
				3183	error = xlog_bread(log, blk_no+hblks, bblks, dbp,
				3184	&offset);
				3185	if (error)
				3186	goto bread_err2;
Christoph Hellwig	076e6ac	2009-03-16 08:24:13 +0100	[diff] [blame]	3187
Brian Foster	12818d2	2016-09-26 08:22:16 +1000	[diff] [blame]	3188	error = xlog_recover_process(log, rhash, rhead, offset, pass,
				3189	&buffer_list);
Eric Sandeen	970fd3f	2014-09-09 11:57:29 +1000	[diff] [blame]	3190	if (error)
				3191	goto bread_err2;
Brian Foster	d7f3769	2016-01-04 15:55:10 +1100	[diff] [blame]	3192
Eric Sandeen	970fd3f	2014-09-09 11:57:29 +1000	[diff] [blame]	3193	blk_no += bblks + hblks;
Brian Foster	d7f3769	2016-01-04 15:55:10 +1100	[diff] [blame]	3194	rhead_blk = blk_no;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	3195	}
				3196
				3197	bread_err2:
Christoph Hellwig	6ad5b32	2019-06-28 19:27:26 -0700	[diff] [blame]	3198	kmem_free(dbp);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	3199	bread_err1:
Christoph Hellwig	6ad5b32	2019-06-28 19:27:26 -0700	[diff] [blame]	3200	kmem_free(hbp);
Brian Foster	d7f3769	2016-01-04 15:55:10 +1100	[diff] [blame]	3201
Brian Foster	12818d2	2016-09-26 08:22:16 +1000	[diff] [blame]	3202	/*
				3203	* Submit buffers that have been added from the last record processed,
				3204	* regardless of error status.
				3205	*/
				3206	if (!list_empty(&buffer_list))
				3207	error2 = xfs_buf_delwri_submit(&buffer_list);
				3208
Brian Foster	d7f3769	2016-01-04 15:55:10 +1100	[diff] [blame]	3209	if (error && first_bad)
				3210	*first_bad = rhead_blk;
				3211
Brian Foster	3977543	2017-06-24 10:11:41 -0700	[diff] [blame]	3212	/*
				3213	* Transactions are freed at commit time but transactions without commit
				3214	* records on disk are never committed. Free any that may be left in the
				3215	* hash table.
				3216	*/
				3217	for (i = 0; i < XLOG_RHASH_SIZE; i++) {
				3218	struct hlist_node *tmp;
				3219	struct xlog_recover *trans;
				3220
				3221	hlist_for_each_entry_safe(trans, tmp, &rhash[i], r_list)
				3222	xlog_recover_free_trans(trans);
				3223	}
				3224
Brian Foster	12818d2	2016-09-26 08:22:16 +1000	[diff] [blame]	3225	return error ? error : error2;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	3226	}
				3227
				3228	/*
				3229	* Do the recovery of the log. We actually do this in two phases.
				3230	* The two passes are necessary in order to implement the function
				3231	* of cancelling a record written into the log. The first pass
				3232	* determines those things which have been cancelled, and the
				3233	* second pass replays log items normally except for those which
				3234	* have been cancelled. The handling of the replay and cancellations
				3235	* takes place in the log item type specific routines.
				3236	*
				3237	* The table of items which have cancel records in the log is allocated
				3238	* and freed at this level, since only here do we know when all of
				3239	* the log recovery has been completed.
				3240	*/
				3241	STATIC int
				3242	xlog_do_log_recovery(
Mark Tinguely	9a8d2fd	2012-06-14 09:22:16 -0500	[diff] [blame]	3243	struct xlog *log,
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	3244	xfs_daddr_t head_blk,
				3245	xfs_daddr_t tail_blk)
				3246	{
Christoph Hellwig	d5689ea	2010-12-01 22:06:22 +0000	[diff] [blame]	3247	int error, i;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	3248
				3249	ASSERT(head_blk != tail_blk);
				3250
				3251	/*
				3252	* First do a pass to find all of the cancelled buf log items.
				3253	* Store them in the buf_cancel_table for use in the second pass.
				3254	*/
Christoph Hellwig	d5689ea	2010-12-01 22:06:22 +0000	[diff] [blame]	3255	log->l_buf_cancel_table = kmem_zalloc(XLOG_BC_TABLE_SIZE *
				3256	sizeof(struct list_head),
Tetsuo Handa	707e0dd	2019-08-26 12:06:22 -0700	[diff] [blame]	3257	0);
Christoph Hellwig	d5689ea	2010-12-01 22:06:22 +0000	[diff] [blame]	3258	for (i = 0; i < XLOG_BC_TABLE_SIZE; i++)
				3259	INIT_LIST_HEAD(&log->l_buf_cancel_table[i]);
				3260
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	3261	error = xlog_do_recovery_pass(log, head_blk, tail_blk,
Brian Foster	d7f3769	2016-01-04 15:55:10 +1100	[diff] [blame]	3262	XLOG_RECOVER_PASS1, NULL);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	3263	if (error != 0) {
Denys Vlasenko	f0e2d93	2008-05-19 16:31:57 +1000	[diff] [blame]	3264	kmem_free(log->l_buf_cancel_table);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	3265	log->l_buf_cancel_table = NULL;
				3266	return error;
				3267	}
				3268	/*
				3269	* Then do a second pass to actually recover the items in the log.
				3270	* When it is complete free the table of buf cancel items.
				3271	*/
				3272	error = xlog_do_recovery_pass(log, head_blk, tail_blk,
Brian Foster	d7f3769	2016-01-04 15:55:10 +1100	[diff] [blame]	3273	XLOG_RECOVER_PASS2, NULL);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	3274	#ifdef DEBUG
Tim Shimmin	6d192a9	2006-06-09 14:55:38 +1000	[diff] [blame]	3275	if (!error) {
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	3276	int i;
				3277
				3278	for (i = 0; i < XLOG_BC_TABLE_SIZE; i++)
Christoph Hellwig	d5689ea	2010-12-01 22:06:22 +0000	[diff] [blame]	3279	ASSERT(list_empty(&log->l_buf_cancel_table[i]));
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	3280	}
				3281	#endif /* DEBUG */
				3282
Denys Vlasenko	f0e2d93	2008-05-19 16:31:57 +1000	[diff] [blame]	3283	kmem_free(log->l_buf_cancel_table);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	3284	log->l_buf_cancel_table = NULL;
				3285
				3286	return error;
				3287	}
				3288
				3289	/*
				3290	* Do the actual recovery
				3291	*/
				3292	STATIC int
				3293	xlog_do_recover(
Christoph Hellwig	b3f8e08	2020-09-01 10:55:47 -0700	[diff] [blame]	3294	struct xlog *log,
				3295	xfs_daddr_t head_blk,
				3296	xfs_daddr_t tail_blk)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	3297	{
Christoph Hellwig	b3f8e08	2020-09-01 10:55:47 -0700	[diff] [blame]	3298	struct xfs_mount *mp = log->l_mp;
				3299	struct xfs_buf *bp = mp->m_sb_bp;
				3300	struct xfs_sb *sbp = &mp->m_sb;
				3301	int error;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	3302
Brian Foster	e67d3d4	2017-08-08 18:21:53 -0700	[diff] [blame]	3303	trace_xfs_log_recover(log, head_blk, tail_blk);
				3304
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	3305	/*
				3306	* First replay the images in the log.
				3307	*/
				3308	error = xlog_do_log_recovery(log, head_blk, tail_blk);
Christoph Hellwig	43ff212	2012-04-23 15:58:39 +1000	[diff] [blame]	3309	if (error)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	3310	return error;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	3311
Dave Chinner	2039a27	2021-08-10 17:59:01 -0700	[diff] [blame]	3312	if (xlog_is_shutdown(log))
Dave Chinner	2451337	2014-06-25 14:58:08 +1000	[diff] [blame]	3313	return -EIO;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	3314
				3315	/*
				3316	* We now update the tail_lsn since much of the recovery has completed
				3317	* and there may be space available to use. If there were no extent
				3318	* or iunlinks, we can free up the entire log and set the tail_lsn to
				3319	* be the last_sync_lsn. This was set in xlog_find_tail to be the
				3320	* lsn of the last known good LR on disk. If there are extent frees
				3321	* or iunlinks they will have some entries in the AIL; so we look at
				3322	* the AIL to determine how to set the tail_lsn.
				3323	*/
Dave Chinner	a798011	2016-03-07 08:39:36 +1100	[diff] [blame]	3324	xlog_assign_tail_lsn(mp);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	3325
				3326	/*
Christoph Hellwig	b3f8e08	2020-09-01 10:55:47 -0700	[diff] [blame]	3327	* Now that we've finished replaying all buffer and inode updates,
				3328	* re-read the superblock and reverify it.
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	3329	*/
Christoph Hellwig	b3f8e08	2020-09-01 10:55:47 -0700	[diff] [blame]	3330	xfs_buf_lock(bp);
				3331	xfs_buf_hold(bp);
Christoph Hellwig	26e32875	2020-09-01 10:55:47 -0700	[diff] [blame]	3332	error = _xfs_buf_read(bp, XBF_READ);
David Chinner	d64e31a	2008-04-10 12:22:17 +1000	[diff] [blame]	3333	if (error) {
Dave Chinner	2039a27	2021-08-10 17:59:01 -0700	[diff] [blame]	3334	if (!xlog_is_shutdown(log)) {
Darrick J. Wong	cdbcf82	2020-01-23 17:01:20 -0800	[diff] [blame]	3335	xfs_buf_ioerror_alert(bp, __this_address);
Dave Chinner	595bff7	2014-10-02 09:05:14 +1000	[diff] [blame]	3336	ASSERT(0);
				3337	}
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	3338	xfs_buf_relse(bp);
				3339	return error;
				3340	}
				3341
				3342	/* Convert superblock from on-disk format */
Christoph Hellwig	3e6e8af	2020-03-10 08:57:30 -0700	[diff] [blame]	3343	xfs_sb_from_disk(sbp, bp->b_addr);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	3344	xfs_buf_relse(bp);
				3345
Dave Chinner	a798011	2016-03-07 08:39:36 +1100	[diff] [blame]	3346	/* re-initialise in-core superblock and geometry structures */
Dave Chinner	a1d86e8	2021-08-18 18:46:26 -0700	[diff] [blame]	3347	mp->m_features \|= xfs_sb_version_to_features(sbp);
Dave Chinner	a798011	2016-03-07 08:39:36 +1100	[diff] [blame]	3348	xfs_reinit_percpu_counters(mp);
				3349	error = xfs_initialize_perag(mp, sbp->sb_agcount, &mp->m_maxagi);
				3350	if (error) {
				3351	xfs_warn(mp, "Failed post-recovery per-ag init: %d", error);
				3352	return error;
				3353	}
Darrick J. Wong	5254885	2016-08-03 11:38:24 +1000	[diff] [blame]	3354	mp->m_alloc_set_aside = xfs_alloc_set_aside(mp);
Lachlan McIlroy	5478eea	2007-02-10 18:36:29 +1100	[diff] [blame]	3355
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	3356	xlog_recover_check_summary(log);
				3357
				3358	/* Normal transactions can now occur */
Dave Chinner	e1d06e5	2021-08-10 17:59:02 -0700	[diff] [blame]	3359	clear_bit(XLOG_ACTIVE_RECOVERY, &log->l_opstate);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	3360	return 0;
				3361	}
				3362
				3363	/*
				3364	* Perform recovery and re-initialize some log variables in xlog_find_tail.
				3365	*
				3366	* Return error or zero.
				3367	*/
				3368	int
				3369	xlog_recover(
Mark Tinguely	9a8d2fd	2012-06-14 09:22:16 -0500	[diff] [blame]	3370	struct xlog *log)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	3371	{
				3372	xfs_daddr_t head_blk, tail_blk;
				3373	int error;
				3374
				3375	/* find the tail of the log */
Brian Foster	a45086e	2015-10-12 15:59:25 +1100	[diff] [blame]	3376	error = xlog_find_tail(log, &head_blk, &tail_blk);
				3377	if (error)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	3378	return error;
				3379
Brian Foster	a45086e	2015-10-12 15:59:25 +1100	[diff] [blame]	3380	/*
				3381	* The superblock was read before the log was available and thus the LSN
				3382	* could not be verified. Check the superblock LSN against the current
				3383	* LSN now that it's known.
				3384	*/
Dave Chinner	38c26bf	2021-08-18 18:46:37 -0700	[diff] [blame]	3385	if (xfs_has_crc(log->l_mp) &&
Brian Foster	a45086e	2015-10-12 15:59:25 +1100	[diff] [blame]	3386	!xfs_log_check_lsn(log->l_mp, log->l_mp->m_sb.sb_lsn))
				3387	return -EINVAL;
				3388
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	3389	if (tail_blk != head_blk) {
				3390	/* There used to be a comment here:
				3391	*
				3392	* disallow recovery on read-only mounts. note -- mount
				3393	* checks for ENOSPC and turns it into an intelligent
				3394	* error message.
				3395	* ...but this is no longer true. Now, unless you specify
				3396	* NORECOVERY (in which case this function would never be
				3397	* called), we just go ahead and recover. We do this all
				3398	* under the vfs layer, so we can get away with it unless
				3399	* the device itself is read-only, in which case we fail.
				3400	*/
Utako Kusaka	3a02ee1	2007-05-08 13:50:06 +1000	[diff] [blame]	3401	if ((error = xfs_dev_is_read_only(log->l_mp, "recovery"))) {
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	3402	return error;
				3403	}
				3404
Dave Chinner	e721f50	2013-04-03 16:11:32 +1100	[diff] [blame]	3405	/*
				3406	* Version 5 superblock log feature mask validation. We know the
				3407	* log is dirty so check if there are any unknown log features
				3408	* in what we need to recover. If there are unknown features
				3409	* (e.g. unsupported transactions, then simply reject the
				3410	* attempt at recovery before touching anything.
				3411	*/
Dave Chinner	d6837c1	2021-08-18 18:46:56 -0700	[diff] [blame]	3412	if (xfs_sb_is_v5(&log->l_mp->m_sb) &&
Dave Chinner	e721f50	2013-04-03 16:11:32 +1100	[diff] [blame]	3413	xfs_sb_has_incompat_log_feature(&log->l_mp->m_sb,
				3414	XFS_SB_FEAT_INCOMPAT_LOG_UNKNOWN)) {
				3415	xfs_warn(log->l_mp,
Joe Perches	f41febd	2015-07-29 11:52:04 +1000	[diff] [blame]	3416	"Superblock has unknown incompatible log features (0x%x) enabled.",
Dave Chinner	e721f50	2013-04-03 16:11:32 +1100	[diff] [blame]	3417	(log->l_mp->m_sb.sb_features_log_incompat &
				3418	XFS_SB_FEAT_INCOMPAT_LOG_UNKNOWN));
Joe Perches	f41febd	2015-07-29 11:52:04 +1000	[diff] [blame]	3419	xfs_warn(log->l_mp,
				3420	"The log can not be fully and/or safely recovered by this kernel.");
				3421	xfs_warn(log->l_mp,
				3422	"Please recover the log on a kernel that supports the unknown features.");
Dave Chinner	2451337	2014-06-25 14:58:08 +1000	[diff] [blame]	3423	return -EINVAL;
Dave Chinner	e721f50	2013-04-03 16:11:32 +1100	[diff] [blame]	3424	}
				3425
Brian Foster	2e22717	2014-09-09 11:56:13 +1000	[diff] [blame]	3426	/*
				3427	* Delay log recovery if the debug hook is set. This is debug
Bhaskar Chowdhury	bd24a4f	2021-03-23 16:59:30 -0700	[diff] [blame]	3428	* instrumentation to coordinate simulation of I/O failures with
Brian Foster	2e22717	2014-09-09 11:56:13 +1000	[diff] [blame]	3429	* log recovery.
				3430	*/
				3431	if (xfs_globals.log_recovery_delay) {
				3432	xfs_notice(log->l_mp,
				3433	"Delaying log recovery for %d seconds.",
				3434	xfs_globals.log_recovery_delay);
				3435	msleep(xfs_globals.log_recovery_delay * 1000);
				3436	}
				3437
Dave Chinner	a0fa2b6	2011-03-07 10:01:35 +1100	[diff] [blame]	3438	xfs_notice(log->l_mp, "Starting recovery (logdev: %s)",
				3439	log->l_mp->m_logname ? log->l_mp->m_logname
				3440	: "internal");
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	3441
				3442	error = xlog_do_recover(log, head_blk, tail_blk);
Dave Chinner	e1d06e5	2021-08-10 17:59:02 -0700	[diff] [blame]	3443	set_bit(XLOG_RECOVERY_NEEDED, &log->l_opstate);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	3444	}
				3445	return error;
				3446	}
				3447
				3448	/*
Dave Chinner	fd67d8a	2021-08-10 17:59:02 -0700	[diff] [blame]	3449	* In the first part of recovery we replay inodes and buffers and build up the
				3450	* list of intents which need to be processed. Here we process the intents and
				3451	* clean up the on disk unlinked inode lists. This is separated from the first
				3452	* part of recovery so that the root and real-time bitmap inodes can be read in
				3453	* from disk in between the two stages. This is necessary so that we can free
				3454	* space in the real-time portion of the file system.
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	3455	*/
				3456	int
				3457	xlog_recover_finish(
Mark Tinguely	9a8d2fd	2012-06-14 09:22:16 -0500	[diff] [blame]	3458	struct xlog *log)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	3459	{
Dave Chinner	fd67d8a	2021-08-10 17:59:02 -0700	[diff] [blame]	3460	int error;
				3461
				3462	error = xlog_recover_process_intents(log);
				3463	if (error) {
				3464	/*
				3465	* Cancel all the unprocessed intent items now so that we don't
				3466	* leave them pinned in the AIL. This can cause the AIL to
				3467	* livelock on the pinned item if anyone tries to push the AIL
				3468	* (inode reclaim does this) before we get around to
				3469	* xfs_log_mount_cancel.
				3470	*/
				3471	xlog_recover_cancel_intents(log);
				3472	xfs_alert(log->l_mp, "Failed to recover intents");
				3473	xfs_force_shutdown(log->l_mp, SHUTDOWN_LOG_IO_ERROR);
				3474	return error;
				3475	}
				3476
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	3477	/*
Dave Chinner	fd67d8a	2021-08-10 17:59:02 -0700	[diff] [blame]	3478	* Sync the log to get all the intents out of the AIL. This isn't
				3479	* absolutely necessary, but it helps in case the unlink transactions
				3480	* would have problems pushing the intents out of the way.
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	3481	*/
Dave Chinner	fd67d8a	2021-08-10 17:59:02 -0700	[diff] [blame]	3482	xfs_log_force(log->l_mp, XFS_LOG_SYNC);
				3483
				3484	/*
				3485	* Now that we've recovered the log and all the intents, we can clear
				3486	* the log incompat feature bits in the superblock because there's no
				3487	* longer anything to protect. We rely on the AIL push to write out the
				3488	* updated superblock after everything else.
				3489	*/
				3490	if (xfs_clear_incompat_log_features(log->l_mp)) {
				3491	error = xfs_sync_sb(log->l_mp, false);
				3492	if (error < 0) {
				3493	xfs_alert(log->l_mp,
				3494	"Failed to clear log incompat features on recovery");
David Chinner	3c1e2bb	2008-04-10 12:21:11 +1000	[diff] [blame]	3495	return error;
				3496	}
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	3497	}
Dave Chinner	fd67d8a	2021-08-10 17:59:02 -0700	[diff] [blame]	3498
				3499	xlog_recover_process_iunlinks(log);
				3500	xlog_recover_check_summary(log);
Darrick J. Wong	7993f1a	2021-12-15 11:52:23 -0800	[diff] [blame]	3501
				3502	/*
				3503	* Recover any CoW staging blocks that are still referenced by the
				3504	* ondisk refcount metadata. During mount there cannot be any live
				3505	* staging extents as we have not permitted any user modifications.
				3506	* Therefore, it is safe to free them all right now, even on a
				3507	* read-only mount.
				3508	*/
				3509	error = xfs_reflink_recover_cow(log->l_mp);
				3510	if (error) {
				3511	xfs_alert(log->l_mp,
				3512	"Failed to recover leftover CoW staging extents, err %d.",
				3513	error);
				3514	/*
				3515	* If we get an error here, make sure the log is shut down
				3516	* but return zero so that any log items committed since the
				3517	* end of intents processing can be pushed through the CIL
				3518	* and AIL.
				3519	*/
				3520	xfs_force_shutdown(log->l_mp, SHUTDOWN_LOG_IO_ERROR);
				3521	}
				3522
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	3523	return 0;
				3524	}
				3525
Hariprasad Kelam	a7a9250	2019-07-03 07:34:18 -0700	[diff] [blame]	3526	void
Brian Foster	f0b2efa	2015-08-19 09:58:36 +1000	[diff] [blame]	3527	xlog_recover_cancel(
				3528	struct xlog *log)
				3529	{
Dave Chinner	e1d06e5	2021-08-10 17:59:02 -0700	[diff] [blame]	3530	if (xlog_recovery_needed(log))
Hariprasad Kelam	a7a9250	2019-07-03 07:34:18 -0700	[diff] [blame]	3531	xlog_recover_cancel_intents(log);
Brian Foster	f0b2efa	2015-08-19 09:58:36 +1000	[diff] [blame]	3532	}
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	3533
				3534	#if defined(DEBUG)
				3535	/*
				3536	* Read all of the agf and agi counters and check that they
				3537	* are consistent with the superblock counters.
				3538	*/
Christoph Hellwig	e89fbb5	2017-11-06 11:54:01 -0800	[diff] [blame]	3539	STATIC void
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	3540	xlog_recover_check_summary(
Dave Chinner	934933c	2021-06-02 10:48:24 +1000	[diff] [blame]	3541	struct xlog *log)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	3542	{
Dave Chinner	934933c	2021-06-02 10:48:24 +1000	[diff] [blame]	3543	struct xfs_mount *mp = log->l_mp;
				3544	struct xfs_perag *pag;
				3545	struct xfs_buf *agfbp;
				3546	struct xfs_buf *agibp;
				3547	xfs_agnumber_t agno;
				3548	uint64_t freeblks;
				3549	uint64_t itotal;
				3550	uint64_t ifree;
				3551	int error;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	3552
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	3553	freeblks = 0LL;
				3554	itotal = 0LL;
				3555	ifree = 0LL;
Dave Chinner	934933c	2021-06-02 10:48:24 +1000	[diff] [blame]	3556	for_each_perag(mp, agno, pag) {
				3557	error = xfs_read_agf(mp, NULL, pag->pag_agno, 0, &agfbp);
From: Christoph Hellwig	4805621	2008-11-28 14:23:38 +1100	[diff] [blame]	3558	if (error) {
Dave Chinner	a0fa2b6	2011-03-07 10:01:35 +1100	[diff] [blame]	3559	xfs_alert(mp, "%s agf read failed agno %d error %d",
Dave Chinner	934933c	2021-06-02 10:48:24 +1000	[diff] [blame]	3560	__func__, pag->pag_agno, error);
From: Christoph Hellwig	4805621	2008-11-28 14:23:38 +1100	[diff] [blame]	3561	} else {
Christoph Hellwig	9798f61	2020-03-10 08:57:29 -0700	[diff] [blame]	3562	struct xfs_agf *agfp = agfbp->b_addr;
				3563
From: Christoph Hellwig	4805621	2008-11-28 14:23:38 +1100	[diff] [blame]	3564	freeblks += be32_to_cpu(agfp->agf_freeblks) +
				3565	be32_to_cpu(agfp->agf_flcount);
				3566	xfs_buf_relse(agfbp);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	3567	}
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	3568
Dave Chinner	934933c	2021-06-02 10:48:24 +1000	[diff] [blame]	3569	error = xfs_read_agi(mp, NULL, pag->pag_agno, &agibp);
Dave Chinner	a0fa2b6	2011-03-07 10:01:35 +1100	[diff] [blame]	3570	if (error) {
				3571	xfs_alert(mp, "%s agi read failed agno %d error %d",
Dave Chinner	934933c	2021-06-02 10:48:24 +1000	[diff] [blame]	3572	__func__, pag->pag_agno, error);
Dave Chinner	a0fa2b6	2011-03-07 10:01:35 +1100	[diff] [blame]	3573	} else {
Christoph Hellwig	370c782	2020-03-10 08:57:29 -0700	[diff] [blame]	3574	struct xfs_agi *agi = agibp->b_addr;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	3575
Christoph Hellwig	5e1be0f	2008-11-28 14:23:37 +1100	[diff] [blame]	3576	itotal += be32_to_cpu(agi->agi_count);
				3577	ifree += be32_to_cpu(agi->agi_freecount);
				3578	xfs_buf_relse(agibp);
				3579	}
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	3580	}
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	3581	}
				3582	#endif /* DEBUG */