blob: a9811c01e007f23d7c7ca03906c8dabd04ae27ea [file] [log] [blame]
Dave Chinner0b61f8a2018-06-05 19:42:14 -07001// SPDX-License-Identifier: GPL-2.0
Linus Torvalds1da177e2005-04-16 15:20:36 -07002/*
Tim Shimmin87c199c2006-06-09 14:56:16 +10003 * Copyright (c) 2000-2006 Silicon Graphics, Inc.
Nathan Scott7b718762005-11-02 14:58:39 +11004 * All Rights Reserved.
Linus Torvalds1da177e2005-04-16 15:20:36 -07005 */
Linus Torvalds1da177e2005-04-16 15:20:36 -07006#include "xfs.h"
Nathan Scotta844f452005-11-02 14:38:42 +11007#include "xfs_fs.h"
Dave Chinner70a98832013-10-23 10:36:05 +11008#include "xfs_shared.h"
Dave Chinner239880e2013-10-23 10:50:10 +11009#include "xfs_format.h"
10#include "xfs_log_format.h"
11#include "xfs_trans_resv.h"
Nathan Scotta844f452005-11-02 14:38:42 +110012#include "xfs_bit.h"
Nathan Scotta844f452005-11-02 14:38:42 +110013#include "xfs_sb.h"
Linus Torvalds1da177e2005-04-16 15:20:36 -070014#include "xfs_mount.h"
Darrick J. Wong50995582017-11-21 20:53:02 -080015#include "xfs_defer.h"
Dave Chinner57062782013-10-15 09:17:51 +110016#include "xfs_da_format.h"
Dave Chinner9a2cc412014-12-04 09:43:17 +110017#include "xfs_da_btree.h"
Linus Torvalds1da177e2005-04-16 15:20:36 -070018#include "xfs_inode.h"
Dave Chinner239880e2013-10-23 10:50:10 +110019#include "xfs_trans.h"
Dave Chinner239880e2013-10-23 10:50:10 +110020#include "xfs_log.h"
Linus Torvalds1da177e2005-04-16 15:20:36 -070021#include "xfs_log_priv.h"
Linus Torvalds1da177e2005-04-16 15:20:36 -070022#include "xfs_log_recover.h"
Dave Chinnera4fbe6a2013-10-23 10:51:50 +110023#include "xfs_inode_item.h"
Linus Torvalds1da177e2005-04-16 15:20:36 -070024#include "xfs_extfree_item.h"
25#include "xfs_trans_priv.h"
Dave Chinnera4fbe6a2013-10-23 10:51:50 +110026#include "xfs_alloc.h"
27#include "xfs_ialloc.h"
Linus Torvalds1da177e2005-04-16 15:20:36 -070028#include "xfs_quota.h"
Christoph Hellwig0e446be2012-11-12 22:54:24 +110029#include "xfs_cksum.h"
Christoph Hellwig0b1b2132009-12-14 23:14:59 +000030#include "xfs_trace.h"
Dave Chinner33479e02012-10-08 21:56:11 +110031#include "xfs_icache.h"
Dave Chinnera4fbe6a2013-10-23 10:51:50 +110032#include "xfs_bmap_btree.h"
Dave Chinnera4fbe6a2013-10-23 10:51:50 +110033#include "xfs_error.h"
Dave Chinner2b9ab5a2013-08-12 20:49:37 +100034#include "xfs_dir2.h"
Darrick J. Wong9e88b5d2016-08-03 12:09:48 +100035#include "xfs_rmap_item.h"
Brian Foster60a4a222016-09-26 08:34:27 +100036#include "xfs_buf_item.h"
Darrick J. Wongf997ee22016-10-03 09:11:21 -070037#include "xfs_refcount_item.h"
Darrick J. Wong77d61fe2016-10-03 09:11:26 -070038#include "xfs_bmap_item.h"
Linus Torvalds1da177e2005-04-16 15:20:36 -070039
Dave Chinnerfc06c6d2013-08-12 20:49:22 +100040#define BLK_AVG(blk1, blk2) ((blk1+blk2) >> 1)
41
Mark Tinguely9a8d2fd2012-06-14 09:22:16 -050042STATIC int
43xlog_find_zeroed(
44 struct xlog *,
45 xfs_daddr_t *);
46STATIC int
47xlog_clear_stale_blocks(
48 struct xlog *,
49 xfs_lsn_t);
Linus Torvalds1da177e2005-04-16 15:20:36 -070050#if defined(DEBUG)
Mark Tinguely9a8d2fd2012-06-14 09:22:16 -050051STATIC void
52xlog_recover_check_summary(
53 struct xlog *);
Linus Torvalds1da177e2005-04-16 15:20:36 -070054#else
55#define xlog_recover_check_summary(log)
Linus Torvalds1da177e2005-04-16 15:20:36 -070056#endif
Brian Foster7088c412016-01-05 07:40:16 +110057STATIC int
58xlog_do_recovery_pass(
59 struct xlog *, xfs_daddr_t, xfs_daddr_t, int, xfs_daddr_t *);
Linus Torvalds1da177e2005-04-16 15:20:36 -070060
Linus Torvalds1da177e2005-04-16 15:20:36 -070061/*
Christoph Hellwigd5689ea2010-12-01 22:06:22 +000062 * This structure is used during recovery to record the buf log items which
63 * have been canceled and should not be replayed.
64 */
65struct xfs_buf_cancel {
66 xfs_daddr_t bc_blkno;
67 uint bc_len;
68 int bc_refcount;
69 struct list_head bc_list;
70};
71
72/*
Linus Torvalds1da177e2005-04-16 15:20:36 -070073 * Sector aligned buffer routines for buffer create/read/write/access
74 */
75
Alex Elderff30a622010-04-13 15:22:58 +100076/*
Brian Foster99c26592017-10-26 09:31:15 -070077 * Verify the log-relative block number and length in basic blocks are valid for
78 * an operation involving the given XFS log buffer. Returns true if the fields
79 * are valid, false otherwise.
Alex Elderff30a622010-04-13 15:22:58 +100080 */
Brian Foster99c26592017-10-26 09:31:15 -070081static inline bool
82xlog_verify_bp(
Mark Tinguely9a8d2fd2012-06-14 09:22:16 -050083 struct xlog *log,
Brian Foster99c26592017-10-26 09:31:15 -070084 xfs_daddr_t blk_no,
Alex Elderff30a622010-04-13 15:22:58 +100085 int bbcount)
86{
Brian Foster99c26592017-10-26 09:31:15 -070087 if (blk_no < 0 || blk_no >= log->l_logBBsize)
88 return false;
89 if (bbcount <= 0 || (blk_no + bbcount) > log->l_logBBsize)
90 return false;
91 return true;
Alex Elderff30a622010-04-13 15:22:58 +100092}
93
Alex Elder36adecf2010-04-13 15:21:13 +100094/*
95 * Allocate a buffer to hold log data. The buffer needs to be able
96 * to map to a range of nbblks basic blocks at any valid (basic
97 * block) offset within the log.
98 */
Eric Sandeen5d77c0d2009-11-19 15:52:00 +000099STATIC xfs_buf_t *
Linus Torvalds1da177e2005-04-16 15:20:36 -0700100xlog_get_bp(
Mark Tinguely9a8d2fd2012-06-14 09:22:16 -0500101 struct xlog *log,
Dave Chinner32281492009-01-22 15:37:47 +1100102 int nbblks)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700103{
Christoph Hellwigc8da0fa2011-07-08 14:36:25 +0200104 struct xfs_buf *bp;
105
Brian Foster99c26592017-10-26 09:31:15 -0700106 /*
107 * Pass log block 0 since we don't have an addr yet, buffer will be
108 * verified on read.
109 */
110 if (!xlog_verify_bp(log, 0, nbblks)) {
Dave Chinnera0fa2b62011-03-07 10:01:35 +1100111 xfs_warn(log->l_mp, "Invalid block length (0x%x) for buffer",
Alex Elderff30a622010-04-13 15:22:58 +1000112 nbblks);
113 XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_HIGH, log->l_mp);
Dave Chinner32281492009-01-22 15:37:47 +1100114 return NULL;
115 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700116
Alex Elder36adecf2010-04-13 15:21:13 +1000117 /*
118 * We do log I/O in units of log sectors (a power-of-2
119 * multiple of the basic block size), so we round up the
Lucas De Marchi25985ed2011-03-30 22:57:33 -0300120 * requested size to accommodate the basic blocks required
Alex Elder36adecf2010-04-13 15:21:13 +1000121 * for complete log sectors.
122 *
123 * In addition, the buffer may be used for a non-sector-
124 * aligned block offset, in which case an I/O of the
125 * requested size could extend beyond the end of the
126 * buffer. If the requested size is only 1 basic block it
127 * will never straddle a sector boundary, so this won't be
128 * an issue. Nor will this be a problem if the log I/O is
129 * done in basic blocks (sector size 1). But otherwise we
130 * extend the buffer by one extra log sector to ensure
Lucas De Marchi25985ed2011-03-30 22:57:33 -0300131 * there's space to accommodate this possibility.
Alex Elder36adecf2010-04-13 15:21:13 +1000132 */
Alex Elder69ce58f2010-04-20 17:09:59 +1000133 if (nbblks > 1 && log->l_sectBBsize > 1)
134 nbblks += log->l_sectBBsize;
135 nbblks = round_up(nbblks, log->l_sectBBsize);
Alex Elder36adecf2010-04-13 15:21:13 +1000136
Christoph Hellwig2d15d2c2019-06-28 19:27:24 -0700137 bp = xfs_buf_get_uncached(log->l_targ, nbblks, 0);
Christoph Hellwigc8da0fa2011-07-08 14:36:25 +0200138 if (bp)
139 xfs_buf_unlock(bp);
140 return bp;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700141}
142
Eric Sandeen5d77c0d2009-11-19 15:52:00 +0000143STATIC void
Linus Torvalds1da177e2005-04-16 15:20:36 -0700144xlog_put_bp(
145 xfs_buf_t *bp)
146{
147 xfs_buf_free(bp);
148}
149
Alex Elder48389ef2010-04-20 17:10:21 +1000150/*
151 * Return the address of the start of the given block number's data
152 * in a log buffer. The buffer covers a log sector-aligned region.
153 */
Christoph Hellwig18ffb8c2019-06-28 19:27:26 -0700154static inline unsigned int
Christoph Hellwig076e6ac2009-03-16 08:24:13 +0100155xlog_align(
Mark Tinguely9a8d2fd2012-06-14 09:22:16 -0500156 struct xlog *log,
Christoph Hellwig18ffb8c2019-06-28 19:27:26 -0700157 xfs_daddr_t blk_no)
Christoph Hellwig076e6ac2009-03-16 08:24:13 +0100158{
Christoph Hellwig18ffb8c2019-06-28 19:27:26 -0700159 return BBTOB(blk_no & ((xfs_daddr_t)log->l_sectBBsize - 1));
Christoph Hellwig076e6ac2009-03-16 08:24:13 +0100160}
161
Linus Torvalds1da177e2005-04-16 15:20:36 -0700162/*
163 * nbblks should be uint, but oh well. Just want to catch that 32-bit length.
164 */
Christoph Hellwig076e6ac2009-03-16 08:24:13 +0100165STATIC int
166xlog_bread_noalign(
Mark Tinguely9a8d2fd2012-06-14 09:22:16 -0500167 struct xlog *log,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700168 xfs_daddr_t blk_no,
169 int nbblks,
Mark Tinguely9a8d2fd2012-06-14 09:22:16 -0500170 struct xfs_buf *bp)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700171{
172 int error;
173
Brian Foster99c26592017-10-26 09:31:15 -0700174 if (!xlog_verify_bp(log, blk_no, nbblks)) {
175 xfs_warn(log->l_mp,
176 "Invalid log block/length (0x%llx, 0x%x) for buffer",
177 blk_no, nbblks);
Alex Elderff30a622010-04-13 15:22:58 +1000178 XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_HIGH, log->l_mp);
Dave Chinner24513372014-06-25 14:58:08 +1000179 return -EFSCORRUPTED;
Dave Chinner32281492009-01-22 15:37:47 +1100180 }
181
Alex Elder69ce58f2010-04-20 17:09:59 +1000182 blk_no = round_down(blk_no, log->l_sectBBsize);
183 nbblks = round_up(nbblks, log->l_sectBBsize);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700184
185 ASSERT(nbblks > 0);
Dave Chinner4e94b712012-04-23 15:58:51 +1000186 ASSERT(nbblks <= bp->b_length);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700187
188 XFS_BUF_SET_ADDR(bp, log->l_logBBstart + blk_no);
Dave Chinner0cac6822016-02-10 15:01:11 +1100189 bp->b_flags |= XBF_READ;
Dave Chinneraa0e8832012-04-23 15:58:52 +1000190 bp->b_io_length = nbblks;
Dave Chinner0e95f192012-04-23 15:58:46 +1000191 bp->b_error = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700192
Brian Foster6af88cd2018-07-11 22:26:35 -0700193 error = xfs_buf_submit(bp);
Dave Chinner595bff72014-10-02 09:05:14 +1000194 if (error && !XFS_FORCED_SHUTDOWN(log->l_mp))
Christoph Hellwig901796a2011-10-10 16:52:49 +0000195 xfs_buf_ioerror_alert(bp, __func__);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700196 return error;
197}
198
Christoph Hellwig076e6ac2009-03-16 08:24:13 +0100199STATIC int
200xlog_bread(
Mark Tinguely9a8d2fd2012-06-14 09:22:16 -0500201 struct xlog *log,
Christoph Hellwig076e6ac2009-03-16 08:24:13 +0100202 xfs_daddr_t blk_no,
203 int nbblks,
Mark Tinguely9a8d2fd2012-06-14 09:22:16 -0500204 struct xfs_buf *bp,
Christoph Hellwigb2a922c2015-06-22 09:45:10 +1000205 char **offset)
Christoph Hellwig076e6ac2009-03-16 08:24:13 +0100206{
207 int error;
208
209 error = xlog_bread_noalign(log, blk_no, nbblks, bp);
210 if (error)
211 return error;
212
Christoph Hellwig18ffb8c2019-06-28 19:27:26 -0700213 *offset = bp->b_addr + xlog_align(log, blk_no);
Christoph Hellwig076e6ac2009-03-16 08:24:13 +0100214 return 0;
215}
216
Linus Torvalds1da177e2005-04-16 15:20:36 -0700217/*
Dave Chinner44396472011-04-21 09:34:27 +0000218 * Read at an offset into the buffer. Returns with the buffer in it's original
219 * state regardless of the result of the read.
220 */
221STATIC int
222xlog_bread_offset(
Mark Tinguely9a8d2fd2012-06-14 09:22:16 -0500223 struct xlog *log,
Dave Chinner44396472011-04-21 09:34:27 +0000224 xfs_daddr_t blk_no, /* block to read from */
225 int nbblks, /* blocks to read */
Mark Tinguely9a8d2fd2012-06-14 09:22:16 -0500226 struct xfs_buf *bp,
Christoph Hellwigb2a922c2015-06-22 09:45:10 +1000227 char *offset)
Dave Chinner44396472011-04-21 09:34:27 +0000228{
Christoph Hellwigb2a922c2015-06-22 09:45:10 +1000229 char *orig_offset = bp->b_addr;
Dave Chinner4e94b712012-04-23 15:58:51 +1000230 int orig_len = BBTOB(bp->b_length);
Dave Chinner44396472011-04-21 09:34:27 +0000231 int error, error2;
232
Chandra Seetharaman02fe03d2011-07-22 23:40:22 +0000233 error = xfs_buf_associate_memory(bp, offset, BBTOB(nbblks));
Dave Chinner44396472011-04-21 09:34:27 +0000234 if (error)
235 return error;
236
237 error = xlog_bread_noalign(log, blk_no, nbblks, bp);
238
239 /* must reset buffer pointer even on error */
Chandra Seetharaman02fe03d2011-07-22 23:40:22 +0000240 error2 = xfs_buf_associate_memory(bp, orig_offset, orig_len);
Dave Chinner44396472011-04-21 09:34:27 +0000241 if (error)
242 return error;
243 return error2;
244}
245
246/*
Linus Torvalds1da177e2005-04-16 15:20:36 -0700247 * Write out the buffer at the given block for the given number of blocks.
248 * The buffer is kept locked across the write and is returned locked.
249 * This can only be used for synchronous log writes.
250 */
Christoph Hellwigba0f32d2005-06-21 15:36:52 +1000251STATIC int
Linus Torvalds1da177e2005-04-16 15:20:36 -0700252xlog_bwrite(
Mark Tinguely9a8d2fd2012-06-14 09:22:16 -0500253 struct xlog *log,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700254 xfs_daddr_t blk_no,
255 int nbblks,
Mark Tinguely9a8d2fd2012-06-14 09:22:16 -0500256 struct xfs_buf *bp)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700257{
258 int error;
259
Brian Foster99c26592017-10-26 09:31:15 -0700260 if (!xlog_verify_bp(log, blk_no, nbblks)) {
261 xfs_warn(log->l_mp,
262 "Invalid log block/length (0x%llx, 0x%x) for buffer",
263 blk_no, nbblks);
Alex Elderff30a622010-04-13 15:22:58 +1000264 XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_HIGH, log->l_mp);
Dave Chinner24513372014-06-25 14:58:08 +1000265 return -EFSCORRUPTED;
Dave Chinner32281492009-01-22 15:37:47 +1100266 }
267
Alex Elder69ce58f2010-04-20 17:09:59 +1000268 blk_no = round_down(blk_no, log->l_sectBBsize);
269 nbblks = round_up(nbblks, log->l_sectBBsize);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700270
271 ASSERT(nbblks > 0);
Dave Chinner4e94b712012-04-23 15:58:51 +1000272 ASSERT(nbblks <= bp->b_length);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700273
274 XFS_BUF_SET_ADDR(bp, log->l_logBBstart + blk_no);
Chandra Seetharaman72790aa2011-07-22 23:40:04 +0000275 xfs_buf_hold(bp);
Christoph Hellwig0c842ad2011-07-08 14:36:19 +0200276 xfs_buf_lock(bp);
Dave Chinneraa0e8832012-04-23 15:58:52 +1000277 bp->b_io_length = nbblks;
Dave Chinner0e95f192012-04-23 15:58:46 +1000278 bp->b_error = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700279
Christoph Hellwigc2b006c2011-08-23 08:28:07 +0000280 error = xfs_bwrite(bp);
Christoph Hellwig901796a2011-10-10 16:52:49 +0000281 if (error)
282 xfs_buf_ioerror_alert(bp, __func__);
Christoph Hellwigc2b006c2011-08-23 08:28:07 +0000283 xfs_buf_relse(bp);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700284 return error;
285}
286
Linus Torvalds1da177e2005-04-16 15:20:36 -0700287#ifdef DEBUG
288/*
289 * dump debug superblock and log record information
290 */
291STATIC void
292xlog_header_check_dump(
293 xfs_mount_t *mp,
294 xlog_rec_header_t *head)
295{
Eric Sandeen08e96e12013-10-11 20:59:05 -0500296 xfs_debug(mp, "%s: SB : uuid = %pU, fmt = %d",
Joe Perches03daa572009-12-14 18:01:10 -0800297 __func__, &mp->m_sb.sb_uuid, XLOG_FMT);
Eric Sandeen08e96e12013-10-11 20:59:05 -0500298 xfs_debug(mp, " log : uuid = %pU, fmt = %d",
Joe Perches03daa572009-12-14 18:01:10 -0800299 &head->h_fs_uuid, be32_to_cpu(head->h_fmt));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700300}
301#else
302#define xlog_header_check_dump(mp, head)
303#endif
304
305/*
306 * check log record header for recovery
307 */
308STATIC int
309xlog_header_check_recover(
310 xfs_mount_t *mp,
311 xlog_rec_header_t *head)
312{
Christoph Hellwig69ef9212011-07-08 14:36:05 +0200313 ASSERT(head->h_magicno == cpu_to_be32(XLOG_HEADER_MAGIC_NUM));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700314
315 /*
316 * IRIX doesn't write the h_fmt field and leaves it zeroed
317 * (XLOG_FMT_UNKNOWN). This stops us from trying to recover
318 * a dirty log created in IRIX.
319 */
Christoph Hellwig69ef9212011-07-08 14:36:05 +0200320 if (unlikely(head->h_fmt != cpu_to_be32(XLOG_FMT))) {
Dave Chinnera0fa2b62011-03-07 10:01:35 +1100321 xfs_warn(mp,
322 "dirty log written in incompatible format - can't recover");
Linus Torvalds1da177e2005-04-16 15:20:36 -0700323 xlog_header_check_dump(mp, head);
324 XFS_ERROR_REPORT("xlog_header_check_recover(1)",
325 XFS_ERRLEVEL_HIGH, mp);
Dave Chinner24513372014-06-25 14:58:08 +1000326 return -EFSCORRUPTED;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700327 } else if (unlikely(!uuid_equal(&mp->m_sb.sb_uuid, &head->h_fs_uuid))) {
Dave Chinnera0fa2b62011-03-07 10:01:35 +1100328 xfs_warn(mp,
329 "dirty log entry has mismatched uuid - can't recover");
Linus Torvalds1da177e2005-04-16 15:20:36 -0700330 xlog_header_check_dump(mp, head);
331 XFS_ERROR_REPORT("xlog_header_check_recover(2)",
332 XFS_ERRLEVEL_HIGH, mp);
Dave Chinner24513372014-06-25 14:58:08 +1000333 return -EFSCORRUPTED;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700334 }
335 return 0;
336}
337
338/*
339 * read the head block of the log and check the header
340 */
341STATIC int
342xlog_header_check_mount(
343 xfs_mount_t *mp,
344 xlog_rec_header_t *head)
345{
Christoph Hellwig69ef9212011-07-08 14:36:05 +0200346 ASSERT(head->h_magicno == cpu_to_be32(XLOG_HEADER_MAGIC_NUM));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700347
Amir Goldsteind905fda2017-05-04 16:26:23 +0300348 if (uuid_is_null(&head->h_fs_uuid)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700349 /*
350 * IRIX doesn't write the h_fs_uuid or h_fmt fields. If
Amir Goldsteind905fda2017-05-04 16:26:23 +0300351 * h_fs_uuid is null, we assume this log was last mounted
Linus Torvalds1da177e2005-04-16 15:20:36 -0700352 * by IRIX and continue.
353 */
Amir Goldsteind905fda2017-05-04 16:26:23 +0300354 xfs_warn(mp, "null uuid in log - IRIX style log");
Linus Torvalds1da177e2005-04-16 15:20:36 -0700355 } else if (unlikely(!uuid_equal(&mp->m_sb.sb_uuid, &head->h_fs_uuid))) {
Dave Chinnera0fa2b62011-03-07 10:01:35 +1100356 xfs_warn(mp, "log has mismatched uuid - can't recover");
Linus Torvalds1da177e2005-04-16 15:20:36 -0700357 xlog_header_check_dump(mp, head);
358 XFS_ERROR_REPORT("xlog_header_check_mount",
359 XFS_ERRLEVEL_HIGH, mp);
Dave Chinner24513372014-06-25 14:58:08 +1000360 return -EFSCORRUPTED;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700361 }
362 return 0;
363}
364
365STATIC void
366xlog_recover_iodone(
367 struct xfs_buf *bp)
368{
Chandra Seetharaman5a52c2a582011-07-22 23:39:51 +0000369 if (bp->b_error) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700370 /*
371 * We're not going to bother about retrying
372 * this during recovery. One strike!
373 */
Dave Chinner595bff72014-10-02 09:05:14 +1000374 if (!XFS_FORCED_SHUTDOWN(bp->b_target->bt_mount)) {
375 xfs_buf_ioerror_alert(bp, __func__);
376 xfs_force_shutdown(bp->b_target->bt_mount,
377 SHUTDOWN_META_IO_ERROR);
378 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700379 }
Brian Foster60a4a222016-09-26 08:34:27 +1000380
381 /*
382 * On v5 supers, a bli could be attached to update the metadata LSN.
383 * Clean it up.
384 */
Carlos Maiolinofb1755a2018-01-24 13:38:48 -0800385 if (bp->b_log_item)
Brian Foster60a4a222016-09-26 08:34:27 +1000386 xfs_buf_item_relse(bp);
Carlos Maiolinofb1755a2018-01-24 13:38:48 -0800387 ASSERT(bp->b_log_item == NULL);
Brian Foster60a4a222016-09-26 08:34:27 +1000388
Christoph Hellwigcb669ca2011-07-13 13:43:49 +0200389 bp->b_iodone = NULL;
Dave Chinnere8aaba92014-10-02 09:04:22 +1000390 xfs_buf_ioend(bp);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700391}
392
393/*
394 * This routine finds (to an approximation) the first block in the physical
395 * log which contains the given cycle. It uses a binary search algorithm.
396 * Note that the algorithm can not be perfect because the disk will not
397 * necessarily be perfect.
398 */
David Chinnera8272ce2007-11-23 16:28:09 +1100399STATIC int
Linus Torvalds1da177e2005-04-16 15:20:36 -0700400xlog_find_cycle_start(
Mark Tinguely9a8d2fd2012-06-14 09:22:16 -0500401 struct xlog *log,
402 struct xfs_buf *bp,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700403 xfs_daddr_t first_blk,
404 xfs_daddr_t *last_blk,
405 uint cycle)
406{
Christoph Hellwigb2a922c2015-06-22 09:45:10 +1000407 char *offset;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700408 xfs_daddr_t mid_blk;
Alex Eldere3bb2e32010-04-15 18:17:30 +0000409 xfs_daddr_t end_blk;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700410 uint mid_cycle;
411 int error;
412
Alex Eldere3bb2e32010-04-15 18:17:30 +0000413 end_blk = *last_blk;
414 mid_blk = BLK_AVG(first_blk, end_blk);
415 while (mid_blk != first_blk && mid_blk != end_blk) {
Christoph Hellwig076e6ac2009-03-16 08:24:13 +0100416 error = xlog_bread(log, mid_blk, 1, bp, &offset);
417 if (error)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700418 return error;
Christoph Hellwig03bea6f2007-10-12 10:58:05 +1000419 mid_cycle = xlog_get_cycle(offset);
Alex Eldere3bb2e32010-04-15 18:17:30 +0000420 if (mid_cycle == cycle)
421 end_blk = mid_blk; /* last_half_cycle == mid_cycle */
422 else
423 first_blk = mid_blk; /* first_half_cycle == mid_cycle */
424 mid_blk = BLK_AVG(first_blk, end_blk);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700425 }
Alex Eldere3bb2e32010-04-15 18:17:30 +0000426 ASSERT((mid_blk == first_blk && mid_blk+1 == end_blk) ||
427 (mid_blk == end_blk && mid_blk-1 == first_blk));
428
429 *last_blk = end_blk;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700430
431 return 0;
432}
433
434/*
Alex Elder3f943d82010-04-15 18:17:34 +0000435 * Check that a range of blocks does not contain stop_on_cycle_no.
436 * Fill in *new_blk with the block offset where such a block is
437 * found, or with -1 (an invalid block number) if there is no such
438 * block in the range. The scan needs to occur from front to back
439 * and the pointer into the region must be updated since a later
440 * routine will need to perform another test.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700441 */
442STATIC int
443xlog_find_verify_cycle(
Mark Tinguely9a8d2fd2012-06-14 09:22:16 -0500444 struct xlog *log,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700445 xfs_daddr_t start_blk,
446 int nbblks,
447 uint stop_on_cycle_no,
448 xfs_daddr_t *new_blk)
449{
450 xfs_daddr_t i, j;
451 uint cycle;
452 xfs_buf_t *bp;
453 xfs_daddr_t bufblks;
Christoph Hellwigb2a922c2015-06-22 09:45:10 +1000454 char *buf = NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700455 int error = 0;
456
Alex Elder6881a222010-04-13 15:22:29 +1000457 /*
458 * Greedily allocate a buffer big enough to handle the full
459 * range of basic blocks we'll be examining. If that fails,
460 * try a smaller size. We need to be able to read at least
461 * a log sector, or we're out of luck.
462 */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700463 bufblks = 1 << ffs(nbblks);
Dave Chinner81158e02012-04-27 19:45:22 +1000464 while (bufblks > log->l_logBBsize)
465 bufblks >>= 1;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700466 while (!(bp = xlog_get_bp(log, bufblks))) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700467 bufblks >>= 1;
Alex Elder69ce58f2010-04-20 17:09:59 +1000468 if (bufblks < log->l_sectBBsize)
Dave Chinner24513372014-06-25 14:58:08 +1000469 return -ENOMEM;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700470 }
471
472 for (i = start_blk; i < start_blk + nbblks; i += bufblks) {
473 int bcount;
474
475 bcount = min(bufblks, (start_blk + nbblks - i));
476
Christoph Hellwig076e6ac2009-03-16 08:24:13 +0100477 error = xlog_bread(log, i, bcount, bp, &buf);
478 if (error)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700479 goto out;
480
Linus Torvalds1da177e2005-04-16 15:20:36 -0700481 for (j = 0; j < bcount; j++) {
Christoph Hellwig03bea6f2007-10-12 10:58:05 +1000482 cycle = xlog_get_cycle(buf);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700483 if (cycle == stop_on_cycle_no) {
484 *new_blk = i+j;
485 goto out;
486 }
487
488 buf += BBSIZE;
489 }
490 }
491
492 *new_blk = -1;
493
494out:
495 xlog_put_bp(bp);
496 return error;
497}
498
499/*
500 * Potentially backup over partial log record write.
501 *
502 * In the typical case, last_blk is the number of the block directly after
503 * a good log record. Therefore, we subtract one to get the block number
504 * of the last block in the given buffer. extra_bblks contains the number
505 * of blocks we would have read on a previous read. This happens when the
506 * last log record is split over the end of the physical log.
507 *
508 * extra_bblks is the number of blocks potentially verified on a previous
509 * call to this routine.
510 */
511STATIC int
512xlog_find_verify_log_record(
Mark Tinguely9a8d2fd2012-06-14 09:22:16 -0500513 struct xlog *log,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700514 xfs_daddr_t start_blk,
515 xfs_daddr_t *last_blk,
516 int extra_bblks)
517{
518 xfs_daddr_t i;
519 xfs_buf_t *bp;
Christoph Hellwigb2a922c2015-06-22 09:45:10 +1000520 char *offset = NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700521 xlog_rec_header_t *head = NULL;
522 int error = 0;
523 int smallmem = 0;
524 int num_blks = *last_blk - start_blk;
525 int xhdrs;
526
527 ASSERT(start_blk != 0 || *last_blk != start_blk);
528
529 if (!(bp = xlog_get_bp(log, num_blks))) {
530 if (!(bp = xlog_get_bp(log, 1)))
Dave Chinner24513372014-06-25 14:58:08 +1000531 return -ENOMEM;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700532 smallmem = 1;
533 } else {
Christoph Hellwig076e6ac2009-03-16 08:24:13 +0100534 error = xlog_bread(log, start_blk, num_blks, bp, &offset);
535 if (error)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700536 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700537 offset += ((num_blks - 1) << BBSHIFT);
538 }
539
540 for (i = (*last_blk) - 1; i >= 0; i--) {
541 if (i < start_blk) {
542 /* valid log record not found */
Dave Chinnera0fa2b62011-03-07 10:01:35 +1100543 xfs_warn(log->l_mp,
544 "Log inconsistent (didn't find previous header)");
Linus Torvalds1da177e2005-04-16 15:20:36 -0700545 ASSERT(0);
Dave Chinner24513372014-06-25 14:58:08 +1000546 error = -EIO;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700547 goto out;
548 }
549
550 if (smallmem) {
Christoph Hellwig076e6ac2009-03-16 08:24:13 +0100551 error = xlog_bread(log, i, 1, bp, &offset);
552 if (error)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700553 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700554 }
555
556 head = (xlog_rec_header_t *)offset;
557
Christoph Hellwig69ef9212011-07-08 14:36:05 +0200558 if (head->h_magicno == cpu_to_be32(XLOG_HEADER_MAGIC_NUM))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700559 break;
560
561 if (!smallmem)
562 offset -= BBSIZE;
563 }
564
565 /*
566 * We hit the beginning of the physical log & still no header. Return
567 * to caller. If caller can handle a return of -1, then this routine
568 * will be called again for the end of the physical log.
569 */
570 if (i == -1) {
Dave Chinner24513372014-06-25 14:58:08 +1000571 error = 1;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700572 goto out;
573 }
574
575 /*
576 * We have the final block of the good log (the first block
577 * of the log record _before_ the head. So we check the uuid.
578 */
579 if ((error = xlog_header_check_mount(log->l_mp, head)))
580 goto out;
581
582 /*
583 * We may have found a log record header before we expected one.
584 * last_blk will be the 1st block # with a given cycle #. We may end
585 * up reading an entire log record. In this case, we don't want to
586 * reset last_blk. Only when last_blk points in the middle of a log
587 * record do we update last_blk.
588 */
Eric Sandeen62118702008-03-06 13:44:28 +1100589 if (xfs_sb_version_haslogv2(&log->l_mp->m_sb)) {
Christoph Hellwigb53e6752007-10-12 10:59:34 +1000590 uint h_size = be32_to_cpu(head->h_size);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700591
592 xhdrs = h_size / XLOG_HEADER_CYCLE_SIZE;
593 if (h_size % XLOG_HEADER_CYCLE_SIZE)
594 xhdrs++;
595 } else {
596 xhdrs = 1;
597 }
598
Christoph Hellwigb53e6752007-10-12 10:59:34 +1000599 if (*last_blk - i + extra_bblks !=
600 BTOBB(be32_to_cpu(head->h_len)) + xhdrs)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700601 *last_blk = i;
602
603out:
604 xlog_put_bp(bp);
605 return error;
606}
607
608/*
609 * Head is defined to be the point of the log where the next log write
Zhi Yong Wu0a94da22013-08-07 10:11:08 +0000610 * could go. This means that incomplete LR writes at the end are
Linus Torvalds1da177e2005-04-16 15:20:36 -0700611 * eliminated when calculating the head. We aren't guaranteed that previous
612 * LR have complete transactions. We only know that a cycle number of
613 * current cycle number -1 won't be present in the log if we start writing
614 * from our current block number.
615 *
616 * last_blk contains the block number of the first block with a given
617 * cycle number.
618 *
619 * Return: zero if normal, non-zero if error.
620 */
Christoph Hellwigba0f32d2005-06-21 15:36:52 +1000621STATIC int
Linus Torvalds1da177e2005-04-16 15:20:36 -0700622xlog_find_head(
Mark Tinguely9a8d2fd2012-06-14 09:22:16 -0500623 struct xlog *log,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700624 xfs_daddr_t *return_head_blk)
625{
626 xfs_buf_t *bp;
Christoph Hellwigb2a922c2015-06-22 09:45:10 +1000627 char *offset;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700628 xfs_daddr_t new_blk, first_blk, start_blk, last_blk, head_blk;
629 int num_scan_bblks;
630 uint first_half_cycle, last_half_cycle;
631 uint stop_on_cycle;
632 int error, log_bbnum = log->l_logBBsize;
633
634 /* Is the end of the log device zeroed? */
Dave Chinner24513372014-06-25 14:58:08 +1000635 error = xlog_find_zeroed(log, &first_blk);
636 if (error < 0) {
637 xfs_warn(log->l_mp, "empty log check failed");
638 return error;
639 }
640 if (error == 1) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700641 *return_head_blk = first_blk;
642
643 /* Is the whole lot zeroed? */
644 if (!first_blk) {
645 /* Linux XFS shouldn't generate totally zeroed logs -
646 * mkfs etc write a dummy unmount record to a fresh
647 * log so we can store the uuid in there
648 */
Dave Chinnera0fa2b62011-03-07 10:01:35 +1100649 xfs_warn(log->l_mp, "totally zeroed log");
Linus Torvalds1da177e2005-04-16 15:20:36 -0700650 }
651
652 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700653 }
654
655 first_blk = 0; /* get cycle # of 1st block */
656 bp = xlog_get_bp(log, 1);
657 if (!bp)
Dave Chinner24513372014-06-25 14:58:08 +1000658 return -ENOMEM;
Christoph Hellwig076e6ac2009-03-16 08:24:13 +0100659
660 error = xlog_bread(log, 0, 1, bp, &offset);
661 if (error)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700662 goto bp_err;
Christoph Hellwig076e6ac2009-03-16 08:24:13 +0100663
Christoph Hellwig03bea6f2007-10-12 10:58:05 +1000664 first_half_cycle = xlog_get_cycle(offset);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700665
666 last_blk = head_blk = log_bbnum - 1; /* get cycle # of last block */
Christoph Hellwig076e6ac2009-03-16 08:24:13 +0100667 error = xlog_bread(log, last_blk, 1, bp, &offset);
668 if (error)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700669 goto bp_err;
Christoph Hellwig076e6ac2009-03-16 08:24:13 +0100670
Christoph Hellwig03bea6f2007-10-12 10:58:05 +1000671 last_half_cycle = xlog_get_cycle(offset);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700672 ASSERT(last_half_cycle != 0);
673
674 /*
675 * If the 1st half cycle number is equal to the last half cycle number,
676 * then the entire log is stamped with the same cycle number. In this
677 * case, head_blk can't be set to zero (which makes sense). The below
678 * math doesn't work out properly with head_blk equal to zero. Instead,
679 * we set it to log_bbnum which is an invalid block number, but this
680 * value makes the math correct. If head_blk doesn't changed through
681 * all the tests below, *head_blk is set to zero at the very end rather
682 * than log_bbnum. In a sense, log_bbnum and zero are the same block
683 * in a circular file.
684 */
685 if (first_half_cycle == last_half_cycle) {
686 /*
687 * In this case we believe that the entire log should have
688 * cycle number last_half_cycle. We need to scan backwards
689 * from the end verifying that there are no holes still
690 * containing last_half_cycle - 1. If we find such a hole,
691 * then the start of that hole will be the new head. The
692 * simple case looks like
693 * x | x ... | x - 1 | x
694 * Another case that fits this picture would be
695 * x | x + 1 | x ... | x
Nathan Scottc41564b2006-03-29 08:55:14 +1000696 * In this case the head really is somewhere at the end of the
Linus Torvalds1da177e2005-04-16 15:20:36 -0700697 * log, as one of the latest writes at the beginning was
698 * incomplete.
699 * One more case is
700 * x | x + 1 | x ... | x - 1 | x
701 * This is really the combination of the above two cases, and
702 * the head has to end up at the start of the x-1 hole at the
703 * end of the log.
704 *
705 * In the 256k log case, we will read from the beginning to the
706 * end of the log and search for cycle numbers equal to x-1.
707 * We don't worry about the x+1 blocks that we encounter,
708 * because we know that they cannot be the head since the log
709 * started with x.
710 */
711 head_blk = log_bbnum;
712 stop_on_cycle = last_half_cycle - 1;
713 } else {
714 /*
715 * In this case we want to find the first block with cycle
716 * number matching last_half_cycle. We expect the log to be
717 * some variation on
Alex Elder3f943d82010-04-15 18:17:34 +0000718 * x + 1 ... | x ... | x
Linus Torvalds1da177e2005-04-16 15:20:36 -0700719 * The first block with cycle number x (last_half_cycle) will
720 * be where the new head belongs. First we do a binary search
721 * for the first occurrence of last_half_cycle. The binary
722 * search may not be totally accurate, so then we scan back
723 * from there looking for occurrences of last_half_cycle before
724 * us. If that backwards scan wraps around the beginning of
725 * the log, then we look for occurrences of last_half_cycle - 1
726 * at the end of the log. The cases we're looking for look
727 * like
Alex Elder3f943d82010-04-15 18:17:34 +0000728 * v binary search stopped here
729 * x + 1 ... | x | x + 1 | x ... | x
730 * ^ but we want to locate this spot
Linus Torvalds1da177e2005-04-16 15:20:36 -0700731 * or
Linus Torvalds1da177e2005-04-16 15:20:36 -0700732 * <---------> less than scan distance
Alex Elder3f943d82010-04-15 18:17:34 +0000733 * x + 1 ... | x ... | x - 1 | x
734 * ^ we want to locate this spot
Linus Torvalds1da177e2005-04-16 15:20:36 -0700735 */
736 stop_on_cycle = last_half_cycle;
737 if ((error = xlog_find_cycle_start(log, bp, first_blk,
738 &head_blk, last_half_cycle)))
739 goto bp_err;
740 }
741
742 /*
743 * Now validate the answer. Scan back some number of maximum possible
744 * blocks and make sure each one has the expected cycle number. The
745 * maximum is determined by the total possible amount of buffering
746 * in the in-core log. The following number can be made tighter if
747 * we actually look at the block size of the filesystem.
748 */
Brian Foster9f2a4502017-10-26 09:31:16 -0700749 num_scan_bblks = min_t(int, log_bbnum, XLOG_TOTAL_REC_SHIFT(log));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700750 if (head_blk >= num_scan_bblks) {
751 /*
752 * We are guaranteed that the entire check can be performed
753 * in one buffer.
754 */
755 start_blk = head_blk - num_scan_bblks;
756 if ((error = xlog_find_verify_cycle(log,
757 start_blk, num_scan_bblks,
758 stop_on_cycle, &new_blk)))
759 goto bp_err;
760 if (new_blk != -1)
761 head_blk = new_blk;
762 } else { /* need to read 2 parts of log */
763 /*
764 * We are going to scan backwards in the log in two parts.
765 * First we scan the physical end of the log. In this part
766 * of the log, we are looking for blocks with cycle number
767 * last_half_cycle - 1.
768 * If we find one, then we know that the log starts there, as
769 * we've found a hole that didn't get written in going around
770 * the end of the physical log. The simple case for this is
771 * x + 1 ... | x ... | x - 1 | x
772 * <---------> less than scan distance
773 * If all of the blocks at the end of the log have cycle number
774 * last_half_cycle, then we check the blocks at the start of
775 * the log looking for occurrences of last_half_cycle. If we
776 * find one, then our current estimate for the location of the
777 * first occurrence of last_half_cycle is wrong and we move
778 * back to the hole we've found. This case looks like
779 * x + 1 ... | x | x + 1 | x ...
780 * ^ binary search stopped here
781 * Another case we need to handle that only occurs in 256k
782 * logs is
783 * x + 1 ... | x ... | x+1 | x ...
784 * ^ binary search stops here
785 * In a 256k log, the scan at the end of the log will see the
786 * x + 1 blocks. We need to skip past those since that is
787 * certainly not the head of the log. By searching for
788 * last_half_cycle-1 we accomplish that.
789 */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700790 ASSERT(head_blk <= INT_MAX &&
Alex Elder3f943d82010-04-15 18:17:34 +0000791 (xfs_daddr_t) num_scan_bblks >= head_blk);
792 start_blk = log_bbnum - (num_scan_bblks - head_blk);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700793 if ((error = xlog_find_verify_cycle(log, start_blk,
794 num_scan_bblks - (int)head_blk,
795 (stop_on_cycle - 1), &new_blk)))
796 goto bp_err;
797 if (new_blk != -1) {
798 head_blk = new_blk;
Alex Elder9db127e2010-04-15 18:17:26 +0000799 goto validate_head;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700800 }
801
802 /*
803 * Scan beginning of log now. The last part of the physical
804 * log is good. This scan needs to verify that it doesn't find
805 * the last_half_cycle.
806 */
807 start_blk = 0;
808 ASSERT(head_blk <= INT_MAX);
809 if ((error = xlog_find_verify_cycle(log,
810 start_blk, (int)head_blk,
811 stop_on_cycle, &new_blk)))
812 goto bp_err;
813 if (new_blk != -1)
814 head_blk = new_blk;
815 }
816
Alex Elder9db127e2010-04-15 18:17:26 +0000817validate_head:
Linus Torvalds1da177e2005-04-16 15:20:36 -0700818 /*
819 * Now we need to make sure head_blk is not pointing to a block in
820 * the middle of a log record.
821 */
822 num_scan_bblks = XLOG_REC_SHIFT(log);
823 if (head_blk >= num_scan_bblks) {
824 start_blk = head_blk - num_scan_bblks; /* don't read head_blk */
825
826 /* start ptr at last block ptr before head_blk */
Dave Chinner24513372014-06-25 14:58:08 +1000827 error = xlog_find_verify_log_record(log, start_blk, &head_blk, 0);
828 if (error == 1)
829 error = -EIO;
830 if (error)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700831 goto bp_err;
832 } else {
833 start_blk = 0;
834 ASSERT(head_blk <= INT_MAX);
Dave Chinner24513372014-06-25 14:58:08 +1000835 error = xlog_find_verify_log_record(log, start_blk, &head_blk, 0);
836 if (error < 0)
837 goto bp_err;
838 if (error == 1) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700839 /* We hit the beginning of the log during our search */
Alex Elder3f943d82010-04-15 18:17:34 +0000840 start_blk = log_bbnum - (num_scan_bblks - head_blk);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700841 new_blk = log_bbnum;
842 ASSERT(start_blk <= INT_MAX &&
843 (xfs_daddr_t) log_bbnum-start_blk >= 0);
844 ASSERT(head_blk <= INT_MAX);
Dave Chinner24513372014-06-25 14:58:08 +1000845 error = xlog_find_verify_log_record(log, start_blk,
846 &new_blk, (int)head_blk);
847 if (error == 1)
848 error = -EIO;
849 if (error)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700850 goto bp_err;
851 if (new_blk != log_bbnum)
852 head_blk = new_blk;
853 } else if (error)
854 goto bp_err;
855 }
856
857 xlog_put_bp(bp);
858 if (head_blk == log_bbnum)
859 *return_head_blk = 0;
860 else
861 *return_head_blk = head_blk;
862 /*
863 * When returning here, we have a good block number. Bad block
864 * means that during a previous crash, we didn't have a clean break
865 * from cycle number N to cycle number N-1. In this case, we need
866 * to find the first block with cycle number N-1.
867 */
868 return 0;
869
870 bp_err:
871 xlog_put_bp(bp);
872
873 if (error)
Dave Chinnera0fa2b62011-03-07 10:01:35 +1100874 xfs_warn(log->l_mp, "failed to find log head");
Linus Torvalds1da177e2005-04-16 15:20:36 -0700875 return error;
876}
877
878/*
Brian Fostereed6b462016-01-04 15:55:10 +1100879 * Seek backwards in the log for log record headers.
880 *
881 * Given a starting log block, walk backwards until we find the provided number
882 * of records or hit the provided tail block. The return value is the number of
883 * records encountered or a negative error code. The log block and buffer
884 * pointer of the last record seen are returned in rblk and rhead respectively.
885 */
886STATIC int
887xlog_rseek_logrec_hdr(
888 struct xlog *log,
889 xfs_daddr_t head_blk,
890 xfs_daddr_t tail_blk,
891 int count,
892 struct xfs_buf *bp,
893 xfs_daddr_t *rblk,
894 struct xlog_rec_header **rhead,
895 bool *wrapped)
896{
897 int i;
898 int error;
899 int found = 0;
900 char *offset = NULL;
901 xfs_daddr_t end_blk;
902
903 *wrapped = false;
904
905 /*
906 * Walk backwards from the head block until we hit the tail or the first
907 * block in the log.
908 */
909 end_blk = head_blk > tail_blk ? tail_blk : 0;
910 for (i = (int) head_blk - 1; i >= end_blk; i--) {
911 error = xlog_bread(log, i, 1, bp, &offset);
912 if (error)
913 goto out_error;
914
915 if (*(__be32 *) offset == cpu_to_be32(XLOG_HEADER_MAGIC_NUM)) {
916 *rblk = i;
917 *rhead = (struct xlog_rec_header *) offset;
918 if (++found == count)
919 break;
920 }
921 }
922
923 /*
924 * If we haven't hit the tail block or the log record header count,
925 * start looking again from the end of the physical log. Note that
926 * callers can pass head == tail if the tail is not yet known.
927 */
928 if (tail_blk >= head_blk && found != count) {
929 for (i = log->l_logBBsize - 1; i >= (int) tail_blk; i--) {
930 error = xlog_bread(log, i, 1, bp, &offset);
931 if (error)
932 goto out_error;
933
934 if (*(__be32 *)offset ==
935 cpu_to_be32(XLOG_HEADER_MAGIC_NUM)) {
936 *wrapped = true;
937 *rblk = i;
938 *rhead = (struct xlog_rec_header *) offset;
939 if (++found == count)
940 break;
941 }
942 }
943 }
944
945 return found;
946
947out_error:
948 return error;
949}
950
951/*
Brian Foster7088c412016-01-05 07:40:16 +1100952 * Seek forward in the log for log record headers.
953 *
954 * Given head and tail blocks, walk forward from the tail block until we find
955 * the provided number of records or hit the head block. The return value is the
956 * number of records encountered or a negative error code. The log block and
957 * buffer pointer of the last record seen are returned in rblk and rhead
958 * respectively.
959 */
960STATIC int
961xlog_seek_logrec_hdr(
962 struct xlog *log,
963 xfs_daddr_t head_blk,
964 xfs_daddr_t tail_blk,
965 int count,
966 struct xfs_buf *bp,
967 xfs_daddr_t *rblk,
968 struct xlog_rec_header **rhead,
969 bool *wrapped)
970{
971 int i;
972 int error;
973 int found = 0;
974 char *offset = NULL;
975 xfs_daddr_t end_blk;
976
977 *wrapped = false;
978
979 /*
980 * Walk forward from the tail block until we hit the head or the last
981 * block in the log.
982 */
983 end_blk = head_blk > tail_blk ? head_blk : log->l_logBBsize - 1;
984 for (i = (int) tail_blk; i <= end_blk; i++) {
985 error = xlog_bread(log, i, 1, bp, &offset);
986 if (error)
987 goto out_error;
988
989 if (*(__be32 *) offset == cpu_to_be32(XLOG_HEADER_MAGIC_NUM)) {
990 *rblk = i;
991 *rhead = (struct xlog_rec_header *) offset;
992 if (++found == count)
993 break;
994 }
995 }
996
997 /*
998 * If we haven't hit the head block or the log record header count,
999 * start looking again from the start of the physical log.
1000 */
1001 if (tail_blk > head_blk && found != count) {
1002 for (i = 0; i < (int) head_blk; i++) {
1003 error = xlog_bread(log, i, 1, bp, &offset);
1004 if (error)
1005 goto out_error;
1006
1007 if (*(__be32 *)offset ==
1008 cpu_to_be32(XLOG_HEADER_MAGIC_NUM)) {
1009 *wrapped = true;
1010 *rblk = i;
1011 *rhead = (struct xlog_rec_header *) offset;
1012 if (++found == count)
1013 break;
1014 }
1015 }
1016 }
1017
1018 return found;
1019
1020out_error:
1021 return error;
1022}
1023
1024/*
Brian Foster4a4f66e2017-08-08 18:21:52 -07001025 * Calculate distance from head to tail (i.e., unused space in the log).
1026 */
1027static inline int
1028xlog_tail_distance(
1029 struct xlog *log,
1030 xfs_daddr_t head_blk,
1031 xfs_daddr_t tail_blk)
1032{
1033 if (head_blk < tail_blk)
1034 return tail_blk - head_blk;
1035
1036 return tail_blk + (log->l_logBBsize - head_blk);
1037}
1038
1039/*
1040 * Verify the log tail. This is particularly important when torn or incomplete
1041 * writes have been detected near the front of the log and the head has been
1042 * walked back accordingly.
Brian Foster7088c412016-01-05 07:40:16 +11001043 *
Brian Foster4a4f66e2017-08-08 18:21:52 -07001044 * We also have to handle the case where the tail was pinned and the head
1045 * blocked behind the tail right before a crash. If the tail had been pushed
1046 * immediately prior to the crash and the subsequent checkpoint was only
1047 * partially written, it's possible it overwrote the last referenced tail in the
1048 * log with garbage. This is not a coherency problem because the tail must have
1049 * been pushed before it can be overwritten, but appears as log corruption to
1050 * recovery because we have no way to know the tail was updated if the
1051 * subsequent checkpoint didn't write successfully.
1052 *
1053 * Therefore, CRC check the log from tail to head. If a failure occurs and the
1054 * offending record is within max iclog bufs from the head, walk the tail
1055 * forward and retry until a valid tail is found or corruption is detected out
1056 * of the range of a possible overwrite.
Brian Foster7088c412016-01-05 07:40:16 +11001057 */
1058STATIC int
1059xlog_verify_tail(
1060 struct xlog *log,
1061 xfs_daddr_t head_blk,
Brian Foster4a4f66e2017-08-08 18:21:52 -07001062 xfs_daddr_t *tail_blk,
1063 int hsize)
Brian Foster7088c412016-01-05 07:40:16 +11001064{
1065 struct xlog_rec_header *thead;
1066 struct xfs_buf *bp;
1067 xfs_daddr_t first_bad;
Brian Foster7088c412016-01-05 07:40:16 +11001068 int error = 0;
1069 bool wrapped;
Brian Foster4a4f66e2017-08-08 18:21:52 -07001070 xfs_daddr_t tmp_tail;
1071 xfs_daddr_t orig_tail = *tail_blk;
Brian Foster7088c412016-01-05 07:40:16 +11001072
1073 bp = xlog_get_bp(log, 1);
1074 if (!bp)
1075 return -ENOMEM;
1076
1077 /*
Brian Foster4a4f66e2017-08-08 18:21:52 -07001078 * Make sure the tail points to a record (returns positive count on
1079 * success).
Brian Foster7088c412016-01-05 07:40:16 +11001080 */
Brian Foster4a4f66e2017-08-08 18:21:52 -07001081 error = xlog_seek_logrec_hdr(log, head_blk, *tail_blk, 1, bp,
1082 &tmp_tail, &thead, &wrapped);
1083 if (error < 0)
Brian Foster7088c412016-01-05 07:40:16 +11001084 goto out;
Brian Foster4a4f66e2017-08-08 18:21:52 -07001085 if (*tail_blk != tmp_tail)
1086 *tail_blk = tmp_tail;
1087
1088 /*
1089 * Run a CRC check from the tail to the head. We can't just check
1090 * MAX_ICLOGS records past the tail because the tail may point to stale
1091 * blocks cleared during the search for the head/tail. These blocks are
1092 * overwritten with zero-length records and thus record count is not a
1093 * reliable indicator of the iclog state before a crash.
1094 */
1095 first_bad = 0;
1096 error = xlog_do_recovery_pass(log, head_blk, *tail_blk,
1097 XLOG_RECOVER_CRCPASS, &first_bad);
Brian Fostera4c9b342017-08-08 18:21:53 -07001098 while ((error == -EFSBADCRC || error == -EFSCORRUPTED) && first_bad) {
Brian Foster4a4f66e2017-08-08 18:21:52 -07001099 int tail_distance;
1100
1101 /*
1102 * Is corruption within range of the head? If so, retry from
1103 * the next record. Otherwise return an error.
1104 */
1105 tail_distance = xlog_tail_distance(log, head_blk, first_bad);
1106 if (tail_distance > BTOBB(XLOG_MAX_ICLOGS * hsize))
1107 break;
1108
1109 /* skip to the next record; returns positive count on success */
1110 error = xlog_seek_logrec_hdr(log, head_blk, first_bad, 2, bp,
1111 &tmp_tail, &thead, &wrapped);
1112 if (error < 0)
1113 goto out;
1114
1115 *tail_blk = tmp_tail;
1116 first_bad = 0;
1117 error = xlog_do_recovery_pass(log, head_blk, *tail_blk,
1118 XLOG_RECOVER_CRCPASS, &first_bad);
Brian Foster7088c412016-01-05 07:40:16 +11001119 }
1120
Brian Foster4a4f66e2017-08-08 18:21:52 -07001121 if (!error && *tail_blk != orig_tail)
1122 xfs_warn(log->l_mp,
1123 "Tail block (0x%llx) overwrite detected. Updated to 0x%llx",
1124 orig_tail, *tail_blk);
Brian Foster7088c412016-01-05 07:40:16 +11001125out:
1126 xlog_put_bp(bp);
1127 return error;
1128}
1129
1130/*
1131 * Detect and trim torn writes from the head of the log.
1132 *
1133 * Storage without sector atomicity guarantees can result in torn writes in the
1134 * log in the event of a crash. Our only means to detect this scenario is via
1135 * CRC verification. While we can't always be certain that CRC verification
1136 * failure is due to a torn write vs. an unrelated corruption, we do know that
1137 * only a certain number (XLOG_MAX_ICLOGS) of log records can be written out at
1138 * one time. Therefore, CRC verify up to XLOG_MAX_ICLOGS records at the head of
1139 * the log and treat failures in this range as torn writes as a matter of
1140 * policy. In the event of CRC failure, the head is walked back to the last good
1141 * record in the log and the tail is updated from that record and verified.
1142 */
1143STATIC int
1144xlog_verify_head(
1145 struct xlog *log,
1146 xfs_daddr_t *head_blk, /* in/out: unverified head */
1147 xfs_daddr_t *tail_blk, /* out: tail block */
1148 struct xfs_buf *bp,
1149 xfs_daddr_t *rhead_blk, /* start blk of last record */
1150 struct xlog_rec_header **rhead, /* ptr to last record */
1151 bool *wrapped) /* last rec. wraps phys. log */
1152{
1153 struct xlog_rec_header *tmp_rhead;
1154 struct xfs_buf *tmp_bp;
1155 xfs_daddr_t first_bad;
1156 xfs_daddr_t tmp_rhead_blk;
1157 int found;
1158 int error;
1159 bool tmp_wrapped;
1160
1161 /*
Brian Foster82ff6cc2016-03-07 08:22:22 +11001162 * Check the head of the log for torn writes. Search backwards from the
1163 * head until we hit the tail or the maximum number of log record I/Os
1164 * that could have been in flight at one time. Use a temporary buffer so
1165 * we don't trash the rhead/bp pointers from the caller.
Brian Foster7088c412016-01-05 07:40:16 +11001166 */
1167 tmp_bp = xlog_get_bp(log, 1);
1168 if (!tmp_bp)
1169 return -ENOMEM;
1170 error = xlog_rseek_logrec_hdr(log, *head_blk, *tail_blk,
1171 XLOG_MAX_ICLOGS, tmp_bp, &tmp_rhead_blk,
1172 &tmp_rhead, &tmp_wrapped);
1173 xlog_put_bp(tmp_bp);
1174 if (error < 0)
1175 return error;
1176
1177 /*
1178 * Now run a CRC verification pass over the records starting at the
1179 * block found above to the current head. If a CRC failure occurs, the
1180 * log block of the first bad record is saved in first_bad.
1181 */
1182 error = xlog_do_recovery_pass(log, *head_blk, tmp_rhead_blk,
1183 XLOG_RECOVER_CRCPASS, &first_bad);
Brian Fostera4c9b342017-08-08 18:21:53 -07001184 if ((error == -EFSBADCRC || error == -EFSCORRUPTED) && first_bad) {
Brian Foster7088c412016-01-05 07:40:16 +11001185 /*
1186 * We've hit a potential torn write. Reset the error and warn
1187 * about it.
1188 */
1189 error = 0;
1190 xfs_warn(log->l_mp,
1191"Torn write (CRC failure) detected at log block 0x%llx. Truncating head block from 0x%llx.",
1192 first_bad, *head_blk);
1193
1194 /*
1195 * Get the header block and buffer pointer for the last good
1196 * record before the bad record.
1197 *
1198 * Note that xlog_find_tail() clears the blocks at the new head
1199 * (i.e., the records with invalid CRC) if the cycle number
1200 * matches the the current cycle.
1201 */
1202 found = xlog_rseek_logrec_hdr(log, first_bad, *tail_blk, 1, bp,
1203 rhead_blk, rhead, wrapped);
1204 if (found < 0)
1205 return found;
1206 if (found == 0) /* XXX: right thing to do here? */
1207 return -EIO;
1208
1209 /*
1210 * Reset the head block to the starting block of the first bad
1211 * log record and set the tail block based on the last good
1212 * record.
1213 *
1214 * Bail out if the updated head/tail match as this indicates
1215 * possible corruption outside of the acceptable
1216 * (XLOG_MAX_ICLOGS) range. This is a job for xfs_repair...
1217 */
1218 *head_blk = first_bad;
1219 *tail_blk = BLOCK_LSN(be64_to_cpu((*rhead)->h_tail_lsn));
1220 if (*head_blk == *tail_blk) {
1221 ASSERT(0);
1222 return 0;
1223 }
Brian Foster7088c412016-01-05 07:40:16 +11001224 }
Brian Foster5297ac12017-08-08 18:21:51 -07001225 if (error)
1226 return error;
Brian Foster7088c412016-01-05 07:40:16 +11001227
Brian Foster4a4f66e2017-08-08 18:21:52 -07001228 return xlog_verify_tail(log, *head_blk, tail_blk,
1229 be32_to_cpu((*rhead)->h_size));
Brian Foster7088c412016-01-05 07:40:16 +11001230}
1231
1232/*
Dave Chinner0703a8e2018-06-08 09:54:22 -07001233 * We need to make sure we handle log wrapping properly, so we can't use the
1234 * calculated logbno directly. Make sure it wraps to the correct bno inside the
1235 * log.
1236 *
1237 * The log is limited to 32 bit sizes, so we use the appropriate modulus
1238 * operation here and cast it back to a 64 bit daddr on return.
1239 */
1240static inline xfs_daddr_t
1241xlog_wrap_logbno(
1242 struct xlog *log,
1243 xfs_daddr_t bno)
1244{
1245 int mod;
1246
1247 div_s64_rem(bno, log->l_logBBsize, &mod);
1248 return mod;
1249}
1250
1251/*
Brian Foster65b99a02016-03-07 08:22:22 +11001252 * Check whether the head of the log points to an unmount record. In other
1253 * words, determine whether the log is clean. If so, update the in-core state
1254 * appropriately.
1255 */
1256static int
1257xlog_check_unmount_rec(
1258 struct xlog *log,
1259 xfs_daddr_t *head_blk,
1260 xfs_daddr_t *tail_blk,
1261 struct xlog_rec_header *rhead,
1262 xfs_daddr_t rhead_blk,
1263 struct xfs_buf *bp,
1264 bool *clean)
1265{
1266 struct xlog_op_header *op_head;
1267 xfs_daddr_t umount_data_blk;
1268 xfs_daddr_t after_umount_blk;
1269 int hblks;
1270 int error;
1271 char *offset;
1272
1273 *clean = false;
1274
1275 /*
1276 * Look for unmount record. If we find it, then we know there was a
1277 * clean unmount. Since 'i' could be the last block in the physical
1278 * log, we convert to a log block before comparing to the head_blk.
1279 *
1280 * Save the current tail lsn to use to pass to xlog_clear_stale_blocks()
1281 * below. We won't want to clear the unmount record if there is one, so
1282 * we pass the lsn of the unmount record rather than the block after it.
1283 */
1284 if (xfs_sb_version_haslogv2(&log->l_mp->m_sb)) {
1285 int h_size = be32_to_cpu(rhead->h_size);
1286 int h_version = be32_to_cpu(rhead->h_version);
1287
1288 if ((h_version & XLOG_VERSION_2) &&
1289 (h_size > XLOG_HEADER_CYCLE_SIZE)) {
1290 hblks = h_size / XLOG_HEADER_CYCLE_SIZE;
1291 if (h_size % XLOG_HEADER_CYCLE_SIZE)
1292 hblks++;
1293 } else {
1294 hblks = 1;
1295 }
1296 } else {
1297 hblks = 1;
1298 }
Dave Chinner0703a8e2018-06-08 09:54:22 -07001299
1300 after_umount_blk = xlog_wrap_logbno(log,
1301 rhead_blk + hblks + BTOBB(be32_to_cpu(rhead->h_len)));
1302
Brian Foster65b99a02016-03-07 08:22:22 +11001303 if (*head_blk == after_umount_blk &&
1304 be32_to_cpu(rhead->h_num_logops) == 1) {
Dave Chinner0703a8e2018-06-08 09:54:22 -07001305 umount_data_blk = xlog_wrap_logbno(log, rhead_blk + hblks);
Brian Foster65b99a02016-03-07 08:22:22 +11001306 error = xlog_bread(log, umount_data_blk, 1, bp, &offset);
1307 if (error)
1308 return error;
1309
1310 op_head = (struct xlog_op_header *)offset;
1311 if (op_head->oh_flags & XLOG_UNMOUNT_TRANS) {
1312 /*
1313 * Set tail and last sync so that newly written log
1314 * records will point recovery to after the current
1315 * unmount record.
1316 */
1317 xlog_assign_atomic_lsn(&log->l_tail_lsn,
1318 log->l_curr_cycle, after_umount_blk);
1319 xlog_assign_atomic_lsn(&log->l_last_sync_lsn,
1320 log->l_curr_cycle, after_umount_blk);
1321 *tail_blk = after_umount_blk;
1322
1323 *clean = true;
1324 }
1325 }
1326
1327 return 0;
1328}
1329
Brian Foster717bc0e2016-03-07 08:22:22 +11001330static void
1331xlog_set_state(
1332 struct xlog *log,
1333 xfs_daddr_t head_blk,
1334 struct xlog_rec_header *rhead,
1335 xfs_daddr_t rhead_blk,
1336 bool bump_cycle)
1337{
1338 /*
1339 * Reset log values according to the state of the log when we
1340 * crashed. In the case where head_blk == 0, we bump curr_cycle
1341 * one because the next write starts a new cycle rather than
1342 * continuing the cycle of the last good log record. At this
1343 * point we have guaranteed that all partial log records have been
1344 * accounted for. Therefore, we know that the last good log record
1345 * written was complete and ended exactly on the end boundary
1346 * of the physical log.
1347 */
1348 log->l_prev_block = rhead_blk;
1349 log->l_curr_block = (int)head_blk;
1350 log->l_curr_cycle = be32_to_cpu(rhead->h_cycle);
1351 if (bump_cycle)
1352 log->l_curr_cycle++;
1353 atomic64_set(&log->l_tail_lsn, be64_to_cpu(rhead->h_tail_lsn));
1354 atomic64_set(&log->l_last_sync_lsn, be64_to_cpu(rhead->h_lsn));
1355 xlog_assign_grant_head(&log->l_reserve_head.grant, log->l_curr_cycle,
1356 BBTOB(log->l_curr_block));
1357 xlog_assign_grant_head(&log->l_write_head.grant, log->l_curr_cycle,
1358 BBTOB(log->l_curr_block));
1359}
1360
Brian Foster65b99a02016-03-07 08:22:22 +11001361/*
Linus Torvalds1da177e2005-04-16 15:20:36 -07001362 * Find the sync block number or the tail of the log.
1363 *
1364 * This will be the block number of the last record to have its
1365 * associated buffers synced to disk. Every log record header has
1366 * a sync lsn embedded in it. LSNs hold block numbers, so it is easy
1367 * to get a sync block number. The only concern is to figure out which
1368 * log record header to believe.
1369 *
1370 * The following algorithm uses the log record header with the largest
1371 * lsn. The entire log record does not need to be valid. We only care
1372 * that the header is valid.
1373 *
1374 * We could speed up search by using current head_blk buffer, but it is not
1375 * available.
1376 */
Eric Sandeen5d77c0d2009-11-19 15:52:00 +00001377STATIC int
Linus Torvalds1da177e2005-04-16 15:20:36 -07001378xlog_find_tail(
Mark Tinguely9a8d2fd2012-06-14 09:22:16 -05001379 struct xlog *log,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001380 xfs_daddr_t *head_blk,
Eric Sandeen65be6052006-01-11 15:34:19 +11001381 xfs_daddr_t *tail_blk)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001382{
1383 xlog_rec_header_t *rhead;
Christoph Hellwigb2a922c2015-06-22 09:45:10 +10001384 char *offset = NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001385 xfs_buf_t *bp;
Brian Foster7088c412016-01-05 07:40:16 +11001386 int error;
Brian Foster7088c412016-01-05 07:40:16 +11001387 xfs_daddr_t rhead_blk;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001388 xfs_lsn_t tail_lsn;
Brian Fostereed6b462016-01-04 15:55:10 +11001389 bool wrapped = false;
Brian Foster65b99a02016-03-07 08:22:22 +11001390 bool clean = false;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001391
1392 /*
1393 * Find previous log record
1394 */
1395 if ((error = xlog_find_head(log, head_blk)))
1396 return error;
Brian Foster82ff6cc2016-03-07 08:22:22 +11001397 ASSERT(*head_blk < INT_MAX);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001398
1399 bp = xlog_get_bp(log, 1);
1400 if (!bp)
Dave Chinner24513372014-06-25 14:58:08 +10001401 return -ENOMEM;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001402 if (*head_blk == 0) { /* special case */
Christoph Hellwig076e6ac2009-03-16 08:24:13 +01001403 error = xlog_bread(log, 0, 1, bp, &offset);
1404 if (error)
Alex Elder9db127e2010-04-15 18:17:26 +00001405 goto done;
Christoph Hellwig076e6ac2009-03-16 08:24:13 +01001406
Christoph Hellwig03bea6f2007-10-12 10:58:05 +10001407 if (xlog_get_cycle(offset) == 0) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001408 *tail_blk = 0;
1409 /* leave all other log inited values alone */
Alex Elder9db127e2010-04-15 18:17:26 +00001410 goto done;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001411 }
1412 }
1413
1414 /*
Brian Foster82ff6cc2016-03-07 08:22:22 +11001415 * Search backwards through the log looking for the log record header
1416 * block. This wraps all the way back around to the head so something is
1417 * seriously wrong if we can't find it.
Linus Torvalds1da177e2005-04-16 15:20:36 -07001418 */
Brian Foster82ff6cc2016-03-07 08:22:22 +11001419 error = xlog_rseek_logrec_hdr(log, *head_blk, *head_blk, 1, bp,
1420 &rhead_blk, &rhead, &wrapped);
1421 if (error < 0)
1422 return error;
1423 if (!error) {
1424 xfs_warn(log->l_mp, "%s: couldn't find sync record", __func__);
1425 return -EIO;
1426 }
1427 *tail_blk = BLOCK_LSN(be64_to_cpu(rhead->h_tail_lsn));
1428
1429 /*
Brian Foster717bc0e2016-03-07 08:22:22 +11001430 * Set the log state based on the current head record.
Linus Torvalds1da177e2005-04-16 15:20:36 -07001431 */
Brian Foster717bc0e2016-03-07 08:22:22 +11001432 xlog_set_state(log, *head_blk, rhead, rhead_blk, wrapped);
Brian Foster65b99a02016-03-07 08:22:22 +11001433 tail_lsn = atomic64_read(&log->l_tail_lsn);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001434
1435 /*
Brian Foster65b99a02016-03-07 08:22:22 +11001436 * Look for an unmount record at the head of the log. This sets the log
1437 * state to determine whether recovery is necessary.
Linus Torvalds1da177e2005-04-16 15:20:36 -07001438 */
Brian Foster65b99a02016-03-07 08:22:22 +11001439 error = xlog_check_unmount_rec(log, head_blk, tail_blk, rhead,
1440 rhead_blk, bp, &clean);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001441 if (error)
1442 goto done;
1443
1444 /*
Brian Foster7f6aff3a2016-03-07 08:22:22 +11001445 * Verify the log head if the log is not clean (e.g., we have anything
1446 * but an unmount record at the head). This uses CRC verification to
1447 * detect and trim torn writes. If discovered, CRC failures are
1448 * considered torn writes and the log head is trimmed accordingly.
Linus Torvalds1da177e2005-04-16 15:20:36 -07001449 *
Brian Foster7f6aff3a2016-03-07 08:22:22 +11001450 * Note that we can only run CRC verification when the log is dirty
1451 * because there's no guarantee that the log data behind an unmount
1452 * record is compatible with the current architecture.
Linus Torvalds1da177e2005-04-16 15:20:36 -07001453 */
Brian Foster7f6aff3a2016-03-07 08:22:22 +11001454 if (!clean) {
1455 xfs_daddr_t orig_head = *head_blk;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001456
Brian Foster7f6aff3a2016-03-07 08:22:22 +11001457 error = xlog_verify_head(log, head_blk, tail_blk, bp,
1458 &rhead_blk, &rhead, &wrapped);
Christoph Hellwig076e6ac2009-03-16 08:24:13 +01001459 if (error)
Alex Elder9db127e2010-04-15 18:17:26 +00001460 goto done;
Christoph Hellwig076e6ac2009-03-16 08:24:13 +01001461
Brian Foster7f6aff3a2016-03-07 08:22:22 +11001462 /* update in-core state again if the head changed */
1463 if (*head_blk != orig_head) {
1464 xlog_set_state(log, *head_blk, rhead, rhead_blk,
1465 wrapped);
1466 tail_lsn = atomic64_read(&log->l_tail_lsn);
1467 error = xlog_check_unmount_rec(log, head_blk, tail_blk,
1468 rhead, rhead_blk, bp,
1469 &clean);
1470 if (error)
1471 goto done;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001472 }
1473 }
1474
1475 /*
Brian Foster65b99a02016-03-07 08:22:22 +11001476 * Note that the unmount was clean. If the unmount was not clean, we
1477 * need to know this to rebuild the superblock counters from the perag
1478 * headers if we have a filesystem using non-persistent counters.
1479 */
1480 if (clean)
1481 log->l_mp->m_flags |= XFS_MOUNT_WAS_CLEAN;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001482
1483 /*
1484 * Make sure that there are no blocks in front of the head
1485 * with the same cycle number as the head. This can happen
1486 * because we allow multiple outstanding log writes concurrently,
1487 * and the later writes might make it out before earlier ones.
1488 *
1489 * We use the lsn from before modifying it so that we'll never
1490 * overwrite the unmount record after a clean unmount.
1491 *
1492 * Do this only if we are going to recover the filesystem
1493 *
1494 * NOTE: This used to say "if (!readonly)"
1495 * However on Linux, we can & do recover a read-only filesystem.
1496 * We only skip recovery if NORECOVERY is specified on mount,
1497 * in which case we would not be here.
1498 *
1499 * But... if the -device- itself is readonly, just skip this.
1500 * We can't recover this device anyway, so it won't matter.
1501 */
Christoph Hellwig2d15d2c2019-06-28 19:27:24 -07001502 if (!xfs_readonly_buftarg(log->l_targ))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001503 error = xlog_clear_stale_blocks(log, tail_lsn);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001504
Alex Elder9db127e2010-04-15 18:17:26 +00001505done:
Linus Torvalds1da177e2005-04-16 15:20:36 -07001506 xlog_put_bp(bp);
1507
1508 if (error)
Dave Chinnera0fa2b62011-03-07 10:01:35 +11001509 xfs_warn(log->l_mp, "failed to locate log tail");
Linus Torvalds1da177e2005-04-16 15:20:36 -07001510 return error;
1511}
1512
1513/*
1514 * Is the log zeroed at all?
1515 *
1516 * The last binary search should be changed to perform an X block read
1517 * once X becomes small enough. You can then search linearly through
1518 * the X blocks. This will cut down on the number of reads we need to do.
1519 *
1520 * If the log is partially zeroed, this routine will pass back the blkno
1521 * of the first block with cycle number 0. It won't have a complete LR
1522 * preceding it.
1523 *
1524 * Return:
1525 * 0 => the log is completely written to
Dave Chinner24513372014-06-25 14:58:08 +10001526 * 1 => use *blk_no as the first block of the log
1527 * <0 => error has occurred
Linus Torvalds1da177e2005-04-16 15:20:36 -07001528 */
David Chinnera8272ce2007-11-23 16:28:09 +11001529STATIC int
Linus Torvalds1da177e2005-04-16 15:20:36 -07001530xlog_find_zeroed(
Mark Tinguely9a8d2fd2012-06-14 09:22:16 -05001531 struct xlog *log,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001532 xfs_daddr_t *blk_no)
1533{
1534 xfs_buf_t *bp;
Christoph Hellwigb2a922c2015-06-22 09:45:10 +10001535 char *offset;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001536 uint first_cycle, last_cycle;
1537 xfs_daddr_t new_blk, last_blk, start_blk;
1538 xfs_daddr_t num_scan_bblks;
1539 int error, log_bbnum = log->l_logBBsize;
1540
Nathan Scott6fdf8cc2006-06-28 10:13:52 +10001541 *blk_no = 0;
1542
Linus Torvalds1da177e2005-04-16 15:20:36 -07001543 /* check totally zeroed log */
1544 bp = xlog_get_bp(log, 1);
1545 if (!bp)
Dave Chinner24513372014-06-25 14:58:08 +10001546 return -ENOMEM;
Christoph Hellwig076e6ac2009-03-16 08:24:13 +01001547 error = xlog_bread(log, 0, 1, bp, &offset);
1548 if (error)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001549 goto bp_err;
Christoph Hellwig076e6ac2009-03-16 08:24:13 +01001550
Christoph Hellwig03bea6f2007-10-12 10:58:05 +10001551 first_cycle = xlog_get_cycle(offset);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001552 if (first_cycle == 0) { /* completely zeroed log */
1553 *blk_no = 0;
1554 xlog_put_bp(bp);
Dave Chinner24513372014-06-25 14:58:08 +10001555 return 1;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001556 }
1557
1558 /* check partially zeroed log */
Christoph Hellwig076e6ac2009-03-16 08:24:13 +01001559 error = xlog_bread(log, log_bbnum-1, 1, bp, &offset);
1560 if (error)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001561 goto bp_err;
Christoph Hellwig076e6ac2009-03-16 08:24:13 +01001562
Christoph Hellwig03bea6f2007-10-12 10:58:05 +10001563 last_cycle = xlog_get_cycle(offset);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001564 if (last_cycle != 0) { /* log completely written to */
1565 xlog_put_bp(bp);
1566 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001567 }
1568
1569 /* we have a partially zeroed log */
1570 last_blk = log_bbnum-1;
1571 if ((error = xlog_find_cycle_start(log, bp, 0, &last_blk, 0)))
1572 goto bp_err;
1573
1574 /*
1575 * Validate the answer. Because there is no way to guarantee that
1576 * the entire log is made up of log records which are the same size,
1577 * we scan over the defined maximum blocks. At this point, the maximum
1578 * is not chosen to mean anything special. XXXmiken
1579 */
1580 num_scan_bblks = XLOG_TOTAL_REC_SHIFT(log);
1581 ASSERT(num_scan_bblks <= INT_MAX);
1582
1583 if (last_blk < num_scan_bblks)
1584 num_scan_bblks = last_blk;
1585 start_blk = last_blk - num_scan_bblks;
1586
1587 /*
1588 * We search for any instances of cycle number 0 that occur before
1589 * our current estimate of the head. What we're trying to detect is
1590 * 1 ... | 0 | 1 | 0...
1591 * ^ binary search ends here
1592 */
1593 if ((error = xlog_find_verify_cycle(log, start_blk,
1594 (int)num_scan_bblks, 0, &new_blk)))
1595 goto bp_err;
1596 if (new_blk != -1)
1597 last_blk = new_blk;
1598
1599 /*
1600 * Potentially backup over partial log record write. We don't need
1601 * to search the end of the log because we know it is zero.
1602 */
Dave Chinner24513372014-06-25 14:58:08 +10001603 error = xlog_find_verify_log_record(log, start_blk, &last_blk, 0);
1604 if (error == 1)
1605 error = -EIO;
1606 if (error)
1607 goto bp_err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001608
1609 *blk_no = last_blk;
1610bp_err:
1611 xlog_put_bp(bp);
1612 if (error)
1613 return error;
Dave Chinner24513372014-06-25 14:58:08 +10001614 return 1;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001615}
1616
1617/*
1618 * These are simple subroutines used by xlog_clear_stale_blocks() below
1619 * to initialize a buffer full of empty log record headers and write
1620 * them into the log.
1621 */
1622STATIC void
1623xlog_add_record(
Mark Tinguely9a8d2fd2012-06-14 09:22:16 -05001624 struct xlog *log,
Christoph Hellwigb2a922c2015-06-22 09:45:10 +10001625 char *buf,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001626 int cycle,
1627 int block,
1628 int tail_cycle,
1629 int tail_block)
1630{
1631 xlog_rec_header_t *recp = (xlog_rec_header_t *)buf;
1632
1633 memset(buf, 0, BBSIZE);
Christoph Hellwigb53e6752007-10-12 10:59:34 +10001634 recp->h_magicno = cpu_to_be32(XLOG_HEADER_MAGIC_NUM);
1635 recp->h_cycle = cpu_to_be32(cycle);
1636 recp->h_version = cpu_to_be32(
Eric Sandeen62118702008-03-06 13:44:28 +11001637 xfs_sb_version_haslogv2(&log->l_mp->m_sb) ? 2 : 1);
Christoph Hellwigb53e6752007-10-12 10:59:34 +10001638 recp->h_lsn = cpu_to_be64(xlog_assign_lsn(cycle, block));
1639 recp->h_tail_lsn = cpu_to_be64(xlog_assign_lsn(tail_cycle, tail_block));
1640 recp->h_fmt = cpu_to_be32(XLOG_FMT);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001641 memcpy(&recp->h_fs_uuid, &log->l_mp->m_sb.sb_uuid, sizeof(uuid_t));
1642}
1643
1644STATIC int
1645xlog_write_log_records(
Mark Tinguely9a8d2fd2012-06-14 09:22:16 -05001646 struct xlog *log,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001647 int cycle,
1648 int start_block,
1649 int blocks,
1650 int tail_cycle,
1651 int tail_block)
1652{
Christoph Hellwigb2a922c2015-06-22 09:45:10 +10001653 char *offset;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001654 xfs_buf_t *bp;
1655 int balign, ealign;
Alex Elder69ce58f2010-04-20 17:09:59 +10001656 int sectbb = log->l_sectBBsize;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001657 int end_block = start_block + blocks;
1658 int bufblks;
1659 int error = 0;
1660 int i, j = 0;
1661
Alex Elder6881a222010-04-13 15:22:29 +10001662 /*
1663 * Greedily allocate a buffer big enough to handle the full
1664 * range of basic blocks to be written. If that fails, try
1665 * a smaller size. We need to be able to write at least a
1666 * log sector, or we're out of luck.
1667 */
Linus Torvalds1da177e2005-04-16 15:20:36 -07001668 bufblks = 1 << ffs(blocks);
Dave Chinner81158e02012-04-27 19:45:22 +10001669 while (bufblks > log->l_logBBsize)
1670 bufblks >>= 1;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001671 while (!(bp = xlog_get_bp(log, bufblks))) {
1672 bufblks >>= 1;
Alex Elder69ce58f2010-04-20 17:09:59 +10001673 if (bufblks < sectbb)
Dave Chinner24513372014-06-25 14:58:08 +10001674 return -ENOMEM;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001675 }
1676
1677 /* We may need to do a read at the start to fill in part of
1678 * the buffer in the starting sector not covered by the first
1679 * write below.
1680 */
Alex Elder5c17f532010-04-13 15:22:48 +10001681 balign = round_down(start_block, sectbb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001682 if (balign != start_block) {
Christoph Hellwig076e6ac2009-03-16 08:24:13 +01001683 error = xlog_bread_noalign(log, start_block, 1, bp);
1684 if (error)
1685 goto out_put_bp;
1686
Linus Torvalds1da177e2005-04-16 15:20:36 -07001687 j = start_block - balign;
1688 }
1689
1690 for (i = start_block; i < end_block; i += bufblks) {
1691 int bcount, endcount;
1692
1693 bcount = min(bufblks, end_block - start_block);
1694 endcount = bcount - j;
1695
1696 /* We may need to do a read at the end to fill in part of
1697 * the buffer in the final sector not covered by the write.
1698 * If this is the same sector as the above read, skip it.
1699 */
Alex Elder5c17f532010-04-13 15:22:48 +10001700 ealign = round_down(end_block, sectbb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001701 if (j == 0 && (start_block + endcount > ealign)) {
Chandra Seetharaman62926042011-07-22 23:40:15 +00001702 offset = bp->b_addr + BBTOB(ealign - start_block);
Dave Chinner44396472011-04-21 09:34:27 +00001703 error = xlog_bread_offset(log, ealign, sectbb,
1704 bp, offset);
Christoph Hellwig076e6ac2009-03-16 08:24:13 +01001705 if (error)
1706 break;
1707
Linus Torvalds1da177e2005-04-16 15:20:36 -07001708 }
1709
Christoph Hellwig18ffb8c2019-06-28 19:27:26 -07001710 offset = bp->b_addr + xlog_align(log, start_block);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001711 for (; j < endcount; j++) {
1712 xlog_add_record(log, offset, cycle, i+j,
1713 tail_cycle, tail_block);
1714 offset += BBSIZE;
1715 }
1716 error = xlog_bwrite(log, start_block, endcount, bp);
1717 if (error)
1718 break;
1719 start_block += endcount;
1720 j = 0;
1721 }
Christoph Hellwig076e6ac2009-03-16 08:24:13 +01001722
1723 out_put_bp:
Linus Torvalds1da177e2005-04-16 15:20:36 -07001724 xlog_put_bp(bp);
1725 return error;
1726}
1727
1728/*
1729 * This routine is called to blow away any incomplete log writes out
1730 * in front of the log head. We do this so that we won't become confused
1731 * if we come up, write only a little bit more, and then crash again.
1732 * If we leave the partial log records out there, this situation could
1733 * cause us to think those partial writes are valid blocks since they
1734 * have the current cycle number. We get rid of them by overwriting them
1735 * with empty log records with the old cycle number rather than the
1736 * current one.
1737 *
1738 * The tail lsn is passed in rather than taken from
1739 * the log so that we will not write over the unmount record after a
1740 * clean unmount in a 512 block log. Doing so would leave the log without
1741 * any valid log records in it until a new one was written. If we crashed
1742 * during that time we would not be able to recover.
1743 */
1744STATIC int
1745xlog_clear_stale_blocks(
Mark Tinguely9a8d2fd2012-06-14 09:22:16 -05001746 struct xlog *log,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001747 xfs_lsn_t tail_lsn)
1748{
1749 int tail_cycle, head_cycle;
1750 int tail_block, head_block;
1751 int tail_distance, max_distance;
1752 int distance;
1753 int error;
1754
1755 tail_cycle = CYCLE_LSN(tail_lsn);
1756 tail_block = BLOCK_LSN(tail_lsn);
1757 head_cycle = log->l_curr_cycle;
1758 head_block = log->l_curr_block;
1759
1760 /*
1761 * Figure out the distance between the new head of the log
1762 * and the tail. We want to write over any blocks beyond the
1763 * head that we may have written just before the crash, but
1764 * we don't want to overwrite the tail of the log.
1765 */
1766 if (head_cycle == tail_cycle) {
1767 /*
1768 * The tail is behind the head in the physical log,
1769 * so the distance from the head to the tail is the
1770 * distance from the head to the end of the log plus
1771 * the distance from the beginning of the log to the
1772 * tail.
1773 */
1774 if (unlikely(head_block < tail_block || head_block >= log->l_logBBsize)) {
1775 XFS_ERROR_REPORT("xlog_clear_stale_blocks(1)",
1776 XFS_ERRLEVEL_LOW, log->l_mp);
Dave Chinner24513372014-06-25 14:58:08 +10001777 return -EFSCORRUPTED;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001778 }
1779 tail_distance = tail_block + (log->l_logBBsize - head_block);
1780 } else {
1781 /*
1782 * The head is behind the tail in the physical log,
1783 * so the distance from the head to the tail is just
1784 * the tail block minus the head block.
1785 */
1786 if (unlikely(head_block >= tail_block || head_cycle != (tail_cycle + 1))){
1787 XFS_ERROR_REPORT("xlog_clear_stale_blocks(2)",
1788 XFS_ERRLEVEL_LOW, log->l_mp);
Dave Chinner24513372014-06-25 14:58:08 +10001789 return -EFSCORRUPTED;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001790 }
1791 tail_distance = tail_block - head_block;
1792 }
1793
1794 /*
1795 * If the head is right up against the tail, we can't clear
1796 * anything.
1797 */
1798 if (tail_distance <= 0) {
1799 ASSERT(tail_distance == 0);
1800 return 0;
1801 }
1802
1803 max_distance = XLOG_TOTAL_REC_SHIFT(log);
1804 /*
1805 * Take the smaller of the maximum amount of outstanding I/O
1806 * we could have and the distance to the tail to clear out.
1807 * We take the smaller so that we don't overwrite the tail and
1808 * we don't waste all day writing from the head to the tail
1809 * for no reason.
1810 */
Dave Chinner9bb54cb2018-06-07 07:54:02 -07001811 max_distance = min(max_distance, tail_distance);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001812
1813 if ((head_block + max_distance) <= log->l_logBBsize) {
1814 /*
1815 * We can stomp all the blocks we need to without
1816 * wrapping around the end of the log. Just do it
1817 * in a single write. Use the cycle number of the
1818 * current cycle minus one so that the log will look like:
1819 * n ... | n - 1 ...
1820 */
1821 error = xlog_write_log_records(log, (head_cycle - 1),
1822 head_block, max_distance, tail_cycle,
1823 tail_block);
1824 if (error)
1825 return error;
1826 } else {
1827 /*
1828 * We need to wrap around the end of the physical log in
1829 * order to clear all the blocks. Do it in two separate
1830 * I/Os. The first write should be from the head to the
1831 * end of the physical log, and it should use the current
1832 * cycle number minus one just like above.
1833 */
1834 distance = log->l_logBBsize - head_block;
1835 error = xlog_write_log_records(log, (head_cycle - 1),
1836 head_block, distance, tail_cycle,
1837 tail_block);
1838
1839 if (error)
1840 return error;
1841
1842 /*
1843 * Now write the blocks at the start of the physical log.
1844 * This writes the remainder of the blocks we want to clear.
1845 * It uses the current cycle number since we're now on the
1846 * same cycle as the head so that we get:
1847 * n ... n ... | n - 1 ...
1848 * ^^^^^ blocks we're writing
1849 */
1850 distance = max_distance - (log->l_logBBsize - head_block);
1851 error = xlog_write_log_records(log, head_cycle, 0, distance,
1852 tail_cycle, tail_block);
1853 if (error)
1854 return error;
1855 }
1856
1857 return 0;
1858}
1859
1860/******************************************************************************
1861 *
1862 * Log recover routines
1863 *
1864 ******************************************************************************
1865 */
1866
Dave Chinnerf0a76952010-01-11 11:49:57 +00001867/*
Dave Chinnera775ad72013-06-05 12:09:07 +10001868 * Sort the log items in the transaction.
1869 *
1870 * The ordering constraints are defined by the inode allocation and unlink
1871 * behaviour. The rules are:
1872 *
1873 * 1. Every item is only logged once in a given transaction. Hence it
1874 * represents the last logged state of the item. Hence ordering is
1875 * dependent on the order in which operations need to be performed so
1876 * required initial conditions are always met.
1877 *
1878 * 2. Cancelled buffers are recorded in pass 1 in a separate table and
1879 * there's nothing to replay from them so we can simply cull them
1880 * from the transaction. However, we can't do that until after we've
1881 * replayed all the other items because they may be dependent on the
1882 * cancelled buffer and replaying the cancelled buffer can remove it
1883 * form the cancelled buffer table. Hence they have tobe done last.
1884 *
1885 * 3. Inode allocation buffers must be replayed before inode items that
Dave Chinner28c8e412013-06-27 16:04:55 +10001886 * read the buffer and replay changes into it. For filesystems using the
1887 * ICREATE transactions, this means XFS_LI_ICREATE objects need to get
1888 * treated the same as inode allocation buffers as they create and
1889 * initialise the buffers directly.
Dave Chinnera775ad72013-06-05 12:09:07 +10001890 *
1891 * 4. Inode unlink buffers must be replayed after inode items are replayed.
1892 * This ensures that inodes are completely flushed to the inode buffer
1893 * in a "free" state before we remove the unlinked inode list pointer.
1894 *
1895 * Hence the ordering needs to be inode allocation buffers first, inode items
1896 * second, inode unlink buffers third and cancelled buffers last.
1897 *
1898 * But there's a problem with that - we can't tell an inode allocation buffer
1899 * apart from a regular buffer, so we can't separate them. We can, however,
1900 * tell an inode unlink buffer from the others, and so we can separate them out
1901 * from all the other buffers and move them to last.
1902 *
1903 * Hence, 4 lists, in order from head to tail:
Dave Chinner28c8e412013-06-27 16:04:55 +10001904 * - buffer_list for all buffers except cancelled/inode unlink buffers
1905 * - item_list for all non-buffer items
1906 * - inode_buffer_list for inode unlink buffers
1907 * - cancel_list for the cancelled buffers
1908 *
1909 * Note that we add objects to the tail of the lists so that first-to-last
1910 * ordering is preserved within the lists. Adding objects to the head of the
1911 * list means when we traverse from the head we walk them in last-to-first
1912 * order. For cancelled buffers and inode unlink buffers this doesn't matter,
1913 * but for all other items there may be specific ordering that we need to
1914 * preserve.
Dave Chinnerf0a76952010-01-11 11:49:57 +00001915 */
Linus Torvalds1da177e2005-04-16 15:20:36 -07001916STATIC int
1917xlog_recover_reorder_trans(
Mark Tinguelyad223e62012-06-14 09:22:15 -05001918 struct xlog *log,
1919 struct xlog_recover *trans,
Dave Chinner9abbc532010-04-13 15:06:46 +10001920 int pass)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001921{
Dave Chinnerf0a76952010-01-11 11:49:57 +00001922 xlog_recover_item_t *item, *n;
Mark Tinguely2a841082013-10-02 07:51:12 -05001923 int error = 0;
Dave Chinnerf0a76952010-01-11 11:49:57 +00001924 LIST_HEAD(sort_list);
Dave Chinnera775ad72013-06-05 12:09:07 +10001925 LIST_HEAD(cancel_list);
1926 LIST_HEAD(buffer_list);
1927 LIST_HEAD(inode_buffer_list);
1928 LIST_HEAD(inode_list);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001929
Dave Chinnerf0a76952010-01-11 11:49:57 +00001930 list_splice_init(&trans->r_itemq, &sort_list);
1931 list_for_each_entry_safe(item, n, &sort_list, ri_list) {
Christoph Hellwig4e0d5f92010-06-23 18:11:15 +10001932 xfs_buf_log_format_t *buf_f = item->ri_buf[0].i_addr;
Dave Chinnerf0a76952010-01-11 11:49:57 +00001933
1934 switch (ITEM_TYPE(item)) {
Dave Chinner28c8e412013-06-27 16:04:55 +10001935 case XFS_LI_ICREATE:
1936 list_move_tail(&item->ri_list, &buffer_list);
1937 break;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001938 case XFS_LI_BUF:
Dave Chinnera775ad72013-06-05 12:09:07 +10001939 if (buf_f->blf_flags & XFS_BLF_CANCEL) {
Dave Chinner9abbc532010-04-13 15:06:46 +10001940 trace_xfs_log_recover_item_reorder_head(log,
1941 trans, item, pass);
Dave Chinnera775ad72013-06-05 12:09:07 +10001942 list_move(&item->ri_list, &cancel_list);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001943 break;
1944 }
Dave Chinnera775ad72013-06-05 12:09:07 +10001945 if (buf_f->blf_flags & XFS_BLF_INODE_BUF) {
1946 list_move(&item->ri_list, &inode_buffer_list);
1947 break;
1948 }
1949 list_move_tail(&item->ri_list, &buffer_list);
1950 break;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001951 case XFS_LI_INODE:
Linus Torvalds1da177e2005-04-16 15:20:36 -07001952 case XFS_LI_DQUOT:
1953 case XFS_LI_QUOTAOFF:
1954 case XFS_LI_EFD:
1955 case XFS_LI_EFI:
Darrick J. Wong9e88b5d2016-08-03 12:09:48 +10001956 case XFS_LI_RUI:
1957 case XFS_LI_RUD:
Darrick J. Wongf997ee22016-10-03 09:11:21 -07001958 case XFS_LI_CUI:
1959 case XFS_LI_CUD:
Darrick J. Wong77d61fe2016-10-03 09:11:26 -07001960 case XFS_LI_BUI:
1961 case XFS_LI_BUD:
Dave Chinner9abbc532010-04-13 15:06:46 +10001962 trace_xfs_log_recover_item_reorder_tail(log,
1963 trans, item, pass);
Dave Chinnera775ad72013-06-05 12:09:07 +10001964 list_move_tail(&item->ri_list, &inode_list);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001965 break;
1966 default:
Dave Chinnera0fa2b62011-03-07 10:01:35 +11001967 xfs_warn(log->l_mp,
1968 "%s: unrecognized type of log operation",
1969 __func__);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001970 ASSERT(0);
Mark Tinguely2a841082013-10-02 07:51:12 -05001971 /*
1972 * return the remaining items back to the transaction
1973 * item list so they can be freed in caller.
1974 */
1975 if (!list_empty(&sort_list))
1976 list_splice_init(&sort_list, &trans->r_itemq);
Dave Chinner24513372014-06-25 14:58:08 +10001977 error = -EIO;
Mark Tinguely2a841082013-10-02 07:51:12 -05001978 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001979 }
Dave Chinnerf0a76952010-01-11 11:49:57 +00001980 }
Mark Tinguely2a841082013-10-02 07:51:12 -05001981out:
Dave Chinnerf0a76952010-01-11 11:49:57 +00001982 ASSERT(list_empty(&sort_list));
Dave Chinnera775ad72013-06-05 12:09:07 +10001983 if (!list_empty(&buffer_list))
1984 list_splice(&buffer_list, &trans->r_itemq);
1985 if (!list_empty(&inode_list))
1986 list_splice_tail(&inode_list, &trans->r_itemq);
1987 if (!list_empty(&inode_buffer_list))
1988 list_splice_tail(&inode_buffer_list, &trans->r_itemq);
1989 if (!list_empty(&cancel_list))
1990 list_splice_tail(&cancel_list, &trans->r_itemq);
Mark Tinguely2a841082013-10-02 07:51:12 -05001991 return error;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001992}
1993
1994/*
1995 * Build up the table of buf cancel records so that we don't replay
1996 * cancelled data in the second pass. For buffer records that are
1997 * not cancel records, there is nothing to do here so we just return.
1998 *
1999 * If we get a cancel record which is already in the table, this indicates
2000 * that the buffer was cancelled multiple times. In order to ensure
2001 * that during pass 2 we keep the record in the table until we reach its
2002 * last occurrence in the log, we keep a reference count in the cancel
2003 * record in the table to tell us how many times we expect to see this
2004 * record during the second pass.
2005 */
Christoph Hellwigc9f71f52010-12-01 22:06:24 +00002006STATIC int
2007xlog_recover_buffer_pass1(
Mark Tinguelyad223e62012-06-14 09:22:15 -05002008 struct xlog *log,
2009 struct xlog_recover_item *item)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002010{
Christoph Hellwigc9f71f52010-12-01 22:06:24 +00002011 xfs_buf_log_format_t *buf_f = item->ri_buf[0].i_addr;
Christoph Hellwigd5689ea2010-12-01 22:06:22 +00002012 struct list_head *bucket;
2013 struct xfs_buf_cancel *bcp;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002014
2015 /*
2016 * If this isn't a cancel buffer item, then just return.
2017 */
Christoph Hellwige2714bf2010-12-01 22:06:21 +00002018 if (!(buf_f->blf_flags & XFS_BLF_CANCEL)) {
Dave Chinner9abbc532010-04-13 15:06:46 +10002019 trace_xfs_log_recover_buf_not_cancel(log, buf_f);
Christoph Hellwigc9f71f52010-12-01 22:06:24 +00002020 return 0;
Dave Chinner9abbc532010-04-13 15:06:46 +10002021 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002022
2023 /*
Christoph Hellwigd5689ea2010-12-01 22:06:22 +00002024 * Insert an xfs_buf_cancel record into the hash table of them.
2025 * If there is already an identical record, bump its reference count.
Linus Torvalds1da177e2005-04-16 15:20:36 -07002026 */
Christoph Hellwigd5689ea2010-12-01 22:06:22 +00002027 bucket = XLOG_BUF_CANCEL_BUCKET(log, buf_f->blf_blkno);
2028 list_for_each_entry(bcp, bucket, bc_list) {
2029 if (bcp->bc_blkno == buf_f->blf_blkno &&
2030 bcp->bc_len == buf_f->blf_len) {
2031 bcp->bc_refcount++;
Dave Chinner9abbc532010-04-13 15:06:46 +10002032 trace_xfs_log_recover_buf_cancel_ref_inc(log, buf_f);
Christoph Hellwigc9f71f52010-12-01 22:06:24 +00002033 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002034 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002035 }
Christoph Hellwigd5689ea2010-12-01 22:06:22 +00002036
2037 bcp = kmem_alloc(sizeof(struct xfs_buf_cancel), KM_SLEEP);
2038 bcp->bc_blkno = buf_f->blf_blkno;
2039 bcp->bc_len = buf_f->blf_len;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002040 bcp->bc_refcount = 1;
Christoph Hellwigd5689ea2010-12-01 22:06:22 +00002041 list_add_tail(&bcp->bc_list, bucket);
2042
Dave Chinner9abbc532010-04-13 15:06:46 +10002043 trace_xfs_log_recover_buf_cancel_add(log, buf_f);
Christoph Hellwigc9f71f52010-12-01 22:06:24 +00002044 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002045}
2046
2047/*
2048 * Check to see whether the buffer being recovered has a corresponding
Dave Chinner84a5b732013-08-27 08:10:53 +10002049 * entry in the buffer cancel record table. If it is, return the cancel
2050 * buffer structure to the caller.
Linus Torvalds1da177e2005-04-16 15:20:36 -07002051 */
Dave Chinner84a5b732013-08-27 08:10:53 +10002052STATIC struct xfs_buf_cancel *
2053xlog_peek_buffer_cancelled(
Mark Tinguelyad223e62012-06-14 09:22:15 -05002054 struct xlog *log,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002055 xfs_daddr_t blkno,
2056 uint len,
Darrick J. Wong755c7bf2016-11-08 11:55:48 +11002057 unsigned short flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002058{
Christoph Hellwigd5689ea2010-12-01 22:06:22 +00002059 struct list_head *bucket;
2060 struct xfs_buf_cancel *bcp;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002061
Dave Chinner84a5b732013-08-27 08:10:53 +10002062 if (!log->l_buf_cancel_table) {
2063 /* empty table means no cancelled buffers in the log */
Dave Chinnerc1155412010-05-07 11:05:19 +10002064 ASSERT(!(flags & XFS_BLF_CANCEL));
Dave Chinner84a5b732013-08-27 08:10:53 +10002065 return NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002066 }
2067
Christoph Hellwigd5689ea2010-12-01 22:06:22 +00002068 bucket = XLOG_BUF_CANCEL_BUCKET(log, blkno);
2069 list_for_each_entry(bcp, bucket, bc_list) {
2070 if (bcp->bc_blkno == blkno && bcp->bc_len == len)
Dave Chinner84a5b732013-08-27 08:10:53 +10002071 return bcp;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002072 }
2073
2074 /*
Christoph Hellwigd5689ea2010-12-01 22:06:22 +00002075 * We didn't find a corresponding entry in the table, so return 0 so
2076 * that the buffer is NOT cancelled.
Linus Torvalds1da177e2005-04-16 15:20:36 -07002077 */
Dave Chinnerc1155412010-05-07 11:05:19 +10002078 ASSERT(!(flags & XFS_BLF_CANCEL));
Dave Chinner84a5b732013-08-27 08:10:53 +10002079 return NULL;
2080}
Christoph Hellwigd5689ea2010-12-01 22:06:22 +00002081
Dave Chinner84a5b732013-08-27 08:10:53 +10002082/*
2083 * If the buffer is being cancelled then return 1 so that it will be cancelled,
2084 * otherwise return 0. If the buffer is actually a buffer cancel item
2085 * (XFS_BLF_CANCEL is set), then decrement the refcount on the entry in the
2086 * table and remove it from the table if this is the last reference.
2087 *
2088 * We remove the cancel record from the table when we encounter its last
2089 * occurrence in the log so that if the same buffer is re-used again after its
2090 * last cancellation we actually replay the changes made at that point.
2091 */
2092STATIC int
2093xlog_check_buffer_cancelled(
2094 struct xlog *log,
2095 xfs_daddr_t blkno,
2096 uint len,
Darrick J. Wong755c7bf2016-11-08 11:55:48 +11002097 unsigned short flags)
Dave Chinner84a5b732013-08-27 08:10:53 +10002098{
2099 struct xfs_buf_cancel *bcp;
2100
2101 bcp = xlog_peek_buffer_cancelled(log, blkno, len, flags);
2102 if (!bcp)
2103 return 0;
2104
Christoph Hellwigd5689ea2010-12-01 22:06:22 +00002105 /*
2106 * We've go a match, so return 1 so that the recovery of this buffer
2107 * is cancelled. If this buffer is actually a buffer cancel log
2108 * item, then decrement the refcount on the one in the table and
2109 * remove it if this is the last reference.
2110 */
2111 if (flags & XFS_BLF_CANCEL) {
2112 if (--bcp->bc_refcount == 0) {
2113 list_del(&bcp->bc_list);
2114 kmem_free(bcp);
2115 }
2116 }
2117 return 1;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002118}
2119
Linus Torvalds1da177e2005-04-16 15:20:36 -07002120/*
Christoph Hellwige2714bf2010-12-01 22:06:21 +00002121 * Perform recovery for a buffer full of inodes. In these buffers, the only
2122 * data which should be recovered is that which corresponds to the
2123 * di_next_unlinked pointers in the on disk inode structures. The rest of the
2124 * data for the inodes is always logged through the inodes themselves rather
2125 * than the inode buffer and is recovered in xlog_recover_inode_pass2().
Linus Torvalds1da177e2005-04-16 15:20:36 -07002126 *
Christoph Hellwige2714bf2010-12-01 22:06:21 +00002127 * The only time when buffers full of inodes are fully recovered is when the
2128 * buffer is full of newly allocated inodes. In this case the buffer will
2129 * not be marked as an inode buffer and so will be sent to
2130 * xlog_recover_do_reg_buffer() below during recovery.
Linus Torvalds1da177e2005-04-16 15:20:36 -07002131 */
2132STATIC int
2133xlog_recover_do_inode_buffer(
Christoph Hellwige2714bf2010-12-01 22:06:21 +00002134 struct xfs_mount *mp,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002135 xlog_recover_item_t *item,
Christoph Hellwige2714bf2010-12-01 22:06:21 +00002136 struct xfs_buf *bp,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002137 xfs_buf_log_format_t *buf_f)
2138{
2139 int i;
Christoph Hellwige2714bf2010-12-01 22:06:21 +00002140 int item_index = 0;
2141 int bit = 0;
2142 int nbits = 0;
2143 int reg_buf_offset = 0;
2144 int reg_buf_bytes = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002145 int next_unlinked_offset;
2146 int inodes_per_buf;
2147 xfs_agino_t *logged_nextp;
2148 xfs_agino_t *buffer_nextp;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002149
Dave Chinner9abbc532010-04-13 15:06:46 +10002150 trace_xfs_log_recover_buf_inode_buf(mp->m_log, buf_f);
Dave Chinner9222a9c2013-06-12 12:19:06 +10002151
2152 /*
2153 * Post recovery validation only works properly on CRC enabled
2154 * filesystems.
2155 */
2156 if (xfs_sb_version_hascrc(&mp->m_sb))
2157 bp->b_ops = &xfs_inode_buf_ops;
Dave Chinner9abbc532010-04-13 15:06:46 +10002158
Dave Chinneraa0e8832012-04-23 15:58:52 +10002159 inodes_per_buf = BBTOB(bp->b_io_length) >> mp->m_sb.sb_inodelog;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002160 for (i = 0; i < inodes_per_buf; i++) {
2161 next_unlinked_offset = (i * mp->m_sb.sb_inodesize) +
2162 offsetof(xfs_dinode_t, di_next_unlinked);
2163
2164 while (next_unlinked_offset >=
2165 (reg_buf_offset + reg_buf_bytes)) {
2166 /*
2167 * The next di_next_unlinked field is beyond
2168 * the current logged region. Find the next
2169 * logged region that contains or is beyond
2170 * the current di_next_unlinked field.
2171 */
2172 bit += nbits;
Christoph Hellwige2714bf2010-12-01 22:06:21 +00002173 bit = xfs_next_bit(buf_f->blf_data_map,
2174 buf_f->blf_map_size, bit);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002175
2176 /*
2177 * If there are no more logged regions in the
2178 * buffer, then we're done.
2179 */
Christoph Hellwige2714bf2010-12-01 22:06:21 +00002180 if (bit == -1)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002181 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002182
Christoph Hellwige2714bf2010-12-01 22:06:21 +00002183 nbits = xfs_contig_bits(buf_f->blf_data_map,
2184 buf_f->blf_map_size, bit);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002185 ASSERT(nbits > 0);
Dave Chinnerc1155412010-05-07 11:05:19 +10002186 reg_buf_offset = bit << XFS_BLF_SHIFT;
2187 reg_buf_bytes = nbits << XFS_BLF_SHIFT;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002188 item_index++;
2189 }
2190
2191 /*
2192 * If the current logged region starts after the current
2193 * di_next_unlinked field, then move on to the next
2194 * di_next_unlinked field.
2195 */
Christoph Hellwige2714bf2010-12-01 22:06:21 +00002196 if (next_unlinked_offset < reg_buf_offset)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002197 continue;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002198
2199 ASSERT(item->ri_buf[item_index].i_addr != NULL);
Dave Chinnerc1155412010-05-07 11:05:19 +10002200 ASSERT((item->ri_buf[item_index].i_len % XFS_BLF_CHUNK) == 0);
Dave Chinneraa0e8832012-04-23 15:58:52 +10002201 ASSERT((reg_buf_offset + reg_buf_bytes) <=
2202 BBTOB(bp->b_io_length));
Linus Torvalds1da177e2005-04-16 15:20:36 -07002203
2204 /*
2205 * The current logged region contains a copy of the
2206 * current di_next_unlinked field. Extract its value
2207 * and copy it to the buffer copy.
2208 */
Christoph Hellwig4e0d5f92010-06-23 18:11:15 +10002209 logged_nextp = item->ri_buf[item_index].i_addr +
2210 next_unlinked_offset - reg_buf_offset;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002211 if (unlikely(*logged_nextp == 0)) {
Dave Chinnera0fa2b62011-03-07 10:01:35 +11002212 xfs_alert(mp,
Darrick J. Wongc9690042018-01-09 12:02:55 -08002213 "Bad inode buffer log record (ptr = "PTR_FMT", bp = "PTR_FMT"). "
Dave Chinnera0fa2b62011-03-07 10:01:35 +11002214 "Trying to replay bad (0) inode di_next_unlinked field.",
Linus Torvalds1da177e2005-04-16 15:20:36 -07002215 item, bp);
2216 XFS_ERROR_REPORT("xlog_recover_do_inode_buf",
2217 XFS_ERRLEVEL_LOW, mp);
Dave Chinner24513372014-06-25 14:58:08 +10002218 return -EFSCORRUPTED;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002219 }
2220
Christoph Hellwig88ee2df2015-06-22 09:44:29 +10002221 buffer_nextp = xfs_buf_offset(bp, next_unlinked_offset);
Tim Shimmin87c199c2006-06-09 14:56:16 +10002222 *buffer_nextp = *logged_nextp;
Dave Chinner0a32c262013-06-05 12:09:08 +10002223
2224 /*
2225 * If necessary, recalculate the CRC in the on-disk inode. We
2226 * have to leave the inode in a consistent state for whoever
2227 * reads it next....
2228 */
Christoph Hellwig88ee2df2015-06-22 09:44:29 +10002229 xfs_dinode_calc_crc(mp,
Dave Chinner0a32c262013-06-05 12:09:08 +10002230 xfs_buf_offset(bp, i * mp->m_sb.sb_inodesize));
2231
Linus Torvalds1da177e2005-04-16 15:20:36 -07002232 }
2233
2234 return 0;
2235}
2236
2237/*
Dave Chinner50d5c8d2013-08-28 21:22:47 +10002238 * V5 filesystems know the age of the buffer on disk being recovered. We can
2239 * have newer objects on disk than we are replaying, and so for these cases we
2240 * don't want to replay the current change as that will make the buffer contents
2241 * temporarily invalid on disk.
2242 *
2243 * The magic number might not match the buffer type we are going to recover
2244 * (e.g. reallocated blocks), so we ignore the xfs_buf_log_format flags. Hence
2245 * extract the LSN of the existing object in the buffer based on it's current
2246 * magic number. If we don't recognise the magic number in the buffer, then
2247 * return a LSN of -1 so that the caller knows it was an unrecognised block and
2248 * so can recover the buffer.
Dave Chinner566055d2013-09-24 16:01:16 +10002249 *
2250 * Note: we cannot rely solely on magic number matches to determine that the
2251 * buffer has a valid LSN - we also need to verify that it belongs to this
2252 * filesystem, so we need to extract the object's LSN and compare it to that
2253 * which we read from the superblock. If the UUIDs don't match, then we've got a
2254 * stale metadata block from an old filesystem instance that we need to recover
2255 * over the top of.
Dave Chinner50d5c8d2013-08-28 21:22:47 +10002256 */
2257static xfs_lsn_t
2258xlog_recover_get_buf_lsn(
2259 struct xfs_mount *mp,
2260 struct xfs_buf *bp)
2261{
Darrick J. Wongc8ce5402017-06-16 11:00:05 -07002262 uint32_t magic32;
2263 uint16_t magic16;
2264 uint16_t magicda;
Dave Chinner50d5c8d2013-08-28 21:22:47 +10002265 void *blk = bp->b_addr;
Dave Chinner566055d2013-09-24 16:01:16 +10002266 uuid_t *uuid;
2267 xfs_lsn_t lsn = -1;
Dave Chinner50d5c8d2013-08-28 21:22:47 +10002268
2269 /* v4 filesystems always recover immediately */
2270 if (!xfs_sb_version_hascrc(&mp->m_sb))
2271 goto recover_immediately;
2272
2273 magic32 = be32_to_cpu(*(__be32 *)blk);
2274 switch (magic32) {
2275 case XFS_ABTB_CRC_MAGIC:
2276 case XFS_ABTC_CRC_MAGIC:
2277 case XFS_ABTB_MAGIC:
2278 case XFS_ABTC_MAGIC:
Darrick J. Wonga650e8f2016-08-03 12:17:11 +10002279 case XFS_RMAP_CRC_MAGIC:
Darrick J. Wonga90c00f2016-10-03 09:11:23 -07002280 case XFS_REFC_CRC_MAGIC:
Dave Chinner50d5c8d2013-08-28 21:22:47 +10002281 case XFS_IBT_CRC_MAGIC:
Dave Chinner566055d2013-09-24 16:01:16 +10002282 case XFS_IBT_MAGIC: {
2283 struct xfs_btree_block *btb = blk;
2284
2285 lsn = be64_to_cpu(btb->bb_u.s.bb_lsn);
2286 uuid = &btb->bb_u.s.bb_uuid;
2287 break;
2288 }
Dave Chinner50d5c8d2013-08-28 21:22:47 +10002289 case XFS_BMAP_CRC_MAGIC:
Dave Chinner566055d2013-09-24 16:01:16 +10002290 case XFS_BMAP_MAGIC: {
2291 struct xfs_btree_block *btb = blk;
2292
2293 lsn = be64_to_cpu(btb->bb_u.l.bb_lsn);
2294 uuid = &btb->bb_u.l.bb_uuid;
2295 break;
2296 }
Dave Chinner50d5c8d2013-08-28 21:22:47 +10002297 case XFS_AGF_MAGIC:
Dave Chinner566055d2013-09-24 16:01:16 +10002298 lsn = be64_to_cpu(((struct xfs_agf *)blk)->agf_lsn);
2299 uuid = &((struct xfs_agf *)blk)->agf_uuid;
2300 break;
Dave Chinner50d5c8d2013-08-28 21:22:47 +10002301 case XFS_AGFL_MAGIC:
Dave Chinner566055d2013-09-24 16:01:16 +10002302 lsn = be64_to_cpu(((struct xfs_agfl *)blk)->agfl_lsn);
2303 uuid = &((struct xfs_agfl *)blk)->agfl_uuid;
2304 break;
Dave Chinner50d5c8d2013-08-28 21:22:47 +10002305 case XFS_AGI_MAGIC:
Dave Chinner566055d2013-09-24 16:01:16 +10002306 lsn = be64_to_cpu(((struct xfs_agi *)blk)->agi_lsn);
2307 uuid = &((struct xfs_agi *)blk)->agi_uuid;
2308 break;
Dave Chinner50d5c8d2013-08-28 21:22:47 +10002309 case XFS_SYMLINK_MAGIC:
Dave Chinner566055d2013-09-24 16:01:16 +10002310 lsn = be64_to_cpu(((struct xfs_dsymlink_hdr *)blk)->sl_lsn);
2311 uuid = &((struct xfs_dsymlink_hdr *)blk)->sl_uuid;
2312 break;
Dave Chinner50d5c8d2013-08-28 21:22:47 +10002313 case XFS_DIR3_BLOCK_MAGIC:
2314 case XFS_DIR3_DATA_MAGIC:
2315 case XFS_DIR3_FREE_MAGIC:
Dave Chinner566055d2013-09-24 16:01:16 +10002316 lsn = be64_to_cpu(((struct xfs_dir3_blk_hdr *)blk)->lsn);
2317 uuid = &((struct xfs_dir3_blk_hdr *)blk)->uuid;
2318 break;
Dave Chinner50d5c8d2013-08-28 21:22:47 +10002319 case XFS_ATTR3_RMT_MAGIC:
Dave Chinnere3c32ee2015-07-29 11:48:01 +10002320 /*
2321 * Remote attr blocks are written synchronously, rather than
2322 * being logged. That means they do not contain a valid LSN
2323 * (i.e. transactionally ordered) in them, and hence any time we
2324 * see a buffer to replay over the top of a remote attribute
2325 * block we should simply do so.
2326 */
2327 goto recover_immediately;
Dave Chinner50d5c8d2013-08-28 21:22:47 +10002328 case XFS_SB_MAGIC:
Dave Chinnerfcfbe2c2015-08-19 10:31:54 +10002329 /*
2330 * superblock uuids are magic. We may or may not have a
2331 * sb_meta_uuid on disk, but it will be set in the in-core
2332 * superblock. We set the uuid pointer for verification
2333 * according to the superblock feature mask to ensure we check
2334 * the relevant UUID in the superblock.
2335 */
Dave Chinner566055d2013-09-24 16:01:16 +10002336 lsn = be64_to_cpu(((struct xfs_dsb *)blk)->sb_lsn);
Dave Chinnerfcfbe2c2015-08-19 10:31:54 +10002337 if (xfs_sb_version_hasmetauuid(&mp->m_sb))
2338 uuid = &((struct xfs_dsb *)blk)->sb_meta_uuid;
2339 else
2340 uuid = &((struct xfs_dsb *)blk)->sb_uuid;
Dave Chinner566055d2013-09-24 16:01:16 +10002341 break;
Dave Chinner50d5c8d2013-08-28 21:22:47 +10002342 default:
2343 break;
2344 }
2345
Dave Chinner566055d2013-09-24 16:01:16 +10002346 if (lsn != (xfs_lsn_t)-1) {
Dave Chinnerfcfbe2c2015-08-19 10:31:54 +10002347 if (!uuid_equal(&mp->m_sb.sb_meta_uuid, uuid))
Dave Chinner566055d2013-09-24 16:01:16 +10002348 goto recover_immediately;
2349 return lsn;
2350 }
2351
Dave Chinner50d5c8d2013-08-28 21:22:47 +10002352 magicda = be16_to_cpu(((struct xfs_da_blkinfo *)blk)->magic);
2353 switch (magicda) {
2354 case XFS_DIR3_LEAF1_MAGIC:
2355 case XFS_DIR3_LEAFN_MAGIC:
2356 case XFS_DA3_NODE_MAGIC:
Dave Chinner566055d2013-09-24 16:01:16 +10002357 lsn = be64_to_cpu(((struct xfs_da3_blkinfo *)blk)->lsn);
2358 uuid = &((struct xfs_da3_blkinfo *)blk)->uuid;
2359 break;
Dave Chinner50d5c8d2013-08-28 21:22:47 +10002360 default:
2361 break;
2362 }
2363
Dave Chinner566055d2013-09-24 16:01:16 +10002364 if (lsn != (xfs_lsn_t)-1) {
2365 if (!uuid_equal(&mp->m_sb.sb_uuid, uuid))
2366 goto recover_immediately;
2367 return lsn;
2368 }
2369
Dave Chinner50d5c8d2013-08-28 21:22:47 +10002370 /*
2371 * We do individual object checks on dquot and inode buffers as they
2372 * have their own individual LSN records. Also, we could have a stale
2373 * buffer here, so we have to at least recognise these buffer types.
2374 *
2375 * A notd complexity here is inode unlinked list processing - it logs
2376 * the inode directly in the buffer, but we don't know which inodes have
2377 * been modified, and there is no global buffer LSN. Hence we need to
2378 * recover all inode buffer types immediately. This problem will be
2379 * fixed by logical logging of the unlinked list modifications.
2380 */
2381 magic16 = be16_to_cpu(*(__be16 *)blk);
2382 switch (magic16) {
2383 case XFS_DQUOT_MAGIC:
2384 case XFS_DINODE_MAGIC:
2385 goto recover_immediately;
2386 default:
2387 break;
2388 }
2389
2390 /* unknown buffer contents, recover immediately */
2391
2392recover_immediately:
2393 return (xfs_lsn_t)-1;
2394
2395}
2396
2397/*
Dave Chinnerd75afeb2013-04-03 16:11:29 +11002398 * Validate the recovered buffer is of the correct type and attach the
2399 * appropriate buffer operations to them for writeback. Magic numbers are in a
2400 * few places:
2401 * the first 16 bits of the buffer (inode buffer, dquot buffer),
2402 * the first 32 bits of the buffer (most blocks),
2403 * inside a struct xfs_da_blkinfo at the start of the buffer.
2404 */
2405static void
Dave Chinner50d5c8d2013-08-28 21:22:47 +10002406xlog_recover_validate_buf_type(
Dave Chinnerd75afeb2013-04-03 16:11:29 +11002407 struct xfs_mount *mp,
2408 struct xfs_buf *bp,
Brian Foster22db9af2016-09-26 08:32:07 +10002409 xfs_buf_log_format_t *buf_f,
2410 xfs_lsn_t current_lsn)
Dave Chinnerd75afeb2013-04-03 16:11:29 +11002411{
2412 struct xfs_da_blkinfo *info = bp->b_addr;
Darrick J. Wongc8ce5402017-06-16 11:00:05 -07002413 uint32_t magic32;
2414 uint16_t magic16;
2415 uint16_t magicda;
Brian Foster040c52c2016-09-26 08:32:50 +10002416 char *warnmsg = NULL;
Dave Chinnerd75afeb2013-04-03 16:11:29 +11002417
Dave Chinner67dc2882014-08-04 12:43:06 +10002418 /*
2419 * We can only do post recovery validation on items on CRC enabled
2420 * fielsystems as we need to know when the buffer was written to be able
2421 * to determine if we should have replayed the item. If we replay old
2422 * metadata over a newer buffer, then it will enter a temporarily
2423 * inconsistent state resulting in verification failures. Hence for now
2424 * just avoid the verification stage for non-crc filesystems
2425 */
2426 if (!xfs_sb_version_hascrc(&mp->m_sb))
2427 return;
2428
Dave Chinnerd75afeb2013-04-03 16:11:29 +11002429 magic32 = be32_to_cpu(*(__be32 *)bp->b_addr);
2430 magic16 = be16_to_cpu(*(__be16*)bp->b_addr);
2431 magicda = be16_to_cpu(info->magic);
Dave Chinner61fe1352013-04-03 16:11:30 +11002432 switch (xfs_blft_from_flags(buf_f)) {
2433 case XFS_BLFT_BTREE_BUF:
Dave Chinnerd75afeb2013-04-03 16:11:29 +11002434 switch (magic32) {
2435 case XFS_ABTB_CRC_MAGIC:
Dave Chinnerd75afeb2013-04-03 16:11:29 +11002436 case XFS_ABTB_MAGIC:
Brian Foster27df4f52019-02-07 10:45:47 -08002437 bp->b_ops = &xfs_bnobt_buf_ops;
2438 break;
2439 case XFS_ABTC_CRC_MAGIC:
Dave Chinnerd75afeb2013-04-03 16:11:29 +11002440 case XFS_ABTC_MAGIC:
Brian Foster27df4f52019-02-07 10:45:47 -08002441 bp->b_ops = &xfs_cntbt_buf_ops;
Dave Chinnerd75afeb2013-04-03 16:11:29 +11002442 break;
2443 case XFS_IBT_CRC_MAGIC:
2444 case XFS_IBT_MAGIC:
2445 bp->b_ops = &xfs_inobt_buf_ops;
2446 break;
Brian Foster01e68f42019-02-07 10:45:46 -08002447 case XFS_FIBT_CRC_MAGIC:
2448 case XFS_FIBT_MAGIC:
2449 bp->b_ops = &xfs_finobt_buf_ops;
2450 break;
Dave Chinnerd75afeb2013-04-03 16:11:29 +11002451 case XFS_BMAP_CRC_MAGIC:
2452 case XFS_BMAP_MAGIC:
2453 bp->b_ops = &xfs_bmbt_buf_ops;
2454 break;
Darrick J. Wonga650e8f2016-08-03 12:17:11 +10002455 case XFS_RMAP_CRC_MAGIC:
2456 bp->b_ops = &xfs_rmapbt_buf_ops;
2457 break;
Darrick J. Wonga90c00f2016-10-03 09:11:23 -07002458 case XFS_REFC_CRC_MAGIC:
2459 bp->b_ops = &xfs_refcountbt_buf_ops;
2460 break;
Dave Chinnerd75afeb2013-04-03 16:11:29 +11002461 default:
Brian Foster040c52c2016-09-26 08:32:50 +10002462 warnmsg = "Bad btree block magic!";
Dave Chinnerd75afeb2013-04-03 16:11:29 +11002463 break;
2464 }
2465 break;
Dave Chinner61fe1352013-04-03 16:11:30 +11002466 case XFS_BLFT_AGF_BUF:
Dave Chinnerd75afeb2013-04-03 16:11:29 +11002467 if (magic32 != XFS_AGF_MAGIC) {
Brian Foster040c52c2016-09-26 08:32:50 +10002468 warnmsg = "Bad AGF block magic!";
Dave Chinnerd75afeb2013-04-03 16:11:29 +11002469 break;
2470 }
2471 bp->b_ops = &xfs_agf_buf_ops;
2472 break;
Dave Chinner61fe1352013-04-03 16:11:30 +11002473 case XFS_BLFT_AGFL_BUF:
Dave Chinnerd75afeb2013-04-03 16:11:29 +11002474 if (magic32 != XFS_AGFL_MAGIC) {
Brian Foster040c52c2016-09-26 08:32:50 +10002475 warnmsg = "Bad AGFL block magic!";
Dave Chinnerd75afeb2013-04-03 16:11:29 +11002476 break;
2477 }
2478 bp->b_ops = &xfs_agfl_buf_ops;
2479 break;
Dave Chinner61fe1352013-04-03 16:11:30 +11002480 case XFS_BLFT_AGI_BUF:
Dave Chinnerd75afeb2013-04-03 16:11:29 +11002481 if (magic32 != XFS_AGI_MAGIC) {
Brian Foster040c52c2016-09-26 08:32:50 +10002482 warnmsg = "Bad AGI block magic!";
Dave Chinnerd75afeb2013-04-03 16:11:29 +11002483 break;
2484 }
2485 bp->b_ops = &xfs_agi_buf_ops;
2486 break;
Dave Chinner61fe1352013-04-03 16:11:30 +11002487 case XFS_BLFT_UDQUOT_BUF:
2488 case XFS_BLFT_PDQUOT_BUF:
2489 case XFS_BLFT_GDQUOT_BUF:
Dave Chinner123887e2013-04-30 21:39:33 +10002490#ifdef CONFIG_XFS_QUOTA
Dave Chinnerd75afeb2013-04-03 16:11:29 +11002491 if (magic16 != XFS_DQUOT_MAGIC) {
Brian Foster040c52c2016-09-26 08:32:50 +10002492 warnmsg = "Bad DQUOT block magic!";
Dave Chinnerd75afeb2013-04-03 16:11:29 +11002493 break;
2494 }
2495 bp->b_ops = &xfs_dquot_buf_ops;
Dave Chinner123887e2013-04-30 21:39:33 +10002496#else
2497 xfs_alert(mp,
2498 "Trying to recover dquots without QUOTA support built in!");
2499 ASSERT(0);
2500#endif
Dave Chinnerd75afeb2013-04-03 16:11:29 +11002501 break;
Dave Chinner61fe1352013-04-03 16:11:30 +11002502 case XFS_BLFT_DINO_BUF:
Dave Chinnerd75afeb2013-04-03 16:11:29 +11002503 if (magic16 != XFS_DINODE_MAGIC) {
Brian Foster040c52c2016-09-26 08:32:50 +10002504 warnmsg = "Bad INODE block magic!";
Dave Chinnerd75afeb2013-04-03 16:11:29 +11002505 break;
2506 }
2507 bp->b_ops = &xfs_inode_buf_ops;
2508 break;
Dave Chinner61fe1352013-04-03 16:11:30 +11002509 case XFS_BLFT_SYMLINK_BUF:
Dave Chinnerd75afeb2013-04-03 16:11:29 +11002510 if (magic32 != XFS_SYMLINK_MAGIC) {
Brian Foster040c52c2016-09-26 08:32:50 +10002511 warnmsg = "Bad symlink block magic!";
Dave Chinnerd75afeb2013-04-03 16:11:29 +11002512 break;
2513 }
2514 bp->b_ops = &xfs_symlink_buf_ops;
2515 break;
Dave Chinner61fe1352013-04-03 16:11:30 +11002516 case XFS_BLFT_DIR_BLOCK_BUF:
Dave Chinnerd75afeb2013-04-03 16:11:29 +11002517 if (magic32 != XFS_DIR2_BLOCK_MAGIC &&
2518 magic32 != XFS_DIR3_BLOCK_MAGIC) {
Brian Foster040c52c2016-09-26 08:32:50 +10002519 warnmsg = "Bad dir block magic!";
Dave Chinnerd75afeb2013-04-03 16:11:29 +11002520 break;
2521 }
2522 bp->b_ops = &xfs_dir3_block_buf_ops;
2523 break;
Dave Chinner61fe1352013-04-03 16:11:30 +11002524 case XFS_BLFT_DIR_DATA_BUF:
Dave Chinnerd75afeb2013-04-03 16:11:29 +11002525 if (magic32 != XFS_DIR2_DATA_MAGIC &&
2526 magic32 != XFS_DIR3_DATA_MAGIC) {
Brian Foster040c52c2016-09-26 08:32:50 +10002527 warnmsg = "Bad dir data magic!";
Dave Chinnerd75afeb2013-04-03 16:11:29 +11002528 break;
2529 }
2530 bp->b_ops = &xfs_dir3_data_buf_ops;
2531 break;
Dave Chinner61fe1352013-04-03 16:11:30 +11002532 case XFS_BLFT_DIR_FREE_BUF:
Dave Chinnerd75afeb2013-04-03 16:11:29 +11002533 if (magic32 != XFS_DIR2_FREE_MAGIC &&
2534 magic32 != XFS_DIR3_FREE_MAGIC) {
Brian Foster040c52c2016-09-26 08:32:50 +10002535 warnmsg = "Bad dir3 free magic!";
Dave Chinnerd75afeb2013-04-03 16:11:29 +11002536 break;
2537 }
2538 bp->b_ops = &xfs_dir3_free_buf_ops;
2539 break;
Dave Chinner61fe1352013-04-03 16:11:30 +11002540 case XFS_BLFT_DIR_LEAF1_BUF:
Dave Chinnerd75afeb2013-04-03 16:11:29 +11002541 if (magicda != XFS_DIR2_LEAF1_MAGIC &&
2542 magicda != XFS_DIR3_LEAF1_MAGIC) {
Brian Foster040c52c2016-09-26 08:32:50 +10002543 warnmsg = "Bad dir leaf1 magic!";
Dave Chinnerd75afeb2013-04-03 16:11:29 +11002544 break;
2545 }
2546 bp->b_ops = &xfs_dir3_leaf1_buf_ops;
2547 break;
Dave Chinner61fe1352013-04-03 16:11:30 +11002548 case XFS_BLFT_DIR_LEAFN_BUF:
Dave Chinnerd75afeb2013-04-03 16:11:29 +11002549 if (magicda != XFS_DIR2_LEAFN_MAGIC &&
2550 magicda != XFS_DIR3_LEAFN_MAGIC) {
Brian Foster040c52c2016-09-26 08:32:50 +10002551 warnmsg = "Bad dir leafn magic!";
Dave Chinnerd75afeb2013-04-03 16:11:29 +11002552 break;
2553 }
2554 bp->b_ops = &xfs_dir3_leafn_buf_ops;
2555 break;
Dave Chinner61fe1352013-04-03 16:11:30 +11002556 case XFS_BLFT_DA_NODE_BUF:
Dave Chinnerd75afeb2013-04-03 16:11:29 +11002557 if (magicda != XFS_DA_NODE_MAGIC &&
2558 magicda != XFS_DA3_NODE_MAGIC) {
Brian Foster040c52c2016-09-26 08:32:50 +10002559 warnmsg = "Bad da node magic!";
Dave Chinnerd75afeb2013-04-03 16:11:29 +11002560 break;
2561 }
2562 bp->b_ops = &xfs_da3_node_buf_ops;
2563 break;
Dave Chinner61fe1352013-04-03 16:11:30 +11002564 case XFS_BLFT_ATTR_LEAF_BUF:
Dave Chinnerd75afeb2013-04-03 16:11:29 +11002565 if (magicda != XFS_ATTR_LEAF_MAGIC &&
2566 magicda != XFS_ATTR3_LEAF_MAGIC) {
Brian Foster040c52c2016-09-26 08:32:50 +10002567 warnmsg = "Bad attr leaf magic!";
Dave Chinnerd75afeb2013-04-03 16:11:29 +11002568 break;
2569 }
2570 bp->b_ops = &xfs_attr3_leaf_buf_ops;
2571 break;
Dave Chinner61fe1352013-04-03 16:11:30 +11002572 case XFS_BLFT_ATTR_RMT_BUF:
Dave Chinnercab09a82013-04-30 21:39:36 +10002573 if (magic32 != XFS_ATTR3_RMT_MAGIC) {
Brian Foster040c52c2016-09-26 08:32:50 +10002574 warnmsg = "Bad attr remote magic!";
Dave Chinnerd75afeb2013-04-03 16:11:29 +11002575 break;
2576 }
2577 bp->b_ops = &xfs_attr3_rmt_buf_ops;
2578 break;
Dave Chinner04a1e6c2013-04-03 16:11:31 +11002579 case XFS_BLFT_SB_BUF:
2580 if (magic32 != XFS_SB_MAGIC) {
Brian Foster040c52c2016-09-26 08:32:50 +10002581 warnmsg = "Bad SB block magic!";
Dave Chinner04a1e6c2013-04-03 16:11:31 +11002582 break;
2583 }
2584 bp->b_ops = &xfs_sb_buf_ops;
2585 break;
Dave Chinnerf67ca6e2016-02-09 16:41:31 +11002586#ifdef CONFIG_XFS_RT
2587 case XFS_BLFT_RTBITMAP_BUF:
2588 case XFS_BLFT_RTSUMMARY_BUF:
Dave Chinnerbf85e092016-02-09 16:41:45 +11002589 /* no magic numbers for verification of RT buffers */
2590 bp->b_ops = &xfs_rtbuf_ops;
Dave Chinnerf67ca6e2016-02-09 16:41:31 +11002591 break;
2592#endif /* CONFIG_XFS_RT */
Dave Chinnerd75afeb2013-04-03 16:11:29 +11002593 default:
Dave Chinner61fe1352013-04-03 16:11:30 +11002594 xfs_warn(mp, "Unknown buffer type %d!",
2595 xfs_blft_from_flags(buf_f));
Dave Chinnerd75afeb2013-04-03 16:11:29 +11002596 break;
2597 }
Brian Foster040c52c2016-09-26 08:32:50 +10002598
2599 /*
Brian Foster60a4a222016-09-26 08:34:27 +10002600 * Nothing else to do in the case of a NULL current LSN as this means
2601 * the buffer is more recent than the change in the log and will be
2602 * skipped.
Brian Foster040c52c2016-09-26 08:32:50 +10002603 */
Brian Foster60a4a222016-09-26 08:34:27 +10002604 if (current_lsn == NULLCOMMITLSN)
2605 return;
2606
2607 if (warnmsg) {
Brian Foster040c52c2016-09-26 08:32:50 +10002608 xfs_warn(mp, warnmsg);
2609 ASSERT(0);
2610 }
Brian Foster60a4a222016-09-26 08:34:27 +10002611
2612 /*
2613 * We must update the metadata LSN of the buffer as it is written out to
2614 * ensure that older transactions never replay over this one and corrupt
2615 * the buffer. This can occur if log recovery is interrupted at some
2616 * point after the current transaction completes, at which point a
2617 * subsequent mount starts recovery from the beginning.
2618 *
2619 * Write verifiers update the metadata LSN from log items attached to
2620 * the buffer. Therefore, initialize a bli purely to carry the LSN to
2621 * the verifier. We'll clean it up in our ->iodone() callback.
2622 */
2623 if (bp->b_ops) {
2624 struct xfs_buf_log_item *bip;
2625
2626 ASSERT(!bp->b_iodone || bp->b_iodone == xlog_recover_iodone);
2627 bp->b_iodone = xlog_recover_iodone;
2628 xfs_buf_item_init(bp, mp);
Carlos Maiolinofb1755a2018-01-24 13:38:48 -08002629 bip = bp->b_log_item;
Brian Foster60a4a222016-09-26 08:34:27 +10002630 bip->bli_item.li_lsn = current_lsn;
2631 }
Dave Chinnerd75afeb2013-04-03 16:11:29 +11002632}
2633
2634/*
Linus Torvalds1da177e2005-04-16 15:20:36 -07002635 * Perform a 'normal' buffer recovery. Each logged region of the
2636 * buffer should be copied over the corresponding region in the
2637 * given buffer. The bitmap in the buf log format structure indicates
2638 * where to place the logged data.
2639 */
Linus Torvalds1da177e2005-04-16 15:20:36 -07002640STATIC void
2641xlog_recover_do_reg_buffer(
Dave Chinner9abbc532010-04-13 15:06:46 +10002642 struct xfs_mount *mp,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002643 xlog_recover_item_t *item,
Christoph Hellwige2714bf2010-12-01 22:06:21 +00002644 struct xfs_buf *bp,
Brian Foster22db9af2016-09-26 08:32:07 +10002645 xfs_buf_log_format_t *buf_f,
2646 xfs_lsn_t current_lsn)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002647{
2648 int i;
2649 int bit;
2650 int nbits;
Darrick J. Wongeebf3ca2018-01-08 10:51:25 -08002651 xfs_failaddr_t fa;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002652
Dave Chinner9abbc532010-04-13 15:06:46 +10002653 trace_xfs_log_recover_buf_reg_buf(mp->m_log, buf_f);
2654
Linus Torvalds1da177e2005-04-16 15:20:36 -07002655 bit = 0;
2656 i = 1; /* 0 is the buf format structure */
2657 while (1) {
Christoph Hellwige2714bf2010-12-01 22:06:21 +00002658 bit = xfs_next_bit(buf_f->blf_data_map,
2659 buf_f->blf_map_size, bit);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002660 if (bit == -1)
2661 break;
Christoph Hellwige2714bf2010-12-01 22:06:21 +00002662 nbits = xfs_contig_bits(buf_f->blf_data_map,
2663 buf_f->blf_map_size, bit);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002664 ASSERT(nbits > 0);
Christoph Hellwig4b809162007-08-16 15:37:36 +10002665 ASSERT(item->ri_buf[i].i_addr != NULL);
Dave Chinnerc1155412010-05-07 11:05:19 +10002666 ASSERT(item->ri_buf[i].i_len % XFS_BLF_CHUNK == 0);
Dave Chinneraa0e8832012-04-23 15:58:52 +10002667 ASSERT(BBTOB(bp->b_io_length) >=
2668 ((uint)bit << XFS_BLF_SHIFT) + (nbits << XFS_BLF_SHIFT));
Linus Torvalds1da177e2005-04-16 15:20:36 -07002669
2670 /*
Dave Chinner709da6a2013-05-27 16:38:23 +10002671 * The dirty regions logged in the buffer, even though
2672 * contiguous, may span multiple chunks. This is because the
2673 * dirty region may span a physical page boundary in a buffer
2674 * and hence be split into two separate vectors for writing into
2675 * the log. Hence we need to trim nbits back to the length of
2676 * the current region being copied out of the log.
2677 */
2678 if (item->ri_buf[i].i_len < (nbits << XFS_BLF_SHIFT))
2679 nbits = item->ri_buf[i].i_len >> XFS_BLF_SHIFT;
2680
2681 /*
Linus Torvalds1da177e2005-04-16 15:20:36 -07002682 * Do a sanity check if this is a dquot buffer. Just checking
2683 * the first dquot in the buffer should do. XXXThis is
2684 * probably a good thing to do for other buf types also.
2685 */
Darrick J. Wongeebf3ca2018-01-08 10:51:25 -08002686 fa = NULL;
Nathan Scottc8ad20f2005-06-21 15:38:48 +10002687 if (buf_f->blf_flags &
Dave Chinnerc1155412010-05-07 11:05:19 +10002688 (XFS_BLF_UDQUOT_BUF|XFS_BLF_PDQUOT_BUF|XFS_BLF_GDQUOT_BUF)) {
Christoph Hellwig0c5e1ce2009-06-08 15:33:21 +02002689 if (item->ri_buf[i].i_addr == NULL) {
Dave Chinnera0fa2b62011-03-07 10:01:35 +11002690 xfs_alert(mp,
Christoph Hellwig0c5e1ce2009-06-08 15:33:21 +02002691 "XFS: NULL dquot in %s.", __func__);
2692 goto next;
2693 }
Jan Rekorajski8ec6dba2009-11-16 11:57:02 +00002694 if (item->ri_buf[i].i_len < sizeof(xfs_disk_dquot_t)) {
Dave Chinnera0fa2b62011-03-07 10:01:35 +11002695 xfs_alert(mp,
Christoph Hellwig0c5e1ce2009-06-08 15:33:21 +02002696 "XFS: dquot too small (%d) in %s.",
2697 item->ri_buf[i].i_len, __func__);
2698 goto next;
2699 }
Darrick J. Wongeebf3ca2018-01-08 10:51:25 -08002700 fa = xfs_dquot_verify(mp, item->ri_buf[i].i_addr,
Eric Sandeene381a0f2018-05-04 15:15:48 -07002701 -1, 0);
Darrick J. Wongeebf3ca2018-01-08 10:51:25 -08002702 if (fa) {
2703 xfs_alert(mp,
2704 "dquot corrupt at %pS trying to replay into block 0x%llx",
2705 fa, bp->b_bn);
Christoph Hellwig0c5e1ce2009-06-08 15:33:21 +02002706 goto next;
Darrick J. Wongeebf3ca2018-01-08 10:51:25 -08002707 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002708 }
Christoph Hellwig0c5e1ce2009-06-08 15:33:21 +02002709
2710 memcpy(xfs_buf_offset(bp,
Dave Chinnerc1155412010-05-07 11:05:19 +10002711 (uint)bit << XFS_BLF_SHIFT), /* dest */
Christoph Hellwig0c5e1ce2009-06-08 15:33:21 +02002712 item->ri_buf[i].i_addr, /* source */
Dave Chinnerc1155412010-05-07 11:05:19 +10002713 nbits<<XFS_BLF_SHIFT); /* length */
Christoph Hellwig0c5e1ce2009-06-08 15:33:21 +02002714 next:
Linus Torvalds1da177e2005-04-16 15:20:36 -07002715 i++;
2716 bit += nbits;
2717 }
2718
2719 /* Shouldn't be any more regions */
2720 ASSERT(i == item->ri_total);
Christoph Hellwigee1a47a2013-04-21 14:53:46 -05002721
Brian Foster22db9af2016-09-26 08:32:07 +10002722 xlog_recover_validate_buf_type(mp, bp, buf_f, current_lsn);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002723}
2724
2725/*
Linus Torvalds1da177e2005-04-16 15:20:36 -07002726 * Perform a dquot buffer recovery.
Zhi Yong Wu8ba701e2013-08-12 03:15:01 +00002727 * Simple algorithm: if we have found a QUOTAOFF log item of the same type
Linus Torvalds1da177e2005-04-16 15:20:36 -07002728 * (ie. USR or GRP), then just toss this buffer away; don't recover it.
2729 * Else, treat it as a regular buffer and do recovery.
Dave Chinnerad3714b2014-08-04 12:59:31 +10002730 *
2731 * Return false if the buffer was tossed and true if we recovered the buffer to
2732 * indicate to the caller if the buffer needs writing.
Linus Torvalds1da177e2005-04-16 15:20:36 -07002733 */
Dave Chinnerad3714b2014-08-04 12:59:31 +10002734STATIC bool
Linus Torvalds1da177e2005-04-16 15:20:36 -07002735xlog_recover_do_dquot_buffer(
Mark Tinguely9a8d2fd2012-06-14 09:22:16 -05002736 struct xfs_mount *mp,
2737 struct xlog *log,
2738 struct xlog_recover_item *item,
2739 struct xfs_buf *bp,
2740 struct xfs_buf_log_format *buf_f)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002741{
2742 uint type;
2743
Dave Chinner9abbc532010-04-13 15:06:46 +10002744 trace_xfs_log_recover_buf_dquot_buf(log, buf_f);
2745
Linus Torvalds1da177e2005-04-16 15:20:36 -07002746 /*
2747 * Filesystems are required to send in quota flags at mount time.
2748 */
Dave Chinnerad3714b2014-08-04 12:59:31 +10002749 if (!mp->m_qflags)
2750 return false;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002751
2752 type = 0;
Dave Chinnerc1155412010-05-07 11:05:19 +10002753 if (buf_f->blf_flags & XFS_BLF_UDQUOT_BUF)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002754 type |= XFS_DQ_USER;
Dave Chinnerc1155412010-05-07 11:05:19 +10002755 if (buf_f->blf_flags & XFS_BLF_PDQUOT_BUF)
Nathan Scottc8ad20f2005-06-21 15:38:48 +10002756 type |= XFS_DQ_PROJ;
Dave Chinnerc1155412010-05-07 11:05:19 +10002757 if (buf_f->blf_flags & XFS_BLF_GDQUOT_BUF)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002758 type |= XFS_DQ_GROUP;
2759 /*
2760 * This type of quotas was turned off, so ignore this buffer
2761 */
2762 if (log->l_quotaoffs_flag & type)
Dave Chinnerad3714b2014-08-04 12:59:31 +10002763 return false;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002764
Brian Foster22db9af2016-09-26 08:32:07 +10002765 xlog_recover_do_reg_buffer(mp, item, bp, buf_f, NULLCOMMITLSN);
Dave Chinnerad3714b2014-08-04 12:59:31 +10002766 return true;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002767}
2768
2769/*
2770 * This routine replays a modification made to a buffer at runtime.
2771 * There are actually two types of buffer, regular and inode, which
2772 * are handled differently. Inode buffers are handled differently
2773 * in that we only recover a specific set of data from them, namely
2774 * the inode di_next_unlinked fields. This is because all other inode
2775 * data is actually logged via inode records and any data we replay
2776 * here which overlaps that may be stale.
2777 *
2778 * When meta-data buffers are freed at run time we log a buffer item
Dave Chinnerc1155412010-05-07 11:05:19 +10002779 * with the XFS_BLF_CANCEL bit set to indicate that previous copies
Linus Torvalds1da177e2005-04-16 15:20:36 -07002780 * of the buffer in the log should not be replayed at recovery time.
2781 * This is so that if the blocks covered by the buffer are reused for
2782 * file data before we crash we don't end up replaying old, freed
2783 * meta-data into a user's file.
2784 *
2785 * To handle the cancellation of buffer log items, we make two passes
2786 * over the log during recovery. During the first we build a table of
2787 * those buffers which have been cancelled, and during the second we
2788 * only replay those buffers which do not have corresponding cancel
Zhi Yong Wu34be5ff2013-08-07 10:11:07 +00002789 * records in the table. See xlog_recover_buffer_pass[1,2] above
Linus Torvalds1da177e2005-04-16 15:20:36 -07002790 * for more details on the implementation of the table of cancel records.
2791 */
2792STATIC int
Christoph Hellwigc9f71f52010-12-01 22:06:24 +00002793xlog_recover_buffer_pass2(
Mark Tinguely9a8d2fd2012-06-14 09:22:16 -05002794 struct xlog *log,
2795 struct list_head *buffer_list,
Dave Chinner50d5c8d2013-08-28 21:22:47 +10002796 struct xlog_recover_item *item,
2797 xfs_lsn_t current_lsn)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002798{
Christoph Hellwig4e0d5f92010-06-23 18:11:15 +10002799 xfs_buf_log_format_t *buf_f = item->ri_buf[0].i_addr;
Christoph Hellwige2714bf2010-12-01 22:06:21 +00002800 xfs_mount_t *mp = log->l_mp;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002801 xfs_buf_t *bp;
2802 int error;
Christoph Hellwig6ad112b2009-11-24 18:02:23 +00002803 uint buf_flags;
Dave Chinner50d5c8d2013-08-28 21:22:47 +10002804 xfs_lsn_t lsn;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002805
Christoph Hellwigc9f71f52010-12-01 22:06:24 +00002806 /*
2807 * In this pass we only want to recover all the buffers which have
2808 * not been cancelled and are not cancellation buffers themselves.
2809 */
2810 if (xlog_check_buffer_cancelled(log, buf_f->blf_blkno,
2811 buf_f->blf_len, buf_f->blf_flags)) {
2812 trace_xfs_log_recover_buf_cancel(log, buf_f);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002813 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002814 }
Christoph Hellwigc9f71f52010-12-01 22:06:24 +00002815
Dave Chinner9abbc532010-04-13 15:06:46 +10002816 trace_xfs_log_recover_buf_recover(log, buf_f);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002817
Dave Chinnera8acad72012-04-23 15:58:54 +10002818 buf_flags = 0;
Dave Chinner611c9942012-04-23 15:59:07 +10002819 if (buf_f->blf_flags & XFS_BLF_INODE_BUF)
2820 buf_flags |= XBF_UNMAPPED;
Christoph Hellwig6ad112b2009-11-24 18:02:23 +00002821
Christoph Hellwige2714bf2010-12-01 22:06:21 +00002822 bp = xfs_buf_read(mp->m_ddev_targp, buf_f->blf_blkno, buf_f->blf_len,
Dave Chinnerc3f8fc72012-11-12 22:54:01 +11002823 buf_flags, NULL);
Chandra Seetharamanac4d6882011-08-03 02:18:29 +00002824 if (!bp)
Dave Chinner24513372014-06-25 14:58:08 +10002825 return -ENOMEM;
Chandra Seetharamane5702802011-08-03 02:18:34 +00002826 error = bp->b_error;
Chandra Seetharaman5a52c2a582011-07-22 23:39:51 +00002827 if (error) {
Christoph Hellwig901796a2011-10-10 16:52:49 +00002828 xfs_buf_ioerror_alert(bp, "xlog_recover_do..(read#1)");
Dave Chinner50d5c8d2013-08-28 21:22:47 +10002829 goto out_release;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002830 }
2831
Dave Chinner50d5c8d2013-08-28 21:22:47 +10002832 /*
Dave Chinner67dc2882014-08-04 12:43:06 +10002833 * Recover the buffer only if we get an LSN from it and it's less than
Dave Chinner50d5c8d2013-08-28 21:22:47 +10002834 * the lsn of the transaction we are replaying.
Dave Chinner67dc2882014-08-04 12:43:06 +10002835 *
2836 * Note that we have to be extremely careful of readahead here.
2837 * Readahead does not attach verfiers to the buffers so if we don't
2838 * actually do any replay after readahead because of the LSN we found
2839 * in the buffer if more recent than that current transaction then we
2840 * need to attach the verifier directly. Failure to do so can lead to
2841 * future recovery actions (e.g. EFI and unlinked list recovery) can
2842 * operate on the buffers and they won't get the verifier attached. This
2843 * can lead to blocks on disk having the correct content but a stale
2844 * CRC.
2845 *
2846 * It is safe to assume these clean buffers are currently up to date.
2847 * If the buffer is dirtied by a later transaction being replayed, then
2848 * the verifier will be reset to match whatever recover turns that
2849 * buffer into.
Dave Chinner50d5c8d2013-08-28 21:22:47 +10002850 */
2851 lsn = xlog_recover_get_buf_lsn(mp, bp);
Dave Chinner67dc2882014-08-04 12:43:06 +10002852 if (lsn && lsn != -1 && XFS_LSN_CMP(lsn, current_lsn) >= 0) {
Brian Foster5cd9cee2016-09-26 08:34:52 +10002853 trace_xfs_log_recover_buf_skip(log, buf_f);
Brian Foster22db9af2016-09-26 08:32:07 +10002854 xlog_recover_validate_buf_type(mp, bp, buf_f, NULLCOMMITLSN);
Dave Chinner50d5c8d2013-08-28 21:22:47 +10002855 goto out_release;
Dave Chinner67dc2882014-08-04 12:43:06 +10002856 }
Dave Chinner50d5c8d2013-08-28 21:22:47 +10002857
Christoph Hellwige2714bf2010-12-01 22:06:21 +00002858 if (buf_f->blf_flags & XFS_BLF_INODE_BUF) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002859 error = xlog_recover_do_inode_buffer(mp, item, bp, buf_f);
Dave Chinnerad3714b2014-08-04 12:59:31 +10002860 if (error)
2861 goto out_release;
Christoph Hellwige2714bf2010-12-01 22:06:21 +00002862 } else if (buf_f->blf_flags &
Dave Chinnerc1155412010-05-07 11:05:19 +10002863 (XFS_BLF_UDQUOT_BUF|XFS_BLF_PDQUOT_BUF|XFS_BLF_GDQUOT_BUF)) {
Dave Chinnerad3714b2014-08-04 12:59:31 +10002864 bool dirty;
2865
2866 dirty = xlog_recover_do_dquot_buffer(mp, log, item, bp, buf_f);
2867 if (!dirty)
2868 goto out_release;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002869 } else {
Brian Foster22db9af2016-09-26 08:32:07 +10002870 xlog_recover_do_reg_buffer(mp, item, bp, buf_f, current_lsn);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002871 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002872
2873 /*
2874 * Perform delayed write on the buffer. Asynchronous writes will be
2875 * slower when taking into account all the buffers to be flushed.
2876 *
2877 * Also make sure that only inode buffers with good sizes stay in
2878 * the buffer cache. The kernel moves inodes in buffers of 1 block
Darrick J. Wongef325952019-06-05 11:19:34 -07002879 * or inode_cluster_size bytes, whichever is bigger. The inode
Linus Torvalds1da177e2005-04-16 15:20:36 -07002880 * buffers in the log can be a different size if the log was generated
2881 * by an older kernel using unclustered inode buffers or a newer kernel
2882 * running with a different inode cluster size. Regardless, if the
Darrick J. Wongef325952019-06-05 11:19:34 -07002883 * the inode buffer size isn't max(blocksize, inode_cluster_size)
2884 * for *our* value of inode_cluster_size, then we need to keep
Linus Torvalds1da177e2005-04-16 15:20:36 -07002885 * the buffer out of the buffer cache so that the buffer won't
2886 * overlap with future reads of those inodes.
2887 */
2888 if (XFS_DINODE_MAGIC ==
Christoph Hellwigb53e6752007-10-12 10:59:34 +10002889 be16_to_cpu(*((__be16 *)xfs_buf_offset(bp, 0))) &&
Darrick J. Wong490d4512019-06-05 11:19:35 -07002890 (BBTOB(bp->b_io_length) != M_IGEO(log->l_mp)->inode_cluster_size)) {
Christoph Hellwigc867cb62011-10-10 16:52:46 +00002891 xfs_buf_stale(bp);
Christoph Hellwigc2b006c2011-08-23 08:28:07 +00002892 error = xfs_bwrite(bp);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002893 } else {
Dave Chinnerebad8612010-09-22 10:47:20 +10002894 ASSERT(bp->b_target->bt_mount == mp);
Christoph Hellwigcb669ca2011-07-13 13:43:49 +02002895 bp->b_iodone = xlog_recover_iodone;
Christoph Hellwig43ff2122012-04-23 15:58:39 +10002896 xfs_buf_delwri_queue(bp, buffer_list);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002897 }
2898
Dave Chinner50d5c8d2013-08-28 21:22:47 +10002899out_release:
Christoph Hellwigc2b006c2011-08-23 08:28:07 +00002900 xfs_buf_relse(bp);
2901 return error;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002902}
2903
Dave Chinner638f44162013-08-30 10:23:45 +10002904/*
2905 * Inode fork owner changes
2906 *
2907 * If we have been told that we have to reparent the inode fork, it's because an
2908 * extent swap operation on a CRC enabled filesystem has been done and we are
2909 * replaying it. We need to walk the BMBT of the appropriate fork and change the
2910 * owners of it.
2911 *
2912 * The complexity here is that we don't have an inode context to work with, so
2913 * after we've replayed the inode we need to instantiate one. This is where the
2914 * fun begins.
2915 *
2916 * We are in the middle of log recovery, so we can't run transactions. That
2917 * means we cannot use cache coherent inode instantiation via xfs_iget(), as
2918 * that will result in the corresponding iput() running the inode through
2919 * xfs_inactive(). If we've just replayed an inode core that changes the link
2920 * count to zero (i.e. it's been unlinked), then xfs_inactive() will run
2921 * transactions (bad!).
2922 *
2923 * So, to avoid this, we instantiate an inode directly from the inode core we've
2924 * just recovered. We have the buffer still locked, and all we really need to
2925 * instantiate is the inode core and the forks being modified. We can do this
2926 * manually, then run the inode btree owner change, and then tear down the
2927 * xfs_inode without having to run any transactions at all.
2928 *
2929 * Also, because we don't have a transaction context available here but need to
2930 * gather all the buffers we modify for writeback so we pass the buffer_list
2931 * instead for the operation to use.
2932 */
2933
2934STATIC int
2935xfs_recover_inode_owner_change(
2936 struct xfs_mount *mp,
2937 struct xfs_dinode *dip,
2938 struct xfs_inode_log_format *in_f,
2939 struct list_head *buffer_list)
2940{
2941 struct xfs_inode *ip;
2942 int error;
2943
2944 ASSERT(in_f->ilf_fields & (XFS_ILOG_DOWNER|XFS_ILOG_AOWNER));
2945
2946 ip = xfs_inode_alloc(mp, in_f->ilf_ino);
2947 if (!ip)
Dave Chinner24513372014-06-25 14:58:08 +10002948 return -ENOMEM;
Dave Chinner638f44162013-08-30 10:23:45 +10002949
2950 /* instantiate the inode */
Dave Chinner39878482016-02-09 16:54:58 +11002951 xfs_inode_from_disk(ip, dip);
Dave Chinner638f44162013-08-30 10:23:45 +10002952 ASSERT(ip->i_d.di_version >= 3);
2953
2954 error = xfs_iformat_fork(ip, dip);
2955 if (error)
2956 goto out_free_ip;
2957
Darrick J. Wong9cfb9b42018-01-08 10:51:06 -08002958 if (!xfs_inode_verify_forks(ip)) {
2959 error = -EFSCORRUPTED;
2960 goto out_free_ip;
2961 }
Dave Chinner638f44162013-08-30 10:23:45 +10002962
2963 if (in_f->ilf_fields & XFS_ILOG_DOWNER) {
2964 ASSERT(in_f->ilf_fields & XFS_ILOG_DBROOT);
2965 error = xfs_bmbt_change_owner(NULL, ip, XFS_DATA_FORK,
2966 ip->i_ino, buffer_list);
2967 if (error)
2968 goto out_free_ip;
2969 }
2970
2971 if (in_f->ilf_fields & XFS_ILOG_AOWNER) {
2972 ASSERT(in_f->ilf_fields & XFS_ILOG_ABROOT);
2973 error = xfs_bmbt_change_owner(NULL, ip, XFS_ATTR_FORK,
2974 ip->i_ino, buffer_list);
2975 if (error)
2976 goto out_free_ip;
2977 }
2978
2979out_free_ip:
2980 xfs_inode_free(ip);
2981 return error;
2982}
2983
Linus Torvalds1da177e2005-04-16 15:20:36 -07002984STATIC int
Christoph Hellwigc9f71f52010-12-01 22:06:24 +00002985xlog_recover_inode_pass2(
Mark Tinguely9a8d2fd2012-06-14 09:22:16 -05002986 struct xlog *log,
2987 struct list_head *buffer_list,
Dave Chinner50d5c8d2013-08-28 21:22:47 +10002988 struct xlog_recover_item *item,
2989 xfs_lsn_t current_lsn)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002990{
Darrick J. Wong06b11322017-10-31 12:04:24 -07002991 struct xfs_inode_log_format *in_f;
Christoph Hellwigc9f71f52010-12-01 22:06:24 +00002992 xfs_mount_t *mp = log->l_mp;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002993 xfs_buf_t *bp;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002994 xfs_dinode_t *dip;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002995 int len;
Christoph Hellwigb2a922c2015-06-22 09:45:10 +10002996 char *src;
2997 char *dest;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002998 int error;
2999 int attr_index;
3000 uint fields;
Dave Chinnerf8d55aa0522016-02-09 16:54:58 +11003001 struct xfs_log_dinode *ldip;
Christoph Hellwig93848a92013-04-03 16:11:17 +11003002 uint isize;
Tim Shimmin6d192a92006-06-09 14:55:38 +10003003 int need_free = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003004
Darrick J. Wong06b11322017-10-31 12:04:24 -07003005 if (item->ri_buf[0].i_len == sizeof(struct xfs_inode_log_format)) {
Christoph Hellwig4e0d5f92010-06-23 18:11:15 +10003006 in_f = item->ri_buf[0].i_addr;
Tim Shimmin6d192a92006-06-09 14:55:38 +10003007 } else {
Darrick J. Wong06b11322017-10-31 12:04:24 -07003008 in_f = kmem_alloc(sizeof(struct xfs_inode_log_format), KM_SLEEP);
Tim Shimmin6d192a92006-06-09 14:55:38 +10003009 need_free = 1;
3010 error = xfs_inode_item_format_convert(&item->ri_buf[0], in_f);
3011 if (error)
3012 goto error;
3013 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07003014
3015 /*
3016 * Inode buffers can be freed, look out for it,
3017 * and do not replay the inode.
3018 */
Christoph Hellwiga1941892008-11-28 14:23:40 +11003019 if (xlog_check_buffer_cancelled(log, in_f->ilf_blkno,
3020 in_f->ilf_len, 0)) {
Tim Shimmin6d192a92006-06-09 14:55:38 +10003021 error = 0;
Dave Chinner9abbc532010-04-13 15:06:46 +10003022 trace_xfs_log_recover_inode_cancel(log, in_f);
Tim Shimmin6d192a92006-06-09 14:55:38 +10003023 goto error;
3024 }
Dave Chinner9abbc532010-04-13 15:06:46 +10003025 trace_xfs_log_recover_inode_recover(log, in_f);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003026
Dave Chinnerc3f8fc72012-11-12 22:54:01 +11003027 bp = xfs_buf_read(mp->m_ddev_targp, in_f->ilf_blkno, in_f->ilf_len, 0,
Christoph Hellwig93848a92013-04-03 16:11:17 +11003028 &xfs_inode_buf_ops);
Chandra Seetharamanac4d6882011-08-03 02:18:29 +00003029 if (!bp) {
Dave Chinner24513372014-06-25 14:58:08 +10003030 error = -ENOMEM;
Chandra Seetharamanac4d6882011-08-03 02:18:29 +00003031 goto error;
3032 }
Chandra Seetharamane5702802011-08-03 02:18:34 +00003033 error = bp->b_error;
Chandra Seetharaman5a52c2a582011-07-22 23:39:51 +00003034 if (error) {
Christoph Hellwig901796a2011-10-10 16:52:49 +00003035 xfs_buf_ioerror_alert(bp, "xlog_recover_do..(read#2)");
Dave Chinner638f44162013-08-30 10:23:45 +10003036 goto out_release;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003037 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07003038 ASSERT(in_f->ilf_fields & XFS_ILOG_CORE);
Christoph Hellwig88ee2df2015-06-22 09:44:29 +10003039 dip = xfs_buf_offset(bp, in_f->ilf_boffset);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003040
3041 /*
3042 * Make sure the place we're flushing out to really looks
3043 * like an inode!
3044 */
Darrick J. Wong15baadf2019-02-16 11:47:28 -08003045 if (unlikely(!xfs_verify_magic16(bp, dip->di_magic))) {
Dave Chinnera0fa2b62011-03-07 10:01:35 +11003046 xfs_alert(mp,
Darrick J. Wongc9690042018-01-09 12:02:55 -08003047 "%s: Bad inode magic number, dip = "PTR_FMT", dino bp = "PTR_FMT", ino = %Ld",
Dave Chinnera0fa2b62011-03-07 10:01:35 +11003048 __func__, dip, bp, in_f->ilf_ino);
Christoph Hellwigc9f71f52010-12-01 22:06:24 +00003049 XFS_ERROR_REPORT("xlog_recover_inode_pass2(1)",
Linus Torvalds1da177e2005-04-16 15:20:36 -07003050 XFS_ERRLEVEL_LOW, mp);
Dave Chinner24513372014-06-25 14:58:08 +10003051 error = -EFSCORRUPTED;
Dave Chinner638f44162013-08-30 10:23:45 +10003052 goto out_release;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003053 }
Dave Chinnerf8d55aa0522016-02-09 16:54:58 +11003054 ldip = item->ri_buf[1].i_addr;
3055 if (unlikely(ldip->di_magic != XFS_DINODE_MAGIC)) {
Dave Chinnera0fa2b62011-03-07 10:01:35 +11003056 xfs_alert(mp,
Darrick J. Wongc9690042018-01-09 12:02:55 -08003057 "%s: Bad inode log record, rec ptr "PTR_FMT", ino %Ld",
Dave Chinnera0fa2b62011-03-07 10:01:35 +11003058 __func__, item, in_f->ilf_ino);
Christoph Hellwigc9f71f52010-12-01 22:06:24 +00003059 XFS_ERROR_REPORT("xlog_recover_inode_pass2(2)",
Linus Torvalds1da177e2005-04-16 15:20:36 -07003060 XFS_ERRLEVEL_LOW, mp);
Dave Chinner24513372014-06-25 14:58:08 +10003061 error = -EFSCORRUPTED;
Dave Chinner638f44162013-08-30 10:23:45 +10003062 goto out_release;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003063 }
3064
Dave Chinnere60896d2013-07-24 15:47:30 +10003065 /*
Dave Chinner50d5c8d2013-08-28 21:22:47 +10003066 * If the inode has an LSN in it, recover the inode only if it's less
Dave Chinner638f44162013-08-30 10:23:45 +10003067 * than the lsn of the transaction we are replaying. Note: we still
3068 * need to replay an owner change even though the inode is more recent
3069 * than the transaction as there is no guarantee that all the btree
3070 * blocks are more recent than this transaction, too.
Dave Chinner50d5c8d2013-08-28 21:22:47 +10003071 */
3072 if (dip->di_version >= 3) {
3073 xfs_lsn_t lsn = be64_to_cpu(dip->di_lsn);
3074
3075 if (lsn && lsn != -1 && XFS_LSN_CMP(lsn, current_lsn) >= 0) {
3076 trace_xfs_log_recover_inode_skip(log, in_f);
3077 error = 0;
Dave Chinner638f44162013-08-30 10:23:45 +10003078 goto out_owner_change;
Dave Chinner50d5c8d2013-08-28 21:22:47 +10003079 }
3080 }
3081
3082 /*
Dave Chinnere60896d2013-07-24 15:47:30 +10003083 * di_flushiter is only valid for v1/2 inodes. All changes for v3 inodes
3084 * are transactional and if ordering is necessary we can determine that
3085 * more accurately by the LSN field in the V3 inode core. Don't trust
3086 * the inode versions we might be changing them here - use the
3087 * superblock flag to determine whether we need to look at di_flushiter
3088 * to skip replay when the on disk inode is newer than the log one
3089 */
3090 if (!xfs_sb_version_hascrc(&mp->m_sb) &&
Dave Chinnerf8d55aa0522016-02-09 16:54:58 +11003091 ldip->di_flushiter < be16_to_cpu(dip->di_flushiter)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07003092 /*
3093 * Deal with the wrap case, DI_MAX_FLUSH is less
3094 * than smaller numbers
3095 */
Christoph Hellwig81591fe2008-11-28 14:23:39 +11003096 if (be16_to_cpu(dip->di_flushiter) == DI_MAX_FLUSH &&
Dave Chinnerf8d55aa0522016-02-09 16:54:58 +11003097 ldip->di_flushiter < (DI_MAX_FLUSH >> 1)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07003098 /* do nothing */
3099 } else {
Dave Chinner9abbc532010-04-13 15:06:46 +10003100 trace_xfs_log_recover_inode_skip(log, in_f);
Tim Shimmin6d192a92006-06-09 14:55:38 +10003101 error = 0;
Dave Chinner638f44162013-08-30 10:23:45 +10003102 goto out_release;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003103 }
3104 }
Dave Chinnere60896d2013-07-24 15:47:30 +10003105
Linus Torvalds1da177e2005-04-16 15:20:36 -07003106 /* Take the opportunity to reset the flush iteration count */
Dave Chinnerf8d55aa0522016-02-09 16:54:58 +11003107 ldip->di_flushiter = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003108
Dave Chinnerf8d55aa0522016-02-09 16:54:58 +11003109 if (unlikely(S_ISREG(ldip->di_mode))) {
3110 if ((ldip->di_format != XFS_DINODE_FMT_EXTENTS) &&
3111 (ldip->di_format != XFS_DINODE_FMT_BTREE)) {
Christoph Hellwigc9f71f52010-12-01 22:06:24 +00003112 XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(3)",
Darrick J. Wong2551a532018-06-04 10:23:54 -07003113 XFS_ERRLEVEL_LOW, mp, ldip,
3114 sizeof(*ldip));
Dave Chinnera0fa2b62011-03-07 10:01:35 +11003115 xfs_alert(mp,
Darrick J. Wongc9690042018-01-09 12:02:55 -08003116 "%s: Bad regular inode log record, rec ptr "PTR_FMT", "
3117 "ino ptr = "PTR_FMT", ino bp = "PTR_FMT", ino %Ld",
Dave Chinnera0fa2b62011-03-07 10:01:35 +11003118 __func__, item, dip, bp, in_f->ilf_ino);
Dave Chinner24513372014-06-25 14:58:08 +10003119 error = -EFSCORRUPTED;
Dave Chinner638f44162013-08-30 10:23:45 +10003120 goto out_release;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003121 }
Dave Chinnerf8d55aa0522016-02-09 16:54:58 +11003122 } else if (unlikely(S_ISDIR(ldip->di_mode))) {
3123 if ((ldip->di_format != XFS_DINODE_FMT_EXTENTS) &&
3124 (ldip->di_format != XFS_DINODE_FMT_BTREE) &&
3125 (ldip->di_format != XFS_DINODE_FMT_LOCAL)) {
Christoph Hellwigc9f71f52010-12-01 22:06:24 +00003126 XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(4)",
Darrick J. Wong2551a532018-06-04 10:23:54 -07003127 XFS_ERRLEVEL_LOW, mp, ldip,
3128 sizeof(*ldip));
Dave Chinnera0fa2b62011-03-07 10:01:35 +11003129 xfs_alert(mp,
Darrick J. Wongc9690042018-01-09 12:02:55 -08003130 "%s: Bad dir inode log record, rec ptr "PTR_FMT", "
3131 "ino ptr = "PTR_FMT", ino bp = "PTR_FMT", ino %Ld",
Dave Chinnera0fa2b62011-03-07 10:01:35 +11003132 __func__, item, dip, bp, in_f->ilf_ino);
Dave Chinner24513372014-06-25 14:58:08 +10003133 error = -EFSCORRUPTED;
Dave Chinner638f44162013-08-30 10:23:45 +10003134 goto out_release;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003135 }
3136 }
Dave Chinnerf8d55aa0522016-02-09 16:54:58 +11003137 if (unlikely(ldip->di_nextents + ldip->di_anextents > ldip->di_nblocks)){
Christoph Hellwigc9f71f52010-12-01 22:06:24 +00003138 XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(5)",
Darrick J. Wong2551a532018-06-04 10:23:54 -07003139 XFS_ERRLEVEL_LOW, mp, ldip,
3140 sizeof(*ldip));
Dave Chinnera0fa2b62011-03-07 10:01:35 +11003141 xfs_alert(mp,
Darrick J. Wongc9690042018-01-09 12:02:55 -08003142 "%s: Bad inode log record, rec ptr "PTR_FMT", dino ptr "PTR_FMT", "
3143 "dino bp "PTR_FMT", ino %Ld, total extents = %d, nblocks = %Ld",
Dave Chinnera0fa2b62011-03-07 10:01:35 +11003144 __func__, item, dip, bp, in_f->ilf_ino,
Dave Chinnerf8d55aa0522016-02-09 16:54:58 +11003145 ldip->di_nextents + ldip->di_anextents,
3146 ldip->di_nblocks);
Dave Chinner24513372014-06-25 14:58:08 +10003147 error = -EFSCORRUPTED;
Dave Chinner638f44162013-08-30 10:23:45 +10003148 goto out_release;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003149 }
Dave Chinnerf8d55aa0522016-02-09 16:54:58 +11003150 if (unlikely(ldip->di_forkoff > mp->m_sb.sb_inodesize)) {
Christoph Hellwigc9f71f52010-12-01 22:06:24 +00003151 XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(6)",
Darrick J. Wong2551a532018-06-04 10:23:54 -07003152 XFS_ERRLEVEL_LOW, mp, ldip,
3153 sizeof(*ldip));
Dave Chinnera0fa2b62011-03-07 10:01:35 +11003154 xfs_alert(mp,
Darrick J. Wongc9690042018-01-09 12:02:55 -08003155 "%s: Bad inode log record, rec ptr "PTR_FMT", dino ptr "PTR_FMT", "
3156 "dino bp "PTR_FMT", ino %Ld, forkoff 0x%x", __func__,
Dave Chinnerf8d55aa0522016-02-09 16:54:58 +11003157 item, dip, bp, in_f->ilf_ino, ldip->di_forkoff);
Dave Chinner24513372014-06-25 14:58:08 +10003158 error = -EFSCORRUPTED;
Dave Chinner638f44162013-08-30 10:23:45 +10003159 goto out_release;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003160 }
Dave Chinnerf8d55aa0522016-02-09 16:54:58 +11003161 isize = xfs_log_dinode_size(ldip->di_version);
Christoph Hellwig93848a92013-04-03 16:11:17 +11003162 if (unlikely(item->ri_buf[1].i_len > isize)) {
Christoph Hellwigc9f71f52010-12-01 22:06:24 +00003163 XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(7)",
Darrick J. Wong2551a532018-06-04 10:23:54 -07003164 XFS_ERRLEVEL_LOW, mp, ldip,
3165 sizeof(*ldip));
Dave Chinnera0fa2b62011-03-07 10:01:35 +11003166 xfs_alert(mp,
Darrick J. Wongc9690042018-01-09 12:02:55 -08003167 "%s: Bad inode log record length %d, rec ptr "PTR_FMT,
Dave Chinnera0fa2b62011-03-07 10:01:35 +11003168 __func__, item->ri_buf[1].i_len, item);
Dave Chinner24513372014-06-25 14:58:08 +10003169 error = -EFSCORRUPTED;
Dave Chinner638f44162013-08-30 10:23:45 +10003170 goto out_release;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003171 }
3172
Dave Chinner39878482016-02-09 16:54:58 +11003173 /* recover the log dinode inode into the on disk inode */
3174 xfs_log_dinode_to_disk(ldip, dip);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003175
Linus Torvalds1da177e2005-04-16 15:20:36 -07003176 fields = in_f->ilf_fields;
Christoph Hellwig42b67dc2017-10-19 11:07:09 -07003177 if (fields & XFS_ILOG_DEV)
Christoph Hellwig81591fe2008-11-28 14:23:39 +11003178 xfs_dinode_put_rdev(dip, in_f->ilf_u.ilfu_rdev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003179
3180 if (in_f->ilf_size == 2)
Dave Chinner638f44162013-08-30 10:23:45 +10003181 goto out_owner_change;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003182 len = item->ri_buf[2].i_len;
3183 src = item->ri_buf[2].i_addr;
3184 ASSERT(in_f->ilf_size <= 4);
3185 ASSERT((in_f->ilf_size == 3) || (fields & XFS_ILOG_AFORK));
3186 ASSERT(!(fields & XFS_ILOG_DFORK) ||
3187 (len == in_f->ilf_dsize));
3188
3189 switch (fields & XFS_ILOG_DFORK) {
3190 case XFS_ILOG_DDATA:
3191 case XFS_ILOG_DEXT:
Christoph Hellwig81591fe2008-11-28 14:23:39 +11003192 memcpy(XFS_DFORK_DPTR(dip), src, len);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003193 break;
3194
3195 case XFS_ILOG_DBROOT:
Christoph Hellwig7cc95a82008-10-30 17:14:34 +11003196 xfs_bmbt_to_bmdr(mp, (struct xfs_btree_block *)src, len,
Christoph Hellwig81591fe2008-11-28 14:23:39 +11003197 (xfs_bmdr_block_t *)XFS_DFORK_DPTR(dip),
Linus Torvalds1da177e2005-04-16 15:20:36 -07003198 XFS_DFORK_DSIZE(dip, mp));
3199 break;
3200
3201 default:
3202 /*
3203 * There are no data fork flags set.
3204 */
3205 ASSERT((fields & XFS_ILOG_DFORK) == 0);
3206 break;
3207 }
3208
3209 /*
3210 * If we logged any attribute data, recover it. There may or
3211 * may not have been any other non-core data logged in this
3212 * transaction.
3213 */
3214 if (in_f->ilf_fields & XFS_ILOG_AFORK) {
3215 if (in_f->ilf_fields & XFS_ILOG_DFORK) {
3216 attr_index = 3;
3217 } else {
3218 attr_index = 2;
3219 }
3220 len = item->ri_buf[attr_index].i_len;
3221 src = item->ri_buf[attr_index].i_addr;
3222 ASSERT(len == in_f->ilf_asize);
3223
3224 switch (in_f->ilf_fields & XFS_ILOG_AFORK) {
3225 case XFS_ILOG_ADATA:
3226 case XFS_ILOG_AEXT:
3227 dest = XFS_DFORK_APTR(dip);
3228 ASSERT(len <= XFS_DFORK_ASIZE(dip, mp));
3229 memcpy(dest, src, len);
3230 break;
3231
3232 case XFS_ILOG_ABROOT:
3233 dest = XFS_DFORK_APTR(dip);
Christoph Hellwig7cc95a82008-10-30 17:14:34 +11003234 xfs_bmbt_to_bmdr(mp, (struct xfs_btree_block *)src,
3235 len, (xfs_bmdr_block_t*)dest,
Linus Torvalds1da177e2005-04-16 15:20:36 -07003236 XFS_DFORK_ASIZE(dip, mp));
3237 break;
3238
3239 default:
Dave Chinnera0fa2b62011-03-07 10:01:35 +11003240 xfs_warn(log->l_mp, "%s: Invalid flag", __func__);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003241 ASSERT(0);
Dave Chinner24513372014-06-25 14:58:08 +10003242 error = -EIO;
Dave Chinner638f44162013-08-30 10:23:45 +10003243 goto out_release;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003244 }
3245 }
3246
Dave Chinner638f44162013-08-30 10:23:45 +10003247out_owner_change:
Eric Sandeendc1baa72018-03-28 17:48:08 -07003248 /* Recover the swapext owner change unless inode has been deleted */
3249 if ((in_f->ilf_fields & (XFS_ILOG_DOWNER|XFS_ILOG_AOWNER)) &&
3250 (dip->di_mode != 0))
Dave Chinner638f44162013-08-30 10:23:45 +10003251 error = xfs_recover_inode_owner_change(mp, dip, in_f,
3252 buffer_list);
Christoph Hellwig93848a92013-04-03 16:11:17 +11003253 /* re-generate the checksum. */
3254 xfs_dinode_calc_crc(log->l_mp, dip);
3255
Dave Chinnerebad8612010-09-22 10:47:20 +10003256 ASSERT(bp->b_target->bt_mount == mp);
Christoph Hellwigcb669ca2011-07-13 13:43:49 +02003257 bp->b_iodone = xlog_recover_iodone;
Christoph Hellwig43ff2122012-04-23 15:58:39 +10003258 xfs_buf_delwri_queue(bp, buffer_list);
Dave Chinner50d5c8d2013-08-28 21:22:47 +10003259
3260out_release:
Christoph Hellwig61551f12011-08-23 08:28:06 +00003261 xfs_buf_relse(bp);
Tim Shimmin6d192a92006-06-09 14:55:38 +10003262error:
3263 if (need_free)
Denys Vlasenkof0e2d932008-05-19 16:31:57 +10003264 kmem_free(in_f);
Eric Sandeenb474c7a2014-06-22 15:04:54 +10003265 return error;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003266}
3267
3268/*
Mark Tinguely9a8d2fd2012-06-14 09:22:16 -05003269 * Recover QUOTAOFF records. We simply make a note of it in the xlog
Linus Torvalds1da177e2005-04-16 15:20:36 -07003270 * structure, so that we know not to do any dquot item or dquot buffer recovery,
3271 * of that type.
3272 */
3273STATIC int
Christoph Hellwigc9f71f52010-12-01 22:06:24 +00003274xlog_recover_quotaoff_pass1(
Mark Tinguely9a8d2fd2012-06-14 09:22:16 -05003275 struct xlog *log,
3276 struct xlog_recover_item *item)
Linus Torvalds1da177e2005-04-16 15:20:36 -07003277{
Christoph Hellwigc9f71f52010-12-01 22:06:24 +00003278 xfs_qoff_logformat_t *qoff_f = item->ri_buf[0].i_addr;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003279 ASSERT(qoff_f);
3280
3281 /*
3282 * The logitem format's flag tells us if this was user quotaoff,
Nathan Scott77a7cce2006-01-11 15:35:57 +11003283 * group/project quotaoff or both.
Linus Torvalds1da177e2005-04-16 15:20:36 -07003284 */
3285 if (qoff_f->qf_flags & XFS_UQUOTA_ACCT)
3286 log->l_quotaoffs_flag |= XFS_DQ_USER;
Nathan Scott77a7cce2006-01-11 15:35:57 +11003287 if (qoff_f->qf_flags & XFS_PQUOTA_ACCT)
3288 log->l_quotaoffs_flag |= XFS_DQ_PROJ;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003289 if (qoff_f->qf_flags & XFS_GQUOTA_ACCT)
3290 log->l_quotaoffs_flag |= XFS_DQ_GROUP;
3291
Eric Sandeend99831f2014-06-22 15:03:54 +10003292 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003293}
3294
3295/*
3296 * Recover a dquot record
3297 */
3298STATIC int
Christoph Hellwigc9f71f52010-12-01 22:06:24 +00003299xlog_recover_dquot_pass2(
Mark Tinguely9a8d2fd2012-06-14 09:22:16 -05003300 struct xlog *log,
3301 struct list_head *buffer_list,
Dave Chinner50d5c8d2013-08-28 21:22:47 +10003302 struct xlog_recover_item *item,
3303 xfs_lsn_t current_lsn)
Linus Torvalds1da177e2005-04-16 15:20:36 -07003304{
Christoph Hellwigc9f71f52010-12-01 22:06:24 +00003305 xfs_mount_t *mp = log->l_mp;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003306 xfs_buf_t *bp;
3307 struct xfs_disk_dquot *ddq, *recddq;
Darrick J. Wongeebf3ca2018-01-08 10:51:25 -08003308 xfs_failaddr_t fa;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003309 int error;
3310 xfs_dq_logformat_t *dq_f;
3311 uint type;
3312
Linus Torvalds1da177e2005-04-16 15:20:36 -07003313
3314 /*
3315 * Filesystems are required to send in quota flags at mount time.
3316 */
3317 if (mp->m_qflags == 0)
Eric Sandeend99831f2014-06-22 15:03:54 +10003318 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003319
Christoph Hellwig4e0d5f92010-06-23 18:11:15 +10003320 recddq = item->ri_buf[1].i_addr;
3321 if (recddq == NULL) {
Dave Chinnera0fa2b62011-03-07 10:01:35 +11003322 xfs_alert(log->l_mp, "NULL dquot in %s.", __func__);
Dave Chinner24513372014-06-25 14:58:08 +10003323 return -EIO;
Christoph Hellwig0c5e1ce2009-06-08 15:33:21 +02003324 }
Jan Rekorajski8ec6dba2009-11-16 11:57:02 +00003325 if (item->ri_buf[1].i_len < sizeof(xfs_disk_dquot_t)) {
Dave Chinnera0fa2b62011-03-07 10:01:35 +11003326 xfs_alert(log->l_mp, "dquot too small (%d) in %s.",
Christoph Hellwig0c5e1ce2009-06-08 15:33:21 +02003327 item->ri_buf[1].i_len, __func__);
Dave Chinner24513372014-06-25 14:58:08 +10003328 return -EIO;
Christoph Hellwig0c5e1ce2009-06-08 15:33:21 +02003329 }
3330
Linus Torvalds1da177e2005-04-16 15:20:36 -07003331 /*
3332 * This type of quotas was turned off, so ignore this record.
3333 */
Christoph Hellwigb53e6752007-10-12 10:59:34 +10003334 type = recddq->d_flags & (XFS_DQ_USER | XFS_DQ_PROJ | XFS_DQ_GROUP);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003335 ASSERT(type);
3336 if (log->l_quotaoffs_flag & type)
Eric Sandeend99831f2014-06-22 15:03:54 +10003337 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003338
3339 /*
3340 * At this point we know that quota was _not_ turned off.
3341 * Since the mount flags are not indicating to us otherwise, this
3342 * must mean that quota is on, and the dquot needs to be replayed.
3343 * Remember that we may not have fully recovered the superblock yet,
3344 * so we can't do the usual trick of looking at the SB quota bits.
3345 *
3346 * The other possibility, of course, is that the quota subsystem was
3347 * removed since the last mount - ENOSYS.
3348 */
Christoph Hellwig4e0d5f92010-06-23 18:11:15 +10003349 dq_f = item->ri_buf[0].i_addr;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003350 ASSERT(dq_f);
Eric Sandeene381a0f2018-05-04 15:15:48 -07003351 fa = xfs_dquot_verify(mp, recddq, dq_f->qlf_id, 0);
Darrick J. Wongeebf3ca2018-01-08 10:51:25 -08003352 if (fa) {
3353 xfs_alert(mp, "corrupt dquot ID 0x%x in log at %pS",
3354 dq_f->qlf_id, fa);
Dave Chinner24513372014-06-25 14:58:08 +10003355 return -EIO;
Darrick J. Wongeebf3ca2018-01-08 10:51:25 -08003356 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07003357 ASSERT(dq_f->qlf_len == 1);
3358
Dave Chinnerad3714b2014-08-04 12:59:31 +10003359 /*
3360 * At this point we are assuming that the dquots have been allocated
3361 * and hence the buffer has valid dquots stamped in it. It should,
3362 * therefore, pass verifier validation. If the dquot is bad, then the
3363 * we'll return an error here, so we don't need to specifically check
3364 * the dquot in the buffer after the verifier has run.
3365 */
Dave Chinner7ca790a2012-04-23 15:58:55 +10003366 error = xfs_trans_read_buf(mp, NULL, mp->m_ddev_targp, dq_f->qlf_blkno,
Dave Chinnerc3f8fc72012-11-12 22:54:01 +11003367 XFS_FSB_TO_BB(mp, dq_f->qlf_len), 0, &bp,
Dave Chinnerad3714b2014-08-04 12:59:31 +10003368 &xfs_dquot_buf_ops);
Dave Chinner7ca790a2012-04-23 15:58:55 +10003369 if (error)
Linus Torvalds1da177e2005-04-16 15:20:36 -07003370 return error;
Dave Chinner7ca790a2012-04-23 15:58:55 +10003371
Linus Torvalds1da177e2005-04-16 15:20:36 -07003372 ASSERT(bp);
Christoph Hellwig88ee2df2015-06-22 09:44:29 +10003373 ddq = xfs_buf_offset(bp, dq_f->qlf_boffset);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003374
3375 /*
Dave Chinner50d5c8d2013-08-28 21:22:47 +10003376 * If the dquot has an LSN in it, recover the dquot only if it's less
3377 * than the lsn of the transaction we are replaying.
3378 */
3379 if (xfs_sb_version_hascrc(&mp->m_sb)) {
3380 struct xfs_dqblk *dqb = (struct xfs_dqblk *)ddq;
3381 xfs_lsn_t lsn = be64_to_cpu(dqb->dd_lsn);
3382
3383 if (lsn && lsn != -1 && XFS_LSN_CMP(lsn, current_lsn) >= 0) {
3384 goto out_release;
3385 }
3386 }
3387
Linus Torvalds1da177e2005-04-16 15:20:36 -07003388 memcpy(ddq, recddq, item->ri_buf[1].i_len);
Dave Chinner6fcdc592013-06-03 15:28:46 +10003389 if (xfs_sb_version_hascrc(&mp->m_sb)) {
3390 xfs_update_cksum((char *)ddq, sizeof(struct xfs_dqblk),
3391 XFS_DQUOT_CRC_OFF);
3392 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07003393
3394 ASSERT(dq_f->qlf_size == 2);
Dave Chinnerebad8612010-09-22 10:47:20 +10003395 ASSERT(bp->b_target->bt_mount == mp);
Christoph Hellwigcb669ca2011-07-13 13:43:49 +02003396 bp->b_iodone = xlog_recover_iodone;
Christoph Hellwig43ff2122012-04-23 15:58:39 +10003397 xfs_buf_delwri_queue(bp, buffer_list);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003398
Dave Chinner50d5c8d2013-08-28 21:22:47 +10003399out_release:
3400 xfs_buf_relse(bp);
3401 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003402}
3403
3404/*
3405 * This routine is called to create an in-core extent free intent
3406 * item from the efi format structure which was logged on disk.
3407 * It allocates an in-core efi, copies the extents from the format
3408 * structure into it, and adds the efi to the AIL with the given
3409 * LSN.
3410 */
Tim Shimmin6d192a92006-06-09 14:55:38 +10003411STATIC int
Christoph Hellwigc9f71f52010-12-01 22:06:24 +00003412xlog_recover_efi_pass2(
Mark Tinguely9a8d2fd2012-06-14 09:22:16 -05003413 struct xlog *log,
3414 struct xlog_recover_item *item,
3415 xfs_lsn_t lsn)
Linus Torvalds1da177e2005-04-16 15:20:36 -07003416{
Brian Fostere32a1d12015-08-19 09:52:21 +10003417 int error;
3418 struct xfs_mount *mp = log->l_mp;
3419 struct xfs_efi_log_item *efip;
3420 struct xfs_efi_log_format *efi_formatp;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003421
Christoph Hellwig4e0d5f92010-06-23 18:11:15 +10003422 efi_formatp = item->ri_buf[0].i_addr;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003423
Linus Torvalds1da177e2005-04-16 15:20:36 -07003424 efip = xfs_efi_init(mp, efi_formatp->efi_nextents);
Brian Fostere32a1d12015-08-19 09:52:21 +10003425 error = xfs_efi_copy_format(&item->ri_buf[0], &efip->efi_format);
3426 if (error) {
Tim Shimmin6d192a92006-06-09 14:55:38 +10003427 xfs_efi_item_free(efip);
3428 return error;
3429 }
Dave Chinnerb199c8a2010-12-20 11:59:49 +11003430 atomic_set(&efip->efi_next_extent, efi_formatp->efi_nextents);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003431
Matthew Wilcox57e80952018-03-07 14:59:39 -08003432 spin_lock(&log->l_ailp->ail_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003433 /*
Brian Fostere32a1d12015-08-19 09:52:21 +10003434 * The EFI has two references. One for the EFD and one for EFI to ensure
3435 * it makes it into the AIL. Insert the EFI into the AIL directly and
3436 * drop the EFI reference. Note that xfs_trans_ail_update() drops the
3437 * AIL lock.
Linus Torvalds1da177e2005-04-16 15:20:36 -07003438 */
Dave Chinnere6059942010-12-20 12:34:26 +11003439 xfs_trans_ail_update(log->l_ailp, &efip->efi_item, lsn);
Brian Fostere32a1d12015-08-19 09:52:21 +10003440 xfs_efi_release(efip);
Tim Shimmin6d192a92006-06-09 14:55:38 +10003441 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003442}
3443
3444
3445/*
Brian Fostere32a1d12015-08-19 09:52:21 +10003446 * This routine is called when an EFD format structure is found in a committed
3447 * transaction in the log. Its purpose is to cancel the corresponding EFI if it
3448 * was still in the log. To do this it searches the AIL for the EFI with an id
3449 * equal to that in the EFD format structure. If we find it we drop the EFD
3450 * reference, which removes the EFI from the AIL and frees it.
Linus Torvalds1da177e2005-04-16 15:20:36 -07003451 */
Christoph Hellwigc9f71f52010-12-01 22:06:24 +00003452STATIC int
3453xlog_recover_efd_pass2(
Mark Tinguely9a8d2fd2012-06-14 09:22:16 -05003454 struct xlog *log,
3455 struct xlog_recover_item *item)
Linus Torvalds1da177e2005-04-16 15:20:36 -07003456{
Linus Torvalds1da177e2005-04-16 15:20:36 -07003457 xfs_efd_log_format_t *efd_formatp;
3458 xfs_efi_log_item_t *efip = NULL;
3459 xfs_log_item_t *lip;
Darrick J. Wongc8ce5402017-06-16 11:00:05 -07003460 uint64_t efi_id;
David Chinner27d8d5f2008-10-30 17:38:39 +11003461 struct xfs_ail_cursor cur;
David Chinner783a2f62008-10-30 17:39:58 +11003462 struct xfs_ail *ailp = log->l_ailp;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003463
Christoph Hellwig4e0d5f92010-06-23 18:11:15 +10003464 efd_formatp = item->ri_buf[0].i_addr;
Tim Shimmin6d192a92006-06-09 14:55:38 +10003465 ASSERT((item->ri_buf[0].i_len == (sizeof(xfs_efd_log_format_32_t) +
3466 ((efd_formatp->efd_nextents - 1) * sizeof(xfs_extent_32_t)))) ||
3467 (item->ri_buf[0].i_len == (sizeof(xfs_efd_log_format_64_t) +
3468 ((efd_formatp->efd_nextents - 1) * sizeof(xfs_extent_64_t)))));
Linus Torvalds1da177e2005-04-16 15:20:36 -07003469 efi_id = efd_formatp->efd_efi_id;
3470
3471 /*
Brian Fostere32a1d12015-08-19 09:52:21 +10003472 * Search for the EFI with the id in the EFD format structure in the
3473 * AIL.
Linus Torvalds1da177e2005-04-16 15:20:36 -07003474 */
Matthew Wilcox57e80952018-03-07 14:59:39 -08003475 spin_lock(&ailp->ail_lock);
David Chinnera9c21c12008-10-30 17:39:35 +11003476 lip = xfs_trans_ail_cursor_first(ailp, &cur, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003477 while (lip != NULL) {
3478 if (lip->li_type == XFS_LI_EFI) {
3479 efip = (xfs_efi_log_item_t *)lip;
3480 if (efip->efi_format.efi_id == efi_id) {
3481 /*
Brian Fostere32a1d12015-08-19 09:52:21 +10003482 * Drop the EFD reference to the EFI. This
3483 * removes the EFI from the AIL and frees it.
Linus Torvalds1da177e2005-04-16 15:20:36 -07003484 */
Matthew Wilcox57e80952018-03-07 14:59:39 -08003485 spin_unlock(&ailp->ail_lock);
Brian Fostere32a1d12015-08-19 09:52:21 +10003486 xfs_efi_release(efip);
Matthew Wilcox57e80952018-03-07 14:59:39 -08003487 spin_lock(&ailp->ail_lock);
David Chinner27d8d5f2008-10-30 17:38:39 +11003488 break;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003489 }
3490 }
David Chinnera9c21c12008-10-30 17:39:35 +11003491 lip = xfs_trans_ail_cursor_next(ailp, &cur);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003492 }
Brian Fostere32a1d12015-08-19 09:52:21 +10003493
Eric Sandeene4a1e292014-04-14 19:06:05 +10003494 xfs_trans_ail_cursor_done(&cur);
Matthew Wilcox57e80952018-03-07 14:59:39 -08003495 spin_unlock(&ailp->ail_lock);
Christoph Hellwigc9f71f52010-12-01 22:06:24 +00003496
3497 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003498}
3499
3500/*
Darrick J. Wong9e88b5d2016-08-03 12:09:48 +10003501 * This routine is called to create an in-core extent rmap update
3502 * item from the rui format structure which was logged on disk.
3503 * It allocates an in-core rui, copies the extents from the format
3504 * structure into it, and adds the rui to the AIL with the given
3505 * LSN.
3506 */
3507STATIC int
3508xlog_recover_rui_pass2(
3509 struct xlog *log,
3510 struct xlog_recover_item *item,
3511 xfs_lsn_t lsn)
3512{
3513 int error;
3514 struct xfs_mount *mp = log->l_mp;
3515 struct xfs_rui_log_item *ruip;
3516 struct xfs_rui_log_format *rui_formatp;
3517
3518 rui_formatp = item->ri_buf[0].i_addr;
3519
3520 ruip = xfs_rui_init(mp, rui_formatp->rui_nextents);
3521 error = xfs_rui_copy_format(&item->ri_buf[0], &ruip->rui_format);
3522 if (error) {
3523 xfs_rui_item_free(ruip);
3524 return error;
3525 }
3526 atomic_set(&ruip->rui_next_extent, rui_formatp->rui_nextents);
3527
Matthew Wilcox57e80952018-03-07 14:59:39 -08003528 spin_lock(&log->l_ailp->ail_lock);
Darrick J. Wong9e88b5d2016-08-03 12:09:48 +10003529 /*
3530 * The RUI has two references. One for the RUD and one for RUI to ensure
3531 * it makes it into the AIL. Insert the RUI into the AIL directly and
3532 * drop the RUI reference. Note that xfs_trans_ail_update() drops the
3533 * AIL lock.
3534 */
3535 xfs_trans_ail_update(log->l_ailp, &ruip->rui_item, lsn);
3536 xfs_rui_release(ruip);
3537 return 0;
3538}
3539
3540
3541/*
3542 * This routine is called when an RUD format structure is found in a committed
3543 * transaction in the log. Its purpose is to cancel the corresponding RUI if it
3544 * was still in the log. To do this it searches the AIL for the RUI with an id
3545 * equal to that in the RUD format structure. If we find it we drop the RUD
3546 * reference, which removes the RUI from the AIL and frees it.
3547 */
3548STATIC int
3549xlog_recover_rud_pass2(
3550 struct xlog *log,
3551 struct xlog_recover_item *item)
3552{
3553 struct xfs_rud_log_format *rud_formatp;
3554 struct xfs_rui_log_item *ruip = NULL;
3555 struct xfs_log_item *lip;
Darrick J. Wongc8ce5402017-06-16 11:00:05 -07003556 uint64_t rui_id;
Darrick J. Wong9e88b5d2016-08-03 12:09:48 +10003557 struct xfs_ail_cursor cur;
3558 struct xfs_ail *ailp = log->l_ailp;
3559
3560 rud_formatp = item->ri_buf[0].i_addr;
Darrick J. Wong722e2512016-08-03 12:28:43 +10003561 ASSERT(item->ri_buf[0].i_len == sizeof(struct xfs_rud_log_format));
Darrick J. Wong9e88b5d2016-08-03 12:09:48 +10003562 rui_id = rud_formatp->rud_rui_id;
3563
3564 /*
3565 * Search for the RUI with the id in the RUD format structure in the
3566 * AIL.
3567 */
Matthew Wilcox57e80952018-03-07 14:59:39 -08003568 spin_lock(&ailp->ail_lock);
Darrick J. Wong9e88b5d2016-08-03 12:09:48 +10003569 lip = xfs_trans_ail_cursor_first(ailp, &cur, 0);
3570 while (lip != NULL) {
3571 if (lip->li_type == XFS_LI_RUI) {
3572 ruip = (struct xfs_rui_log_item *)lip;
3573 if (ruip->rui_format.rui_id == rui_id) {
3574 /*
3575 * Drop the RUD reference to the RUI. This
3576 * removes the RUI from the AIL and frees it.
3577 */
Matthew Wilcox57e80952018-03-07 14:59:39 -08003578 spin_unlock(&ailp->ail_lock);
Darrick J. Wong9e88b5d2016-08-03 12:09:48 +10003579 xfs_rui_release(ruip);
Matthew Wilcox57e80952018-03-07 14:59:39 -08003580 spin_lock(&ailp->ail_lock);
Darrick J. Wong9e88b5d2016-08-03 12:09:48 +10003581 break;
3582 }
3583 }
3584 lip = xfs_trans_ail_cursor_next(ailp, &cur);
3585 }
3586
3587 xfs_trans_ail_cursor_done(&cur);
Matthew Wilcox57e80952018-03-07 14:59:39 -08003588 spin_unlock(&ailp->ail_lock);
Darrick J. Wong9e88b5d2016-08-03 12:09:48 +10003589
3590 return 0;
3591}
3592
3593/*
Darrick J. Wongf997ee22016-10-03 09:11:21 -07003594 * Copy an CUI format buffer from the given buf, and into the destination
3595 * CUI format structure. The CUI/CUD items were designed not to need any
3596 * special alignment handling.
3597 */
3598static int
3599xfs_cui_copy_format(
3600 struct xfs_log_iovec *buf,
3601 struct xfs_cui_log_format *dst_cui_fmt)
3602{
3603 struct xfs_cui_log_format *src_cui_fmt;
3604 uint len;
3605
3606 src_cui_fmt = buf->i_addr;
3607 len = xfs_cui_log_format_sizeof(src_cui_fmt->cui_nextents);
3608
3609 if (buf->i_len == len) {
3610 memcpy(dst_cui_fmt, src_cui_fmt, len);
3611 return 0;
3612 }
3613 return -EFSCORRUPTED;
3614}
3615
3616/*
3617 * This routine is called to create an in-core extent refcount update
3618 * item from the cui format structure which was logged on disk.
3619 * It allocates an in-core cui, copies the extents from the format
3620 * structure into it, and adds the cui to the AIL with the given
3621 * LSN.
3622 */
3623STATIC int
3624xlog_recover_cui_pass2(
3625 struct xlog *log,
3626 struct xlog_recover_item *item,
3627 xfs_lsn_t lsn)
3628{
3629 int error;
3630 struct xfs_mount *mp = log->l_mp;
3631 struct xfs_cui_log_item *cuip;
3632 struct xfs_cui_log_format *cui_formatp;
3633
3634 cui_formatp = item->ri_buf[0].i_addr;
3635
3636 cuip = xfs_cui_init(mp, cui_formatp->cui_nextents);
3637 error = xfs_cui_copy_format(&item->ri_buf[0], &cuip->cui_format);
3638 if (error) {
3639 xfs_cui_item_free(cuip);
3640 return error;
3641 }
3642 atomic_set(&cuip->cui_next_extent, cui_formatp->cui_nextents);
3643
Matthew Wilcox57e80952018-03-07 14:59:39 -08003644 spin_lock(&log->l_ailp->ail_lock);
Darrick J. Wongf997ee22016-10-03 09:11:21 -07003645 /*
3646 * The CUI has two references. One for the CUD and one for CUI to ensure
3647 * it makes it into the AIL. Insert the CUI into the AIL directly and
3648 * drop the CUI reference. Note that xfs_trans_ail_update() drops the
3649 * AIL lock.
3650 */
3651 xfs_trans_ail_update(log->l_ailp, &cuip->cui_item, lsn);
3652 xfs_cui_release(cuip);
3653 return 0;
3654}
3655
3656
3657/*
3658 * This routine is called when an CUD format structure is found in a committed
3659 * transaction in the log. Its purpose is to cancel the corresponding CUI if it
3660 * was still in the log. To do this it searches the AIL for the CUI with an id
3661 * equal to that in the CUD format structure. If we find it we drop the CUD
3662 * reference, which removes the CUI from the AIL and frees it.
3663 */
3664STATIC int
3665xlog_recover_cud_pass2(
3666 struct xlog *log,
3667 struct xlog_recover_item *item)
3668{
3669 struct xfs_cud_log_format *cud_formatp;
3670 struct xfs_cui_log_item *cuip = NULL;
3671 struct xfs_log_item *lip;
Darrick J. Wongc8ce5402017-06-16 11:00:05 -07003672 uint64_t cui_id;
Darrick J. Wongf997ee22016-10-03 09:11:21 -07003673 struct xfs_ail_cursor cur;
3674 struct xfs_ail *ailp = log->l_ailp;
3675
3676 cud_formatp = item->ri_buf[0].i_addr;
3677 if (item->ri_buf[0].i_len != sizeof(struct xfs_cud_log_format))
3678 return -EFSCORRUPTED;
3679 cui_id = cud_formatp->cud_cui_id;
3680
3681 /*
3682 * Search for the CUI with the id in the CUD format structure in the
3683 * AIL.
3684 */
Matthew Wilcox57e80952018-03-07 14:59:39 -08003685 spin_lock(&ailp->ail_lock);
Darrick J. Wongf997ee22016-10-03 09:11:21 -07003686 lip = xfs_trans_ail_cursor_first(ailp, &cur, 0);
3687 while (lip != NULL) {
3688 if (lip->li_type == XFS_LI_CUI) {
3689 cuip = (struct xfs_cui_log_item *)lip;
3690 if (cuip->cui_format.cui_id == cui_id) {
3691 /*
3692 * Drop the CUD reference to the CUI. This
3693 * removes the CUI from the AIL and frees it.
3694 */
Matthew Wilcox57e80952018-03-07 14:59:39 -08003695 spin_unlock(&ailp->ail_lock);
Darrick J. Wongf997ee22016-10-03 09:11:21 -07003696 xfs_cui_release(cuip);
Matthew Wilcox57e80952018-03-07 14:59:39 -08003697 spin_lock(&ailp->ail_lock);
Darrick J. Wongf997ee22016-10-03 09:11:21 -07003698 break;
3699 }
3700 }
3701 lip = xfs_trans_ail_cursor_next(ailp, &cur);
3702 }
3703
3704 xfs_trans_ail_cursor_done(&cur);
Matthew Wilcox57e80952018-03-07 14:59:39 -08003705 spin_unlock(&ailp->ail_lock);
Darrick J. Wongf997ee22016-10-03 09:11:21 -07003706
3707 return 0;
3708}
3709
3710/*
Darrick J. Wong77d61fe2016-10-03 09:11:26 -07003711 * Copy an BUI format buffer from the given buf, and into the destination
3712 * BUI format structure. The BUI/BUD items were designed not to need any
3713 * special alignment handling.
3714 */
3715static int
3716xfs_bui_copy_format(
3717 struct xfs_log_iovec *buf,
3718 struct xfs_bui_log_format *dst_bui_fmt)
3719{
3720 struct xfs_bui_log_format *src_bui_fmt;
3721 uint len;
3722
3723 src_bui_fmt = buf->i_addr;
3724 len = xfs_bui_log_format_sizeof(src_bui_fmt->bui_nextents);
3725
3726 if (buf->i_len == len) {
3727 memcpy(dst_bui_fmt, src_bui_fmt, len);
3728 return 0;
3729 }
3730 return -EFSCORRUPTED;
3731}
3732
3733/*
3734 * This routine is called to create an in-core extent bmap update
3735 * item from the bui format structure which was logged on disk.
3736 * It allocates an in-core bui, copies the extents from the format
3737 * structure into it, and adds the bui to the AIL with the given
3738 * LSN.
3739 */
3740STATIC int
3741xlog_recover_bui_pass2(
3742 struct xlog *log,
3743 struct xlog_recover_item *item,
3744 xfs_lsn_t lsn)
3745{
3746 int error;
3747 struct xfs_mount *mp = log->l_mp;
3748 struct xfs_bui_log_item *buip;
3749 struct xfs_bui_log_format *bui_formatp;
3750
3751 bui_formatp = item->ri_buf[0].i_addr;
3752
3753 if (bui_formatp->bui_nextents != XFS_BUI_MAX_FAST_EXTENTS)
3754 return -EFSCORRUPTED;
3755 buip = xfs_bui_init(mp);
3756 error = xfs_bui_copy_format(&item->ri_buf[0], &buip->bui_format);
3757 if (error) {
3758 xfs_bui_item_free(buip);
3759 return error;
3760 }
3761 atomic_set(&buip->bui_next_extent, bui_formatp->bui_nextents);
3762
Matthew Wilcox57e80952018-03-07 14:59:39 -08003763 spin_lock(&log->l_ailp->ail_lock);
Darrick J. Wong77d61fe2016-10-03 09:11:26 -07003764 /*
3765 * The RUI has two references. One for the RUD and one for RUI to ensure
3766 * it makes it into the AIL. Insert the RUI into the AIL directly and
3767 * drop the RUI reference. Note that xfs_trans_ail_update() drops the
3768 * AIL lock.
3769 */
3770 xfs_trans_ail_update(log->l_ailp, &buip->bui_item, lsn);
3771 xfs_bui_release(buip);
3772 return 0;
3773}
3774
3775
3776/*
3777 * This routine is called when an BUD format structure is found in a committed
3778 * transaction in the log. Its purpose is to cancel the corresponding BUI if it
3779 * was still in the log. To do this it searches the AIL for the BUI with an id
3780 * equal to that in the BUD format structure. If we find it we drop the BUD
3781 * reference, which removes the BUI from the AIL and frees it.
3782 */
3783STATIC int
3784xlog_recover_bud_pass2(
3785 struct xlog *log,
3786 struct xlog_recover_item *item)
3787{
3788 struct xfs_bud_log_format *bud_formatp;
3789 struct xfs_bui_log_item *buip = NULL;
3790 struct xfs_log_item *lip;
Darrick J. Wongc8ce5402017-06-16 11:00:05 -07003791 uint64_t bui_id;
Darrick J. Wong77d61fe2016-10-03 09:11:26 -07003792 struct xfs_ail_cursor cur;
3793 struct xfs_ail *ailp = log->l_ailp;
3794
3795 bud_formatp = item->ri_buf[0].i_addr;
3796 if (item->ri_buf[0].i_len != sizeof(struct xfs_bud_log_format))
3797 return -EFSCORRUPTED;
3798 bui_id = bud_formatp->bud_bui_id;
3799
3800 /*
3801 * Search for the BUI with the id in the BUD format structure in the
3802 * AIL.
3803 */
Matthew Wilcox57e80952018-03-07 14:59:39 -08003804 spin_lock(&ailp->ail_lock);
Darrick J. Wong77d61fe2016-10-03 09:11:26 -07003805 lip = xfs_trans_ail_cursor_first(ailp, &cur, 0);
3806 while (lip != NULL) {
3807 if (lip->li_type == XFS_LI_BUI) {
3808 buip = (struct xfs_bui_log_item *)lip;
3809 if (buip->bui_format.bui_id == bui_id) {
3810 /*
3811 * Drop the BUD reference to the BUI. This
3812 * removes the BUI from the AIL and frees it.
3813 */
Matthew Wilcox57e80952018-03-07 14:59:39 -08003814 spin_unlock(&ailp->ail_lock);
Darrick J. Wong77d61fe2016-10-03 09:11:26 -07003815 xfs_bui_release(buip);
Matthew Wilcox57e80952018-03-07 14:59:39 -08003816 spin_lock(&ailp->ail_lock);
Darrick J. Wong77d61fe2016-10-03 09:11:26 -07003817 break;
3818 }
3819 }
3820 lip = xfs_trans_ail_cursor_next(ailp, &cur);
3821 }
3822
3823 xfs_trans_ail_cursor_done(&cur);
Matthew Wilcox57e80952018-03-07 14:59:39 -08003824 spin_unlock(&ailp->ail_lock);
Darrick J. Wong77d61fe2016-10-03 09:11:26 -07003825
3826 return 0;
3827}
3828
3829/*
Dave Chinner28c8e412013-06-27 16:04:55 +10003830 * This routine is called when an inode create format structure is found in a
3831 * committed transaction in the log. It's purpose is to initialise the inodes
3832 * being allocated on disk. This requires us to get inode cluster buffers that
Masahiro Yamada6e7c2b42017-05-08 15:57:53 -07003833 * match the range to be initialised, stamped with inode templates and written
Dave Chinner28c8e412013-06-27 16:04:55 +10003834 * by delayed write so that subsequent modifications will hit the cached buffer
3835 * and only need writing out at the end of recovery.
3836 */
3837STATIC int
3838xlog_recover_do_icreate_pass2(
3839 struct xlog *log,
3840 struct list_head *buffer_list,
3841 xlog_recover_item_t *item)
3842{
3843 struct xfs_mount *mp = log->l_mp;
3844 struct xfs_icreate_log *icl;
Darrick J. Wongef325952019-06-05 11:19:34 -07003845 struct xfs_ino_geometry *igeo = M_IGEO(mp);
Dave Chinner28c8e412013-06-27 16:04:55 +10003846 xfs_agnumber_t agno;
3847 xfs_agblock_t agbno;
3848 unsigned int count;
3849 unsigned int isize;
3850 xfs_agblock_t length;
Brian Fosterfc0d1652015-08-19 09:59:38 +10003851 int bb_per_cluster;
3852 int cancel_count;
3853 int nbufs;
3854 int i;
Dave Chinner28c8e412013-06-27 16:04:55 +10003855
3856 icl = (struct xfs_icreate_log *)item->ri_buf[0].i_addr;
3857 if (icl->icl_type != XFS_LI_ICREATE) {
3858 xfs_warn(log->l_mp, "xlog_recover_do_icreate_trans: bad type");
Dave Chinner24513372014-06-25 14:58:08 +10003859 return -EINVAL;
Dave Chinner28c8e412013-06-27 16:04:55 +10003860 }
3861
3862 if (icl->icl_size != 1) {
3863 xfs_warn(log->l_mp, "xlog_recover_do_icreate_trans: bad icl size");
Dave Chinner24513372014-06-25 14:58:08 +10003864 return -EINVAL;
Dave Chinner28c8e412013-06-27 16:04:55 +10003865 }
3866
3867 agno = be32_to_cpu(icl->icl_ag);
3868 if (agno >= mp->m_sb.sb_agcount) {
3869 xfs_warn(log->l_mp, "xlog_recover_do_icreate_trans: bad agno");
Dave Chinner24513372014-06-25 14:58:08 +10003870 return -EINVAL;
Dave Chinner28c8e412013-06-27 16:04:55 +10003871 }
3872 agbno = be32_to_cpu(icl->icl_agbno);
3873 if (!agbno || agbno == NULLAGBLOCK || agbno >= mp->m_sb.sb_agblocks) {
3874 xfs_warn(log->l_mp, "xlog_recover_do_icreate_trans: bad agbno");
Dave Chinner24513372014-06-25 14:58:08 +10003875 return -EINVAL;
Dave Chinner28c8e412013-06-27 16:04:55 +10003876 }
3877 isize = be32_to_cpu(icl->icl_isize);
3878 if (isize != mp->m_sb.sb_inodesize) {
3879 xfs_warn(log->l_mp, "xlog_recover_do_icreate_trans: bad isize");
Dave Chinner24513372014-06-25 14:58:08 +10003880 return -EINVAL;
Dave Chinner28c8e412013-06-27 16:04:55 +10003881 }
3882 count = be32_to_cpu(icl->icl_count);
3883 if (!count) {
3884 xfs_warn(log->l_mp, "xlog_recover_do_icreate_trans: bad count");
Dave Chinner24513372014-06-25 14:58:08 +10003885 return -EINVAL;
Dave Chinner28c8e412013-06-27 16:04:55 +10003886 }
3887 length = be32_to_cpu(icl->icl_length);
3888 if (!length || length >= mp->m_sb.sb_agblocks) {
3889 xfs_warn(log->l_mp, "xlog_recover_do_icreate_trans: bad length");
Dave Chinner24513372014-06-25 14:58:08 +10003890 return -EINVAL;
Dave Chinner28c8e412013-06-27 16:04:55 +10003891 }
3892
Brian Foster7f43c902015-05-29 09:06:30 +10003893 /*
3894 * The inode chunk is either full or sparse and we only support
Darrick J. Wongef325952019-06-05 11:19:34 -07003895 * m_ino_geo.ialloc_min_blks sized sparse allocations at this time.
Brian Foster7f43c902015-05-29 09:06:30 +10003896 */
Darrick J. Wongef325952019-06-05 11:19:34 -07003897 if (length != igeo->ialloc_blks &&
3898 length != igeo->ialloc_min_blks) {
Brian Foster7f43c902015-05-29 09:06:30 +10003899 xfs_warn(log->l_mp,
3900 "%s: unsupported chunk length", __FUNCTION__);
3901 return -EINVAL;
3902 }
3903
3904 /* verify inode count is consistent with extent length */
3905 if ((count >> mp->m_sb.sb_inopblog) != length) {
3906 xfs_warn(log->l_mp,
3907 "%s: inconsistent inode count and chunk length",
3908 __FUNCTION__);
Dave Chinner24513372014-06-25 14:58:08 +10003909 return -EINVAL;
Dave Chinner28c8e412013-06-27 16:04:55 +10003910 }
3911
3912 /*
Brian Fosterfc0d1652015-08-19 09:59:38 +10003913 * The icreate transaction can cover multiple cluster buffers and these
3914 * buffers could have been freed and reused. Check the individual
3915 * buffers for cancellation so we don't overwrite anything written after
3916 * a cancellation.
Dave Chinner28c8e412013-06-27 16:04:55 +10003917 */
Darrick J. Wongef325952019-06-05 11:19:34 -07003918 bb_per_cluster = XFS_FSB_TO_BB(mp, igeo->blocks_per_cluster);
3919 nbufs = length / igeo->blocks_per_cluster;
Brian Fosterfc0d1652015-08-19 09:59:38 +10003920 for (i = 0, cancel_count = 0; i < nbufs; i++) {
3921 xfs_daddr_t daddr;
Dave Chinner28c8e412013-06-27 16:04:55 +10003922
Brian Fosterfc0d1652015-08-19 09:59:38 +10003923 daddr = XFS_AGB_TO_DADDR(mp, agno,
Darrick J. Wongef325952019-06-05 11:19:34 -07003924 agbno + i * igeo->blocks_per_cluster);
Brian Fosterfc0d1652015-08-19 09:59:38 +10003925 if (xlog_check_buffer_cancelled(log, daddr, bb_per_cluster, 0))
3926 cancel_count++;
3927 }
3928
3929 /*
3930 * We currently only use icreate for a single allocation at a time. This
3931 * means we should expect either all or none of the buffers to be
3932 * cancelled. Be conservative and skip replay if at least one buffer is
3933 * cancelled, but warn the user that something is awry if the buffers
3934 * are not consistent.
3935 *
3936 * XXX: This must be refined to only skip cancelled clusters once we use
3937 * icreate for multiple chunk allocations.
3938 */
3939 ASSERT(!cancel_count || cancel_count == nbufs);
3940 if (cancel_count) {
3941 if (cancel_count != nbufs)
3942 xfs_warn(mp,
3943 "WARNING: partial inode chunk cancellation, skipped icreate.");
Brian Foster78d57e42015-08-19 09:58:48 +10003944 trace_xfs_log_recover_icreate_cancel(log, icl);
Dave Chinner28c8e412013-06-27 16:04:55 +10003945 return 0;
Brian Foster78d57e42015-08-19 09:58:48 +10003946 }
Dave Chinner28c8e412013-06-27 16:04:55 +10003947
Brian Foster78d57e42015-08-19 09:58:48 +10003948 trace_xfs_log_recover_icreate_recover(log, icl);
Brian Fosterfc0d1652015-08-19 09:59:38 +10003949 return xfs_ialloc_inode_init(mp, NULL, buffer_list, count, agno, agbno,
3950 length, be32_to_cpu(icl->icl_gen));
Dave Chinner28c8e412013-06-27 16:04:55 +10003951}
3952
Zhi Yong Wu00574da2013-08-14 15:16:03 +08003953STATIC void
3954xlog_recover_buffer_ra_pass2(
3955 struct xlog *log,
3956 struct xlog_recover_item *item)
3957{
3958 struct xfs_buf_log_format *buf_f = item->ri_buf[0].i_addr;
3959 struct xfs_mount *mp = log->l_mp;
3960
Dave Chinner84a5b732013-08-27 08:10:53 +10003961 if (xlog_peek_buffer_cancelled(log, buf_f->blf_blkno,
Zhi Yong Wu00574da2013-08-14 15:16:03 +08003962 buf_f->blf_len, buf_f->blf_flags)) {
3963 return;
3964 }
3965
3966 xfs_buf_readahead(mp->m_ddev_targp, buf_f->blf_blkno,
3967 buf_f->blf_len, NULL);
3968}
3969
3970STATIC void
3971xlog_recover_inode_ra_pass2(
3972 struct xlog *log,
3973 struct xlog_recover_item *item)
3974{
3975 struct xfs_inode_log_format ilf_buf;
3976 struct xfs_inode_log_format *ilfp;
3977 struct xfs_mount *mp = log->l_mp;
3978 int error;
3979
3980 if (item->ri_buf[0].i_len == sizeof(struct xfs_inode_log_format)) {
3981 ilfp = item->ri_buf[0].i_addr;
3982 } else {
3983 ilfp = &ilf_buf;
3984 memset(ilfp, 0, sizeof(*ilfp));
3985 error = xfs_inode_item_format_convert(&item->ri_buf[0], ilfp);
3986 if (error)
3987 return;
3988 }
3989
Dave Chinner84a5b732013-08-27 08:10:53 +10003990 if (xlog_peek_buffer_cancelled(log, ilfp->ilf_blkno, ilfp->ilf_len, 0))
Zhi Yong Wu00574da2013-08-14 15:16:03 +08003991 return;
3992
3993 xfs_buf_readahead(mp->m_ddev_targp, ilfp->ilf_blkno,
Dave Chinnerd8914002013-08-27 11:39:37 +10003994 ilfp->ilf_len, &xfs_inode_buf_ra_ops);
Zhi Yong Wu00574da2013-08-14 15:16:03 +08003995}
3996
3997STATIC void
3998xlog_recover_dquot_ra_pass2(
3999 struct xlog *log,
4000 struct xlog_recover_item *item)
4001{
4002 struct xfs_mount *mp = log->l_mp;
4003 struct xfs_disk_dquot *recddq;
4004 struct xfs_dq_logformat *dq_f;
4005 uint type;
Dave Chinner7d6a13f2016-01-12 07:04:01 +11004006 int len;
Zhi Yong Wu00574da2013-08-14 15:16:03 +08004007
4008
4009 if (mp->m_qflags == 0)
4010 return;
4011
4012 recddq = item->ri_buf[1].i_addr;
4013 if (recddq == NULL)
4014 return;
4015 if (item->ri_buf[1].i_len < sizeof(struct xfs_disk_dquot))
4016 return;
4017
4018 type = recddq->d_flags & (XFS_DQ_USER | XFS_DQ_PROJ | XFS_DQ_GROUP);
4019 ASSERT(type);
4020 if (log->l_quotaoffs_flag & type)
4021 return;
4022
4023 dq_f = item->ri_buf[0].i_addr;
4024 ASSERT(dq_f);
4025 ASSERT(dq_f->qlf_len == 1);
4026
Dave Chinner7d6a13f2016-01-12 07:04:01 +11004027 len = XFS_FSB_TO_BB(mp, dq_f->qlf_len);
4028 if (xlog_peek_buffer_cancelled(log, dq_f->qlf_blkno, len, 0))
4029 return;
4030
4031 xfs_buf_readahead(mp->m_ddev_targp, dq_f->qlf_blkno, len,
4032 &xfs_dquot_buf_ra_ops);
Zhi Yong Wu00574da2013-08-14 15:16:03 +08004033}
4034
4035STATIC void
4036xlog_recover_ra_pass2(
4037 struct xlog *log,
4038 struct xlog_recover_item *item)
4039{
4040 switch (ITEM_TYPE(item)) {
4041 case XFS_LI_BUF:
4042 xlog_recover_buffer_ra_pass2(log, item);
4043 break;
4044 case XFS_LI_INODE:
4045 xlog_recover_inode_ra_pass2(log, item);
4046 break;
4047 case XFS_LI_DQUOT:
4048 xlog_recover_dquot_ra_pass2(log, item);
4049 break;
4050 case XFS_LI_EFI:
4051 case XFS_LI_EFD:
4052 case XFS_LI_QUOTAOFF:
Darrick J. Wong9e88b5d2016-08-03 12:09:48 +10004053 case XFS_LI_RUI:
4054 case XFS_LI_RUD:
Darrick J. Wongf997ee22016-10-03 09:11:21 -07004055 case XFS_LI_CUI:
4056 case XFS_LI_CUD:
Darrick J. Wong77d61fe2016-10-03 09:11:26 -07004057 case XFS_LI_BUI:
4058 case XFS_LI_BUD:
Zhi Yong Wu00574da2013-08-14 15:16:03 +08004059 default:
4060 break;
4061 }
4062}
4063
Linus Torvalds1da177e2005-04-16 15:20:36 -07004064STATIC int
Christoph Hellwigc9f71f52010-12-01 22:06:24 +00004065xlog_recover_commit_pass1(
Mark Tinguelyad223e62012-06-14 09:22:15 -05004066 struct xlog *log,
4067 struct xlog_recover *trans,
4068 struct xlog_recover_item *item)
Linus Torvalds1da177e2005-04-16 15:20:36 -07004069{
Christoph Hellwigc9f71f52010-12-01 22:06:24 +00004070 trace_xfs_log_recover_item_recover(log, trans, item, XLOG_RECOVER_PASS1);
Christoph Hellwigd0450942010-12-01 22:06:23 +00004071
4072 switch (ITEM_TYPE(item)) {
4073 case XFS_LI_BUF:
Christoph Hellwigc9f71f52010-12-01 22:06:24 +00004074 return xlog_recover_buffer_pass1(log, item);
Christoph Hellwigd0450942010-12-01 22:06:23 +00004075 case XFS_LI_QUOTAOFF:
Christoph Hellwigc9f71f52010-12-01 22:06:24 +00004076 return xlog_recover_quotaoff_pass1(log, item);
4077 case XFS_LI_INODE:
4078 case XFS_LI_EFI:
4079 case XFS_LI_EFD:
4080 case XFS_LI_DQUOT:
Dave Chinner28c8e412013-06-27 16:04:55 +10004081 case XFS_LI_ICREATE:
Darrick J. Wong9e88b5d2016-08-03 12:09:48 +10004082 case XFS_LI_RUI:
4083 case XFS_LI_RUD:
Darrick J. Wongf997ee22016-10-03 09:11:21 -07004084 case XFS_LI_CUI:
4085 case XFS_LI_CUD:
Darrick J. Wong77d61fe2016-10-03 09:11:26 -07004086 case XFS_LI_BUI:
4087 case XFS_LI_BUD:
Christoph Hellwigc9f71f52010-12-01 22:06:24 +00004088 /* nothing to do in pass 1 */
4089 return 0;
Christoph Hellwigd0450942010-12-01 22:06:23 +00004090 default:
Dave Chinnera0fa2b62011-03-07 10:01:35 +11004091 xfs_warn(log->l_mp, "%s: invalid item type (%d)",
4092 __func__, ITEM_TYPE(item));
Christoph Hellwigc9f71f52010-12-01 22:06:24 +00004093 ASSERT(0);
Dave Chinner24513372014-06-25 14:58:08 +10004094 return -EIO;
Christoph Hellwigc9f71f52010-12-01 22:06:24 +00004095 }
4096}
4097
4098STATIC int
4099xlog_recover_commit_pass2(
Mark Tinguelyad223e62012-06-14 09:22:15 -05004100 struct xlog *log,
4101 struct xlog_recover *trans,
4102 struct list_head *buffer_list,
4103 struct xlog_recover_item *item)
Christoph Hellwigc9f71f52010-12-01 22:06:24 +00004104{
4105 trace_xfs_log_recover_item_recover(log, trans, item, XLOG_RECOVER_PASS2);
4106
4107 switch (ITEM_TYPE(item)) {
4108 case XFS_LI_BUF:
Dave Chinner50d5c8d2013-08-28 21:22:47 +10004109 return xlog_recover_buffer_pass2(log, buffer_list, item,
4110 trans->r_lsn);
Christoph Hellwigc9f71f52010-12-01 22:06:24 +00004111 case XFS_LI_INODE:
Dave Chinner50d5c8d2013-08-28 21:22:47 +10004112 return xlog_recover_inode_pass2(log, buffer_list, item,
4113 trans->r_lsn);
Christoph Hellwigc9f71f52010-12-01 22:06:24 +00004114 case XFS_LI_EFI:
4115 return xlog_recover_efi_pass2(log, item, trans->r_lsn);
4116 case XFS_LI_EFD:
4117 return xlog_recover_efd_pass2(log, item);
Darrick J. Wong9e88b5d2016-08-03 12:09:48 +10004118 case XFS_LI_RUI:
4119 return xlog_recover_rui_pass2(log, item, trans->r_lsn);
4120 case XFS_LI_RUD:
4121 return xlog_recover_rud_pass2(log, item);
Darrick J. Wongf997ee22016-10-03 09:11:21 -07004122 case XFS_LI_CUI:
4123 return xlog_recover_cui_pass2(log, item, trans->r_lsn);
4124 case XFS_LI_CUD:
4125 return xlog_recover_cud_pass2(log, item);
Darrick J. Wong77d61fe2016-10-03 09:11:26 -07004126 case XFS_LI_BUI:
4127 return xlog_recover_bui_pass2(log, item, trans->r_lsn);
4128 case XFS_LI_BUD:
4129 return xlog_recover_bud_pass2(log, item);
Christoph Hellwigc9f71f52010-12-01 22:06:24 +00004130 case XFS_LI_DQUOT:
Dave Chinner50d5c8d2013-08-28 21:22:47 +10004131 return xlog_recover_dquot_pass2(log, buffer_list, item,
4132 trans->r_lsn);
Dave Chinner28c8e412013-06-27 16:04:55 +10004133 case XFS_LI_ICREATE:
4134 return xlog_recover_do_icreate_pass2(log, buffer_list, item);
Christoph Hellwigc9f71f52010-12-01 22:06:24 +00004135 case XFS_LI_QUOTAOFF:
4136 /* nothing to do in pass2 */
4137 return 0;
4138 default:
Dave Chinnera0fa2b62011-03-07 10:01:35 +11004139 xfs_warn(log->l_mp, "%s: invalid item type (%d)",
4140 __func__, ITEM_TYPE(item));
Christoph Hellwigd0450942010-12-01 22:06:23 +00004141 ASSERT(0);
Dave Chinner24513372014-06-25 14:58:08 +10004142 return -EIO;
Christoph Hellwigd0450942010-12-01 22:06:23 +00004143 }
4144}
4145
Zhi Yong Wu00574da2013-08-14 15:16:03 +08004146STATIC int
4147xlog_recover_items_pass2(
4148 struct xlog *log,
4149 struct xlog_recover *trans,
4150 struct list_head *buffer_list,
4151 struct list_head *item_list)
4152{
4153 struct xlog_recover_item *item;
4154 int error = 0;
4155
4156 list_for_each_entry(item, item_list, ri_list) {
4157 error = xlog_recover_commit_pass2(log, trans,
4158 buffer_list, item);
4159 if (error)
4160 return error;
4161 }
4162
4163 return error;
4164}
4165
Christoph Hellwigd0450942010-12-01 22:06:23 +00004166/*
4167 * Perform the transaction.
4168 *
4169 * If the transaction modifies a buffer or inode, do it now. Otherwise,
4170 * EFIs and EFDs get queued up by adding entries into the AIL for them.
4171 */
4172STATIC int
4173xlog_recover_commit_trans(
Mark Tinguelyad223e62012-06-14 09:22:15 -05004174 struct xlog *log,
Christoph Hellwigd0450942010-12-01 22:06:23 +00004175 struct xlog_recover *trans,
Brian Foster12818d22016-09-26 08:22:16 +10004176 int pass,
4177 struct list_head *buffer_list)
Christoph Hellwigd0450942010-12-01 22:06:23 +00004178{
Zhi Yong Wu00574da2013-08-14 15:16:03 +08004179 int error = 0;
Zhi Yong Wu00574da2013-08-14 15:16:03 +08004180 int items_queued = 0;
4181 struct xlog_recover_item *item;
4182 struct xlog_recover_item *next;
Zhi Yong Wu00574da2013-08-14 15:16:03 +08004183 LIST_HEAD (ra_list);
4184 LIST_HEAD (done_list);
4185
4186 #define XLOG_RECOVER_COMMIT_QUEUE_MAX 100
Linus Torvalds1da177e2005-04-16 15:20:36 -07004187
Brian Foster39775432017-06-24 10:11:41 -07004188 hlist_del_init(&trans->r_list);
Christoph Hellwigd0450942010-12-01 22:06:23 +00004189
4190 error = xlog_recover_reorder_trans(log, trans, pass);
4191 if (error)
Linus Torvalds1da177e2005-04-16 15:20:36 -07004192 return error;
Christoph Hellwigd0450942010-12-01 22:06:23 +00004193
Zhi Yong Wu00574da2013-08-14 15:16:03 +08004194 list_for_each_entry_safe(item, next, &trans->r_itemq, ri_list) {
Christoph Hellwig43ff2122012-04-23 15:58:39 +10004195 switch (pass) {
4196 case XLOG_RECOVER_PASS1:
Christoph Hellwigc9f71f52010-12-01 22:06:24 +00004197 error = xlog_recover_commit_pass1(log, trans, item);
Christoph Hellwig43ff2122012-04-23 15:58:39 +10004198 break;
4199 case XLOG_RECOVER_PASS2:
Zhi Yong Wu00574da2013-08-14 15:16:03 +08004200 xlog_recover_ra_pass2(log, item);
4201 list_move_tail(&item->ri_list, &ra_list);
4202 items_queued++;
4203 if (items_queued >= XLOG_RECOVER_COMMIT_QUEUE_MAX) {
4204 error = xlog_recover_items_pass2(log, trans,
Brian Foster12818d22016-09-26 08:22:16 +10004205 buffer_list, &ra_list);
Zhi Yong Wu00574da2013-08-14 15:16:03 +08004206 list_splice_tail_init(&ra_list, &done_list);
4207 items_queued = 0;
4208 }
4209
Christoph Hellwig43ff2122012-04-23 15:58:39 +10004210 break;
4211 default:
4212 ASSERT(0);
4213 }
4214
Christoph Hellwigd0450942010-12-01 22:06:23 +00004215 if (error)
Christoph Hellwig43ff2122012-04-23 15:58:39 +10004216 goto out;
Christoph Hellwigd0450942010-12-01 22:06:23 +00004217 }
4218
Zhi Yong Wu00574da2013-08-14 15:16:03 +08004219out:
4220 if (!list_empty(&ra_list)) {
4221 if (!error)
4222 error = xlog_recover_items_pass2(log, trans,
Brian Foster12818d22016-09-26 08:22:16 +10004223 buffer_list, &ra_list);
Zhi Yong Wu00574da2013-08-14 15:16:03 +08004224 list_splice_tail_init(&ra_list, &done_list);
4225 }
4226
4227 if (!list_empty(&done_list))
4228 list_splice_init(&done_list, &trans->r_itemq);
4229
Brian Foster12818d22016-09-26 08:22:16 +10004230 return error;
Linus Torvalds1da177e2005-04-16 15:20:36 -07004231}
4232
Dave Chinner76560662014-09-29 09:45:42 +10004233STATIC void
4234xlog_recover_add_item(
4235 struct list_head *head)
Linus Torvalds1da177e2005-04-16 15:20:36 -07004236{
Dave Chinner76560662014-09-29 09:45:42 +10004237 xlog_recover_item_t *item;
4238
4239 item = kmem_zalloc(sizeof(xlog_recover_item_t), KM_SLEEP);
4240 INIT_LIST_HEAD(&item->ri_list);
4241 list_add_tail(&item->ri_list, head);
4242}
4243
4244STATIC int
4245xlog_recover_add_to_cont_trans(
4246 struct xlog *log,
4247 struct xlog_recover *trans,
Christoph Hellwigb2a922c2015-06-22 09:45:10 +10004248 char *dp,
Dave Chinner76560662014-09-29 09:45:42 +10004249 int len)
4250{
4251 xlog_recover_item_t *item;
Christoph Hellwigb2a922c2015-06-22 09:45:10 +10004252 char *ptr, *old_ptr;
Dave Chinner76560662014-09-29 09:45:42 +10004253 int old_len;
4254
Brian Foster89cebc842015-07-29 11:51:10 +10004255 /*
4256 * If the transaction is empty, the header was split across this and the
4257 * previous record. Copy the rest of the header.
4258 */
Dave Chinner76560662014-09-29 09:45:42 +10004259 if (list_empty(&trans->r_itemq)) {
Brian Foster848ccfc2015-11-10 10:10:33 +11004260 ASSERT(len <= sizeof(struct xfs_trans_header));
Brian Foster89cebc842015-07-29 11:51:10 +10004261 if (len > sizeof(struct xfs_trans_header)) {
4262 xfs_warn(log->l_mp, "%s: bad header length", __func__);
4263 return -EIO;
4264 }
4265
Dave Chinner76560662014-09-29 09:45:42 +10004266 xlog_recover_add_item(&trans->r_itemq);
Christoph Hellwigb2a922c2015-06-22 09:45:10 +10004267 ptr = (char *)&trans->r_theader +
Brian Foster89cebc842015-07-29 11:51:10 +10004268 sizeof(struct xfs_trans_header) - len;
Dave Chinner76560662014-09-29 09:45:42 +10004269 memcpy(ptr, dp, len);
4270 return 0;
4271 }
Brian Foster89cebc842015-07-29 11:51:10 +10004272
Dave Chinner76560662014-09-29 09:45:42 +10004273 /* take the tail entry */
4274 item = list_entry(trans->r_itemq.prev, xlog_recover_item_t, ri_list);
4275
4276 old_ptr = item->ri_buf[item->ri_cnt-1].i_addr;
4277 old_len = item->ri_buf[item->ri_cnt-1].i_len;
4278
Christoph Hellwig664b60f2016-04-06 09:47:01 +10004279 ptr = kmem_realloc(old_ptr, len + old_len, KM_SLEEP);
Dave Chinner76560662014-09-29 09:45:42 +10004280 memcpy(&ptr[old_len], dp, len);
4281 item->ri_buf[item->ri_cnt-1].i_len += len;
4282 item->ri_buf[item->ri_cnt-1].i_addr = ptr;
4283 trace_xfs_log_recover_item_add_cont(log, trans, item, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004284 return 0;
4285}
4286
4287/*
Dave Chinner76560662014-09-29 09:45:42 +10004288 * The next region to add is the start of a new region. It could be
4289 * a whole region or it could be the first part of a new region. Because
4290 * of this, the assumption here is that the type and size fields of all
4291 * format structures fit into the first 32 bits of the structure.
4292 *
4293 * This works because all regions must be 32 bit aligned. Therefore, we
4294 * either have both fields or we have neither field. In the case we have
4295 * neither field, the data part of the region is zero length. We only have
4296 * a log_op_header and can throw away the header since a new one will appear
4297 * later. If we have at least 4 bytes, then we can determine how many regions
4298 * will appear in the current log item.
4299 */
4300STATIC int
4301xlog_recover_add_to_trans(
4302 struct xlog *log,
4303 struct xlog_recover *trans,
Christoph Hellwigb2a922c2015-06-22 09:45:10 +10004304 char *dp,
Dave Chinner76560662014-09-29 09:45:42 +10004305 int len)
4306{
Darrick J. Wong06b11322017-10-31 12:04:24 -07004307 struct xfs_inode_log_format *in_f; /* any will do */
Dave Chinner76560662014-09-29 09:45:42 +10004308 xlog_recover_item_t *item;
Christoph Hellwigb2a922c2015-06-22 09:45:10 +10004309 char *ptr;
Dave Chinner76560662014-09-29 09:45:42 +10004310
4311 if (!len)
4312 return 0;
4313 if (list_empty(&trans->r_itemq)) {
4314 /* we need to catch log corruptions here */
4315 if (*(uint *)dp != XFS_TRANS_HEADER_MAGIC) {
4316 xfs_warn(log->l_mp, "%s: bad header magic number",
4317 __func__);
4318 ASSERT(0);
4319 return -EIO;
4320 }
Brian Foster89cebc842015-07-29 11:51:10 +10004321
4322 if (len > sizeof(struct xfs_trans_header)) {
4323 xfs_warn(log->l_mp, "%s: bad header length", __func__);
4324 ASSERT(0);
4325 return -EIO;
4326 }
4327
4328 /*
4329 * The transaction header can be arbitrarily split across op
4330 * records. If we don't have the whole thing here, copy what we
4331 * do have and handle the rest in the next record.
4332 */
4333 if (len == sizeof(struct xfs_trans_header))
Dave Chinner76560662014-09-29 09:45:42 +10004334 xlog_recover_add_item(&trans->r_itemq);
4335 memcpy(&trans->r_theader, dp, len);
4336 return 0;
4337 }
4338
4339 ptr = kmem_alloc(len, KM_SLEEP);
4340 memcpy(ptr, dp, len);
Darrick J. Wong06b11322017-10-31 12:04:24 -07004341 in_f = (struct xfs_inode_log_format *)ptr;
Dave Chinner76560662014-09-29 09:45:42 +10004342
4343 /* take the tail entry */
4344 item = list_entry(trans->r_itemq.prev, xlog_recover_item_t, ri_list);
4345 if (item->ri_total != 0 &&
4346 item->ri_total == item->ri_cnt) {
4347 /* tail item is in use, get a new one */
4348 xlog_recover_add_item(&trans->r_itemq);
4349 item = list_entry(trans->r_itemq.prev,
4350 xlog_recover_item_t, ri_list);
4351 }
4352
4353 if (item->ri_total == 0) { /* first region to be added */
4354 if (in_f->ilf_size == 0 ||
4355 in_f->ilf_size > XLOG_MAX_REGIONS_IN_ITEM) {
4356 xfs_warn(log->l_mp,
4357 "bad number of regions (%d) in inode log format",
4358 in_f->ilf_size);
4359 ASSERT(0);
4360 kmem_free(ptr);
4361 return -EIO;
4362 }
4363
4364 item->ri_total = in_f->ilf_size;
4365 item->ri_buf =
4366 kmem_zalloc(item->ri_total * sizeof(xfs_log_iovec_t),
4367 KM_SLEEP);
4368 }
4369 ASSERT(item->ri_total > item->ri_cnt);
4370 /* Description region is ri_buf[0] */
4371 item->ri_buf[item->ri_cnt].i_addr = ptr;
4372 item->ri_buf[item->ri_cnt].i_len = len;
4373 item->ri_cnt++;
4374 trace_xfs_log_recover_item_add(log, trans, item, 0);
4375 return 0;
4376}
Dave Chinnerb818cca2014-09-29 09:45:54 +10004377
Dave Chinner76560662014-09-29 09:45:42 +10004378/*
4379 * Free up any resources allocated by the transaction
4380 *
4381 * Remember that EFIs, EFDs, and IUNLINKs are handled later.
4382 */
4383STATIC void
4384xlog_recover_free_trans(
4385 struct xlog_recover *trans)
4386{
4387 xlog_recover_item_t *item, *n;
4388 int i;
4389
Brian Foster39775432017-06-24 10:11:41 -07004390 hlist_del_init(&trans->r_list);
4391
Dave Chinner76560662014-09-29 09:45:42 +10004392 list_for_each_entry_safe(item, n, &trans->r_itemq, ri_list) {
4393 /* Free the regions in the item. */
4394 list_del(&item->ri_list);
4395 for (i = 0; i < item->ri_cnt; i++)
4396 kmem_free(item->ri_buf[i].i_addr);
4397 /* Free the item itself */
4398 kmem_free(item->ri_buf);
4399 kmem_free(item);
4400 }
4401 /* Free the transaction recover structure */
4402 kmem_free(trans);
4403}
4404
Dave Chinnere9131e502014-09-29 09:45:18 +10004405/*
4406 * On error or completion, trans is freed.
4407 */
Linus Torvalds1da177e2005-04-16 15:20:36 -07004408STATIC int
Dave Chinnereeb11682014-09-29 09:45:03 +10004409xlog_recovery_process_trans(
4410 struct xlog *log,
4411 struct xlog_recover *trans,
Christoph Hellwigb2a922c2015-06-22 09:45:10 +10004412 char *dp,
Dave Chinnereeb11682014-09-29 09:45:03 +10004413 unsigned int len,
4414 unsigned int flags,
Brian Foster12818d22016-09-26 08:22:16 +10004415 int pass,
4416 struct list_head *buffer_list)
Linus Torvalds1da177e2005-04-16 15:20:36 -07004417{
Dave Chinnere9131e502014-09-29 09:45:18 +10004418 int error = 0;
4419 bool freeit = false;
Dave Chinnereeb11682014-09-29 09:45:03 +10004420
4421 /* mask off ophdr transaction container flags */
4422 flags &= ~XLOG_END_TRANS;
4423 if (flags & XLOG_WAS_CONT_TRANS)
4424 flags &= ~XLOG_CONTINUE_TRANS;
4425
Dave Chinner88b863d2014-09-29 09:45:32 +10004426 /*
4427 * Callees must not free the trans structure. We'll decide if we need to
4428 * free it or not based on the operation being done and it's result.
4429 */
Dave Chinnereeb11682014-09-29 09:45:03 +10004430 switch (flags) {
4431 /* expected flag values */
4432 case 0:
4433 case XLOG_CONTINUE_TRANS:
4434 error = xlog_recover_add_to_trans(log, trans, dp, len);
4435 break;
4436 case XLOG_WAS_CONT_TRANS:
4437 error = xlog_recover_add_to_cont_trans(log, trans, dp, len);
4438 break;
4439 case XLOG_COMMIT_TRANS:
Brian Foster12818d22016-09-26 08:22:16 +10004440 error = xlog_recover_commit_trans(log, trans, pass,
4441 buffer_list);
Dave Chinner88b863d2014-09-29 09:45:32 +10004442 /* success or fail, we are now done with this transaction. */
4443 freeit = true;
Dave Chinnereeb11682014-09-29 09:45:03 +10004444 break;
4445
4446 /* unexpected flag values */
4447 case XLOG_UNMOUNT_TRANS:
Dave Chinnere9131e502014-09-29 09:45:18 +10004448 /* just skip trans */
Dave Chinnereeb11682014-09-29 09:45:03 +10004449 xfs_warn(log->l_mp, "%s: Unmount LR", __func__);
Dave Chinnere9131e502014-09-29 09:45:18 +10004450 freeit = true;
Dave Chinnereeb11682014-09-29 09:45:03 +10004451 break;
4452 case XLOG_START_TRANS:
Dave Chinnereeb11682014-09-29 09:45:03 +10004453 default:
4454 xfs_warn(log->l_mp, "%s: bad flag 0x%x", __func__, flags);
4455 ASSERT(0);
Dave Chinnere9131e502014-09-29 09:45:18 +10004456 error = -EIO;
Dave Chinnereeb11682014-09-29 09:45:03 +10004457 break;
4458 }
Dave Chinnere9131e502014-09-29 09:45:18 +10004459 if (error || freeit)
4460 xlog_recover_free_trans(trans);
Dave Chinnereeb11682014-09-29 09:45:03 +10004461 return error;
4462}
4463
Dave Chinnerb818cca2014-09-29 09:45:54 +10004464/*
4465 * Lookup the transaction recovery structure associated with the ID in the
4466 * current ophdr. If the transaction doesn't exist and the start flag is set in
4467 * the ophdr, then allocate a new transaction for future ID matches to find.
4468 * Either way, return what we found during the lookup - an existing transaction
4469 * or nothing.
4470 */
Dave Chinnereeb11682014-09-29 09:45:03 +10004471STATIC struct xlog_recover *
4472xlog_recover_ophdr_to_trans(
4473 struct hlist_head rhash[],
4474 struct xlog_rec_header *rhead,
4475 struct xlog_op_header *ohead)
4476{
4477 struct xlog_recover *trans;
4478 xlog_tid_t tid;
4479 struct hlist_head *rhp;
4480
4481 tid = be32_to_cpu(ohead->oh_tid);
4482 rhp = &rhash[XLOG_RHASH(tid)];
Dave Chinnerb818cca2014-09-29 09:45:54 +10004483 hlist_for_each_entry(trans, rhp, r_list) {
4484 if (trans->r_log_tid == tid)
4485 return trans;
4486 }
Dave Chinnereeb11682014-09-29 09:45:03 +10004487
4488 /*
Dave Chinnerb818cca2014-09-29 09:45:54 +10004489 * skip over non-start transaction headers - we could be
4490 * processing slack space before the next transaction starts
Dave Chinnereeb11682014-09-29 09:45:03 +10004491 */
Dave Chinnerb818cca2014-09-29 09:45:54 +10004492 if (!(ohead->oh_flags & XLOG_START_TRANS))
4493 return NULL;
4494
4495 ASSERT(be32_to_cpu(ohead->oh_len) == 0);
4496
4497 /*
4498 * This is a new transaction so allocate a new recovery container to
4499 * hold the recovery ops that will follow.
4500 */
4501 trans = kmem_zalloc(sizeof(struct xlog_recover), KM_SLEEP);
4502 trans->r_log_tid = tid;
4503 trans->r_lsn = be64_to_cpu(rhead->h_lsn);
4504 INIT_LIST_HEAD(&trans->r_itemq);
4505 INIT_HLIST_NODE(&trans->r_list);
4506 hlist_add_head(&trans->r_list, rhp);
4507
4508 /*
4509 * Nothing more to do for this ophdr. Items to be added to this new
4510 * transaction will be in subsequent ophdr containers.
4511 */
Dave Chinnereeb11682014-09-29 09:45:03 +10004512 return NULL;
4513}
4514
4515STATIC int
4516xlog_recover_process_ophdr(
4517 struct xlog *log,
4518 struct hlist_head rhash[],
4519 struct xlog_rec_header *rhead,
4520 struct xlog_op_header *ohead,
Christoph Hellwigb2a922c2015-06-22 09:45:10 +10004521 char *dp,
4522 char *end,
Brian Foster12818d22016-09-26 08:22:16 +10004523 int pass,
4524 struct list_head *buffer_list)
Dave Chinnereeb11682014-09-29 09:45:03 +10004525{
4526 struct xlog_recover *trans;
Dave Chinnereeb11682014-09-29 09:45:03 +10004527 unsigned int len;
Brian Foster12818d22016-09-26 08:22:16 +10004528 int error;
Dave Chinnereeb11682014-09-29 09:45:03 +10004529
4530 /* Do we understand who wrote this op? */
4531 if (ohead->oh_clientid != XFS_TRANSACTION &&
4532 ohead->oh_clientid != XFS_LOG) {
4533 xfs_warn(log->l_mp, "%s: bad clientid 0x%x",
4534 __func__, ohead->oh_clientid);
4535 ASSERT(0);
4536 return -EIO;
4537 }
4538
4539 /*
4540 * Check the ophdr contains all the data it is supposed to contain.
4541 */
4542 len = be32_to_cpu(ohead->oh_len);
4543 if (dp + len > end) {
4544 xfs_warn(log->l_mp, "%s: bad length 0x%x", __func__, len);
4545 WARN_ON(1);
4546 return -EIO;
4547 }
4548
4549 trans = xlog_recover_ophdr_to_trans(rhash, rhead, ohead);
4550 if (!trans) {
4551 /* nothing to do, so skip over this ophdr */
4552 return 0;
4553 }
4554
Brian Foster12818d22016-09-26 08:22:16 +10004555 /*
4556 * The recovered buffer queue is drained only once we know that all
4557 * recovery items for the current LSN have been processed. This is
4558 * required because:
4559 *
4560 * - Buffer write submission updates the metadata LSN of the buffer.
4561 * - Log recovery skips items with a metadata LSN >= the current LSN of
4562 * the recovery item.
4563 * - Separate recovery items against the same metadata buffer can share
4564 * a current LSN. I.e., consider that the LSN of a recovery item is
4565 * defined as the starting LSN of the first record in which its
4566 * transaction appears, that a record can hold multiple transactions,
4567 * and/or that a transaction can span multiple records.
4568 *
4569 * In other words, we are allowed to submit a buffer from log recovery
4570 * once per current LSN. Otherwise, we may incorrectly skip recovery
4571 * items and cause corruption.
4572 *
4573 * We don't know up front whether buffers are updated multiple times per
4574 * LSN. Therefore, track the current LSN of each commit log record as it
4575 * is processed and drain the queue when it changes. Use commit records
4576 * because they are ordered correctly by the logging code.
4577 */
4578 if (log->l_recovery_lsn != trans->r_lsn &&
4579 ohead->oh_flags & XLOG_COMMIT_TRANS) {
4580 error = xfs_buf_delwri_submit(buffer_list);
4581 if (error)
4582 return error;
4583 log->l_recovery_lsn = trans->r_lsn;
4584 }
4585
Dave Chinnere9131e502014-09-29 09:45:18 +10004586 return xlog_recovery_process_trans(log, trans, dp, len,
Brian Foster12818d22016-09-26 08:22:16 +10004587 ohead->oh_flags, pass, buffer_list);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004588}
4589
4590/*
4591 * There are two valid states of the r_state field. 0 indicates that the
4592 * transaction structure is in a normal state. We have either seen the
4593 * start of the transaction or the last operation we added was not a partial
4594 * operation. If the last operation we added to the transaction was a
4595 * partial operation, we need to mark r_state with XLOG_WAS_CONT_TRANS.
4596 *
4597 * NOTE: skip LRs with 0 data length.
4598 */
4599STATIC int
4600xlog_recover_process_data(
Mark Tinguely9a8d2fd2012-06-14 09:22:16 -05004601 struct xlog *log,
Dave Chinnerf0a76952010-01-11 11:49:57 +00004602 struct hlist_head rhash[],
Mark Tinguely9a8d2fd2012-06-14 09:22:16 -05004603 struct xlog_rec_header *rhead,
Christoph Hellwigb2a922c2015-06-22 09:45:10 +10004604 char *dp,
Brian Foster12818d22016-09-26 08:22:16 +10004605 int pass,
4606 struct list_head *buffer_list)
Linus Torvalds1da177e2005-04-16 15:20:36 -07004607{
Dave Chinnereeb11682014-09-29 09:45:03 +10004608 struct xlog_op_header *ohead;
Christoph Hellwigb2a922c2015-06-22 09:45:10 +10004609 char *end;
Linus Torvalds1da177e2005-04-16 15:20:36 -07004610 int num_logops;
Linus Torvalds1da177e2005-04-16 15:20:36 -07004611 int error;
Linus Torvalds1da177e2005-04-16 15:20:36 -07004612
Dave Chinnereeb11682014-09-29 09:45:03 +10004613 end = dp + be32_to_cpu(rhead->h_len);
Christoph Hellwigb53e6752007-10-12 10:59:34 +10004614 num_logops = be32_to_cpu(rhead->h_num_logops);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004615
4616 /* check the log format matches our own - else we can't recover */
4617 if (xlog_header_check_recover(log->l_mp, rhead))
Dave Chinner24513372014-06-25 14:58:08 +10004618 return -EIO;
Linus Torvalds1da177e2005-04-16 15:20:36 -07004619
Brian Foster5cd9cee2016-09-26 08:34:52 +10004620 trace_xfs_log_recover_record(log, rhead, pass);
Dave Chinnereeb11682014-09-29 09:45:03 +10004621 while ((dp < end) && num_logops) {
4622
4623 ohead = (struct xlog_op_header *)dp;
4624 dp += sizeof(*ohead);
4625 ASSERT(dp <= end);
4626
4627 /* errors will abort recovery */
4628 error = xlog_recover_process_ophdr(log, rhash, rhead, ohead,
Brian Foster12818d22016-09-26 08:22:16 +10004629 dp, end, pass, buffer_list);
Dave Chinnereeb11682014-09-29 09:45:03 +10004630 if (error)
4631 return error;
4632
Christoph Hellwig67fcb7b2007-10-12 10:58:59 +10004633 dp += be32_to_cpu(ohead->oh_len);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004634 num_logops--;
4635 }
4636 return 0;
4637}
4638
Darrick J. Wongdc423752016-08-03 11:23:49 +10004639/* Recover the EFI if necessary. */
David Chinner3c1e2bb2008-04-10 12:21:11 +10004640STATIC int
Linus Torvalds1da177e2005-04-16 15:20:36 -07004641xlog_recover_process_efi(
Darrick J. Wongdc423752016-08-03 11:23:49 +10004642 struct xfs_mount *mp,
4643 struct xfs_ail *ailp,
4644 struct xfs_log_item *lip)
Linus Torvalds1da177e2005-04-16 15:20:36 -07004645{
Darrick J. Wongdc423752016-08-03 11:23:49 +10004646 struct xfs_efi_log_item *efip;
4647 int error;
Linus Torvalds1da177e2005-04-16 15:20:36 -07004648
4649 /*
Darrick J. Wongdc423752016-08-03 11:23:49 +10004650 * Skip EFIs that we've already processed.
Linus Torvalds1da177e2005-04-16 15:20:36 -07004651 */
Darrick J. Wongdc423752016-08-03 11:23:49 +10004652 efip = container_of(lip, struct xfs_efi_log_item, efi_item);
4653 if (test_bit(XFS_EFI_RECOVERED, &efip->efi_flags))
4654 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07004655
Matthew Wilcox57e80952018-03-07 14:59:39 -08004656 spin_unlock(&ailp->ail_lock);
Darrick J. Wongdc423752016-08-03 11:23:49 +10004657 error = xfs_efi_recover(mp, efip);
Matthew Wilcox57e80952018-03-07 14:59:39 -08004658 spin_lock(&ailp->ail_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004659
David Chinnerfc6149d2008-04-10 12:21:53 +10004660 return error;
Linus Torvalds1da177e2005-04-16 15:20:36 -07004661}
4662
Darrick J. Wongdc423752016-08-03 11:23:49 +10004663/* Release the EFI since we're cancelling everything. */
4664STATIC void
4665xlog_recover_cancel_efi(
4666 struct xfs_mount *mp,
4667 struct xfs_ail *ailp,
4668 struct xfs_log_item *lip)
4669{
4670 struct xfs_efi_log_item *efip;
4671
4672 efip = container_of(lip, struct xfs_efi_log_item, efi_item);
4673
Matthew Wilcox57e80952018-03-07 14:59:39 -08004674 spin_unlock(&ailp->ail_lock);
Darrick J. Wongdc423752016-08-03 11:23:49 +10004675 xfs_efi_release(efip);
Matthew Wilcox57e80952018-03-07 14:59:39 -08004676 spin_lock(&ailp->ail_lock);
Darrick J. Wongdc423752016-08-03 11:23:49 +10004677}
4678
Darrick J. Wong9e88b5d2016-08-03 12:09:48 +10004679/* Recover the RUI if necessary. */
4680STATIC int
4681xlog_recover_process_rui(
4682 struct xfs_mount *mp,
4683 struct xfs_ail *ailp,
4684 struct xfs_log_item *lip)
4685{
4686 struct xfs_rui_log_item *ruip;
4687 int error;
4688
4689 /*
4690 * Skip RUIs that we've already processed.
4691 */
4692 ruip = container_of(lip, struct xfs_rui_log_item, rui_item);
4693 if (test_bit(XFS_RUI_RECOVERED, &ruip->rui_flags))
4694 return 0;
4695
Matthew Wilcox57e80952018-03-07 14:59:39 -08004696 spin_unlock(&ailp->ail_lock);
Darrick J. Wong9e88b5d2016-08-03 12:09:48 +10004697 error = xfs_rui_recover(mp, ruip);
Matthew Wilcox57e80952018-03-07 14:59:39 -08004698 spin_lock(&ailp->ail_lock);
Darrick J. Wong9e88b5d2016-08-03 12:09:48 +10004699
4700 return error;
4701}
4702
4703/* Release the RUI since we're cancelling everything. */
4704STATIC void
4705xlog_recover_cancel_rui(
4706 struct xfs_mount *mp,
4707 struct xfs_ail *ailp,
4708 struct xfs_log_item *lip)
4709{
4710 struct xfs_rui_log_item *ruip;
4711
4712 ruip = container_of(lip, struct xfs_rui_log_item, rui_item);
4713
Matthew Wilcox57e80952018-03-07 14:59:39 -08004714 spin_unlock(&ailp->ail_lock);
Darrick J. Wong9e88b5d2016-08-03 12:09:48 +10004715 xfs_rui_release(ruip);
Matthew Wilcox57e80952018-03-07 14:59:39 -08004716 spin_lock(&ailp->ail_lock);
Darrick J. Wong9e88b5d2016-08-03 12:09:48 +10004717}
4718
Darrick J. Wongf997ee22016-10-03 09:11:21 -07004719/* Recover the CUI if necessary. */
4720STATIC int
4721xlog_recover_process_cui(
Brian Fosterfbfa9772018-08-01 07:20:29 -07004722 struct xfs_trans *parent_tp,
Darrick J. Wongf997ee22016-10-03 09:11:21 -07004723 struct xfs_ail *ailp,
Brian Fosterfbfa9772018-08-01 07:20:29 -07004724 struct xfs_log_item *lip)
Darrick J. Wongf997ee22016-10-03 09:11:21 -07004725{
4726 struct xfs_cui_log_item *cuip;
4727 int error;
4728
4729 /*
4730 * Skip CUIs that we've already processed.
4731 */
4732 cuip = container_of(lip, struct xfs_cui_log_item, cui_item);
4733 if (test_bit(XFS_CUI_RECOVERED, &cuip->cui_flags))
4734 return 0;
4735
Matthew Wilcox57e80952018-03-07 14:59:39 -08004736 spin_unlock(&ailp->ail_lock);
Brian Fosterfbfa9772018-08-01 07:20:29 -07004737 error = xfs_cui_recover(parent_tp, cuip);
Matthew Wilcox57e80952018-03-07 14:59:39 -08004738 spin_lock(&ailp->ail_lock);
Darrick J. Wongf997ee22016-10-03 09:11:21 -07004739
4740 return error;
4741}
4742
4743/* Release the CUI since we're cancelling everything. */
4744STATIC void
4745xlog_recover_cancel_cui(
4746 struct xfs_mount *mp,
4747 struct xfs_ail *ailp,
4748 struct xfs_log_item *lip)
4749{
4750 struct xfs_cui_log_item *cuip;
4751
4752 cuip = container_of(lip, struct xfs_cui_log_item, cui_item);
4753
Matthew Wilcox57e80952018-03-07 14:59:39 -08004754 spin_unlock(&ailp->ail_lock);
Darrick J. Wongf997ee22016-10-03 09:11:21 -07004755 xfs_cui_release(cuip);
Matthew Wilcox57e80952018-03-07 14:59:39 -08004756 spin_lock(&ailp->ail_lock);
Darrick J. Wongf997ee22016-10-03 09:11:21 -07004757}
4758
Darrick J. Wong77d61fe2016-10-03 09:11:26 -07004759/* Recover the BUI if necessary. */
4760STATIC int
4761xlog_recover_process_bui(
Brian Fosterfbfa9772018-08-01 07:20:29 -07004762 struct xfs_trans *parent_tp,
Darrick J. Wong77d61fe2016-10-03 09:11:26 -07004763 struct xfs_ail *ailp,
Brian Fosterfbfa9772018-08-01 07:20:29 -07004764 struct xfs_log_item *lip)
Darrick J. Wong77d61fe2016-10-03 09:11:26 -07004765{
4766 struct xfs_bui_log_item *buip;
4767 int error;
4768
4769 /*
4770 * Skip BUIs that we've already processed.
4771 */
4772 buip = container_of(lip, struct xfs_bui_log_item, bui_item);
4773 if (test_bit(XFS_BUI_RECOVERED, &buip->bui_flags))
4774 return 0;
4775
Matthew Wilcox57e80952018-03-07 14:59:39 -08004776 spin_unlock(&ailp->ail_lock);
Brian Fosterfbfa9772018-08-01 07:20:29 -07004777 error = xfs_bui_recover(parent_tp, buip);
Matthew Wilcox57e80952018-03-07 14:59:39 -08004778 spin_lock(&ailp->ail_lock);
Darrick J. Wong77d61fe2016-10-03 09:11:26 -07004779
4780 return error;
4781}
4782
4783/* Release the BUI since we're cancelling everything. */
4784STATIC void
4785xlog_recover_cancel_bui(
4786 struct xfs_mount *mp,
4787 struct xfs_ail *ailp,
4788 struct xfs_log_item *lip)
4789{
4790 struct xfs_bui_log_item *buip;
4791
4792 buip = container_of(lip, struct xfs_bui_log_item, bui_item);
4793
Matthew Wilcox57e80952018-03-07 14:59:39 -08004794 spin_unlock(&ailp->ail_lock);
Darrick J. Wong77d61fe2016-10-03 09:11:26 -07004795 xfs_bui_release(buip);
Matthew Wilcox57e80952018-03-07 14:59:39 -08004796 spin_lock(&ailp->ail_lock);
Darrick J. Wong77d61fe2016-10-03 09:11:26 -07004797}
4798
Darrick J. Wongdc423752016-08-03 11:23:49 +10004799/* Is this log item a deferred action intent? */
4800static inline bool xlog_item_is_intent(struct xfs_log_item *lip)
4801{
4802 switch (lip->li_type) {
4803 case XFS_LI_EFI:
Darrick J. Wong9e88b5d2016-08-03 12:09:48 +10004804 case XFS_LI_RUI:
Darrick J. Wongf997ee22016-10-03 09:11:21 -07004805 case XFS_LI_CUI:
Darrick J. Wong77d61fe2016-10-03 09:11:26 -07004806 case XFS_LI_BUI:
Darrick J. Wongdc423752016-08-03 11:23:49 +10004807 return true;
4808 default:
4809 return false;
4810 }
4811}
4812
Darrick J. Wong50995582017-11-21 20:53:02 -08004813/* Take all the collected deferred ops and finish them in order. */
4814static int
4815xlog_finish_defer_ops(
Brian Fosterfbfa9772018-08-01 07:20:29 -07004816 struct xfs_trans *parent_tp)
Darrick J. Wong50995582017-11-21 20:53:02 -08004817{
Brian Fosterfbfa9772018-08-01 07:20:29 -07004818 struct xfs_mount *mp = parent_tp->t_mountp;
Darrick J. Wong50995582017-11-21 20:53:02 -08004819 struct xfs_trans *tp;
4820 int64_t freeblks;
4821 uint resblks;
4822 int error;
4823
4824 /*
4825 * We're finishing the defer_ops that accumulated as a result of
4826 * recovering unfinished intent items during log recovery. We
4827 * reserve an itruncate transaction because it is the largest
4828 * permanent transaction type. Since we're the only user of the fs
4829 * right now, take 93% (15/16) of the available free blocks. Use
4830 * weird math to avoid a 64-bit division.
4831 */
4832 freeblks = percpu_counter_sum(&mp->m_fdblocks);
4833 if (freeblks <= 0)
4834 return -ENOSPC;
4835 resblks = min_t(int64_t, UINT_MAX, freeblks);
4836 resblks = (resblks * 15) >> 4;
4837 error = xfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate, resblks,
4838 0, XFS_TRANS_RESERVE, &tp);
4839 if (error)
4840 return error;
Brian Foster91ef75b2018-07-24 13:43:13 -07004841 /* transfer all collected dfops to this transaction */
Brian Fosterce356d62018-08-01 07:20:30 -07004842 xfs_defer_move(tp, parent_tp);
Darrick J. Wong50995582017-11-21 20:53:02 -08004843
Darrick J. Wong50995582017-11-21 20:53:02 -08004844 return xfs_trans_commit(tp);
Darrick J. Wong50995582017-11-21 20:53:02 -08004845}
4846
Linus Torvalds1da177e2005-04-16 15:20:36 -07004847/*
Darrick J. Wongdc423752016-08-03 11:23:49 +10004848 * When this is called, all of the log intent items which did not have
4849 * corresponding log done items should be in the AIL. What we do now
4850 * is update the data structures associated with each one.
Linus Torvalds1da177e2005-04-16 15:20:36 -07004851 *
Darrick J. Wongdc423752016-08-03 11:23:49 +10004852 * Since we process the log intent items in normal transactions, they
4853 * will be removed at some point after the commit. This prevents us
4854 * from just walking down the list processing each one. We'll use a
4855 * flag in the intent item to skip those that we've already processed
4856 * and use the AIL iteration mechanism's generation count to try to
4857 * speed this up at least a bit.
Linus Torvalds1da177e2005-04-16 15:20:36 -07004858 *
Darrick J. Wongdc423752016-08-03 11:23:49 +10004859 * When we start, we know that the intents are the only things in the
4860 * AIL. As we process them, however, other items are added to the
4861 * AIL.
Linus Torvalds1da177e2005-04-16 15:20:36 -07004862 */
David Chinner3c1e2bb2008-04-10 12:21:11 +10004863STATIC int
Darrick J. Wongdc423752016-08-03 11:23:49 +10004864xlog_recover_process_intents(
Brian Fosterf0b2efa2015-08-19 09:58:36 +10004865 struct xlog *log)
Linus Torvalds1da177e2005-04-16 15:20:36 -07004866{
Brian Fosterfbfa9772018-08-01 07:20:29 -07004867 struct xfs_trans *parent_tp;
David Chinner27d8d5f2008-10-30 17:38:39 +11004868 struct xfs_ail_cursor cur;
Darrick J. Wong50995582017-11-21 20:53:02 -08004869 struct xfs_log_item *lip;
David Chinnera9c21c12008-10-30 17:39:35 +11004870 struct xfs_ail *ailp;
Brian Fosterfbfa9772018-08-01 07:20:29 -07004871 int error;
Darrick J. Wong7bf7a192017-08-31 15:11:06 -07004872#if defined(DEBUG) || defined(XFS_WARN)
Darrick J. Wongdc423752016-08-03 11:23:49 +10004873 xfs_lsn_t last_lsn;
Darrick J. Wong7bf7a192017-08-31 15:11:06 -07004874#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -07004875
Brian Fosterfbfa9772018-08-01 07:20:29 -07004876 /*
4877 * The intent recovery handlers commit transactions to complete recovery
4878 * for individual intents, but any new deferred operations that are
4879 * queued during that process are held off until the very end. The
4880 * purpose of this transaction is to serve as a container for deferred
4881 * operations. Each intent recovery handler must transfer dfops here
4882 * before its local transaction commits, and we'll finish the entire
4883 * list below.
4884 */
4885 error = xfs_trans_alloc_empty(log->l_mp, &parent_tp);
4886 if (error)
4887 return error;
4888
David Chinnera9c21c12008-10-30 17:39:35 +11004889 ailp = log->l_ailp;
Matthew Wilcox57e80952018-03-07 14:59:39 -08004890 spin_lock(&ailp->ail_lock);
David Chinnera9c21c12008-10-30 17:39:35 +11004891 lip = xfs_trans_ail_cursor_first(ailp, &cur, 0);
Darrick J. Wong7bf7a192017-08-31 15:11:06 -07004892#if defined(DEBUG) || defined(XFS_WARN)
Darrick J. Wongdc423752016-08-03 11:23:49 +10004893 last_lsn = xlog_assign_lsn(log->l_curr_cycle, log->l_curr_block);
Darrick J. Wong7bf7a192017-08-31 15:11:06 -07004894#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -07004895 while (lip != NULL) {
4896 /*
Darrick J. Wongdc423752016-08-03 11:23:49 +10004897 * We're done when we see something other than an intent.
4898 * There should be no intents left in the AIL now.
Linus Torvalds1da177e2005-04-16 15:20:36 -07004899 */
Darrick J. Wongdc423752016-08-03 11:23:49 +10004900 if (!xlog_item_is_intent(lip)) {
David Chinner27d8d5f2008-10-30 17:38:39 +11004901#ifdef DEBUG
David Chinnera9c21c12008-10-30 17:39:35 +11004902 for (; lip; lip = xfs_trans_ail_cursor_next(ailp, &cur))
Darrick J. Wongdc423752016-08-03 11:23:49 +10004903 ASSERT(!xlog_item_is_intent(lip));
David Chinner27d8d5f2008-10-30 17:38:39 +11004904#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -07004905 break;
4906 }
4907
4908 /*
Darrick J. Wongdc423752016-08-03 11:23:49 +10004909 * We should never see a redo item with a LSN higher than
4910 * the last transaction we found in the log at the start
4911 * of recovery.
Linus Torvalds1da177e2005-04-16 15:20:36 -07004912 */
Darrick J. Wongdc423752016-08-03 11:23:49 +10004913 ASSERT(XFS_LSN_CMP(last_lsn, lip->li_lsn) >= 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004914
Darrick J. Wong50995582017-11-21 20:53:02 -08004915 /*
4916 * NOTE: If your intent processing routine can create more
4917 * deferred ops, you /must/ attach them to the dfops in this
4918 * routine or else those subsequent intents will get
4919 * replayed in the wrong order!
4920 */
Darrick J. Wongdc423752016-08-03 11:23:49 +10004921 switch (lip->li_type) {
4922 case XFS_LI_EFI:
4923 error = xlog_recover_process_efi(log->l_mp, ailp, lip);
4924 break;
Darrick J. Wong9e88b5d2016-08-03 12:09:48 +10004925 case XFS_LI_RUI:
4926 error = xlog_recover_process_rui(log->l_mp, ailp, lip);
4927 break;
Darrick J. Wongf997ee22016-10-03 09:11:21 -07004928 case XFS_LI_CUI:
Brian Fosterfbfa9772018-08-01 07:20:29 -07004929 error = xlog_recover_process_cui(parent_tp, ailp, lip);
Darrick J. Wongf997ee22016-10-03 09:11:21 -07004930 break;
Darrick J. Wong77d61fe2016-10-03 09:11:26 -07004931 case XFS_LI_BUI:
Brian Fosterfbfa9772018-08-01 07:20:29 -07004932 error = xlog_recover_process_bui(parent_tp, ailp, lip);
Darrick J. Wong77d61fe2016-10-03 09:11:26 -07004933 break;
Darrick J. Wongdc423752016-08-03 11:23:49 +10004934 }
David Chinner27d8d5f2008-10-30 17:38:39 +11004935 if (error)
4936 goto out;
David Chinnera9c21c12008-10-30 17:39:35 +11004937 lip = xfs_trans_ail_cursor_next(ailp, &cur);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004938 }
David Chinner27d8d5f2008-10-30 17:38:39 +11004939out:
Eric Sandeene4a1e292014-04-14 19:06:05 +10004940 xfs_trans_ail_cursor_done(&cur);
Matthew Wilcox57e80952018-03-07 14:59:39 -08004941 spin_unlock(&ailp->ail_lock);
Brian Fosterfbfa9772018-08-01 07:20:29 -07004942 if (!error)
4943 error = xlog_finish_defer_ops(parent_tp);
4944 xfs_trans_cancel(parent_tp);
Darrick J. Wong50995582017-11-21 20:53:02 -08004945
David Chinner3c1e2bb2008-04-10 12:21:11 +10004946 return error;
Linus Torvalds1da177e2005-04-16 15:20:36 -07004947}
4948
4949/*
Darrick J. Wongdc423752016-08-03 11:23:49 +10004950 * A cancel occurs when the mount has failed and we're bailing out.
4951 * Release all pending log intent items so they don't pin the AIL.
Brian Fosterf0b2efa2015-08-19 09:58:36 +10004952 */
4953STATIC int
Darrick J. Wongdc423752016-08-03 11:23:49 +10004954xlog_recover_cancel_intents(
Brian Fosterf0b2efa2015-08-19 09:58:36 +10004955 struct xlog *log)
4956{
4957 struct xfs_log_item *lip;
Brian Fosterf0b2efa2015-08-19 09:58:36 +10004958 int error = 0;
4959 struct xfs_ail_cursor cur;
4960 struct xfs_ail *ailp;
4961
4962 ailp = log->l_ailp;
Matthew Wilcox57e80952018-03-07 14:59:39 -08004963 spin_lock(&ailp->ail_lock);
Brian Fosterf0b2efa2015-08-19 09:58:36 +10004964 lip = xfs_trans_ail_cursor_first(ailp, &cur, 0);
4965 while (lip != NULL) {
4966 /*
Darrick J. Wongdc423752016-08-03 11:23:49 +10004967 * We're done when we see something other than an intent.
4968 * There should be no intents left in the AIL now.
Brian Fosterf0b2efa2015-08-19 09:58:36 +10004969 */
Darrick J. Wongdc423752016-08-03 11:23:49 +10004970 if (!xlog_item_is_intent(lip)) {
Brian Fosterf0b2efa2015-08-19 09:58:36 +10004971#ifdef DEBUG
4972 for (; lip; lip = xfs_trans_ail_cursor_next(ailp, &cur))
Darrick J. Wongdc423752016-08-03 11:23:49 +10004973 ASSERT(!xlog_item_is_intent(lip));
Brian Fosterf0b2efa2015-08-19 09:58:36 +10004974#endif
4975 break;
4976 }
4977
Darrick J. Wongdc423752016-08-03 11:23:49 +10004978 switch (lip->li_type) {
4979 case XFS_LI_EFI:
4980 xlog_recover_cancel_efi(log->l_mp, ailp, lip);
4981 break;
Darrick J. Wong9e88b5d2016-08-03 12:09:48 +10004982 case XFS_LI_RUI:
4983 xlog_recover_cancel_rui(log->l_mp, ailp, lip);
4984 break;
Darrick J. Wongf997ee22016-10-03 09:11:21 -07004985 case XFS_LI_CUI:
4986 xlog_recover_cancel_cui(log->l_mp, ailp, lip);
4987 break;
Darrick J. Wong77d61fe2016-10-03 09:11:26 -07004988 case XFS_LI_BUI:
4989 xlog_recover_cancel_bui(log->l_mp, ailp, lip);
4990 break;
Darrick J. Wongdc423752016-08-03 11:23:49 +10004991 }
Brian Fosterf0b2efa2015-08-19 09:58:36 +10004992
4993 lip = xfs_trans_ail_cursor_next(ailp, &cur);
4994 }
4995
4996 xfs_trans_ail_cursor_done(&cur);
Matthew Wilcox57e80952018-03-07 14:59:39 -08004997 spin_unlock(&ailp->ail_lock);
Brian Fosterf0b2efa2015-08-19 09:58:36 +10004998 return error;
4999}
5000
5001/*
Linus Torvalds1da177e2005-04-16 15:20:36 -07005002 * This routine performs a transaction to null out a bad inode pointer
5003 * in an agi unlinked inode hash bucket.
5004 */
5005STATIC void
5006xlog_recover_clear_agi_bucket(
5007 xfs_mount_t *mp,
5008 xfs_agnumber_t agno,
5009 int bucket)
5010{
5011 xfs_trans_t *tp;
5012 xfs_agi_t *agi;
5013 xfs_buf_t *agibp;
5014 int offset;
5015 int error;
5016
Christoph Hellwig253f4912016-04-06 09:19:55 +10005017 error = xfs_trans_alloc(mp, &M_RES(mp)->tr_clearagi, 0, 0, 0, &tp);
David Chinnere5720ee2008-04-10 12:21:18 +10005018 if (error)
Christoph Hellwig253f4912016-04-06 09:19:55 +10005019 goto out_error;
Linus Torvalds1da177e2005-04-16 15:20:36 -07005020
Christoph Hellwig5e1be0f2008-11-28 14:23:37 +11005021 error = xfs_read_agi(mp, tp, agno, &agibp);
5022 if (error)
David Chinnere5720ee2008-04-10 12:21:18 +10005023 goto out_abort;
Linus Torvalds1da177e2005-04-16 15:20:36 -07005024
Christoph Hellwig5e1be0f2008-11-28 14:23:37 +11005025 agi = XFS_BUF_TO_AGI(agibp);
Christoph Hellwig16259e72005-11-02 15:11:25 +11005026 agi->agi_unlinked[bucket] = cpu_to_be32(NULLAGINO);
Linus Torvalds1da177e2005-04-16 15:20:36 -07005027 offset = offsetof(xfs_agi_t, agi_unlinked) +
5028 (sizeof(xfs_agino_t) * bucket);
5029 xfs_trans_log_buf(tp, agibp, offset,
5030 (offset + sizeof(xfs_agino_t) - 1));
5031
Christoph Hellwig70393312015-06-04 13:48:08 +10005032 error = xfs_trans_commit(tp);
David Chinnere5720ee2008-04-10 12:21:18 +10005033 if (error)
5034 goto out_error;
5035 return;
5036
5037out_abort:
Christoph Hellwig4906e212015-06-04 13:47:56 +10005038 xfs_trans_cancel(tp);
David Chinnere5720ee2008-04-10 12:21:18 +10005039out_error:
Dave Chinnera0fa2b62011-03-07 10:01:35 +11005040 xfs_warn(mp, "%s: failed to clear agi %d. Continuing.", __func__, agno);
David Chinnere5720ee2008-04-10 12:21:18 +10005041 return;
Linus Torvalds1da177e2005-04-16 15:20:36 -07005042}
5043
Christoph Hellwig23fac502008-11-28 14:23:40 +11005044STATIC xfs_agino_t
5045xlog_recover_process_one_iunlink(
5046 struct xfs_mount *mp,
5047 xfs_agnumber_t agno,
5048 xfs_agino_t agino,
5049 int bucket)
5050{
5051 struct xfs_buf *ibp;
5052 struct xfs_dinode *dip;
5053 struct xfs_inode *ip;
5054 xfs_ino_t ino;
5055 int error;
5056
5057 ino = XFS_AGINO_TO_INO(mp, agno, agino);
Dave Chinner7b6259e2010-06-24 11:35:17 +10005058 error = xfs_iget(mp, NULL, ino, 0, 0, &ip);
Christoph Hellwig23fac502008-11-28 14:23:40 +11005059 if (error)
5060 goto fail;
5061
5062 /*
5063 * Get the on disk inode to find the next inode in the bucket.
5064 */
Christoph Hellwig475ee412012-07-03 12:21:22 -04005065 error = xfs_imap_to_bp(mp, NULL, &ip->i_imap, &dip, &ibp, 0, 0);
Christoph Hellwig23fac502008-11-28 14:23:40 +11005066 if (error)
Christoph Hellwig0e446672008-11-28 14:23:42 +11005067 goto fail_iput;
Christoph Hellwig23fac502008-11-28 14:23:40 +11005068
Darrick J. Wong17c12bc2016-10-03 09:11:29 -07005069 xfs_iflags_clear(ip, XFS_IRECOVERY);
Dave Chinner54d7b5c2016-02-09 16:54:58 +11005070 ASSERT(VFS_I(ip)->i_nlink == 0);
Dave Chinnerc19b3b052016-02-09 16:54:58 +11005071 ASSERT(VFS_I(ip)->i_mode != 0);
Christoph Hellwig23fac502008-11-28 14:23:40 +11005072
5073 /* setup for the next pass */
5074 agino = be32_to_cpu(dip->di_next_unlinked);
5075 xfs_buf_relse(ibp);
5076
5077 /*
5078 * Prevent any DMAPI event from being sent when the reference on
5079 * the inode is dropped.
5080 */
5081 ip->i_d.di_dmevmask = 0;
5082
Darrick J. Wong44a87362018-07-25 12:52:32 -07005083 xfs_irele(ip);
Christoph Hellwig23fac502008-11-28 14:23:40 +11005084 return agino;
5085
Christoph Hellwig0e446672008-11-28 14:23:42 +11005086 fail_iput:
Darrick J. Wong44a87362018-07-25 12:52:32 -07005087 xfs_irele(ip);
Christoph Hellwig23fac502008-11-28 14:23:40 +11005088 fail:
5089 /*
5090 * We can't read in the inode this bucket points to, or this inode
5091 * is messed up. Just ditch this bucket of inodes. We will lose
5092 * some inodes and space, but at least we won't hang.
5093 *
5094 * Call xlog_recover_clear_agi_bucket() to perform a transaction to
5095 * clear the inode pointer in the bucket.
5096 */
5097 xlog_recover_clear_agi_bucket(mp, agno, bucket);
5098 return NULLAGINO;
5099}
5100
Linus Torvalds1da177e2005-04-16 15:20:36 -07005101/*
5102 * xlog_iunlink_recover
5103 *
5104 * This is called during recovery to process any inodes which
5105 * we unlinked but not freed when the system crashed. These
5106 * inodes will be on the lists in the AGI blocks. What we do
5107 * here is scan all the AGIs and fully truncate and free any
5108 * inodes found on the lists. Each inode is removed from the
5109 * lists when it has been fully truncated and is freed. The
5110 * freeing of the inode and its removal from the list must be
5111 * atomic.
5112 */
Eric Sandeend96f8f82009-07-02 00:09:33 -05005113STATIC void
Linus Torvalds1da177e2005-04-16 15:20:36 -07005114xlog_recover_process_iunlinks(
Mark Tinguely9a8d2fd2012-06-14 09:22:16 -05005115 struct xlog *log)
Linus Torvalds1da177e2005-04-16 15:20:36 -07005116{
5117 xfs_mount_t *mp;
5118 xfs_agnumber_t agno;
5119 xfs_agi_t *agi;
5120 xfs_buf_t *agibp;
Linus Torvalds1da177e2005-04-16 15:20:36 -07005121 xfs_agino_t agino;
Linus Torvalds1da177e2005-04-16 15:20:36 -07005122 int bucket;
5123 int error;
Linus Torvalds1da177e2005-04-16 15:20:36 -07005124
5125 mp = log->l_mp;
5126
Linus Torvalds1da177e2005-04-16 15:20:36 -07005127 for (agno = 0; agno < mp->m_sb.sb_agcount; agno++) {
5128 /*
5129 * Find the agi for this ag.
5130 */
Christoph Hellwig5e1be0f2008-11-28 14:23:37 +11005131 error = xfs_read_agi(mp, NULL, agno, &agibp);
5132 if (error) {
5133 /*
5134 * AGI is b0rked. Don't process it.
5135 *
5136 * We should probably mark the filesystem as corrupt
5137 * after we've recovered all the ag's we can....
5138 */
5139 continue;
Linus Torvalds1da177e2005-04-16 15:20:36 -07005140 }
Jan Karad97d32e2012-03-15 09:34:02 +00005141 /*
5142 * Unlock the buffer so that it can be acquired in the normal
5143 * course of the transaction to truncate and free each inode.
5144 * Because we are not racing with anyone else here for the AGI
5145 * buffer, we don't even need to hold it locked to read the
5146 * initial unlinked bucket entries out of the buffer. We keep
5147 * buffer reference though, so that it stays pinned in memory
5148 * while we need the buffer.
5149 */
Linus Torvalds1da177e2005-04-16 15:20:36 -07005150 agi = XFS_BUF_TO_AGI(agibp);
Jan Karad97d32e2012-03-15 09:34:02 +00005151 xfs_buf_unlock(agibp);
Linus Torvalds1da177e2005-04-16 15:20:36 -07005152
5153 for (bucket = 0; bucket < XFS_AGI_UNLINKED_BUCKETS; bucket++) {
Christoph Hellwig16259e72005-11-02 15:11:25 +11005154 agino = be32_to_cpu(agi->agi_unlinked[bucket]);
Linus Torvalds1da177e2005-04-16 15:20:36 -07005155 while (agino != NULLAGINO) {
Christoph Hellwig23fac502008-11-28 14:23:40 +11005156 agino = xlog_recover_process_one_iunlink(mp,
5157 agno, agino, bucket);
Linus Torvalds1da177e2005-04-16 15:20:36 -07005158 }
5159 }
Jan Karad97d32e2012-03-15 09:34:02 +00005160 xfs_buf_rele(agibp);
Linus Torvalds1da177e2005-04-16 15:20:36 -07005161 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07005162}
5163
Eric Sandeen91083262019-05-01 20:26:30 -07005164STATIC void
Linus Torvalds1da177e2005-04-16 15:20:36 -07005165xlog_unpack_data(
Mark Tinguely9a8d2fd2012-06-14 09:22:16 -05005166 struct xlog_rec_header *rhead,
Christoph Hellwigb2a922c2015-06-22 09:45:10 +10005167 char *dp,
Mark Tinguely9a8d2fd2012-06-14 09:22:16 -05005168 struct xlog *log)
Linus Torvalds1da177e2005-04-16 15:20:36 -07005169{
5170 int i, j, k;
Linus Torvalds1da177e2005-04-16 15:20:36 -07005171
Christoph Hellwigb53e6752007-10-12 10:59:34 +10005172 for (i = 0; i < BTOBB(be32_to_cpu(rhead->h_len)) &&
Linus Torvalds1da177e2005-04-16 15:20:36 -07005173 i < (XLOG_HEADER_CYCLE_SIZE / BBSIZE); i++) {
Christoph Hellwigb53e6752007-10-12 10:59:34 +10005174 *(__be32 *)dp = *(__be32 *)&rhead->h_cycle_data[i];
Linus Torvalds1da177e2005-04-16 15:20:36 -07005175 dp += BBSIZE;
5176 }
5177
Eric Sandeen62118702008-03-06 13:44:28 +11005178 if (xfs_sb_version_haslogv2(&log->l_mp->m_sb)) {
Christoph Hellwigb28708d2008-11-28 14:23:38 +11005179 xlog_in_core_2_t *xhdr = (xlog_in_core_2_t *)rhead;
Christoph Hellwigb53e6752007-10-12 10:59:34 +10005180 for ( ; i < BTOBB(be32_to_cpu(rhead->h_len)); i++) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07005181 j = i / (XLOG_HEADER_CYCLE_SIZE / BBSIZE);
5182 k = i % (XLOG_HEADER_CYCLE_SIZE / BBSIZE);
Christoph Hellwigb53e6752007-10-12 10:59:34 +10005183 *(__be32 *)dp = xhdr[j].hic_xheader.xh_cycle_data[k];
Linus Torvalds1da177e2005-04-16 15:20:36 -07005184 dp += BBSIZE;
5185 }
5186 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07005187}
5188
Brian Foster9d949012016-01-04 15:55:10 +11005189/*
Brian Fosterb94fb2d2016-01-04 15:55:10 +11005190 * CRC check, unpack and process a log record.
Brian Foster9d949012016-01-04 15:55:10 +11005191 */
5192STATIC int
5193xlog_recover_process(
5194 struct xlog *log,
5195 struct hlist_head rhash[],
5196 struct xlog_rec_header *rhead,
5197 char *dp,
Brian Foster12818d22016-09-26 08:22:16 +10005198 int pass,
5199 struct list_head *buffer_list)
Brian Foster9d949012016-01-04 15:55:10 +11005200{
Dave Chinnercae028d2016-12-05 14:40:32 +11005201 __le32 old_crc = rhead->h_crc;
Brian Fosterb94fb2d2016-01-04 15:55:10 +11005202 __le32 crc;
5203
Brian Fosterb94fb2d2016-01-04 15:55:10 +11005204 crc = xlog_cksum(log, rhead, dp, be32_to_cpu(rhead->h_len));
Brian Foster65282502016-01-04 15:55:10 +11005205
5206 /*
5207 * Nothing else to do if this is a CRC verification pass. Just return
5208 * if this a record with a non-zero crc. Unfortunately, mkfs always
Dave Chinnercae028d2016-12-05 14:40:32 +11005209 * sets old_crc to 0 so we must consider this valid even on v5 supers.
Brian Foster65282502016-01-04 15:55:10 +11005210 * Otherwise, return EFSBADCRC on failure so the callers up the stack
5211 * know precisely what failed.
5212 */
5213 if (pass == XLOG_RECOVER_CRCPASS) {
Dave Chinnercae028d2016-12-05 14:40:32 +11005214 if (old_crc && crc != old_crc)
Brian Foster65282502016-01-04 15:55:10 +11005215 return -EFSBADCRC;
5216 return 0;
5217 }
5218
5219 /*
5220 * We're in the normal recovery path. Issue a warning if and only if the
5221 * CRC in the header is non-zero. This is an advisory warning and the
5222 * zero CRC check prevents warnings from being emitted when upgrading
5223 * the kernel from one that does not add CRCs by default.
5224 */
Dave Chinnercae028d2016-12-05 14:40:32 +11005225 if (crc != old_crc) {
5226 if (old_crc || xfs_sb_version_hascrc(&log->l_mp->m_sb)) {
Brian Fosterb94fb2d2016-01-04 15:55:10 +11005227 xfs_alert(log->l_mp,
5228 "log record CRC mismatch: found 0x%x, expected 0x%x.",
Dave Chinnercae028d2016-12-05 14:40:32 +11005229 le32_to_cpu(old_crc),
Brian Fosterb94fb2d2016-01-04 15:55:10 +11005230 le32_to_cpu(crc));
5231 xfs_hex_dump(dp, 32);
5232 }
5233
5234 /*
5235 * If the filesystem is CRC enabled, this mismatch becomes a
5236 * fatal log corruption failure.
5237 */
5238 if (xfs_sb_version_hascrc(&log->l_mp->m_sb))
5239 return -EFSCORRUPTED;
5240 }
Brian Foster9d949012016-01-04 15:55:10 +11005241
Eric Sandeen91083262019-05-01 20:26:30 -07005242 xlog_unpack_data(rhead, dp, log);
Brian Foster9d949012016-01-04 15:55:10 +11005243
Brian Foster12818d22016-09-26 08:22:16 +10005244 return xlog_recover_process_data(log, rhash, rhead, dp, pass,
5245 buffer_list);
Brian Foster9d949012016-01-04 15:55:10 +11005246}
5247
Linus Torvalds1da177e2005-04-16 15:20:36 -07005248STATIC int
5249xlog_valid_rec_header(
Mark Tinguely9a8d2fd2012-06-14 09:22:16 -05005250 struct xlog *log,
5251 struct xlog_rec_header *rhead,
Linus Torvalds1da177e2005-04-16 15:20:36 -07005252 xfs_daddr_t blkno)
5253{
5254 int hlen;
5255
Christoph Hellwig69ef9212011-07-08 14:36:05 +02005256 if (unlikely(rhead->h_magicno != cpu_to_be32(XLOG_HEADER_MAGIC_NUM))) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07005257 XFS_ERROR_REPORT("xlog_valid_rec_header(1)",
5258 XFS_ERRLEVEL_LOW, log->l_mp);
Dave Chinner24513372014-06-25 14:58:08 +10005259 return -EFSCORRUPTED;
Linus Torvalds1da177e2005-04-16 15:20:36 -07005260 }
5261 if (unlikely(
5262 (!rhead->h_version ||
Christoph Hellwigb53e6752007-10-12 10:59:34 +10005263 (be32_to_cpu(rhead->h_version) & (~XLOG_VERSION_OKBITS))))) {
Dave Chinnera0fa2b62011-03-07 10:01:35 +11005264 xfs_warn(log->l_mp, "%s: unrecognised log version (%d).",
Harvey Harrison34a622b2008-04-10 12:19:21 +10005265 __func__, be32_to_cpu(rhead->h_version));
Dave Chinner24513372014-06-25 14:58:08 +10005266 return -EIO;
Linus Torvalds1da177e2005-04-16 15:20:36 -07005267 }
5268
5269 /* LR body must have data or it wouldn't have been written */
Christoph Hellwigb53e6752007-10-12 10:59:34 +10005270 hlen = be32_to_cpu(rhead->h_len);
Linus Torvalds1da177e2005-04-16 15:20:36 -07005271 if (unlikely( hlen <= 0 || hlen > INT_MAX )) {
5272 XFS_ERROR_REPORT("xlog_valid_rec_header(2)",
5273 XFS_ERRLEVEL_LOW, log->l_mp);
Dave Chinner24513372014-06-25 14:58:08 +10005274 return -EFSCORRUPTED;
Linus Torvalds1da177e2005-04-16 15:20:36 -07005275 }
5276 if (unlikely( blkno > log->l_logBBsize || blkno > INT_MAX )) {
5277 XFS_ERROR_REPORT("xlog_valid_rec_header(3)",
5278 XFS_ERRLEVEL_LOW, log->l_mp);
Dave Chinner24513372014-06-25 14:58:08 +10005279 return -EFSCORRUPTED;
Linus Torvalds1da177e2005-04-16 15:20:36 -07005280 }
5281 return 0;
5282}
5283
5284/*
5285 * Read the log from tail to head and process the log records found.
5286 * Handle the two cases where the tail and head are in the same cycle
5287 * and where the active portion of the log wraps around the end of
5288 * the physical log separately. The pass parameter is passed through
5289 * to the routines called to process the data and is not looked at
5290 * here.
5291 */
5292STATIC int
5293xlog_do_recovery_pass(
Mark Tinguely9a8d2fd2012-06-14 09:22:16 -05005294 struct xlog *log,
Linus Torvalds1da177e2005-04-16 15:20:36 -07005295 xfs_daddr_t head_blk,
5296 xfs_daddr_t tail_blk,
Brian Fosterd7f37692016-01-04 15:55:10 +11005297 int pass,
5298 xfs_daddr_t *first_bad) /* out: first bad log rec */
Linus Torvalds1da177e2005-04-16 15:20:36 -07005299{
5300 xlog_rec_header_t *rhead;
Brian Foster284f1c22017-08-08 18:21:51 -07005301 xfs_daddr_t blk_no, rblk_no;
Brian Fosterd7f37692016-01-04 15:55:10 +11005302 xfs_daddr_t rhead_blk;
Christoph Hellwigb2a922c2015-06-22 09:45:10 +10005303 char *offset;
Linus Torvalds1da177e2005-04-16 15:20:36 -07005304 xfs_buf_t *hbp, *dbp;
Brian Fostera70f9fe2016-01-04 15:55:10 +11005305 int error = 0, h_size, h_len;
Brian Foster12818d22016-09-26 08:22:16 +10005306 int error2 = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07005307 int bblks, split_bblks;
5308 int hblks, split_hblks, wrapped_hblks;
Brian Foster39775432017-06-24 10:11:41 -07005309 int i;
Dave Chinnerf0a76952010-01-11 11:49:57 +00005310 struct hlist_head rhash[XLOG_RHASH_SIZE];
Brian Foster12818d22016-09-26 08:22:16 +10005311 LIST_HEAD (buffer_list);
Linus Torvalds1da177e2005-04-16 15:20:36 -07005312
5313 ASSERT(head_blk != tail_blk);
Brian Fostera4c9b342017-08-08 18:21:53 -07005314 blk_no = rhead_blk = tail_blk;
Linus Torvalds1da177e2005-04-16 15:20:36 -07005315
Brian Foster39775432017-06-24 10:11:41 -07005316 for (i = 0; i < XLOG_RHASH_SIZE; i++)
5317 INIT_HLIST_HEAD(&rhash[i]);
5318
Linus Torvalds1da177e2005-04-16 15:20:36 -07005319 /*
5320 * Read the header of the tail block and get the iclog buffer size from
5321 * h_size. Use this to tell how many sectors make up the log header.
5322 */
Eric Sandeen62118702008-03-06 13:44:28 +11005323 if (xfs_sb_version_haslogv2(&log->l_mp->m_sb)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07005324 /*
5325 * When using variable length iclogs, read first sector of
5326 * iclog header and extract the header size from it. Get a
5327 * new hbp that is the correct size.
5328 */
5329 hbp = xlog_get_bp(log, 1);
5330 if (!hbp)
Dave Chinner24513372014-06-25 14:58:08 +10005331 return -ENOMEM;
Christoph Hellwig076e6ac2009-03-16 08:24:13 +01005332
5333 error = xlog_bread(log, tail_blk, 1, hbp, &offset);
5334 if (error)
Linus Torvalds1da177e2005-04-16 15:20:36 -07005335 goto bread_err1;
Christoph Hellwig076e6ac2009-03-16 08:24:13 +01005336
Linus Torvalds1da177e2005-04-16 15:20:36 -07005337 rhead = (xlog_rec_header_t *)offset;
5338 error = xlog_valid_rec_header(log, rhead, tail_blk);
5339 if (error)
5340 goto bread_err1;
Brian Fostera70f9fe2016-01-04 15:55:10 +11005341
5342 /*
5343 * xfsprogs has a bug where record length is based on lsunit but
5344 * h_size (iclog size) is hardcoded to 32k. Now that we
5345 * unconditionally CRC verify the unmount record, this means the
5346 * log buffer can be too small for the record and cause an
5347 * overrun.
5348 *
5349 * Detect this condition here. Use lsunit for the buffer size as
5350 * long as this looks like the mkfs case. Otherwise, return an
5351 * error to avoid a buffer overrun.
5352 */
Christoph Hellwigb53e6752007-10-12 10:59:34 +10005353 h_size = be32_to_cpu(rhead->h_size);
Brian Fostera70f9fe2016-01-04 15:55:10 +11005354 h_len = be32_to_cpu(rhead->h_len);
5355 if (h_len > h_size) {
5356 if (h_len <= log->l_mp->m_logbsize &&
5357 be32_to_cpu(rhead->h_num_logops) == 1) {
5358 xfs_warn(log->l_mp,
5359 "invalid iclog size (%d bytes), using lsunit (%d bytes)",
5360 h_size, log->l_mp->m_logbsize);
5361 h_size = log->l_mp->m_logbsize;
5362 } else
5363 return -EFSCORRUPTED;
5364 }
5365
Christoph Hellwigb53e6752007-10-12 10:59:34 +10005366 if ((be32_to_cpu(rhead->h_version) & XLOG_VERSION_2) &&
Linus Torvalds1da177e2005-04-16 15:20:36 -07005367 (h_size > XLOG_HEADER_CYCLE_SIZE)) {
5368 hblks = h_size / XLOG_HEADER_CYCLE_SIZE;
5369 if (h_size % XLOG_HEADER_CYCLE_SIZE)
5370 hblks++;
5371 xlog_put_bp(hbp);
5372 hbp = xlog_get_bp(log, hblks);
5373 } else {
5374 hblks = 1;
5375 }
5376 } else {
Alex Elder69ce58f2010-04-20 17:09:59 +10005377 ASSERT(log->l_sectBBsize == 1);
Linus Torvalds1da177e2005-04-16 15:20:36 -07005378 hblks = 1;
5379 hbp = xlog_get_bp(log, 1);
5380 h_size = XLOG_BIG_RECORD_BSIZE;
5381 }
5382
5383 if (!hbp)
Dave Chinner24513372014-06-25 14:58:08 +10005384 return -ENOMEM;
Linus Torvalds1da177e2005-04-16 15:20:36 -07005385 dbp = xlog_get_bp(log, BTOBB(h_size));
5386 if (!dbp) {
5387 xlog_put_bp(hbp);
Dave Chinner24513372014-06-25 14:58:08 +10005388 return -ENOMEM;
Linus Torvalds1da177e2005-04-16 15:20:36 -07005389 }
5390
5391 memset(rhash, 0, sizeof(rhash));
Eric Sandeen970fd3f2014-09-09 11:57:29 +10005392 if (tail_blk > head_blk) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07005393 /*
5394 * Perform recovery around the end of the physical log.
5395 * When the head is not on the same cycle number as the tail,
Eric Sandeen970fd3f2014-09-09 11:57:29 +10005396 * we can't do a sequential recovery.
Linus Torvalds1da177e2005-04-16 15:20:36 -07005397 */
Linus Torvalds1da177e2005-04-16 15:20:36 -07005398 while (blk_no < log->l_logBBsize) {
5399 /*
5400 * Check for header wrapping around physical end-of-log
5401 */
Chandra Seetharaman62926042011-07-22 23:40:15 +00005402 offset = hbp->b_addr;
Linus Torvalds1da177e2005-04-16 15:20:36 -07005403 split_hblks = 0;
5404 wrapped_hblks = 0;
5405 if (blk_no + hblks <= log->l_logBBsize) {
5406 /* Read header in one read */
Christoph Hellwig076e6ac2009-03-16 08:24:13 +01005407 error = xlog_bread(log, blk_no, hblks, hbp,
5408 &offset);
Linus Torvalds1da177e2005-04-16 15:20:36 -07005409 if (error)
5410 goto bread_err2;
Linus Torvalds1da177e2005-04-16 15:20:36 -07005411 } else {
5412 /* This LR is split across physical log end */
5413 if (blk_no != log->l_logBBsize) {
5414 /* some data before physical log end */
5415 ASSERT(blk_no <= INT_MAX);
5416 split_hblks = log->l_logBBsize - (int)blk_no;
5417 ASSERT(split_hblks > 0);
Christoph Hellwig076e6ac2009-03-16 08:24:13 +01005418 error = xlog_bread(log, blk_no,
5419 split_hblks, hbp,
5420 &offset);
5421 if (error)
Linus Torvalds1da177e2005-04-16 15:20:36 -07005422 goto bread_err2;
Linus Torvalds1da177e2005-04-16 15:20:36 -07005423 }
Christoph Hellwig076e6ac2009-03-16 08:24:13 +01005424
Linus Torvalds1da177e2005-04-16 15:20:36 -07005425 /*
5426 * Note: this black magic still works with
5427 * large sector sizes (non-512) only because:
5428 * - we increased the buffer size originally
5429 * by 1 sector giving us enough extra space
5430 * for the second read;
5431 * - the log start is guaranteed to be sector
5432 * aligned;
5433 * - we read the log end (LR header start)
5434 * _first_, then the log start (LR header end)
5435 * - order is important.
5436 */
David Chinner234f56a2008-04-10 12:24:24 +10005437 wrapped_hblks = hblks - split_hblks;
Dave Chinner44396472011-04-21 09:34:27 +00005438 error = xlog_bread_offset(log, 0,
5439 wrapped_hblks, hbp,
5440 offset + BBTOB(split_hblks));
Linus Torvalds1da177e2005-04-16 15:20:36 -07005441 if (error)
5442 goto bread_err2;
Linus Torvalds1da177e2005-04-16 15:20:36 -07005443 }
5444 rhead = (xlog_rec_header_t *)offset;
5445 error = xlog_valid_rec_header(log, rhead,
5446 split_hblks ? blk_no : 0);
5447 if (error)
5448 goto bread_err2;
5449
Christoph Hellwigb53e6752007-10-12 10:59:34 +10005450 bblks = (int)BTOBB(be32_to_cpu(rhead->h_len));
Linus Torvalds1da177e2005-04-16 15:20:36 -07005451 blk_no += hblks;
5452
Brian Foster284f1c22017-08-08 18:21:51 -07005453 /*
5454 * Read the log record data in multiple reads if it
5455 * wraps around the end of the log. Note that if the
5456 * header already wrapped, blk_no could point past the
5457 * end of the log. The record data is contiguous in
5458 * that case.
5459 */
5460 if (blk_no + bblks <= log->l_logBBsize ||
5461 blk_no >= log->l_logBBsize) {
Dave Chinner0703a8e2018-06-08 09:54:22 -07005462 rblk_no = xlog_wrap_logbno(log, blk_no);
Brian Foster284f1c22017-08-08 18:21:51 -07005463 error = xlog_bread(log, rblk_no, bblks, dbp,
Christoph Hellwig076e6ac2009-03-16 08:24:13 +01005464 &offset);
Linus Torvalds1da177e2005-04-16 15:20:36 -07005465 if (error)
5466 goto bread_err2;
Linus Torvalds1da177e2005-04-16 15:20:36 -07005467 } else {
5468 /* This log record is split across the
5469 * physical end of log */
Chandra Seetharaman62926042011-07-22 23:40:15 +00005470 offset = dbp->b_addr;
Linus Torvalds1da177e2005-04-16 15:20:36 -07005471 split_bblks = 0;
5472 if (blk_no != log->l_logBBsize) {
5473 /* some data is before the physical
5474 * end of log */
5475 ASSERT(!wrapped_hblks);
5476 ASSERT(blk_no <= INT_MAX);
5477 split_bblks =
5478 log->l_logBBsize - (int)blk_no;
5479 ASSERT(split_bblks > 0);
Christoph Hellwig076e6ac2009-03-16 08:24:13 +01005480 error = xlog_bread(log, blk_no,
5481 split_bblks, dbp,
5482 &offset);
5483 if (error)
Linus Torvalds1da177e2005-04-16 15:20:36 -07005484 goto bread_err2;
Linus Torvalds1da177e2005-04-16 15:20:36 -07005485 }
Christoph Hellwig076e6ac2009-03-16 08:24:13 +01005486
Linus Torvalds1da177e2005-04-16 15:20:36 -07005487 /*
5488 * Note: this black magic still works with
5489 * large sector sizes (non-512) only because:
5490 * - we increased the buffer size originally
5491 * by 1 sector giving us enough extra space
5492 * for the second read;
5493 * - the log start is guaranteed to be sector
5494 * aligned;
5495 * - we read the log end (LR header start)
5496 * _first_, then the log start (LR header end)
5497 * - order is important.
5498 */
Dave Chinner44396472011-04-21 09:34:27 +00005499 error = xlog_bread_offset(log, 0,
Dave Chinner009507b2012-11-02 11:38:44 +11005500 bblks - split_bblks, dbp,
Dave Chinner44396472011-04-21 09:34:27 +00005501 offset + BBTOB(split_bblks));
Christoph Hellwig076e6ac2009-03-16 08:24:13 +01005502 if (error)
5503 goto bread_err2;
Linus Torvalds1da177e2005-04-16 15:20:36 -07005504 }
Christoph Hellwig0e446be2012-11-12 22:54:24 +11005505
Brian Foster9d949012016-01-04 15:55:10 +11005506 error = xlog_recover_process(log, rhash, rhead, offset,
Brian Foster12818d22016-09-26 08:22:16 +10005507 pass, &buffer_list);
Christoph Hellwig0e446be2012-11-12 22:54:24 +11005508 if (error)
Linus Torvalds1da177e2005-04-16 15:20:36 -07005509 goto bread_err2;
Brian Fosterd7f37692016-01-04 15:55:10 +11005510
Linus Torvalds1da177e2005-04-16 15:20:36 -07005511 blk_no += bblks;
Brian Fosterd7f37692016-01-04 15:55:10 +11005512 rhead_blk = blk_no;
Linus Torvalds1da177e2005-04-16 15:20:36 -07005513 }
5514
5515 ASSERT(blk_no >= log->l_logBBsize);
5516 blk_no -= log->l_logBBsize;
Brian Fosterd7f37692016-01-04 15:55:10 +11005517 rhead_blk = blk_no;
Eric Sandeen970fd3f2014-09-09 11:57:29 +10005518 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07005519
Eric Sandeen970fd3f2014-09-09 11:57:29 +10005520 /* read first part of physical log */
5521 while (blk_no < head_blk) {
5522 error = xlog_bread(log, blk_no, hblks, hbp, &offset);
5523 if (error)
5524 goto bread_err2;
Christoph Hellwig076e6ac2009-03-16 08:24:13 +01005525
Eric Sandeen970fd3f2014-09-09 11:57:29 +10005526 rhead = (xlog_rec_header_t *)offset;
5527 error = xlog_valid_rec_header(log, rhead, blk_no);
5528 if (error)
5529 goto bread_err2;
Christoph Hellwig076e6ac2009-03-16 08:24:13 +01005530
Eric Sandeen970fd3f2014-09-09 11:57:29 +10005531 /* blocks in data section */
5532 bblks = (int)BTOBB(be32_to_cpu(rhead->h_len));
5533 error = xlog_bread(log, blk_no+hblks, bblks, dbp,
5534 &offset);
5535 if (error)
5536 goto bread_err2;
Christoph Hellwig076e6ac2009-03-16 08:24:13 +01005537
Brian Foster12818d22016-09-26 08:22:16 +10005538 error = xlog_recover_process(log, rhash, rhead, offset, pass,
5539 &buffer_list);
Eric Sandeen970fd3f2014-09-09 11:57:29 +10005540 if (error)
5541 goto bread_err2;
Brian Fosterd7f37692016-01-04 15:55:10 +11005542
Eric Sandeen970fd3f2014-09-09 11:57:29 +10005543 blk_no += bblks + hblks;
Brian Fosterd7f37692016-01-04 15:55:10 +11005544 rhead_blk = blk_no;
Linus Torvalds1da177e2005-04-16 15:20:36 -07005545 }
5546
5547 bread_err2:
5548 xlog_put_bp(dbp);
5549 bread_err1:
5550 xlog_put_bp(hbp);
Brian Fosterd7f37692016-01-04 15:55:10 +11005551
Brian Foster12818d22016-09-26 08:22:16 +10005552 /*
5553 * Submit buffers that have been added from the last record processed,
5554 * regardless of error status.
5555 */
5556 if (!list_empty(&buffer_list))
5557 error2 = xfs_buf_delwri_submit(&buffer_list);
5558
Brian Fosterd7f37692016-01-04 15:55:10 +11005559 if (error && first_bad)
5560 *first_bad = rhead_blk;
5561
Brian Foster39775432017-06-24 10:11:41 -07005562 /*
5563 * Transactions are freed at commit time but transactions without commit
5564 * records on disk are never committed. Free any that may be left in the
5565 * hash table.
5566 */
5567 for (i = 0; i < XLOG_RHASH_SIZE; i++) {
5568 struct hlist_node *tmp;
5569 struct xlog_recover *trans;
5570
5571 hlist_for_each_entry_safe(trans, tmp, &rhash[i], r_list)
5572 xlog_recover_free_trans(trans);
5573 }
5574
Brian Foster12818d22016-09-26 08:22:16 +10005575 return error ? error : error2;
Linus Torvalds1da177e2005-04-16 15:20:36 -07005576}
5577
5578/*
5579 * Do the recovery of the log. We actually do this in two phases.
5580 * The two passes are necessary in order to implement the function
5581 * of cancelling a record written into the log. The first pass
5582 * determines those things which have been cancelled, and the
5583 * second pass replays log items normally except for those which
5584 * have been cancelled. The handling of the replay and cancellations
5585 * takes place in the log item type specific routines.
5586 *
5587 * The table of items which have cancel records in the log is allocated
5588 * and freed at this level, since only here do we know when all of
5589 * the log recovery has been completed.
5590 */
5591STATIC int
5592xlog_do_log_recovery(
Mark Tinguely9a8d2fd2012-06-14 09:22:16 -05005593 struct xlog *log,
Linus Torvalds1da177e2005-04-16 15:20:36 -07005594 xfs_daddr_t head_blk,
5595 xfs_daddr_t tail_blk)
5596{
Christoph Hellwigd5689ea2010-12-01 22:06:22 +00005597 int error, i;
Linus Torvalds1da177e2005-04-16 15:20:36 -07005598
5599 ASSERT(head_blk != tail_blk);
5600
5601 /*
5602 * First do a pass to find all of the cancelled buf log items.
5603 * Store them in the buf_cancel_table for use in the second pass.
5604 */
Christoph Hellwigd5689ea2010-12-01 22:06:22 +00005605 log->l_buf_cancel_table = kmem_zalloc(XLOG_BC_TABLE_SIZE *
5606 sizeof(struct list_head),
Linus Torvalds1da177e2005-04-16 15:20:36 -07005607 KM_SLEEP);
Christoph Hellwigd5689ea2010-12-01 22:06:22 +00005608 for (i = 0; i < XLOG_BC_TABLE_SIZE; i++)
5609 INIT_LIST_HEAD(&log->l_buf_cancel_table[i]);
5610
Linus Torvalds1da177e2005-04-16 15:20:36 -07005611 error = xlog_do_recovery_pass(log, head_blk, tail_blk,
Brian Fosterd7f37692016-01-04 15:55:10 +11005612 XLOG_RECOVER_PASS1, NULL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07005613 if (error != 0) {
Denys Vlasenkof0e2d932008-05-19 16:31:57 +10005614 kmem_free(log->l_buf_cancel_table);
Linus Torvalds1da177e2005-04-16 15:20:36 -07005615 log->l_buf_cancel_table = NULL;
5616 return error;
5617 }
5618 /*
5619 * Then do a second pass to actually recover the items in the log.
5620 * When it is complete free the table of buf cancel items.
5621 */
5622 error = xlog_do_recovery_pass(log, head_blk, tail_blk,
Brian Fosterd7f37692016-01-04 15:55:10 +11005623 XLOG_RECOVER_PASS2, NULL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07005624#ifdef DEBUG
Tim Shimmin6d192a92006-06-09 14:55:38 +10005625 if (!error) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07005626 int i;
5627
5628 for (i = 0; i < XLOG_BC_TABLE_SIZE; i++)
Christoph Hellwigd5689ea2010-12-01 22:06:22 +00005629 ASSERT(list_empty(&log->l_buf_cancel_table[i]));
Linus Torvalds1da177e2005-04-16 15:20:36 -07005630 }
5631#endif /* DEBUG */
5632
Denys Vlasenkof0e2d932008-05-19 16:31:57 +10005633 kmem_free(log->l_buf_cancel_table);
Linus Torvalds1da177e2005-04-16 15:20:36 -07005634 log->l_buf_cancel_table = NULL;
5635
5636 return error;
5637}
5638
5639/*
5640 * Do the actual recovery
5641 */
5642STATIC int
5643xlog_do_recover(
Mark Tinguely9a8d2fd2012-06-14 09:22:16 -05005644 struct xlog *log,
Linus Torvalds1da177e2005-04-16 15:20:36 -07005645 xfs_daddr_t head_blk,
5646 xfs_daddr_t tail_blk)
5647{
Dave Chinnera7980112016-03-07 08:39:36 +11005648 struct xfs_mount *mp = log->l_mp;
Linus Torvalds1da177e2005-04-16 15:20:36 -07005649 int error;
5650 xfs_buf_t *bp;
5651 xfs_sb_t *sbp;
5652
Brian Fostere67d3d42017-08-08 18:21:53 -07005653 trace_xfs_log_recover(log, head_blk, tail_blk);
5654
Linus Torvalds1da177e2005-04-16 15:20:36 -07005655 /*
5656 * First replay the images in the log.
5657 */
5658 error = xlog_do_log_recovery(log, head_blk, tail_blk);
Christoph Hellwig43ff2122012-04-23 15:58:39 +10005659 if (error)
Linus Torvalds1da177e2005-04-16 15:20:36 -07005660 return error;
Linus Torvalds1da177e2005-04-16 15:20:36 -07005661
5662 /*
5663 * If IO errors happened during recovery, bail out.
5664 */
Dave Chinnera7980112016-03-07 08:39:36 +11005665 if (XFS_FORCED_SHUTDOWN(mp)) {
Dave Chinner24513372014-06-25 14:58:08 +10005666 return -EIO;
Linus Torvalds1da177e2005-04-16 15:20:36 -07005667 }
5668
5669 /*
5670 * We now update the tail_lsn since much of the recovery has completed
5671 * and there may be space available to use. If there were no extent
5672 * or iunlinks, we can free up the entire log and set the tail_lsn to
5673 * be the last_sync_lsn. This was set in xlog_find_tail to be the
5674 * lsn of the last known good LR on disk. If there are extent frees
5675 * or iunlinks they will have some entries in the AIL; so we look at
5676 * the AIL to determine how to set the tail_lsn.
5677 */
Dave Chinnera7980112016-03-07 08:39:36 +11005678 xlog_assign_tail_lsn(mp);
Linus Torvalds1da177e2005-04-16 15:20:36 -07005679
5680 /*
5681 * Now that we've finished replaying all buffer and inode
Dave Chinner98021822012-11-12 22:54:03 +11005682 * updates, re-read in the superblock and reverify it.
Linus Torvalds1da177e2005-04-16 15:20:36 -07005683 */
Eric Sandeen8c9ce2f2019-06-12 08:59:58 -07005684 bp = xfs_getsb(mp);
Dave Chinner1157b32c2016-02-10 15:01:11 +11005685 bp->b_flags &= ~(XBF_DONE | XBF_ASYNC);
Dave Chinnerb68c0822016-02-10 15:01:11 +11005686 ASSERT(!(bp->b_flags & XBF_WRITE));
Dave Chinner0cac6822016-02-10 15:01:11 +11005687 bp->b_flags |= XBF_READ;
Dave Chinner1813dd62012-11-14 17:54:40 +11005688 bp->b_ops = &xfs_sb_buf_ops;
Christoph Hellwig83a0adc2013-12-17 00:03:52 -08005689
Brian Foster6af88cd2018-07-11 22:26:35 -07005690 error = xfs_buf_submit(bp);
David Chinnerd64e31a2008-04-10 12:22:17 +10005691 if (error) {
Dave Chinnera7980112016-03-07 08:39:36 +11005692 if (!XFS_FORCED_SHUTDOWN(mp)) {
Dave Chinner595bff72014-10-02 09:05:14 +10005693 xfs_buf_ioerror_alert(bp, __func__);
5694 ASSERT(0);
5695 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07005696 xfs_buf_relse(bp);
5697 return error;
5698 }
5699
5700 /* Convert superblock from on-disk format */
Dave Chinnera7980112016-03-07 08:39:36 +11005701 sbp = &mp->m_sb;
Dave Chinner98021822012-11-12 22:54:03 +11005702 xfs_sb_from_disk(sbp, XFS_BUF_TO_SBP(bp));
Linus Torvalds1da177e2005-04-16 15:20:36 -07005703 xfs_buf_relse(bp);
5704
Dave Chinnera7980112016-03-07 08:39:36 +11005705 /* re-initialise in-core superblock and geometry structures */
5706 xfs_reinit_percpu_counters(mp);
5707 error = xfs_initialize_perag(mp, sbp->sb_agcount, &mp->m_maxagi);
5708 if (error) {
5709 xfs_warn(mp, "Failed post-recovery per-ag init: %d", error);
5710 return error;
5711 }
Darrick J. Wong52548852016-08-03 11:38:24 +10005712 mp->m_alloc_set_aside = xfs_alloc_set_aside(mp);
Lachlan McIlroy5478eea2007-02-10 18:36:29 +11005713
Linus Torvalds1da177e2005-04-16 15:20:36 -07005714 xlog_recover_check_summary(log);
5715
5716 /* Normal transactions can now occur */
5717 log->l_flags &= ~XLOG_ACTIVE_RECOVERY;
5718 return 0;
5719}
5720
5721/*
5722 * Perform recovery and re-initialize some log variables in xlog_find_tail.
5723 *
5724 * Return error or zero.
5725 */
5726int
5727xlog_recover(
Mark Tinguely9a8d2fd2012-06-14 09:22:16 -05005728 struct xlog *log)
Linus Torvalds1da177e2005-04-16 15:20:36 -07005729{
5730 xfs_daddr_t head_blk, tail_blk;
5731 int error;
5732
5733 /* find the tail of the log */
Brian Fostera45086e2015-10-12 15:59:25 +11005734 error = xlog_find_tail(log, &head_blk, &tail_blk);
5735 if (error)
Linus Torvalds1da177e2005-04-16 15:20:36 -07005736 return error;
5737
Brian Fostera45086e2015-10-12 15:59:25 +11005738 /*
5739 * The superblock was read before the log was available and thus the LSN
5740 * could not be verified. Check the superblock LSN against the current
5741 * LSN now that it's known.
5742 */
5743 if (xfs_sb_version_hascrc(&log->l_mp->m_sb) &&
5744 !xfs_log_check_lsn(log->l_mp, log->l_mp->m_sb.sb_lsn))
5745 return -EINVAL;
5746
Linus Torvalds1da177e2005-04-16 15:20:36 -07005747 if (tail_blk != head_blk) {
5748 /* There used to be a comment here:
5749 *
5750 * disallow recovery on read-only mounts. note -- mount
5751 * checks for ENOSPC and turns it into an intelligent
5752 * error message.
5753 * ...but this is no longer true. Now, unless you specify
5754 * NORECOVERY (in which case this function would never be
5755 * called), we just go ahead and recover. We do this all
5756 * under the vfs layer, so we can get away with it unless
5757 * the device itself is read-only, in which case we fail.
5758 */
Utako Kusaka3a02ee12007-05-08 13:50:06 +10005759 if ((error = xfs_dev_is_read_only(log->l_mp, "recovery"))) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07005760 return error;
5761 }
5762
Dave Chinnere721f502013-04-03 16:11:32 +11005763 /*
5764 * Version 5 superblock log feature mask validation. We know the
5765 * log is dirty so check if there are any unknown log features
5766 * in what we need to recover. If there are unknown features
5767 * (e.g. unsupported transactions, then simply reject the
5768 * attempt at recovery before touching anything.
5769 */
5770 if (XFS_SB_VERSION_NUM(&log->l_mp->m_sb) == XFS_SB_VERSION_5 &&
5771 xfs_sb_has_incompat_log_feature(&log->l_mp->m_sb,
5772 XFS_SB_FEAT_INCOMPAT_LOG_UNKNOWN)) {
5773 xfs_warn(log->l_mp,
Joe Perchesf41febd2015-07-29 11:52:04 +10005774"Superblock has unknown incompatible log features (0x%x) enabled.",
Dave Chinnere721f502013-04-03 16:11:32 +11005775 (log->l_mp->m_sb.sb_features_log_incompat &
5776 XFS_SB_FEAT_INCOMPAT_LOG_UNKNOWN));
Joe Perchesf41febd2015-07-29 11:52:04 +10005777 xfs_warn(log->l_mp,
5778"The log can not be fully and/or safely recovered by this kernel.");
5779 xfs_warn(log->l_mp,
5780"Please recover the log on a kernel that supports the unknown features.");
Dave Chinner24513372014-06-25 14:58:08 +10005781 return -EINVAL;
Dave Chinnere721f502013-04-03 16:11:32 +11005782 }
5783
Brian Foster2e227172014-09-09 11:56:13 +10005784 /*
5785 * Delay log recovery if the debug hook is set. This is debug
5786 * instrumention to coordinate simulation of I/O failures with
5787 * log recovery.
5788 */
5789 if (xfs_globals.log_recovery_delay) {
5790 xfs_notice(log->l_mp,
5791 "Delaying log recovery for %d seconds.",
5792 xfs_globals.log_recovery_delay);
5793 msleep(xfs_globals.log_recovery_delay * 1000);
5794 }
5795
Dave Chinnera0fa2b62011-03-07 10:01:35 +11005796 xfs_notice(log->l_mp, "Starting recovery (logdev: %s)",
5797 log->l_mp->m_logname ? log->l_mp->m_logname
5798 : "internal");
Linus Torvalds1da177e2005-04-16 15:20:36 -07005799
5800 error = xlog_do_recover(log, head_blk, tail_blk);
5801 log->l_flags |= XLOG_RECOVERY_NEEDED;
5802 }
5803 return error;
5804}
5805
5806/*
5807 * In the first part of recovery we replay inodes and buffers and build
5808 * up the list of extent free items which need to be processed. Here
5809 * we process the extent free items and clean up the on disk unlinked
5810 * inode lists. This is separated from the first part of recovery so
5811 * that the root and real-time bitmap inodes can be read in from disk in
5812 * between the two stages. This is necessary so that we can free space
5813 * in the real-time portion of the file system.
5814 */
5815int
5816xlog_recover_finish(
Mark Tinguely9a8d2fd2012-06-14 09:22:16 -05005817 struct xlog *log)
Linus Torvalds1da177e2005-04-16 15:20:36 -07005818{
5819 /*
5820 * Now we're ready to do the transactions needed for the
5821 * rest of recovery. Start with completing all the extent
5822 * free intent records and then process the unlinked inode
5823 * lists. At this point, we essentially run in normal mode
5824 * except that we're still performing recovery actions
5825 * rather than accepting new requests.
5826 */
5827 if (log->l_flags & XLOG_RECOVERY_NEEDED) {
David Chinner3c1e2bb2008-04-10 12:21:11 +10005828 int error;
Darrick J. Wongdc423752016-08-03 11:23:49 +10005829 error = xlog_recover_process_intents(log);
David Chinner3c1e2bb2008-04-10 12:21:11 +10005830 if (error) {
Darrick J. Wongdc423752016-08-03 11:23:49 +10005831 xfs_alert(log->l_mp, "Failed to recover intents");
David Chinner3c1e2bb2008-04-10 12:21:11 +10005832 return error;
5833 }
Darrick J. Wong9e88b5d2016-08-03 12:09:48 +10005834
Linus Torvalds1da177e2005-04-16 15:20:36 -07005835 /*
Darrick J. Wongdc423752016-08-03 11:23:49 +10005836 * Sync the log to get all the intents out of the AIL.
Linus Torvalds1da177e2005-04-16 15:20:36 -07005837 * This isn't absolutely necessary, but it helps in
5838 * case the unlink transactions would have problems
Darrick J. Wongdc423752016-08-03 11:23:49 +10005839 * pushing the intents out of the way.
Linus Torvalds1da177e2005-04-16 15:20:36 -07005840 */
Christoph Hellwiga14a3482010-01-19 09:56:46 +00005841 xfs_log_force(log->l_mp, XFS_LOG_SYNC);
Linus Torvalds1da177e2005-04-16 15:20:36 -07005842
Christoph Hellwig42490232008-08-13 16:49:32 +10005843 xlog_recover_process_iunlinks(log);
Linus Torvalds1da177e2005-04-16 15:20:36 -07005844
5845 xlog_recover_check_summary(log);
5846
Dave Chinnera0fa2b62011-03-07 10:01:35 +11005847 xfs_notice(log->l_mp, "Ending recovery (logdev: %s)",
5848 log->l_mp->m_logname ? log->l_mp->m_logname
5849 : "internal");
Linus Torvalds1da177e2005-04-16 15:20:36 -07005850 log->l_flags &= ~XLOG_RECOVERY_NEEDED;
5851 } else {
Dave Chinnera0fa2b62011-03-07 10:01:35 +11005852 xfs_info(log->l_mp, "Ending clean mount");
Linus Torvalds1da177e2005-04-16 15:20:36 -07005853 }
5854 return 0;
5855}
5856
Brian Fosterf0b2efa2015-08-19 09:58:36 +10005857int
5858xlog_recover_cancel(
5859 struct xlog *log)
5860{
5861 int error = 0;
5862
5863 if (log->l_flags & XLOG_RECOVERY_NEEDED)
Darrick J. Wongdc423752016-08-03 11:23:49 +10005864 error = xlog_recover_cancel_intents(log);
Brian Fosterf0b2efa2015-08-19 09:58:36 +10005865
5866 return error;
5867}
Linus Torvalds1da177e2005-04-16 15:20:36 -07005868
5869#if defined(DEBUG)
5870/*
5871 * Read all of the agf and agi counters and check that they
5872 * are consistent with the superblock counters.
5873 */
Christoph Hellwige89fbb52017-11-06 11:54:01 -08005874STATIC void
Linus Torvalds1da177e2005-04-16 15:20:36 -07005875xlog_recover_check_summary(
Mark Tinguely9a8d2fd2012-06-14 09:22:16 -05005876 struct xlog *log)
Linus Torvalds1da177e2005-04-16 15:20:36 -07005877{
5878 xfs_mount_t *mp;
5879 xfs_agf_t *agfp;
Linus Torvalds1da177e2005-04-16 15:20:36 -07005880 xfs_buf_t *agfbp;
5881 xfs_buf_t *agibp;
Linus Torvalds1da177e2005-04-16 15:20:36 -07005882 xfs_agnumber_t agno;
Darrick J. Wongc8ce5402017-06-16 11:00:05 -07005883 uint64_t freeblks;
5884 uint64_t itotal;
5885 uint64_t ifree;
Christoph Hellwig5e1be0f2008-11-28 14:23:37 +11005886 int error;
Linus Torvalds1da177e2005-04-16 15:20:36 -07005887
5888 mp = log->l_mp;
5889
5890 freeblks = 0LL;
5891 itotal = 0LL;
5892 ifree = 0LL;
5893 for (agno = 0; agno < mp->m_sb.sb_agcount; agno++) {
From: Christoph Hellwig48056212008-11-28 14:23:38 +11005894 error = xfs_read_agf(mp, NULL, agno, 0, &agfbp);
5895 if (error) {
Dave Chinnera0fa2b62011-03-07 10:01:35 +11005896 xfs_alert(mp, "%s agf read failed agno %d error %d",
5897 __func__, agno, error);
From: Christoph Hellwig48056212008-11-28 14:23:38 +11005898 } else {
5899 agfp = XFS_BUF_TO_AGF(agfbp);
5900 freeblks += be32_to_cpu(agfp->agf_freeblks) +
5901 be32_to_cpu(agfp->agf_flcount);
5902 xfs_buf_relse(agfbp);
Linus Torvalds1da177e2005-04-16 15:20:36 -07005903 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07005904
Christoph Hellwig5e1be0f2008-11-28 14:23:37 +11005905 error = xfs_read_agi(mp, NULL, agno, &agibp);
Dave Chinnera0fa2b62011-03-07 10:01:35 +11005906 if (error) {
5907 xfs_alert(mp, "%s agi read failed agno %d error %d",
5908 __func__, agno, error);
5909 } else {
Christoph Hellwig5e1be0f2008-11-28 14:23:37 +11005910 struct xfs_agi *agi = XFS_BUF_TO_AGI(agibp);
Linus Torvalds1da177e2005-04-16 15:20:36 -07005911
Christoph Hellwig5e1be0f2008-11-28 14:23:37 +11005912 itotal += be32_to_cpu(agi->agi_count);
5913 ifree += be32_to_cpu(agi->agi_freecount);
5914 xfs_buf_relse(agibp);
5915 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07005916 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07005917}
5918#endif /* DEBUG */