Blame - fs/xfs/xfs_health.c - SHIFTPHONES/kernel/common

blob: 21728228e08b92acd29025bb9611e2e96759a58c [file] [log] [blame]

Darrick J. Wong	6772c1f	2019-04-12 07:40:25 -0700	[diff] [blame]	1	// SPDX-License-Identifier: GPL-2.0+
				2	/*
				3	* Copyright (C) 2019 Oracle. All Rights Reserved.
				4	* Author: Darrick J. Wong <darrick.wong@oracle.com>
				5	*/
				6	#include "xfs.h"
				7	#include "xfs_fs.h"
				8	#include "xfs_shared.h"
				9	#include "xfs_format.h"
				10	#include "xfs_log_format.h"
				11	#include "xfs_trans_resv.h"
				12	#include "xfs_bit.h"
				13	#include "xfs_sb.h"
				14	#include "xfs_mount.h"
				15	#include "xfs_defer.h"
				16	#include "xfs_da_format.h"
				17	#include "xfs_da_btree.h"
				18	#include "xfs_inode.h"
				19	#include "xfs_trace.h"
				20	#include "xfs_health.h"
				21
Darrick J. Wong	519841c	2019-04-12 07:41:16 -0700	[diff] [blame]	22	/*
				23	* Warn about metadata corruption that we detected but haven't fixed, and
				24	* make sure we're not sitting on anything that would get in the way of
				25	* recovery.
				26	*/
				27	void
				28	xfs_health_unmount(
				29	struct xfs_mount *mp)
				30	{
				31	struct xfs_perag *pag;
				32	xfs_agnumber_t agno;
				33	unsigned int sick = 0;
				34	unsigned int checked = 0;
				35	bool warn = false;
				36
				37	if (XFS_FORCED_SHUTDOWN(mp))
				38	return;
				39
				40	/* Measure AG corruption levels. */
				41	for (agno = 0; agno < mp->m_sb.sb_agcount; agno++) {
				42	pag = xfs_perag_get(mp, agno);
				43	xfs_ag_measure_sickness(pag, &sick, &checked);
				44	if (sick) {
				45	trace_xfs_ag_unfixed_corruption(mp, agno, sick);
				46	warn = true;
				47	}
				48	xfs_perag_put(pag);
				49	}
				50
				51	/* Measure realtime volume corruption levels. */
				52	xfs_rt_measure_sickness(mp, &sick, &checked);
				53	if (sick) {
				54	trace_xfs_rt_unfixed_corruption(mp, sick);
				55	warn = true;
				56	}
				57
				58	/*
				59	* Measure fs corruption and keep the sample around for the warning.
				60	* See the note below for why we exempt FS_COUNTERS.
				61	*/
				62	xfs_fs_measure_sickness(mp, &sick, &checked);
				63	if (sick & ~XFS_SICK_FS_COUNTERS) {
				64	trace_xfs_fs_unfixed_corruption(mp, sick);
				65	warn = true;
				66	}
				67
				68	if (warn) {
				69	xfs_warn(mp,
				70	"Uncorrected metadata errors detected; please run xfs_repair.");
				71
				72	/*
				73	* We discovered uncorrected metadata problems at some point
				74	* during this filesystem mount and have advised the
				75	* administrator to run repair once the unmount completes.
				76	*
				77	* However, we must be careful -- when FSCOUNTERS are flagged
				78	* unhealthy, the unmount procedure omits writing the clean
				79	* unmount record to the log so that the next mount will run
				80	* recovery and recompute the summary counters. In other
				81	* words, we leave a dirty log to get the counters fixed.
				82	*
				83	* Unfortunately, xfs_repair cannot recover dirty logs, so if
				84	* there were filesystem problems, FSCOUNTERS was flagged, and
				85	* the administrator takes our advice to run xfs_repair,
				86	* they'll have to zap the log before repairing structures.
				87	* We don't really want to encourage this, so we mark the
				88	* FSCOUNTERS healthy so that a subsequent repair run won't see
				89	* a dirty log.
				90	*/
				91	if (sick & XFS_SICK_FS_COUNTERS)
				92	xfs_fs_mark_healthy(mp, XFS_SICK_FS_COUNTERS);
				93	}
				94	}
				95
Darrick J. Wong	6772c1f	2019-04-12 07:40:25 -0700	[diff] [blame]	96	/* Mark unhealthy per-fs metadata. */
				97	void
				98	xfs_fs_mark_sick(
				99	struct xfs_mount *mp,
				100	unsigned int mask)
				101	{
				102	ASSERT(!(mask & ~XFS_SICK_FS_PRIMARY));
				103	trace_xfs_fs_mark_sick(mp, mask);
				104
				105	spin_lock(&mp->m_sb_lock);
				106	mp->m_fs_sick \|= mask;
				107	mp->m_fs_checked \|= mask;
				108	spin_unlock(&mp->m_sb_lock);
				109	}
				110
				111	/* Mark a per-fs metadata healed. */
				112	void
				113	xfs_fs_mark_healthy(
				114	struct xfs_mount *mp,
				115	unsigned int mask)
				116	{
				117	ASSERT(!(mask & ~XFS_SICK_FS_PRIMARY));
				118	trace_xfs_fs_mark_healthy(mp, mask);
				119
				120	spin_lock(&mp->m_sb_lock);
				121	mp->m_fs_sick &= ~mask;
				122	mp->m_fs_checked \|= mask;
				123	spin_unlock(&mp->m_sb_lock);
				124	}
				125
				126	/* Sample which per-fs metadata are unhealthy. */
				127	void
				128	xfs_fs_measure_sickness(
				129	struct xfs_mount *mp,
				130	unsigned int *sick,
				131	unsigned int *checked)
				132	{
				133	spin_lock(&mp->m_sb_lock);
				134	*sick = mp->m_fs_sick;
				135	*checked = mp->m_fs_checked;
				136	spin_unlock(&mp->m_sb_lock);
				137	}
				138
				139	/* Mark unhealthy realtime metadata. */
				140	void
				141	xfs_rt_mark_sick(
				142	struct xfs_mount *mp,
				143	unsigned int mask)
				144	{
				145	ASSERT(!(mask & ~XFS_SICK_RT_PRIMARY));
				146	trace_xfs_rt_mark_sick(mp, mask);
				147
				148	spin_lock(&mp->m_sb_lock);
				149	mp->m_rt_sick \|= mask;
				150	mp->m_rt_checked \|= mask;
				151	spin_unlock(&mp->m_sb_lock);
				152	}
				153
				154	/* Mark a realtime metadata healed. */
				155	void
				156	xfs_rt_mark_healthy(
				157	struct xfs_mount *mp,
				158	unsigned int mask)
				159	{
				160	ASSERT(!(mask & ~XFS_SICK_RT_PRIMARY));
				161	trace_xfs_rt_mark_healthy(mp, mask);
				162
				163	spin_lock(&mp->m_sb_lock);
				164	mp->m_rt_sick &= ~mask;
				165	mp->m_rt_checked \|= mask;
				166	spin_unlock(&mp->m_sb_lock);
				167	}
				168
				169	/* Sample which realtime metadata are unhealthy. */
				170	void
				171	xfs_rt_measure_sickness(
				172	struct xfs_mount *mp,
				173	unsigned int *sick,
				174	unsigned int *checked)
				175	{
				176	spin_lock(&mp->m_sb_lock);
				177	*sick = mp->m_rt_sick;
				178	*checked = mp->m_rt_checked;
				179	spin_unlock(&mp->m_sb_lock);
				180	}
				181
				182	/* Mark unhealthy per-ag metadata. */
				183	void
				184	xfs_ag_mark_sick(
				185	struct xfs_perag *pag,
				186	unsigned int mask)
				187	{
				188	ASSERT(!(mask & ~XFS_SICK_AG_PRIMARY));
				189	trace_xfs_ag_mark_sick(pag->pag_mount, pag->pag_agno, mask);
				190
				191	spin_lock(&pag->pag_state_lock);
				192	pag->pag_sick \|= mask;
				193	pag->pag_checked \|= mask;
				194	spin_unlock(&pag->pag_state_lock);
				195	}
				196
				197	/* Mark per-ag metadata ok. */
				198	void
				199	xfs_ag_mark_healthy(
				200	struct xfs_perag *pag,
				201	unsigned int mask)
				202	{
				203	ASSERT(!(mask & ~XFS_SICK_AG_PRIMARY));
				204	trace_xfs_ag_mark_healthy(pag->pag_mount, pag->pag_agno, mask);
				205
				206	spin_lock(&pag->pag_state_lock);
				207	pag->pag_sick &= ~mask;
				208	pag->pag_checked \|= mask;
				209	spin_unlock(&pag->pag_state_lock);
				210	}
				211
				212	/* Sample which per-ag metadata are unhealthy. */
				213	void
				214	xfs_ag_measure_sickness(
				215	struct xfs_perag *pag,
				216	unsigned int *sick,
				217	unsigned int *checked)
				218	{
				219	spin_lock(&pag->pag_state_lock);
				220	*sick = pag->pag_sick;
				221	*checked = pag->pag_checked;
				222	spin_unlock(&pag->pag_state_lock);
				223	}
				224
				225	/* Mark the unhealthy parts of an inode. */
				226	void
				227	xfs_inode_mark_sick(
				228	struct xfs_inode *ip,
				229	unsigned int mask)
				230	{
				231	ASSERT(!(mask & ~XFS_SICK_INO_PRIMARY));
				232	trace_xfs_inode_mark_sick(ip, mask);
				233
				234	spin_lock(&ip->i_flags_lock);
				235	ip->i_sick \|= mask;
				236	ip->i_checked \|= mask;
				237	spin_unlock(&ip->i_flags_lock);
				238	}
				239
				240	/* Mark parts of an inode healed. */
				241	void
				242	xfs_inode_mark_healthy(
				243	struct xfs_inode *ip,
				244	unsigned int mask)
				245	{
				246	ASSERT(!(mask & ~XFS_SICK_INO_PRIMARY));
				247	trace_xfs_inode_mark_healthy(ip, mask);
				248
				249	spin_lock(&ip->i_flags_lock);
				250	ip->i_sick &= ~mask;
				251	ip->i_checked \|= mask;
				252	spin_unlock(&ip->i_flags_lock);
				253	}
				254
				255	/* Sample which parts of an inode are unhealthy. */
				256	void
				257	xfs_inode_measure_sickness(
				258	struct xfs_inode *ip,
				259	unsigned int *sick,
				260	unsigned int *checked)
				261	{
				262	spin_lock(&ip->i_flags_lock);
				263	*sick = ip->i_sick;
				264	*checked = ip->i_checked;
				265	spin_unlock(&ip->i_flags_lock);
				266	}