xfs: flatten the dquot lock ordering
Introduce a new XFS_DQ_FREEING flag that tells lookup and mplist walks
to skip a dquot that is beeing freed, and use this avoid the trylock
on the hash and mplist locks in xfs_qm_dqreclaim_one. Also simplify
xfs_dqpurge by moving the inodes to a dispose list after marking them
XFS_DQ_FREEING and avoid the locker ordering constraints.
Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
Signed-off-by: Ben Myers <bpm@sgi.com>
diff --git a/fs/xfs/xfs_dquot.c b/fs/xfs/xfs_dquot.c
index 35d2b8a..d06d2a6 100644
--- a/fs/xfs/xfs_dquot.c
+++ b/fs/xfs/xfs_dquot.c
@@ -728,6 +728,12 @@
trace_xfs_dqlookup_found(dqp);
xfs_dqlock(dqp);
+ if (dqp->dq_flags & XFS_DQ_FREEING) {
+ *O_dqpp = NULL;
+ xfs_dqunlock(dqp);
+ return -1;
+ }
+
XFS_DQHOLD(dqp);
/*
@@ -781,11 +787,7 @@
return (EIO);
}
}
-#endif
- again:
-
-#ifdef DEBUG
ASSERT(type == XFS_DQ_USER ||
type == XFS_DQ_PROJ ||
type == XFS_DQ_GROUP);
@@ -797,13 +799,21 @@
ASSERT(ip->i_gdquot == NULL);
}
#endif
+
+restart:
mutex_lock(&h->qh_lock);
/*
* Look in the cache (hashtable).
* The chain is kept locked during lookup.
*/
- if (xfs_qm_dqlookup(mp, id, h, O_dqpp) == 0) {
+ switch (xfs_qm_dqlookup(mp, id, h, O_dqpp)) {
+ case -1:
+ XQM_STATS_INC(xqmstats.xs_qm_dquot_dups);
+ mutex_unlock(&h->qh_lock);
+ delay(1);
+ goto restart;
+ case 0:
XQM_STATS_INC(xqmstats.xs_qm_dqcachehits);
/*
* The dquot was found, moved to the front of the chain,
@@ -814,9 +824,11 @@
ASSERT(XFS_DQ_IS_LOCKED(*O_dqpp));
mutex_unlock(&h->qh_lock);
trace_xfs_dqget_hit(*O_dqpp);
- return (0); /* success */
+ return 0; /* success */
+ default:
+ XQM_STATS_INC(xqmstats.xs_qm_dqcachemisses);
+ break;
}
- XQM_STATS_INC(xqmstats.xs_qm_dqcachemisses);
/*
* Dquot cache miss. We don't want to keep the inode lock across
@@ -913,16 +925,21 @@
* lock order between the two dquots here since dqp isn't
* on any findable lists yet.
*/
- if (xfs_qm_dqlookup(mp, id, h, &tmpdqp) == 0) {
+ switch (xfs_qm_dqlookup(mp, id, h, &tmpdqp)) {
+ case 0:
+ case -1:
/*
- * Duplicate found. Just throw away the new dquot
- * and start over.
+ * Duplicate found, either in cache or on its way out.
+ * Just throw away the new dquot and start over.
*/
- xfs_qm_dqput(tmpdqp);
+ if (tmpdqp)
+ xfs_qm_dqput(tmpdqp);
mutex_unlock(&h->qh_lock);
xfs_qm_dqdestroy(dqp);
XQM_STATS_INC(xqmstats.xs_qm_dquot_dups);
- goto again;
+ goto restart;
+ default:
+ break;
}
}
@@ -1250,51 +1267,18 @@
}
}
-
/*
- * Take a dquot out of the mount's dqlist as well as the hashlist.
- * This is called via unmount as well as quotaoff, and the purge
- * will always succeed unless there are soft (temp) references
- * outstanding.
- *
- * This returns 0 if it was purged, 1 if it wasn't. It's not an error code
- * that we're returning! XXXsup - not cool.
+ * Take a dquot out of the mount's dqlist as well as the hashlist. This is
+ * called via unmount as well as quotaoff, and the purge will always succeed.
*/
-/* ARGSUSED */
-int
+void
xfs_qm_dqpurge(
- xfs_dquot_t *dqp)
+ struct xfs_dquot *dqp)
{
- xfs_dqhash_t *qh = dqp->q_hash;
- xfs_mount_t *mp = dqp->q_mount;
-
- ASSERT(mutex_is_locked(&mp->m_quotainfo->qi_dqlist_lock));
- ASSERT(mutex_is_locked(&dqp->q_hash->qh_lock));
-
- /*
- * XXX(hch): horrible locking order, will get cleaned up ASAP.
- */
- if (!mutex_trylock(&xfs_Gqm->qm_dqfrlist_lock)) {
- mutex_unlock(&dqp->q_hash->qh_lock);
- return 1;
- }
+ struct xfs_mount *mp = dqp->q_mount;
+ struct xfs_dqhash *qh = dqp->q_hash;
xfs_dqlock(dqp);
- /*
- * We really can't afford to purge a dquot that is
- * referenced, because these are hard refs.
- * It shouldn't happen in general because we went thru _all_ inodes in
- * dqrele_all_inodes before calling this and didn't let the mountlock go.
- * However it is possible that we have dquots with temporary
- * references that are not attached to an inode. e.g. see xfs_setattr().
- */
- if (dqp->q_nrefs != 0) {
- xfs_dqunlock(dqp);
- mutex_unlock(&dqp->q_hash->qh_lock);
- return (1);
- }
-
- ASSERT(!list_empty(&dqp->q_freelist));
/*
* If we're turning off quotas, we have to make sure that, for
@@ -1313,19 +1297,14 @@
}
/*
- * XXXIf we're turning this type of quotas off, we don't care
+ * If we are turning this type of quotas off, we don't care
* about the dirty metadata sitting in this dquot. OTOH, if
* we're unmounting, we do care, so we flush it and wait.
*/
if (XFS_DQ_IS_DIRTY(dqp)) {
int error;
- /* dqflush unlocks dqflock */
/*
- * Given that dqpurge is a very rare occurrence, it is OK
- * that we're holding the hashlist and mplist locks
- * across the disk write. But, ... XXXsup
- *
* We don't care about getting disk errors here. We need
* to purge this dquot anyway, so we go ahead regardless.
*/
@@ -1335,28 +1314,36 @@
__func__, dqp);
xfs_dqflock(dqp);
}
+
ASSERT(atomic_read(&dqp->q_pincount) == 0);
ASSERT(XFS_FORCED_SHUTDOWN(mp) ||
!(dqp->q_logitem.qli_item.li_flags & XFS_LI_IN_AIL));
- list_del_init(&dqp->q_hashlist);
- qh->qh_version++;
-
- list_del_init(&dqp->q_mplist);
- mp->m_quotainfo->qi_dqreclaims++;
- mp->m_quotainfo->qi_dquots--;
-
- list_del_init(&dqp->q_freelist);
- xfs_Gqm->qm_dqfrlist_cnt--;
-
xfs_dqfunlock(dqp);
xfs_dqunlock(dqp);
- mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock);
+ mutex_lock(&qh->qh_lock);
+ list_del_init(&dqp->q_hashlist);
+ qh->qh_version++;
mutex_unlock(&qh->qh_lock);
+ mutex_lock(&mp->m_quotainfo->qi_dqlist_lock);
+ list_del_init(&dqp->q_mplist);
+ mp->m_quotainfo->qi_dqreclaims++;
+ mp->m_quotainfo->qi_dquots--;
+ mutex_unlock(&mp->m_quotainfo->qi_dqlist_lock);
+
+ /*
+ * We move dquots to the freelist as soon as their reference count
+ * hits zero, so it really should be on the freelist here.
+ */
+ mutex_lock(&xfs_Gqm->qm_dqfrlist_lock);
+ ASSERT(!list_empty(&dqp->q_freelist));
+ list_del_init(&dqp->q_freelist);
+ xfs_Gqm->qm_dqfrlist_cnt--;
+ mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock);
+
xfs_qm_dqdestroy(dqp);
- return 0;
}
/*
diff --git a/fs/xfs/xfs_dquot.h b/fs/xfs/xfs_dquot.h
index 0b5d2ae..98488df 100644
--- a/fs/xfs/xfs_dquot.h
+++ b/fs/xfs/xfs_dquot.h
@@ -133,7 +133,7 @@
extern void xfs_qm_dqdestroy(xfs_dquot_t *);
extern int xfs_qm_dqflush(xfs_dquot_t *, uint);
-extern int xfs_qm_dqpurge(xfs_dquot_t *);
+extern void xfs_qm_dqpurge(xfs_dquot_t *);
extern void xfs_qm_dqunpin_wait(xfs_dquot_t *);
extern void xfs_qm_adjust_dqtimers(xfs_mount_t *,
xfs_disk_dquot_t *);
diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c
index 6a0c4f0..f418731 100644
--- a/fs/xfs/xfs_qm.c
+++ b/fs/xfs/xfs_qm.c
@@ -398,7 +398,8 @@
mutex_lock(&q->qi_dqlist_lock);
list_for_each_entry(dqp, &q->qi_dqlist, q_mplist) {
xfs_dqlock(dqp);
- if (! XFS_DQ_IS_DIRTY(dqp)) {
+ if ((dqp->dq_flags & XFS_DQ_FREEING) ||
+ !XFS_DQ_IS_DIRTY(dqp)) {
xfs_dqunlock(dqp);
continue;
}
@@ -437,6 +438,7 @@
/* return ! busy */
return 0;
}
+
/*
* Release the group dquot pointers the user dquots may be
* carrying around as a hint. mplist is locked on entry and exit.
@@ -453,6 +455,13 @@
ASSERT(mutex_is_locked(&q->qi_dqlist_lock));
list_for_each_entry(dqp, &q->qi_dqlist, q_mplist) {
xfs_dqlock(dqp);
+ if (dqp->dq_flags & XFS_DQ_FREEING) {
+ xfs_dqunlock(dqp);
+ mutex_unlock(&q->qi_dqlist_lock);
+ delay(1);
+ mutex_lock(&q->qi_dqlist_lock);
+ goto again;
+ }
if ((gdqp = dqp->q_gdquot)) {
xfs_dqlock(gdqp);
dqp->q_gdquot = NULL;
@@ -489,8 +498,8 @@
struct xfs_quotainfo *q = mp->m_quotainfo;
struct xfs_dquot *dqp, *n;
uint dqtype;
- int nrecl;
- int nmisses;
+ int nmisses = 0;
+ LIST_HEAD (dispose_list);
if (!q)
return 0;
@@ -509,46 +518,26 @@
*/
xfs_qm_detach_gdquots(mp);
- again:
- nmisses = 0;
- ASSERT(mutex_is_locked(&q->qi_dqlist_lock));
/*
- * Try to get rid of all of the unwanted dquots. The idea is to
- * get them off mplist and hashlist, but leave them on freelist.
+ * Try to get rid of all of the unwanted dquots.
*/
list_for_each_entry_safe(dqp, n, &q->qi_dqlist, q_mplist) {
xfs_dqlock(dqp);
- if ((dqp->dq_flags & dqtype) == 0) {
- xfs_dqunlock(dqp);
- continue;
+ if ((dqp->dq_flags & dqtype) != 0 &&
+ !(dqp->dq_flags & XFS_DQ_FREEING)) {
+ if (dqp->q_nrefs == 0) {
+ dqp->dq_flags |= XFS_DQ_FREEING;
+ list_move_tail(&dqp->q_mplist, &dispose_list);
+ } else
+ nmisses++;
}
xfs_dqunlock(dqp);
-
- if (!mutex_trylock(&dqp->q_hash->qh_lock)) {
- nrecl = q->qi_dqreclaims;
- mutex_unlock(&q->qi_dqlist_lock);
- mutex_lock(&dqp->q_hash->qh_lock);
- mutex_lock(&q->qi_dqlist_lock);
-
- /*
- * XXXTheoretically, we can get into a very long
- * ping pong game here.
- * No one can be adding dquots to the mplist at
- * this point, but somebody might be taking things off.
- */
- if (nrecl != q->qi_dqreclaims) {
- mutex_unlock(&dqp->q_hash->qh_lock);
- goto again;
- }
- }
-
- /*
- * Take the dquot off the mplist and hashlist. It may remain on
- * freelist in INACTIVE state.
- */
- nmisses += xfs_qm_dqpurge(dqp);
}
mutex_unlock(&q->qi_dqlist_lock);
+
+ list_for_each_entry_safe(dqp, n, &dispose_list, q_mplist)
+ xfs_qm_dqpurge(dqp);
+
return nmisses;
}
@@ -1667,25 +1656,16 @@
/*
- * Just pop the least recently used dquot off the freelist and
- * recycle it. The returned dquot is locked.
+ * Pop the least recently used dquot off the freelist and recycle it.
*/
-STATIC xfs_dquot_t *
+STATIC struct xfs_dquot *
xfs_qm_dqreclaim_one(void)
{
- xfs_dquot_t *dqpout;
- xfs_dquot_t *dqp;
- int restarts;
- int startagain;
+ struct xfs_dquot *dqp;
+ int restarts = 0;
- restarts = 0;
- dqpout = NULL;
-
- /* lockorder: hashchainlock, freelistlock, mplistlock, dqlock, dqflock */
-again:
- startagain = 0;
mutex_lock(&xfs_Gqm->qm_dqfrlist_lock);
-
+restart:
list_for_each_entry(dqp, &xfs_Gqm->qm_dqfrlist, q_freelist) {
struct xfs_mount *mp = dqp->q_mount;
xfs_dqlock(dqp);
@@ -1701,7 +1681,6 @@
list_del_init(&dqp->q_freelist);
xfs_Gqm->qm_dqfrlist_cnt--;
restarts++;
- startagain = 1;
goto dqunlock;
}
@@ -1737,57 +1716,42 @@
}
goto dqunlock;
}
+ xfs_dqfunlock(dqp);
/*
- * We're trying to get the hashlock out of order. This races
- * with dqlookup; so, we giveup and goto the next dquot if
- * we couldn't get the hashlock. This way, we won't starve
- * a dqlookup process that holds the hashlock that is
- * waiting for the freelist lock.
+ * Prevent lookup now that we are going to reclaim the dquot.
+ * Once XFS_DQ_FREEING is set lookup won't touch the dquot,
+ * thus we can drop the lock now.
*/
- if (!mutex_trylock(&dqp->q_hash->qh_lock)) {
- restarts++;
- goto dqfunlock;
- }
+ dqp->dq_flags |= XFS_DQ_FREEING;
+ xfs_dqunlock(dqp);
- /*
- * This races with dquot allocation code as well as dqflush_all
- * and reclaim code. So, if we failed to grab the mplist lock,
- * giveup everything and start over.
- */
- if (!mutex_trylock(&mp->m_quotainfo->qi_dqlist_lock)) {
- restarts++;
- startagain = 1;
- goto qhunlock;
- }
+ mutex_lock(&dqp->q_hash->qh_lock);
+ list_del_init(&dqp->q_hashlist);
+ dqp->q_hash->qh_version++;
+ mutex_unlock(&dqp->q_hash->qh_lock);
- ASSERT(dqp->q_nrefs == 0);
+ mutex_lock(&mp->m_quotainfo->qi_dqlist_lock);
list_del_init(&dqp->q_mplist);
mp->m_quotainfo->qi_dquots--;
mp->m_quotainfo->qi_dqreclaims++;
- list_del_init(&dqp->q_hashlist);
- dqp->q_hash->qh_version++;
+ mutex_unlock(&mp->m_quotainfo->qi_dqlist_lock);
+
+ ASSERT(dqp->q_nrefs == 0);
list_del_init(&dqp->q_freelist);
xfs_Gqm->qm_dqfrlist_cnt--;
- dqpout = dqp;
- mutex_unlock(&mp->m_quotainfo->qi_dqlist_lock);
-qhunlock:
- mutex_unlock(&dqp->q_hash->qh_lock);
-dqfunlock:
- xfs_dqfunlock(dqp);
+
+ mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock);
+ return dqp;
dqunlock:
xfs_dqunlock(dqp);
- if (dqpout)
- break;
if (restarts >= XFS_QM_RECLAIM_MAX_RESTARTS)
break;
- if (startagain) {
- mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock);
- goto again;
- }
+ goto restart;
}
+
mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock);
- return dqpout;
+ return NULL;
}
/*
diff --git a/fs/xfs/xfs_quota.h b/fs/xfs/xfs_quota.h
index 487653d..b86c62f 100644
--- a/fs/xfs/xfs_quota.h
+++ b/fs/xfs/xfs_quota.h
@@ -87,6 +87,7 @@
#define XFS_DQ_PROJ 0x0002 /* project quota */
#define XFS_DQ_GROUP 0x0004 /* a group quota */
#define XFS_DQ_DIRTY 0x0008 /* dquot is dirty */
+#define XFS_DQ_FREEING 0x0010 /* dquot is beeing torn down */
#define XFS_DQ_ALLTYPES (XFS_DQ_USER|XFS_DQ_PROJ|XFS_DQ_GROUP)
@@ -94,7 +95,8 @@
{ XFS_DQ_USER, "USER" }, \
{ XFS_DQ_PROJ, "PROJ" }, \
{ XFS_DQ_GROUP, "GROUP" }, \
- { XFS_DQ_DIRTY, "DIRTY" }
+ { XFS_DQ_DIRTY, "DIRTY" }, \
+ { XFS_DQ_FREEING, "FREEING" }
/*
* In the worst case, when both user and group quotas are on,