blob: 4089daba4c6fd7afb71af578d6b37c00768f1ffe [file] [log] [blame]
Thomas Gleixner328970d2019-05-24 12:04:05 +02001// SPDX-License-Identifier: GPL-2.0-or-later
Mark Fashehccd979b2005-12-15 14:31:24 -08002/* -*- mode: c; c-basic-offset: 8; -*-
3 * vim: noexpandtab sw=8 ts=8 sts=0:
4 *
5 * dlmglue.c
6 *
7 * Code which implements an OCFS2 specific interface to our DLM.
8 *
9 * Copyright (C) 2003, 2004 Oracle. All rights reserved.
Mark Fashehccd979b2005-12-15 14:31:24 -080010 */
11
12#include <linux/types.h>
13#include <linux/slab.h>
14#include <linux/highmem.h>
15#include <linux/mm.h>
Mark Fashehccd979b2005-12-15 14:31:24 -080016#include <linux/kthread.h>
17#include <linux/pagemap.h>
18#include <linux/debugfs.h>
19#include <linux/seq_file.h>
Sunil Mushran8ddb7b02008-05-13 13:45:15 -070020#include <linux/time.h>
Jan Kara9e33d692008-08-25 19:56:50 +020021#include <linux/quotaops.h>
Ingo Molnar174cd4b2017-02-02 19:15:33 +010022#include <linux/sched/signal.h>
Mark Fashehccd979b2005-12-15 14:31:24 -080023
Mark Fashehccd979b2005-12-15 14:31:24 -080024#define MLOG_MASK_PREFIX ML_DLM_GLUE
25#include <cluster/masklog.h>
26
27#include "ocfs2.h"
Joel Beckerd24fbcd2008-01-25 17:02:21 -080028#include "ocfs2_lockingver.h"
Mark Fashehccd979b2005-12-15 14:31:24 -080029
30#include "alloc.h"
Mark Fashehd680efe2006-09-08 14:14:34 -070031#include "dcache.h"
Mark Fashehccd979b2005-12-15 14:31:24 -080032#include "dlmglue.h"
33#include "extent_map.h"
Tiger Yang7f1a37e2006-11-15 15:48:42 +080034#include "file.h"
Mark Fashehccd979b2005-12-15 14:31:24 -080035#include "heartbeat.h"
36#include "inode.h"
37#include "journal.h"
Joel Becker24ef1812008-01-29 17:37:32 -080038#include "stackglue.h"
Mark Fashehccd979b2005-12-15 14:31:24 -080039#include "slot_map.h"
40#include "super.h"
41#include "uptodate.h"
Jan Kara9e33d692008-08-25 19:56:50 +020042#include "quota.h"
Tao Ma8dec98e2009-08-18 11:19:58 +080043#include "refcounttree.h"
Andreas Gruenbacherb8a7a3a2016-03-24 14:38:37 +010044#include "acl.h"
Mark Fashehccd979b2005-12-15 14:31:24 -080045
46#include "buffer_head_io.h"
47
48struct ocfs2_mask_waiter {
49 struct list_head mw_item;
50 int mw_status;
51 struct completion mw_complete;
52 unsigned long mw_mask;
53 unsigned long mw_goal;
Sunil Mushran8ddb7b02008-05-13 13:45:15 -070054#ifdef CONFIG_OCFS2_FS_STATS
Sunil Mushran5bc970e2010-12-28 23:26:03 -080055 ktime_t mw_lock_start;
Sunil Mushran8ddb7b02008-05-13 13:45:15 -070056#endif
Mark Fashehccd979b2005-12-15 14:31:24 -080057};
58
Mark Fasheh54a7e752006-09-12 21:49:13 -070059static struct ocfs2_super *ocfs2_get_dentry_osb(struct ocfs2_lock_res *lockres);
60static struct ocfs2_super *ocfs2_get_inode_osb(struct ocfs2_lock_res *lockres);
Mark Fashehcf8e06f2007-12-20 16:43:10 -080061static struct ocfs2_super *ocfs2_get_file_osb(struct ocfs2_lock_res *lockres);
Jan Kara9e33d692008-08-25 19:56:50 +020062static struct ocfs2_super *ocfs2_get_qinfo_osb(struct ocfs2_lock_res *lockres);
Mark Fashehccd979b2005-12-15 14:31:24 -080063
Mark Fashehd680efe2006-09-08 14:14:34 -070064/*
Mark Fashehcc567d82006-09-13 21:52:21 -070065 * Return value from ->downconvert_worker functions.
Mark Fashehd680efe2006-09-08 14:14:34 -070066 *
Mark Fashehb5e500e2006-09-13 22:01:16 -070067 * These control the precise actions of ocfs2_unblock_lock()
Mark Fashehd680efe2006-09-08 14:14:34 -070068 * and ocfs2_process_blocked_lock()
69 *
70 */
71enum ocfs2_unblock_action {
72 UNBLOCK_CONTINUE = 0, /* Continue downconvert */
73 UNBLOCK_CONTINUE_POST = 1, /* Continue downconvert, fire
74 * ->post_unlock callback */
75 UNBLOCK_STOP_POST = 2, /* Do not downconvert, fire
76 * ->post_unlock() callback. */
77};
78
79struct ocfs2_unblock_ctl {
80 int requeue;
81 enum ocfs2_unblock_action unblock_action;
82};
83
Jan Karacb257972009-06-04 15:26:50 +020084/* Lockdep class keys */
zhong jiang1cff5142018-10-12 21:34:26 -070085#ifdef CONFIG_DEBUG_LOCK_ALLOC
Colin Ian King480bd562018-08-17 15:44:31 -070086static struct lock_class_key lockdep_keys[OCFS2_NUM_LOCK_TYPES];
zhong jiang1cff5142018-10-12 21:34:26 -070087#endif
Jan Karacb257972009-06-04 15:26:50 +020088
Mark Fasheh810d5ae2006-09-13 21:39:52 -070089static int ocfs2_check_meta_downconvert(struct ocfs2_lock_res *lockres,
90 int new_level);
91static void ocfs2_set_meta_lvb(struct ocfs2_lock_res *lockres);
92
Mark Fashehcc567d82006-09-13 21:52:21 -070093static int ocfs2_data_convert_worker(struct ocfs2_lock_res *lockres,
94 int blocking);
95
Mark Fashehcc567d82006-09-13 21:52:21 -070096static int ocfs2_dentry_convert_worker(struct ocfs2_lock_res *lockres,
97 int blocking);
Mark Fashehd680efe2006-09-08 14:14:34 -070098
99static void ocfs2_dentry_post_unlock(struct ocfs2_super *osb,
100 struct ocfs2_lock_res *lockres);
Mark Fashehccd979b2005-12-15 14:31:24 -0800101
Jan Kara9e33d692008-08-25 19:56:50 +0200102static void ocfs2_set_qinfo_lvb(struct ocfs2_lock_res *lockres);
Adrian Bunk6cb129f2007-04-26 00:29:35 -0700103
Tao Ma8dec98e2009-08-18 11:19:58 +0800104static int ocfs2_check_refcount_downconvert(struct ocfs2_lock_res *lockres,
105 int new_level);
106static int ocfs2_refcount_convert_worker(struct ocfs2_lock_res *lockres,
107 int blocking);
108
Adrian Bunk6cb129f2007-04-26 00:29:35 -0700109#define mlog_meta_lvb(__level, __lockres) ocfs2_dump_meta_lvb_info(__level, __PRETTY_FUNCTION__, __LINE__, __lockres)
110
111/* This aids in debugging situations where a bad LVB might be involved. */
112static void ocfs2_dump_meta_lvb_info(u64 level,
113 const char *function,
114 unsigned int line,
115 struct ocfs2_lock_res *lockres)
116{
Mark Fasheha641dc22008-12-24 16:03:48 -0800117 struct ocfs2_meta_lvb *lvb = ocfs2_dlm_lvb(&lockres->l_lksb);
Adrian Bunk6cb129f2007-04-26 00:29:35 -0700118
119 mlog(level, "LVB information for %s (called from %s:%u):\n",
120 lockres->l_name, function, line);
121 mlog(level, "version: %u, clusters: %u, generation: 0x%x\n",
122 lvb->lvb_version, be32_to_cpu(lvb->lvb_iclusters),
123 be32_to_cpu(lvb->lvb_igeneration));
124 mlog(level, "size: %llu, uid %u, gid %u, mode 0x%x\n",
125 (unsigned long long)be64_to_cpu(lvb->lvb_isize),
126 be32_to_cpu(lvb->lvb_iuid), be32_to_cpu(lvb->lvb_igid),
127 be16_to_cpu(lvb->lvb_imode));
128 mlog(level, "nlink %u, atime_packed 0x%llx, ctime_packed 0x%llx, "
129 "mtime_packed 0x%llx iattr 0x%x\n", be16_to_cpu(lvb->lvb_inlink),
130 (long long)be64_to_cpu(lvb->lvb_iatime_packed),
131 (long long)be64_to_cpu(lvb->lvb_ictime_packed),
132 (long long)be64_to_cpu(lvb->lvb_imtime_packed),
133 be32_to_cpu(lvb->lvb_iattr));
134}
135
136
Mark Fashehf625c972006-09-12 21:24:53 -0700137/*
138 * OCFS2 Lock Resource Operations
139 *
140 * These fine tune the behavior of the generic dlmglue locking infrastructure.
Mark Fasheh0d5dc6c2006-09-14 14:44:51 -0700141 *
142 * The most basic of lock types can point ->l_priv to their respective
143 * struct ocfs2_super and allow the default actions to manage things.
144 *
145 * Right now, each lock type also needs to implement an init function,
146 * and trivial lock/unlock wrappers. ocfs2_simple_drop_lockres()
147 * should be called when the lock is no longer needed (i.e., object
148 * destruction time).
Mark Fashehf625c972006-09-12 21:24:53 -0700149 */
Mark Fashehccd979b2005-12-15 14:31:24 -0800150struct ocfs2_lock_res_ops {
Mark Fasheh54a7e752006-09-12 21:49:13 -0700151 /*
152 * Translate an ocfs2_lock_res * into an ocfs2_super *. Define
153 * this callback if ->l_priv is not an ocfs2_super pointer
154 */
155 struct ocfs2_super * (*get_osb)(struct ocfs2_lock_res *);
Mark Fashehb5e500e2006-09-13 22:01:16 -0700156
Mark Fasheh0d5dc6c2006-09-14 14:44:51 -0700157 /*
Mark Fasheh34d024f2007-09-24 15:56:19 -0700158 * Optionally called in the downconvert thread after a
159 * successful downconvert. The lockres will not be referenced
160 * after this callback is called, so it is safe to free
161 * memory, etc.
Mark Fasheh0d5dc6c2006-09-14 14:44:51 -0700162 *
163 * The exact semantics of when this is called are controlled
164 * by ->downconvert_worker()
165 */
Mark Fashehd680efe2006-09-08 14:14:34 -0700166 void (*post_unlock)(struct ocfs2_super *, struct ocfs2_lock_res *);
Mark Fashehf625c972006-09-12 21:24:53 -0700167
168 /*
Mark Fasheh16d5b952006-09-13 21:10:12 -0700169 * Allow a lock type to add checks to determine whether it is
170 * safe to downconvert a lock. Return 0 to re-queue the
171 * downconvert at a later time, nonzero to continue.
172 *
173 * For most locks, the default checks that there are no
174 * incompatible holders are sufficient.
175 *
176 * Called with the lockres spinlock held.
177 */
178 int (*check_downconvert)(struct ocfs2_lock_res *, int);
179
180 /*
Mark Fasheh5ef0d4e2006-09-13 21:21:52 -0700181 * Allows a lock type to populate the lock value block. This
182 * is called on downconvert, and when we drop a lock.
183 *
184 * Locks that want to use this should set LOCK_TYPE_USES_LVB
185 * in the flags field.
186 *
187 * Called with the lockres spinlock held.
188 */
189 void (*set_lvb)(struct ocfs2_lock_res *);
190
191 /*
Mark Fashehcc567d82006-09-13 21:52:21 -0700192 * Called from the downconvert thread when it is determined
193 * that a lock will be downconverted. This is called without
194 * any locks held so the function can do work that might
195 * schedule (syncing out data, etc).
196 *
197 * This should return any one of the ocfs2_unblock_action
198 * values, depending on what it wants the thread to do.
199 */
200 int (*downconvert_worker)(struct ocfs2_lock_res *, int);
201
202 /*
Mark Fashehf625c972006-09-12 21:24:53 -0700203 * LOCK_TYPE_* flags which describe the specific requirements
204 * of a lock type. Descriptions of each individual flag follow.
205 */
206 int flags;
Mark Fashehccd979b2005-12-15 14:31:24 -0800207};
208
Mark Fashehf625c972006-09-12 21:24:53 -0700209/*
210 * Some locks want to "refresh" potentially stale data when a
211 * meaningful (PRMODE or EXMODE) lock level is first obtained. If this
212 * flag is set, the OCFS2_LOCK_NEEDS_REFRESH flag will be set on the
213 * individual lockres l_flags member from the ast function. It is
214 * expected that the locking wrapper will clear the
215 * OCFS2_LOCK_NEEDS_REFRESH flag when done.
216 */
217#define LOCK_TYPE_REQUIRES_REFRESH 0x1
218
Mark Fashehb80fc012006-09-12 22:08:14 -0700219/*
Mark Fasheh5ef0d4e2006-09-13 21:21:52 -0700220 * Indicate that a lock type makes use of the lock value block. The
221 * ->set_lvb lock type callback must be defined.
Mark Fashehb80fc012006-09-12 22:08:14 -0700222 */
223#define LOCK_TYPE_USES_LVB 0x2
224
Mark Fashehccd979b2005-12-15 14:31:24 -0800225static struct ocfs2_lock_res_ops ocfs2_inode_rw_lops = {
Mark Fasheh54a7e752006-09-12 21:49:13 -0700226 .get_osb = ocfs2_get_inode_osb,
Mark Fashehf625c972006-09-12 21:24:53 -0700227 .flags = 0,
Mark Fashehccd979b2005-12-15 14:31:24 -0800228};
229
Mark Fashehe63aecb62007-10-18 15:30:42 -0700230static struct ocfs2_lock_res_ops ocfs2_inode_inode_lops = {
Mark Fasheh54a7e752006-09-12 21:49:13 -0700231 .get_osb = ocfs2_get_inode_osb,
Mark Fasheh810d5ae2006-09-13 21:39:52 -0700232 .check_downconvert = ocfs2_check_meta_downconvert,
233 .set_lvb = ocfs2_set_meta_lvb,
Mark Fashehf1f54062007-10-18 15:13:59 -0700234 .downconvert_worker = ocfs2_data_convert_worker,
Mark Fashehb80fc012006-09-12 22:08:14 -0700235 .flags = LOCK_TYPE_REQUIRES_REFRESH|LOCK_TYPE_USES_LVB,
Mark Fashehccd979b2005-12-15 14:31:24 -0800236};
237
Mark Fashehccd979b2005-12-15 14:31:24 -0800238static struct ocfs2_lock_res_ops ocfs2_super_lops = {
Mark Fashehf625c972006-09-12 21:24:53 -0700239 .flags = LOCK_TYPE_REQUIRES_REFRESH,
Mark Fashehccd979b2005-12-15 14:31:24 -0800240};
241
242static struct ocfs2_lock_res_ops ocfs2_rename_lops = {
Mark Fashehf625c972006-09-12 21:24:53 -0700243 .flags = 0,
Mark Fashehccd979b2005-12-15 14:31:24 -0800244};
245
wengang wang6ca497a2009-03-06 21:29:10 +0800246static struct ocfs2_lock_res_ops ocfs2_nfs_sync_lops = {
247 .flags = 0,
248};
249
Gang He4882abe2018-01-31 16:15:10 -0800250static struct ocfs2_lock_res_ops ocfs2_trim_fs_lops = {
251 .flags = LOCK_TYPE_REQUIRES_REFRESH|LOCK_TYPE_USES_LVB,
252};
253
Srinivas Eeda83273932009-06-03 17:02:55 -0700254static struct ocfs2_lock_res_ops ocfs2_orphan_scan_lops = {
255 .flags = LOCK_TYPE_REQUIRES_REFRESH|LOCK_TYPE_USES_LVB,
256};
257
Mark Fashehd680efe2006-09-08 14:14:34 -0700258static struct ocfs2_lock_res_ops ocfs2_dentry_lops = {
Mark Fasheh54a7e752006-09-12 21:49:13 -0700259 .get_osb = ocfs2_get_dentry_osb,
Mark Fashehd680efe2006-09-08 14:14:34 -0700260 .post_unlock = ocfs2_dentry_post_unlock,
Mark Fashehcc567d82006-09-13 21:52:21 -0700261 .downconvert_worker = ocfs2_dentry_convert_worker,
Mark Fashehf625c972006-09-12 21:24:53 -0700262 .flags = 0,
Mark Fashehd680efe2006-09-08 14:14:34 -0700263};
264
Tiger Yang50008632007-03-20 16:01:38 -0700265static struct ocfs2_lock_res_ops ocfs2_inode_open_lops = {
266 .get_osb = ocfs2_get_inode_osb,
267 .flags = 0,
268};
269
Mark Fashehcf8e06f2007-12-20 16:43:10 -0800270static struct ocfs2_lock_res_ops ocfs2_flock_lops = {
271 .get_osb = ocfs2_get_file_osb,
272 .flags = 0,
273};
274
Jan Kara9e33d692008-08-25 19:56:50 +0200275static struct ocfs2_lock_res_ops ocfs2_qinfo_lops = {
276 .set_lvb = ocfs2_set_qinfo_lvb,
277 .get_osb = ocfs2_get_qinfo_osb,
278 .flags = LOCK_TYPE_REQUIRES_REFRESH | LOCK_TYPE_USES_LVB,
279};
280
Tao Ma8dec98e2009-08-18 11:19:58 +0800281static struct ocfs2_lock_res_ops ocfs2_refcount_block_lops = {
282 .check_downconvert = ocfs2_check_refcount_downconvert,
283 .downconvert_worker = ocfs2_refcount_convert_worker,
284 .flags = 0,
285};
286
Mark Fashehccd979b2005-12-15 14:31:24 -0800287static inline int ocfs2_is_inode_lock(struct ocfs2_lock_res *lockres)
288{
289 return lockres->l_type == OCFS2_LOCK_TYPE_META ||
Tiger Yang50008632007-03-20 16:01:38 -0700290 lockres->l_type == OCFS2_LOCK_TYPE_RW ||
291 lockres->l_type == OCFS2_LOCK_TYPE_OPEN;
Mark Fashehccd979b2005-12-15 14:31:24 -0800292}
293
Joel Beckerc0e413382010-01-29 14:46:44 -0800294static inline struct ocfs2_lock_res *ocfs2_lksb_to_lock_res(struct ocfs2_dlm_lksb *lksb)
Joel Beckera796d282010-01-28 19:22:39 -0800295{
296 return container_of(lksb, struct ocfs2_lock_res, l_lksb);
297}
298
Mark Fashehccd979b2005-12-15 14:31:24 -0800299static inline struct inode *ocfs2_lock_res_inode(struct ocfs2_lock_res *lockres)
300{
301 BUG_ON(!ocfs2_is_inode_lock(lockres));
302
303 return (struct inode *) lockres->l_priv;
304}
305
Mark Fashehd680efe2006-09-08 14:14:34 -0700306static inline struct ocfs2_dentry_lock *ocfs2_lock_res_dl(struct ocfs2_lock_res *lockres)
307{
308 BUG_ON(lockres->l_type != OCFS2_LOCK_TYPE_DENTRY);
309
310 return (struct ocfs2_dentry_lock *)lockres->l_priv;
311}
312
Jan Kara9e33d692008-08-25 19:56:50 +0200313static inline struct ocfs2_mem_dqinfo *ocfs2_lock_res_qinfo(struct ocfs2_lock_res *lockres)
314{
315 BUG_ON(lockres->l_type != OCFS2_LOCK_TYPE_QINFO);
316
317 return (struct ocfs2_mem_dqinfo *)lockres->l_priv;
318}
319
Tao Ma8dec98e2009-08-18 11:19:58 +0800320static inline struct ocfs2_refcount_tree *
321ocfs2_lock_res_refcount_tree(struct ocfs2_lock_res *res)
322{
323 return container_of(res, struct ocfs2_refcount_tree, rf_lockres);
324}
325
Mark Fasheh54a7e752006-09-12 21:49:13 -0700326static inline struct ocfs2_super *ocfs2_get_lockres_osb(struct ocfs2_lock_res *lockres)
327{
328 if (lockres->l_ops->get_osb)
329 return lockres->l_ops->get_osb(lockres);
330
331 return (struct ocfs2_super *)lockres->l_priv;
332}
333
Mark Fashehccd979b2005-12-15 14:31:24 -0800334static int ocfs2_lock_create(struct ocfs2_super *osb,
335 struct ocfs2_lock_res *lockres,
336 int level,
Joel Beckerbd3e7612008-02-01 12:14:57 -0800337 u32 dlm_flags);
Mark Fashehccd979b2005-12-15 14:31:24 -0800338static inline int ocfs2_may_continue_on_blocked_lock(struct ocfs2_lock_res *lockres,
339 int wanted);
Jan Karacb257972009-06-04 15:26:50 +0200340static void __ocfs2_cluster_unlock(struct ocfs2_super *osb,
341 struct ocfs2_lock_res *lockres,
342 int level, unsigned long caller_ip);
343static inline void ocfs2_cluster_unlock(struct ocfs2_super *osb,
344 struct ocfs2_lock_res *lockres,
345 int level)
346{
347 __ocfs2_cluster_unlock(osb, lockres, level, _RET_IP_);
348}
349
Mark Fashehccd979b2005-12-15 14:31:24 -0800350static inline void ocfs2_generic_handle_downconvert_action(struct ocfs2_lock_res *lockres);
351static inline void ocfs2_generic_handle_convert_action(struct ocfs2_lock_res *lockres);
352static inline void ocfs2_generic_handle_attach_action(struct ocfs2_lock_res *lockres);
353static int ocfs2_generic_handle_bast(struct ocfs2_lock_res *lockres, int level);
354static void ocfs2_schedule_blocked_lock(struct ocfs2_super *osb,
355 struct ocfs2_lock_res *lockres);
356static inline void ocfs2_recover_from_dlm_error(struct ocfs2_lock_res *lockres,
357 int convert);
Sunil Mushranc74ff8b2009-02-03 12:37:14 -0800358#define ocfs2_log_dlm_error(_func, _err, _lockres) do { \
359 if ((_lockres)->l_type != OCFS2_LOCK_TYPE_DENTRY) \
360 mlog(ML_ERROR, "DLM error %d while calling %s on resource %s\n", \
361 _err, _func, _lockres->l_name); \
362 else \
363 mlog(ML_ERROR, "DLM error %d while calling %s on resource %.*s%08x\n", \
364 _err, _func, OCFS2_DENTRY_LOCK_INO_START - 1, (_lockres)->l_name, \
365 (unsigned int)ocfs2_get_dentry_lock_ino(_lockres)); \
Mark Fashehccd979b2005-12-15 14:31:24 -0800366} while (0)
Mark Fasheh34d024f2007-09-24 15:56:19 -0700367static int ocfs2_downconvert_thread(void *arg);
368static void ocfs2_downconvert_on_unlock(struct ocfs2_super *osb,
369 struct ocfs2_lock_res *lockres);
Mark Fashehe63aecb62007-10-18 15:30:42 -0700370static int ocfs2_inode_lock_update(struct inode *inode,
Mark Fashehccd979b2005-12-15 14:31:24 -0800371 struct buffer_head **bh);
372static void ocfs2_drop_osb_locks(struct ocfs2_super *osb);
373static inline int ocfs2_highest_compat_lock_level(int level);
Joel Beckerde551242008-02-01 14:45:08 -0800374static unsigned int ocfs2_prepare_downconvert(struct ocfs2_lock_res *lockres,
375 int new_level);
Mark Fashehcf8e06f2007-12-20 16:43:10 -0800376static int ocfs2_downconvert_lock(struct ocfs2_super *osb,
377 struct ocfs2_lock_res *lockres,
378 int new_level,
Joel Beckerde551242008-02-01 14:45:08 -0800379 int lvb,
380 unsigned int generation);
Mark Fashehcf8e06f2007-12-20 16:43:10 -0800381static int ocfs2_prepare_cancel_convert(struct ocfs2_super *osb,
382 struct ocfs2_lock_res *lockres);
383static int ocfs2_cancel_convert(struct ocfs2_super *osb,
384 struct ocfs2_lock_res *lockres);
385
Mark Fashehccd979b2005-12-15 14:31:24 -0800386
Mark Fashehccd979b2005-12-15 14:31:24 -0800387static void ocfs2_build_lock_name(enum ocfs2_lock_type type,
388 u64 blkno,
389 u32 generation,
390 char *name)
391{
392 int len;
393
Mark Fashehccd979b2005-12-15 14:31:24 -0800394 BUG_ON(type >= OCFS2_NUM_LOCK_TYPES);
395
Mark Fashehb06970532006-03-03 10:24:33 -0800396 len = snprintf(name, OCFS2_LOCK_ID_MAX_LEN, "%c%s%016llx%08x",
397 ocfs2_lock_type_char(type), OCFS2_LOCK_ID_PAD,
398 (long long)blkno, generation);
Mark Fashehccd979b2005-12-15 14:31:24 -0800399
400 BUG_ON(len != (OCFS2_LOCK_ID_MAX_LEN - 1));
401
402 mlog(0, "built lock resource with name: %s\n", name);
Mark Fashehccd979b2005-12-15 14:31:24 -0800403}
404
Ingo Molnar34af9462006-06-27 02:53:55 -0700405static DEFINE_SPINLOCK(ocfs2_dlm_tracking_lock);
Mark Fashehccd979b2005-12-15 14:31:24 -0800406
407static void ocfs2_add_lockres_tracking(struct ocfs2_lock_res *res,
408 struct ocfs2_dlm_debug *dlm_debug)
409{
410 mlog(0, "Add tracking for lockres %s\n", res->l_name);
411
412 spin_lock(&ocfs2_dlm_tracking_lock);
413 list_add(&res->l_debug_list, &dlm_debug->d_lockres_tracking);
414 spin_unlock(&ocfs2_dlm_tracking_lock);
415}
416
417static void ocfs2_remove_lockres_tracking(struct ocfs2_lock_res *res)
418{
419 spin_lock(&ocfs2_dlm_tracking_lock);
420 if (!list_empty(&res->l_debug_list))
421 list_del_init(&res->l_debug_list);
422 spin_unlock(&ocfs2_dlm_tracking_lock);
423}
424
Sunil Mushran8ddb7b02008-05-13 13:45:15 -0700425#ifdef CONFIG_OCFS2_FS_STATS
426static void ocfs2_init_lock_stats(struct ocfs2_lock_res *res)
427{
Sunil Mushran8ddb7b02008-05-13 13:45:15 -0700428 res->l_lock_refresh = 0;
Sunil Mushran5bc970e2010-12-28 23:26:03 -0800429 memset(&res->l_lock_prmode, 0, sizeof(struct ocfs2_lock_stats));
430 memset(&res->l_lock_exmode, 0, sizeof(struct ocfs2_lock_stats));
Sunil Mushran8ddb7b02008-05-13 13:45:15 -0700431}
432
433static void ocfs2_update_lock_stats(struct ocfs2_lock_res *res, int level,
434 struct ocfs2_mask_waiter *mw, int ret)
435{
Sunil Mushran5bc970e2010-12-28 23:26:03 -0800436 u32 usec;
437 ktime_t kt;
438 struct ocfs2_lock_stats *stats;
Sunil Mushran8ddb7b02008-05-13 13:45:15 -0700439
Sunil Mushran5bc970e2010-12-28 23:26:03 -0800440 if (level == LKM_PRMODE)
441 stats = &res->l_lock_prmode;
442 else if (level == LKM_EXMODE)
443 stats = &res->l_lock_exmode;
444 else
Sunil Mushran8ddb7b02008-05-13 13:45:15 -0700445 return;
446
Sunil Mushran5bc970e2010-12-28 23:26:03 -0800447 kt = ktime_sub(ktime_get(), mw->mw_lock_start);
448 usec = ktime_to_us(kt);
449
450 stats->ls_gets++;
451 stats->ls_total += ktime_to_ns(kt);
452 /* overflow */
roel16865b72011-12-12 23:40:51 +0100453 if (unlikely(stats->ls_gets == 0)) {
Sunil Mushran5bc970e2010-12-28 23:26:03 -0800454 stats->ls_gets++;
455 stats->ls_total = ktime_to_ns(kt);
456 }
457
458 if (stats->ls_max < usec)
459 stats->ls_max = usec;
460
Sunil Mushran8ddb7b02008-05-13 13:45:15 -0700461 if (ret)
Sunil Mushran5bc970e2010-12-28 23:26:03 -0800462 stats->ls_fail++;
Gang He8a7f5f42019-07-11 20:53:02 -0700463
464 stats->ls_last = ktime_to_us(ktime_get_real());
Sunil Mushran8ddb7b02008-05-13 13:45:15 -0700465}
466
467static inline void ocfs2_track_lock_refresh(struct ocfs2_lock_res *lockres)
468{
469 lockres->l_lock_refresh++;
470}
471
472static inline void ocfs2_init_start_time(struct ocfs2_mask_waiter *mw)
473{
Sunil Mushran5bc970e2010-12-28 23:26:03 -0800474 mw->mw_lock_start = ktime_get();
Sunil Mushran8ddb7b02008-05-13 13:45:15 -0700475}
476#else
477static inline void ocfs2_init_lock_stats(struct ocfs2_lock_res *res)
478{
479}
480static inline void ocfs2_update_lock_stats(struct ocfs2_lock_res *res,
481 int level, struct ocfs2_mask_waiter *mw, int ret)
482{
483}
484static inline void ocfs2_track_lock_refresh(struct ocfs2_lock_res *lockres)
485{
486}
487static inline void ocfs2_init_start_time(struct ocfs2_mask_waiter *mw)
488{
489}
490#endif
491
Mark Fashehccd979b2005-12-15 14:31:24 -0800492static void ocfs2_lock_res_init_common(struct ocfs2_super *osb,
493 struct ocfs2_lock_res *res,
494 enum ocfs2_lock_type type,
Mark Fashehccd979b2005-12-15 14:31:24 -0800495 struct ocfs2_lock_res_ops *ops,
496 void *priv)
497{
Mark Fashehccd979b2005-12-15 14:31:24 -0800498 res->l_type = type;
499 res->l_ops = ops;
500 res->l_priv = priv;
501
Joel Beckerbd3e7612008-02-01 12:14:57 -0800502 res->l_level = DLM_LOCK_IV;
503 res->l_requested = DLM_LOCK_IV;
504 res->l_blocking = DLM_LOCK_IV;
Mark Fashehccd979b2005-12-15 14:31:24 -0800505 res->l_action = OCFS2_AST_INVALID;
506 res->l_unlock_action = OCFS2_UNLOCK_INVALID;
507
508 res->l_flags = OCFS2_LOCK_INITIALIZED;
509
510 ocfs2_add_lockres_tracking(res, osb->osb_dlm_debug);
Sunil Mushran8ddb7b02008-05-13 13:45:15 -0700511
512 ocfs2_init_lock_stats(res);
Jan Karacb257972009-06-04 15:26:50 +0200513#ifdef CONFIG_DEBUG_LOCK_ALLOC
514 if (type != OCFS2_LOCK_TYPE_OPEN)
515 lockdep_init_map(&res->l_lockdep_map, ocfs2_lock_type_strings[type],
516 &lockdep_keys[type], 0);
517 else
518 res->l_lockdep_map.key = NULL;
519#endif
Mark Fashehccd979b2005-12-15 14:31:24 -0800520}
521
522void ocfs2_lock_res_init_once(struct ocfs2_lock_res *res)
523{
524 /* This also clears out the lock status block */
525 memset(res, 0, sizeof(struct ocfs2_lock_res));
526 spin_lock_init(&res->l_lock);
527 init_waitqueue_head(&res->l_event);
528 INIT_LIST_HEAD(&res->l_blocked_list);
529 INIT_LIST_HEAD(&res->l_mask_waiters);
Eric Ren439a36b2017-02-22 15:40:41 -0800530 INIT_LIST_HEAD(&res->l_holders);
Mark Fashehccd979b2005-12-15 14:31:24 -0800531}
532
533void ocfs2_inode_lock_res_init(struct ocfs2_lock_res *res,
534 enum ocfs2_lock_type type,
Mark Fasheh24c19ef2006-09-22 17:28:19 -0700535 unsigned int generation,
Mark Fashehccd979b2005-12-15 14:31:24 -0800536 struct inode *inode)
537{
538 struct ocfs2_lock_res_ops *ops;
539
540 switch(type) {
541 case OCFS2_LOCK_TYPE_RW:
542 ops = &ocfs2_inode_rw_lops;
543 break;
544 case OCFS2_LOCK_TYPE_META:
Mark Fashehe63aecb62007-10-18 15:30:42 -0700545 ops = &ocfs2_inode_inode_lops;
Mark Fashehccd979b2005-12-15 14:31:24 -0800546 break;
Tiger Yang50008632007-03-20 16:01:38 -0700547 case OCFS2_LOCK_TYPE_OPEN:
548 ops = &ocfs2_inode_open_lops;
549 break;
Mark Fashehccd979b2005-12-15 14:31:24 -0800550 default:
551 mlog_bug_on_msg(1, "type: %d\n", type);
552 ops = NULL; /* thanks, gcc */
553 break;
554 };
555
Mark Fashehd680efe2006-09-08 14:14:34 -0700556 ocfs2_build_lock_name(type, OCFS2_I(inode)->ip_blkno,
Mark Fasheh24c19ef2006-09-22 17:28:19 -0700557 generation, res->l_name);
Mark Fashehd680efe2006-09-08 14:14:34 -0700558 ocfs2_lock_res_init_common(OCFS2_SB(inode->i_sb), res, type, ops, inode);
559}
560
Mark Fasheh54a7e752006-09-12 21:49:13 -0700561static struct ocfs2_super *ocfs2_get_inode_osb(struct ocfs2_lock_res *lockres)
562{
563 struct inode *inode = ocfs2_lock_res_inode(lockres);
564
565 return OCFS2_SB(inode->i_sb);
566}
567
Jan Kara9e33d692008-08-25 19:56:50 +0200568static struct ocfs2_super *ocfs2_get_qinfo_osb(struct ocfs2_lock_res *lockres)
569{
570 struct ocfs2_mem_dqinfo *info = lockres->l_priv;
571
572 return OCFS2_SB(info->dqi_gi.dqi_sb);
573}
574
Mark Fashehcf8e06f2007-12-20 16:43:10 -0800575static struct ocfs2_super *ocfs2_get_file_osb(struct ocfs2_lock_res *lockres)
576{
577 struct ocfs2_file_private *fp = lockres->l_priv;
578
579 return OCFS2_SB(fp->fp_file->f_mapping->host->i_sb);
580}
581
Mark Fashehd680efe2006-09-08 14:14:34 -0700582static __u64 ocfs2_get_dentry_lock_ino(struct ocfs2_lock_res *lockres)
583{
584 __be64 inode_blkno_be;
585
586 memcpy(&inode_blkno_be, &lockres->l_name[OCFS2_DENTRY_LOCK_INO_START],
587 sizeof(__be64));
588
589 return be64_to_cpu(inode_blkno_be);
590}
591
Mark Fasheh54a7e752006-09-12 21:49:13 -0700592static struct ocfs2_super *ocfs2_get_dentry_osb(struct ocfs2_lock_res *lockres)
593{
594 struct ocfs2_dentry_lock *dl = lockres->l_priv;
595
596 return OCFS2_SB(dl->dl_inode->i_sb);
597}
598
Mark Fashehd680efe2006-09-08 14:14:34 -0700599void ocfs2_dentry_lock_res_init(struct ocfs2_dentry_lock *dl,
600 u64 parent, struct inode *inode)
601{
602 int len;
603 u64 inode_blkno = OCFS2_I(inode)->ip_blkno;
604 __be64 inode_blkno_be = cpu_to_be64(inode_blkno);
605 struct ocfs2_lock_res *lockres = &dl->dl_lockres;
606
607 ocfs2_lock_res_init_once(lockres);
608
609 /*
610 * Unfortunately, the standard lock naming scheme won't work
611 * here because we have two 16 byte values to use. Instead,
612 * we'll stuff the inode number as a binary value. We still
613 * want error prints to show something without garbling the
614 * display, so drop a null byte in there before the inode
615 * number. A future version of OCFS2 will likely use all
616 * binary lock names. The stringified names have been a
617 * tremendous aid in debugging, but now that the debugfs
618 * interface exists, we can mangle things there if need be.
619 *
620 * NOTE: We also drop the standard "pad" value (the total lock
621 * name size stays the same though - the last part is all
622 * zeros due to the memset in ocfs2_lock_res_init_once()
623 */
624 len = snprintf(lockres->l_name, OCFS2_DENTRY_LOCK_INO_START,
625 "%c%016llx",
626 ocfs2_lock_type_char(OCFS2_LOCK_TYPE_DENTRY),
627 (long long)parent);
628
629 BUG_ON(len != (OCFS2_DENTRY_LOCK_INO_START - 1));
630
631 memcpy(&lockres->l_name[OCFS2_DENTRY_LOCK_INO_START], &inode_blkno_be,
632 sizeof(__be64));
633
634 ocfs2_lock_res_init_common(OCFS2_SB(inode->i_sb), lockres,
635 OCFS2_LOCK_TYPE_DENTRY, &ocfs2_dentry_lops,
636 dl);
Mark Fashehccd979b2005-12-15 14:31:24 -0800637}
638
639static void ocfs2_super_lock_res_init(struct ocfs2_lock_res *res,
640 struct ocfs2_super *osb)
641{
642 /* Superblock lockres doesn't come from a slab so we call init
643 * once on it manually. */
644 ocfs2_lock_res_init_once(res);
Mark Fashehd680efe2006-09-08 14:14:34 -0700645 ocfs2_build_lock_name(OCFS2_LOCK_TYPE_SUPER, OCFS2_SUPER_BLOCK_BLKNO,
646 0, res->l_name);
Mark Fashehccd979b2005-12-15 14:31:24 -0800647 ocfs2_lock_res_init_common(osb, res, OCFS2_LOCK_TYPE_SUPER,
Mark Fashehccd979b2005-12-15 14:31:24 -0800648 &ocfs2_super_lops, osb);
649}
650
651static void ocfs2_rename_lock_res_init(struct ocfs2_lock_res *res,
652 struct ocfs2_super *osb)
653{
654 /* Rename lockres doesn't come from a slab so we call init
655 * once on it manually. */
656 ocfs2_lock_res_init_once(res);
Mark Fashehd680efe2006-09-08 14:14:34 -0700657 ocfs2_build_lock_name(OCFS2_LOCK_TYPE_RENAME, 0, 0, res->l_name);
658 ocfs2_lock_res_init_common(osb, res, OCFS2_LOCK_TYPE_RENAME,
Mark Fashehccd979b2005-12-15 14:31:24 -0800659 &ocfs2_rename_lops, osb);
660}
661
wengang wang6ca497a2009-03-06 21:29:10 +0800662static void ocfs2_nfs_sync_lock_res_init(struct ocfs2_lock_res *res,
663 struct ocfs2_super *osb)
664{
665 /* nfs_sync lockres doesn't come from a slab so we call init
666 * once on it manually. */
667 ocfs2_lock_res_init_once(res);
668 ocfs2_build_lock_name(OCFS2_LOCK_TYPE_NFS_SYNC, 0, 0, res->l_name);
669 ocfs2_lock_res_init_common(osb, res, OCFS2_LOCK_TYPE_NFS_SYNC,
670 &ocfs2_nfs_sync_lops, osb);
671}
672
Gang He4882abe2018-01-31 16:15:10 -0800673void ocfs2_trim_fs_lock_res_init(struct ocfs2_super *osb)
674{
675 struct ocfs2_lock_res *lockres = &osb->osb_trim_fs_lockres;
676
Gang He5500ab42019-03-05 15:41:45 -0800677 /* Only one trimfs thread are allowed to work at the same time. */
678 mutex_lock(&osb->obs_trim_fs_mutex);
679
Gang He4882abe2018-01-31 16:15:10 -0800680 ocfs2_lock_res_init_once(lockres);
681 ocfs2_build_lock_name(OCFS2_LOCK_TYPE_TRIM_FS, 0, 0, lockres->l_name);
682 ocfs2_lock_res_init_common(osb, lockres, OCFS2_LOCK_TYPE_TRIM_FS,
683 &ocfs2_trim_fs_lops, osb);
684}
685
686void ocfs2_trim_fs_lock_res_uninit(struct ocfs2_super *osb)
687{
688 struct ocfs2_lock_res *lockres = &osb->osb_trim_fs_lockres;
689
690 ocfs2_simple_drop_lockres(osb, lockres);
691 ocfs2_lock_res_free(lockres);
Gang He5500ab42019-03-05 15:41:45 -0800692
693 mutex_unlock(&osb->obs_trim_fs_mutex);
Gang He4882abe2018-01-31 16:15:10 -0800694}
695
Srinivas Eeda83273932009-06-03 17:02:55 -0700696static void ocfs2_orphan_scan_lock_res_init(struct ocfs2_lock_res *res,
697 struct ocfs2_super *osb)
698{
Srinivas Eeda83273932009-06-03 17:02:55 -0700699 ocfs2_lock_res_init_once(res);
700 ocfs2_build_lock_name(OCFS2_LOCK_TYPE_ORPHAN_SCAN, 0, 0, res->l_name);
701 ocfs2_lock_res_init_common(osb, res, OCFS2_LOCK_TYPE_ORPHAN_SCAN,
702 &ocfs2_orphan_scan_lops, osb);
Srinivas Eeda83273932009-06-03 17:02:55 -0700703}
704
Mark Fashehcf8e06f2007-12-20 16:43:10 -0800705void ocfs2_file_lock_res_init(struct ocfs2_lock_res *lockres,
706 struct ocfs2_file_private *fp)
707{
708 struct inode *inode = fp->fp_file->f_mapping->host;
709 struct ocfs2_inode_info *oi = OCFS2_I(inode);
710
711 ocfs2_lock_res_init_once(lockres);
712 ocfs2_build_lock_name(OCFS2_LOCK_TYPE_FLOCK, oi->ip_blkno,
713 inode->i_generation, lockres->l_name);
714 ocfs2_lock_res_init_common(OCFS2_SB(inode->i_sb), lockres,
715 OCFS2_LOCK_TYPE_FLOCK, &ocfs2_flock_lops,
716 fp);
717 lockres->l_flags |= OCFS2_LOCK_NOCACHE;
718}
719
Jan Kara9e33d692008-08-25 19:56:50 +0200720void ocfs2_qinfo_lock_res_init(struct ocfs2_lock_res *lockres,
721 struct ocfs2_mem_dqinfo *info)
722{
723 ocfs2_lock_res_init_once(lockres);
724 ocfs2_build_lock_name(OCFS2_LOCK_TYPE_QINFO, info->dqi_gi.dqi_type,
725 0, lockres->l_name);
726 ocfs2_lock_res_init_common(OCFS2_SB(info->dqi_gi.dqi_sb), lockres,
727 OCFS2_LOCK_TYPE_QINFO, &ocfs2_qinfo_lops,
728 info);
729}
730
Tao Ma8dec98e2009-08-18 11:19:58 +0800731void ocfs2_refcount_lock_res_init(struct ocfs2_lock_res *lockres,
732 struct ocfs2_super *osb, u64 ref_blkno,
733 unsigned int generation)
734{
735 ocfs2_lock_res_init_once(lockres);
736 ocfs2_build_lock_name(OCFS2_LOCK_TYPE_REFCOUNT, ref_blkno,
737 generation, lockres->l_name);
738 ocfs2_lock_res_init_common(osb, lockres, OCFS2_LOCK_TYPE_REFCOUNT,
739 &ocfs2_refcount_block_lops, osb);
740}
741
Mark Fashehccd979b2005-12-15 14:31:24 -0800742void ocfs2_lock_res_free(struct ocfs2_lock_res *res)
743{
Mark Fashehccd979b2005-12-15 14:31:24 -0800744 if (!(res->l_flags & OCFS2_LOCK_INITIALIZED))
745 return;
746
747 ocfs2_remove_lockres_tracking(res);
748
749 mlog_bug_on_msg(!list_empty(&res->l_blocked_list),
750 "Lockres %s is on the blocked list\n",
751 res->l_name);
752 mlog_bug_on_msg(!list_empty(&res->l_mask_waiters),
753 "Lockres %s has mask waiters pending\n",
754 res->l_name);
755 mlog_bug_on_msg(spin_is_locked(&res->l_lock),
756 "Lockres %s is locked\n",
757 res->l_name);
758 mlog_bug_on_msg(res->l_ro_holders,
759 "Lockres %s has %u ro holders\n",
760 res->l_name, res->l_ro_holders);
761 mlog_bug_on_msg(res->l_ex_holders,
762 "Lockres %s has %u ex holders\n",
763 res->l_name, res->l_ex_holders);
764
765 /* Need to clear out the lock status block for the dlm */
766 memset(&res->l_lksb, 0, sizeof(res->l_lksb));
767
768 res->l_flags = 0UL;
Mark Fashehccd979b2005-12-15 14:31:24 -0800769}
770
Eric Ren439a36b2017-02-22 15:40:41 -0800771/*
772 * Keep a list of processes who have interest in a lockres.
773 * Note: this is now only uesed for check recursive cluster locking.
774 */
775static inline void ocfs2_add_holder(struct ocfs2_lock_res *lockres,
776 struct ocfs2_lock_holder *oh)
777{
778 INIT_LIST_HEAD(&oh->oh_list);
779 oh->oh_owner_pid = get_pid(task_pid(current));
780
781 spin_lock(&lockres->l_lock);
782 list_add_tail(&oh->oh_list, &lockres->l_holders);
783 spin_unlock(&lockres->l_lock);
784}
785
Larry Chen133b81f2018-06-07 17:04:43 -0700786static struct ocfs2_lock_holder *
787ocfs2_pid_holder(struct ocfs2_lock_res *lockres,
788 struct pid *pid)
789{
790 struct ocfs2_lock_holder *oh;
791
792 spin_lock(&lockres->l_lock);
793 list_for_each_entry(oh, &lockres->l_holders, oh_list) {
794 if (oh->oh_owner_pid == pid) {
795 spin_unlock(&lockres->l_lock);
796 return oh;
797 }
798 }
799 spin_unlock(&lockres->l_lock);
800 return NULL;
801}
802
Eric Ren439a36b2017-02-22 15:40:41 -0800803static inline void ocfs2_remove_holder(struct ocfs2_lock_res *lockres,
804 struct ocfs2_lock_holder *oh)
805{
806 spin_lock(&lockres->l_lock);
807 list_del(&oh->oh_list);
808 spin_unlock(&lockres->l_lock);
809
810 put_pid(oh->oh_owner_pid);
811}
812
Eric Ren439a36b2017-02-22 15:40:41 -0800813
Mark Fashehccd979b2005-12-15 14:31:24 -0800814static inline void ocfs2_inc_holders(struct ocfs2_lock_res *lockres,
815 int level)
816{
Mark Fashehccd979b2005-12-15 14:31:24 -0800817 BUG_ON(!lockres);
818
819 switch(level) {
Joel Beckerbd3e7612008-02-01 12:14:57 -0800820 case DLM_LOCK_EX:
Mark Fashehccd979b2005-12-15 14:31:24 -0800821 lockres->l_ex_holders++;
822 break;
Joel Beckerbd3e7612008-02-01 12:14:57 -0800823 case DLM_LOCK_PR:
Mark Fashehccd979b2005-12-15 14:31:24 -0800824 lockres->l_ro_holders++;
825 break;
826 default:
827 BUG();
828 }
Mark Fashehccd979b2005-12-15 14:31:24 -0800829}
830
831static inline void ocfs2_dec_holders(struct ocfs2_lock_res *lockres,
832 int level)
833{
Mark Fashehccd979b2005-12-15 14:31:24 -0800834 BUG_ON(!lockres);
835
836 switch(level) {
Joel Beckerbd3e7612008-02-01 12:14:57 -0800837 case DLM_LOCK_EX:
Mark Fashehccd979b2005-12-15 14:31:24 -0800838 BUG_ON(!lockres->l_ex_holders);
839 lockres->l_ex_holders--;
840 break;
Joel Beckerbd3e7612008-02-01 12:14:57 -0800841 case DLM_LOCK_PR:
Mark Fashehccd979b2005-12-15 14:31:24 -0800842 BUG_ON(!lockres->l_ro_holders);
843 lockres->l_ro_holders--;
844 break;
845 default:
846 BUG();
847 }
Mark Fashehccd979b2005-12-15 14:31:24 -0800848}
849
850/* WARNING: This function lives in a world where the only three lock
851 * levels are EX, PR, and NL. It *will* have to be adjusted when more
852 * lock types are added. */
853static inline int ocfs2_highest_compat_lock_level(int level)
854{
Joel Beckerbd3e7612008-02-01 12:14:57 -0800855 int new_level = DLM_LOCK_EX;
Mark Fashehccd979b2005-12-15 14:31:24 -0800856
Joel Beckerbd3e7612008-02-01 12:14:57 -0800857 if (level == DLM_LOCK_EX)
858 new_level = DLM_LOCK_NL;
859 else if (level == DLM_LOCK_PR)
860 new_level = DLM_LOCK_PR;
Mark Fashehccd979b2005-12-15 14:31:24 -0800861 return new_level;
862}
863
864static void lockres_set_flags(struct ocfs2_lock_res *lockres,
865 unsigned long newflags)
866{
Christoph Hellwig800deef2007-05-17 16:03:13 +0200867 struct ocfs2_mask_waiter *mw, *tmp;
Mark Fashehccd979b2005-12-15 14:31:24 -0800868
869 assert_spin_locked(&lockres->l_lock);
870
871 lockres->l_flags = newflags;
872
Christoph Hellwig800deef2007-05-17 16:03:13 +0200873 list_for_each_entry_safe(mw, tmp, &lockres->l_mask_waiters, mw_item) {
Mark Fashehccd979b2005-12-15 14:31:24 -0800874 if ((lockres->l_flags & mw->mw_mask) != mw->mw_goal)
875 continue;
876
877 list_del_init(&mw->mw_item);
878 mw->mw_status = 0;
879 complete(&mw->mw_complete);
880 }
881}
882static void lockres_or_flags(struct ocfs2_lock_res *lockres, unsigned long or)
883{
884 lockres_set_flags(lockres, lockres->l_flags | or);
885}
886static void lockres_clear_flags(struct ocfs2_lock_res *lockres,
887 unsigned long clear)
888{
889 lockres_set_flags(lockres, lockres->l_flags & ~clear);
890}
891
892static inline void ocfs2_generic_handle_downconvert_action(struct ocfs2_lock_res *lockres)
893{
Mark Fashehccd979b2005-12-15 14:31:24 -0800894 BUG_ON(!(lockres->l_flags & OCFS2_LOCK_BUSY));
895 BUG_ON(!(lockres->l_flags & OCFS2_LOCK_ATTACHED));
896 BUG_ON(!(lockres->l_flags & OCFS2_LOCK_BLOCKED));
Joel Beckerbd3e7612008-02-01 12:14:57 -0800897 BUG_ON(lockres->l_blocking <= DLM_LOCK_NL);
Mark Fashehccd979b2005-12-15 14:31:24 -0800898
899 lockres->l_level = lockres->l_requested;
900 if (lockres->l_level <=
901 ocfs2_highest_compat_lock_level(lockres->l_blocking)) {
Joel Beckerbd3e7612008-02-01 12:14:57 -0800902 lockres->l_blocking = DLM_LOCK_NL;
Mark Fashehccd979b2005-12-15 14:31:24 -0800903 lockres_clear_flags(lockres, OCFS2_LOCK_BLOCKED);
904 }
905 lockres_clear_flags(lockres, OCFS2_LOCK_BUSY);
Mark Fashehccd979b2005-12-15 14:31:24 -0800906}
907
908static inline void ocfs2_generic_handle_convert_action(struct ocfs2_lock_res *lockres)
909{
Mark Fashehccd979b2005-12-15 14:31:24 -0800910 BUG_ON(!(lockres->l_flags & OCFS2_LOCK_BUSY));
911 BUG_ON(!(lockres->l_flags & OCFS2_LOCK_ATTACHED));
912
913 /* Convert from RO to EX doesn't really need anything as our
914 * information is already up to data. Convert from NL to
915 * *anything* however should mark ourselves as needing an
916 * update */
Joel Beckerbd3e7612008-02-01 12:14:57 -0800917 if (lockres->l_level == DLM_LOCK_NL &&
Mark Fashehf625c972006-09-12 21:24:53 -0700918 lockres->l_ops->flags & LOCK_TYPE_REQUIRES_REFRESH)
Mark Fashehccd979b2005-12-15 14:31:24 -0800919 lockres_or_flags(lockres, OCFS2_LOCK_NEEDS_REFRESH);
920
921 lockres->l_level = lockres->l_requested;
Sunil Mushrana1912822010-01-21 10:50:03 -0800922
923 /*
924 * We set the OCFS2_LOCK_UPCONVERT_FINISHING flag before clearing
925 * the OCFS2_LOCK_BUSY flag to prevent the dc thread from
926 * downconverting the lock before the upconvert has fully completed.
Xue jiufeid1e782382014-12-10 15:41:59 -0800927 * Do not prevent the dc thread from downconverting if NONBLOCK lock
928 * had already returned.
Sunil Mushrana1912822010-01-21 10:50:03 -0800929 */
Xue jiufeid1e782382014-12-10 15:41:59 -0800930 if (!(lockres->l_flags & OCFS2_LOCK_NONBLOCK_FINISHED))
931 lockres_or_flags(lockres, OCFS2_LOCK_UPCONVERT_FINISHING);
932 else
933 lockres_clear_flags(lockres, OCFS2_LOCK_NONBLOCK_FINISHED);
Sunil Mushrana1912822010-01-21 10:50:03 -0800934
Mark Fashehccd979b2005-12-15 14:31:24 -0800935 lockres_clear_flags(lockres, OCFS2_LOCK_BUSY);
Mark Fashehccd979b2005-12-15 14:31:24 -0800936}
937
938static inline void ocfs2_generic_handle_attach_action(struct ocfs2_lock_res *lockres)
939{
Roel Kluin3cf0c502007-10-27 00:20:36 +0200940 BUG_ON((!(lockres->l_flags & OCFS2_LOCK_BUSY)));
Mark Fashehccd979b2005-12-15 14:31:24 -0800941 BUG_ON(lockres->l_flags & OCFS2_LOCK_ATTACHED);
942
Joel Beckerbd3e7612008-02-01 12:14:57 -0800943 if (lockres->l_requested > DLM_LOCK_NL &&
Mark Fashehf625c972006-09-12 21:24:53 -0700944 !(lockres->l_flags & OCFS2_LOCK_LOCAL) &&
945 lockres->l_ops->flags & LOCK_TYPE_REQUIRES_REFRESH)
Mark Fashehccd979b2005-12-15 14:31:24 -0800946 lockres_or_flags(lockres, OCFS2_LOCK_NEEDS_REFRESH);
947
948 lockres->l_level = lockres->l_requested;
949 lockres_or_flags(lockres, OCFS2_LOCK_ATTACHED);
950 lockres_clear_flags(lockres, OCFS2_LOCK_BUSY);
Mark Fashehccd979b2005-12-15 14:31:24 -0800951}
952
Mark Fashehccd979b2005-12-15 14:31:24 -0800953static int ocfs2_generic_handle_bast(struct ocfs2_lock_res *lockres,
954 int level)
955{
956 int needs_downconvert = 0;
Mark Fashehccd979b2005-12-15 14:31:24 -0800957
958 assert_spin_locked(&lockres->l_lock);
959
Mark Fashehccd979b2005-12-15 14:31:24 -0800960 if (level > lockres->l_blocking) {
961 /* only schedule a downconvert if we haven't already scheduled
962 * one that goes low enough to satisfy the level we're
963 * blocking. this also catches the case where we get
964 * duplicate BASTs */
965 if (ocfs2_highest_compat_lock_level(level) <
966 ocfs2_highest_compat_lock_level(lockres->l_blocking))
967 needs_downconvert = 1;
968
969 lockres->l_blocking = level;
970 }
971
Sunil Mushran9b915182010-02-26 19:42:44 -0800972 mlog(ML_BASTS, "lockres %s, block %d, level %d, l_block %d, dwn %d\n",
973 lockres->l_name, level, lockres->l_level, lockres->l_blocking,
974 needs_downconvert);
975
Wengang Wang0b94a902010-01-21 10:50:02 -0800976 if (needs_downconvert)
977 lockres_or_flags(lockres, OCFS2_LOCK_BLOCKED);
Tao Mac1e8d352011-03-07 16:43:21 +0800978 mlog(0, "needs_downconvert = %d\n", needs_downconvert);
Mark Fashehccd979b2005-12-15 14:31:24 -0800979 return needs_downconvert;
980}
981
Joel Beckerde551242008-02-01 14:45:08 -0800982/*
983 * OCFS2_LOCK_PENDING and l_pending_gen.
984 *
985 * Why does OCFS2_LOCK_PENDING exist? To close a race between setting
986 * OCFS2_LOCK_BUSY and calling ocfs2_dlm_lock(). See ocfs2_unblock_lock()
987 * for more details on the race.
988 *
989 * OCFS2_LOCK_PENDING closes the race quite nicely. However, it introduces
990 * a race on itself. In o2dlm, we can get the ast before ocfs2_dlm_lock()
991 * returns. The ast clears OCFS2_LOCK_BUSY, and must therefore clear
992 * OCFS2_LOCK_PENDING at the same time. When ocfs2_dlm_lock() returns,
993 * the caller is going to try to clear PENDING again. If nothing else is
994 * happening, __lockres_clear_pending() sees PENDING is unset and does
995 * nothing.
996 *
997 * But what if another path (eg downconvert thread) has just started a
998 * new locking action? The other path has re-set PENDING. Our path
999 * cannot clear PENDING, because that will re-open the original race
1000 * window.
1001 *
1002 * [Example]
1003 *
1004 * ocfs2_meta_lock()
1005 * ocfs2_cluster_lock()
1006 * set BUSY
1007 * set PENDING
1008 * drop l_lock
1009 * ocfs2_dlm_lock()
1010 * ocfs2_locking_ast() ocfs2_downconvert_thread()
1011 * clear PENDING ocfs2_unblock_lock()
1012 * take_l_lock
1013 * !BUSY
1014 * ocfs2_prepare_downconvert()
1015 * set BUSY
1016 * set PENDING
1017 * drop l_lock
1018 * take l_lock
1019 * clear PENDING
1020 * drop l_lock
1021 * <window>
1022 * ocfs2_dlm_lock()
1023 *
1024 * So as you can see, we now have a window where l_lock is not held,
1025 * PENDING is not set, and ocfs2_dlm_lock() has not been called.
1026 *
1027 * The core problem is that ocfs2_cluster_lock() has cleared the PENDING
1028 * set by ocfs2_prepare_downconvert(). That wasn't nice.
1029 *
1030 * To solve this we introduce l_pending_gen. A call to
1031 * lockres_clear_pending() will only do so when it is passed a generation
1032 * number that matches the lockres. lockres_set_pending() will return the
1033 * current generation number. When ocfs2_cluster_lock() goes to clear
1034 * PENDING, it passes the generation it got from set_pending(). In our
1035 * example above, the generation numbers will *not* match. Thus,
1036 * ocfs2_cluster_lock() will not clear the PENDING set by
1037 * ocfs2_prepare_downconvert().
1038 */
1039
1040/* Unlocked version for ocfs2_locking_ast() */
1041static void __lockres_clear_pending(struct ocfs2_lock_res *lockres,
1042 unsigned int generation,
1043 struct ocfs2_super *osb)
1044{
1045 assert_spin_locked(&lockres->l_lock);
1046
1047 /*
1048 * The ast and locking functions can race us here. The winner
1049 * will clear pending, the loser will not.
1050 */
1051 if (!(lockres->l_flags & OCFS2_LOCK_PENDING) ||
1052 (lockres->l_pending_gen != generation))
1053 return;
1054
1055 lockres_clear_flags(lockres, OCFS2_LOCK_PENDING);
1056 lockres->l_pending_gen++;
1057
1058 /*
1059 * The downconvert thread may have skipped us because we
1060 * were PENDING. Wake it up.
1061 */
1062 if (lockres->l_flags & OCFS2_LOCK_BLOCKED)
1063 ocfs2_wake_downconvert_thread(osb);
1064}
1065
1066/* Locked version for callers of ocfs2_dlm_lock() */
1067static void lockres_clear_pending(struct ocfs2_lock_res *lockres,
1068 unsigned int generation,
1069 struct ocfs2_super *osb)
1070{
1071 unsigned long flags;
1072
1073 spin_lock_irqsave(&lockres->l_lock, flags);
1074 __lockres_clear_pending(lockres, generation, osb);
1075 spin_unlock_irqrestore(&lockres->l_lock, flags);
1076}
1077
1078static unsigned int lockres_set_pending(struct ocfs2_lock_res *lockres)
1079{
1080 assert_spin_locked(&lockres->l_lock);
1081 BUG_ON(!(lockres->l_flags & OCFS2_LOCK_BUSY));
1082
1083 lockres_or_flags(lockres, OCFS2_LOCK_PENDING);
1084
1085 return lockres->l_pending_gen;
1086}
1087
Joel Beckerc0e413382010-01-29 14:46:44 -08001088static void ocfs2_blocking_ast(struct ocfs2_dlm_lksb *lksb, int level)
Mark Fashehccd979b2005-12-15 14:31:24 -08001089{
Joel Beckera796d282010-01-28 19:22:39 -08001090 struct ocfs2_lock_res *lockres = ocfs2_lksb_to_lock_res(lksb);
Mark Fashehaa2623a2006-09-12 21:58:23 -07001091 struct ocfs2_super *osb = ocfs2_get_lockres_osb(lockres);
Mark Fashehccd979b2005-12-15 14:31:24 -08001092 int needs_downconvert;
1093 unsigned long flags;
1094
Joel Beckerbd3e7612008-02-01 12:14:57 -08001095 BUG_ON(level <= DLM_LOCK_NL);
Mark Fashehccd979b2005-12-15 14:31:24 -08001096
Sunil Mushran9b915182010-02-26 19:42:44 -08001097 mlog(ML_BASTS, "BAST fired for lockres %s, blocking %d, level %d, "
1098 "type %s\n", lockres->l_name, level, lockres->l_level,
Mark Fashehaa2623a2006-09-12 21:58:23 -07001099 ocfs2_lock_type_string(lockres->l_type));
1100
Mark Fashehcf8e06f2007-12-20 16:43:10 -08001101 /*
1102 * We can skip the bast for locks which don't enable caching -
1103 * they'll be dropped at the earliest possible time anyway.
1104 */
1105 if (lockres->l_flags & OCFS2_LOCK_NOCACHE)
1106 return;
1107
Mark Fashehccd979b2005-12-15 14:31:24 -08001108 spin_lock_irqsave(&lockres->l_lock, flags);
1109 needs_downconvert = ocfs2_generic_handle_bast(lockres, level);
1110 if (needs_downconvert)
1111 ocfs2_schedule_blocked_lock(osb, lockres);
1112 spin_unlock_irqrestore(&lockres->l_lock, flags);
1113
Mark Fashehd680efe2006-09-08 14:14:34 -07001114 wake_up(&lockres->l_event);
1115
Mark Fasheh34d024f2007-09-24 15:56:19 -07001116 ocfs2_wake_downconvert_thread(osb);
Mark Fashehccd979b2005-12-15 14:31:24 -08001117}
1118
Joel Beckerc0e413382010-01-29 14:46:44 -08001119static void ocfs2_locking_ast(struct ocfs2_dlm_lksb *lksb)
Mark Fashehccd979b2005-12-15 14:31:24 -08001120{
Joel Beckera796d282010-01-28 19:22:39 -08001121 struct ocfs2_lock_res *lockres = ocfs2_lksb_to_lock_res(lksb);
Joel Beckerde551242008-02-01 14:45:08 -08001122 struct ocfs2_super *osb = ocfs2_get_lockres_osb(lockres);
Mark Fashehccd979b2005-12-15 14:31:24 -08001123 unsigned long flags;
David Teigland1693a5c2008-01-30 16:52:53 -08001124 int status;
Mark Fashehccd979b2005-12-15 14:31:24 -08001125
1126 spin_lock_irqsave(&lockres->l_lock, flags);
1127
David Teigland1693a5c2008-01-30 16:52:53 -08001128 status = ocfs2_dlm_lock_status(&lockres->l_lksb);
1129
1130 if (status == -EAGAIN) {
1131 lockres_clear_flags(lockres, OCFS2_LOCK_BUSY);
1132 goto out;
1133 }
1134
1135 if (status) {
Joel Becker8f2c9c12008-02-01 12:16:57 -08001136 mlog(ML_ERROR, "lockres %s: lksb status value of %d!\n",
David Teigland1693a5c2008-01-30 16:52:53 -08001137 lockres->l_name, status);
Mark Fashehccd979b2005-12-15 14:31:24 -08001138 spin_unlock_irqrestore(&lockres->l_lock, flags);
1139 return;
1140 }
1141
Sunil Mushran9b915182010-02-26 19:42:44 -08001142 mlog(ML_BASTS, "AST fired for lockres %s, action %d, unlock %d, "
1143 "level %d => %d\n", lockres->l_name, lockres->l_action,
1144 lockres->l_unlock_action, lockres->l_level, lockres->l_requested);
1145
Mark Fashehccd979b2005-12-15 14:31:24 -08001146 switch(lockres->l_action) {
1147 case OCFS2_AST_ATTACH:
1148 ocfs2_generic_handle_attach_action(lockres);
Mark Fashehe92d57d2006-09-12 21:34:35 -07001149 lockres_clear_flags(lockres, OCFS2_LOCK_LOCAL);
Mark Fashehccd979b2005-12-15 14:31:24 -08001150 break;
1151 case OCFS2_AST_CONVERT:
1152 ocfs2_generic_handle_convert_action(lockres);
1153 break;
1154 case OCFS2_AST_DOWNCONVERT:
1155 ocfs2_generic_handle_downconvert_action(lockres);
1156 break;
1157 default:
Sunil Mushran9b915182010-02-26 19:42:44 -08001158 mlog(ML_ERROR, "lockres %s: AST fired with invalid action: %u, "
1159 "flags 0x%lx, unlock: %u\n",
Mark Fashehe92d57d2006-09-12 21:34:35 -07001160 lockres->l_name, lockres->l_action, lockres->l_flags,
1161 lockres->l_unlock_action);
Mark Fashehccd979b2005-12-15 14:31:24 -08001162 BUG();
1163 }
David Teigland1693a5c2008-01-30 16:52:53 -08001164out:
Mark Fashehccd979b2005-12-15 14:31:24 -08001165 /* set it to something invalid so if we get called again we
1166 * can catch it. */
1167 lockres->l_action = OCFS2_AST_INVALID;
Mark Fashehccd979b2005-12-15 14:31:24 -08001168
Joel Beckerde551242008-02-01 14:45:08 -08001169 /* Did we try to cancel this lock? Clear that state */
1170 if (lockres->l_unlock_action == OCFS2_UNLOCK_CANCEL_CONVERT)
1171 lockres->l_unlock_action = OCFS2_UNLOCK_INVALID;
1172
1173 /*
1174 * We may have beaten the locking functions here. We certainly
1175 * know that dlm_lock() has been called :-)
1176 * Because we can't have two lock calls in flight at once, we
1177 * can use lockres->l_pending_gen.
1178 */
1179 __lockres_clear_pending(lockres, lockres->l_pending_gen, osb);
1180
Mark Fashehccd979b2005-12-15 14:31:24 -08001181 wake_up(&lockres->l_event);
Mark Fashehd680efe2006-09-08 14:14:34 -07001182 spin_unlock_irqrestore(&lockres->l_lock, flags);
Mark Fashehccd979b2005-12-15 14:31:24 -08001183}
1184
Joel Becker553b5eb2010-01-29 17:19:06 -08001185static void ocfs2_unlock_ast(struct ocfs2_dlm_lksb *lksb, int error)
1186{
1187 struct ocfs2_lock_res *lockres = ocfs2_lksb_to_lock_res(lksb);
1188 unsigned long flags;
1189
Sunil Mushran9b915182010-02-26 19:42:44 -08001190 mlog(ML_BASTS, "UNLOCK AST fired for lockres %s, action = %d\n",
1191 lockres->l_name, lockres->l_unlock_action);
Joel Becker553b5eb2010-01-29 17:19:06 -08001192
1193 spin_lock_irqsave(&lockres->l_lock, flags);
1194 if (error) {
1195 mlog(ML_ERROR, "Dlm passes error %d for lock %s, "
1196 "unlock_action %d\n", error, lockres->l_name,
1197 lockres->l_unlock_action);
1198 spin_unlock_irqrestore(&lockres->l_lock, flags);
Joel Becker553b5eb2010-01-29 17:19:06 -08001199 return;
1200 }
1201
1202 switch(lockres->l_unlock_action) {
1203 case OCFS2_UNLOCK_CANCEL_CONVERT:
1204 mlog(0, "Cancel convert success for %s\n", lockres->l_name);
1205 lockres->l_action = OCFS2_AST_INVALID;
1206 /* Downconvert thread may have requeued this lock, we
1207 * need to wake it. */
1208 if (lockres->l_flags & OCFS2_LOCK_BLOCKED)
1209 ocfs2_wake_downconvert_thread(ocfs2_get_lockres_osb(lockres));
1210 break;
1211 case OCFS2_UNLOCK_DROP_LOCK:
1212 lockres->l_level = DLM_LOCK_IV;
1213 break;
1214 default:
1215 BUG();
1216 }
1217
1218 lockres_clear_flags(lockres, OCFS2_LOCK_BUSY);
1219 lockres->l_unlock_action = OCFS2_UNLOCK_INVALID;
1220 wake_up(&lockres->l_event);
1221 spin_unlock_irqrestore(&lockres->l_lock, flags);
Joel Becker553b5eb2010-01-29 17:19:06 -08001222}
1223
1224/*
1225 * This is the filesystem locking protocol. It provides the lock handling
1226 * hooks for the underlying DLM. It has a maximum version number.
1227 * The version number allows interoperability with systems running at
1228 * the same major number and an equal or smaller minor number.
1229 *
1230 * Whenever the filesystem does new things with locks (adds or removes a
1231 * lock, orders them differently, does different things underneath a lock),
1232 * the version must be changed. The protocol is negotiated when joining
1233 * the dlm domain. A node may join the domain if its major version is
1234 * identical to all other nodes and its minor version is greater than
1235 * or equal to all other nodes. When its minor version is greater than
1236 * the other nodes, it will run at the minor version specified by the
1237 * other nodes.
1238 *
1239 * If a locking change is made that will not be compatible with older
1240 * versions, the major number must be increased and the minor version set
1241 * to zero. If a change merely adds a behavior that can be disabled when
1242 * speaking to older versions, the minor version must be increased. If a
1243 * change adds a fully backwards compatible change (eg, LVB changes that
1244 * are just ignored by older versions), the version does not need to be
1245 * updated.
1246 */
1247static struct ocfs2_locking_protocol lproto = {
1248 .lp_max_version = {
1249 .pv_major = OCFS2_LOCKING_PROTOCOL_MAJOR,
1250 .pv_minor = OCFS2_LOCKING_PROTOCOL_MINOR,
1251 },
1252 .lp_lock_ast = ocfs2_locking_ast,
1253 .lp_blocking_ast = ocfs2_blocking_ast,
1254 .lp_unlock_ast = ocfs2_unlock_ast,
1255};
1256
1257void ocfs2_set_locking_protocol(void)
1258{
1259 ocfs2_stack_glue_set_max_proto_version(&lproto.lp_max_version);
1260}
1261
Mark Fashehccd979b2005-12-15 14:31:24 -08001262static inline void ocfs2_recover_from_dlm_error(struct ocfs2_lock_res *lockres,
1263 int convert)
1264{
1265 unsigned long flags;
1266
Mark Fashehccd979b2005-12-15 14:31:24 -08001267 spin_lock_irqsave(&lockres->l_lock, flags);
1268 lockres_clear_flags(lockres, OCFS2_LOCK_BUSY);
Sunil Mushrana1912822010-01-21 10:50:03 -08001269 lockres_clear_flags(lockres, OCFS2_LOCK_UPCONVERT_FINISHING);
Mark Fashehccd979b2005-12-15 14:31:24 -08001270 if (convert)
1271 lockres->l_action = OCFS2_AST_INVALID;
1272 else
1273 lockres->l_unlock_action = OCFS2_UNLOCK_INVALID;
1274 spin_unlock_irqrestore(&lockres->l_lock, flags);
1275
1276 wake_up(&lockres->l_event);
Mark Fashehccd979b2005-12-15 14:31:24 -08001277}
1278
1279/* Note: If we detect another process working on the lock (i.e.,
1280 * OCFS2_LOCK_BUSY), we'll bail out returning 0. It's up to the caller
1281 * to do the right thing in that case.
1282 */
1283static int ocfs2_lock_create(struct ocfs2_super *osb,
1284 struct ocfs2_lock_res *lockres,
1285 int level,
Joel Beckerbd3e7612008-02-01 12:14:57 -08001286 u32 dlm_flags)
Mark Fashehccd979b2005-12-15 14:31:24 -08001287{
1288 int ret = 0;
Mark Fashehccd979b2005-12-15 14:31:24 -08001289 unsigned long flags;
Joel Beckerde551242008-02-01 14:45:08 -08001290 unsigned int gen;
Mark Fashehccd979b2005-12-15 14:31:24 -08001291
Joel Beckerbd3e7612008-02-01 12:14:57 -08001292 mlog(0, "lock %s, level = %d, flags = %u\n", lockres->l_name, level,
Mark Fashehccd979b2005-12-15 14:31:24 -08001293 dlm_flags);
1294
1295 spin_lock_irqsave(&lockres->l_lock, flags);
1296 if ((lockres->l_flags & OCFS2_LOCK_ATTACHED) ||
1297 (lockres->l_flags & OCFS2_LOCK_BUSY)) {
1298 spin_unlock_irqrestore(&lockres->l_lock, flags);
1299 goto bail;
1300 }
1301
1302 lockres->l_action = OCFS2_AST_ATTACH;
1303 lockres->l_requested = level;
1304 lockres_or_flags(lockres, OCFS2_LOCK_BUSY);
Joel Beckerde551242008-02-01 14:45:08 -08001305 gen = lockres_set_pending(lockres);
Mark Fashehccd979b2005-12-15 14:31:24 -08001306 spin_unlock_irqrestore(&lockres->l_lock, flags);
1307
Joel Becker4670c462008-02-01 14:39:35 -08001308 ret = ocfs2_dlm_lock(osb->cconn,
Joel Becker7431cd72008-02-01 12:15:37 -08001309 level,
1310 &lockres->l_lksb,
1311 dlm_flags,
1312 lockres->l_name,
Joel Beckera796d282010-01-28 19:22:39 -08001313 OCFS2_LOCK_ID_MAX_LEN - 1);
Joel Beckerde551242008-02-01 14:45:08 -08001314 lockres_clear_pending(lockres, gen, osb);
Joel Becker7431cd72008-02-01 12:15:37 -08001315 if (ret) {
1316 ocfs2_log_dlm_error("ocfs2_dlm_lock", ret, lockres);
Mark Fashehccd979b2005-12-15 14:31:24 -08001317 ocfs2_recover_from_dlm_error(lockres, 1);
1318 }
1319
Joel Becker7431cd72008-02-01 12:15:37 -08001320 mlog(0, "lock %s, return from ocfs2_dlm_lock\n", lockres->l_name);
Mark Fashehccd979b2005-12-15 14:31:24 -08001321
1322bail:
Mark Fashehccd979b2005-12-15 14:31:24 -08001323 return ret;
1324}
1325
1326static inline int ocfs2_check_wait_flag(struct ocfs2_lock_res *lockres,
1327 int flag)
1328{
1329 unsigned long flags;
1330 int ret;
1331
1332 spin_lock_irqsave(&lockres->l_lock, flags);
1333 ret = lockres->l_flags & flag;
1334 spin_unlock_irqrestore(&lockres->l_lock, flags);
1335
1336 return ret;
1337}
1338
1339static inline void ocfs2_wait_on_busy_lock(struct ocfs2_lock_res *lockres)
1340
1341{
1342 wait_event(lockres->l_event,
1343 !ocfs2_check_wait_flag(lockres, OCFS2_LOCK_BUSY));
1344}
1345
1346static inline void ocfs2_wait_on_refreshing_lock(struct ocfs2_lock_res *lockres)
1347
1348{
1349 wait_event(lockres->l_event,
1350 !ocfs2_check_wait_flag(lockres, OCFS2_LOCK_REFRESHING));
1351}
1352
1353/* predict what lock level we'll be dropping down to on behalf
1354 * of another node, and return true if the currently wanted
1355 * level will be compatible with it. */
1356static inline int ocfs2_may_continue_on_blocked_lock(struct ocfs2_lock_res *lockres,
1357 int wanted)
1358{
1359 BUG_ON(!(lockres->l_flags & OCFS2_LOCK_BLOCKED));
1360
1361 return wanted <= ocfs2_highest_compat_lock_level(lockres->l_blocking);
1362}
1363
1364static void ocfs2_init_mask_waiter(struct ocfs2_mask_waiter *mw)
1365{
1366 INIT_LIST_HEAD(&mw->mw_item);
1367 init_completion(&mw->mw_complete);
Sunil Mushran8ddb7b02008-05-13 13:45:15 -07001368 ocfs2_init_start_time(mw);
Mark Fashehccd979b2005-12-15 14:31:24 -08001369}
1370
1371static int ocfs2_wait_for_mask(struct ocfs2_mask_waiter *mw)
1372{
1373 wait_for_completion(&mw->mw_complete);
1374 /* Re-arm the completion in case we want to wait on it again */
Wolfram Sang16735d02013-11-14 14:32:02 -08001375 reinit_completion(&mw->mw_complete);
Mark Fashehccd979b2005-12-15 14:31:24 -08001376 return mw->mw_status;
1377}
1378
1379static void lockres_add_mask_waiter(struct ocfs2_lock_res *lockres,
1380 struct ocfs2_mask_waiter *mw,
1381 unsigned long mask,
1382 unsigned long goal)
1383{
1384 BUG_ON(!list_empty(&mw->mw_item));
1385
1386 assert_spin_locked(&lockres->l_lock);
1387
1388 list_add_tail(&mw->mw_item, &lockres->l_mask_waiters);
1389 mw->mw_mask = mask;
1390 mw->mw_goal = goal;
1391}
1392
1393/* returns 0 if the mw that was removed was already satisfied, -EBUSY
1394 * if the mask still hadn't reached its goal */
Xue jiufeid1e782382014-12-10 15:41:59 -08001395static int __lockres_remove_mask_waiter(struct ocfs2_lock_res *lockres,
Mark Fashehccd979b2005-12-15 14:31:24 -08001396 struct ocfs2_mask_waiter *mw)
1397{
Mark Fashehccd979b2005-12-15 14:31:24 -08001398 int ret = 0;
1399
Xue jiufeid1e782382014-12-10 15:41:59 -08001400 assert_spin_locked(&lockres->l_lock);
Mark Fashehccd979b2005-12-15 14:31:24 -08001401 if (!list_empty(&mw->mw_item)) {
1402 if ((lockres->l_flags & mw->mw_mask) != mw->mw_goal)
1403 ret = -EBUSY;
1404
1405 list_del_init(&mw->mw_item);
1406 init_completion(&mw->mw_complete);
1407 }
Xue jiufeid1e782382014-12-10 15:41:59 -08001408
1409 return ret;
1410}
1411
1412static int lockres_remove_mask_waiter(struct ocfs2_lock_res *lockres,
1413 struct ocfs2_mask_waiter *mw)
1414{
1415 unsigned long flags;
1416 int ret = 0;
1417
1418 spin_lock_irqsave(&lockres->l_lock, flags);
1419 ret = __lockres_remove_mask_waiter(lockres, mw);
Mark Fashehccd979b2005-12-15 14:31:24 -08001420 spin_unlock_irqrestore(&lockres->l_lock, flags);
1421
1422 return ret;
1423
1424}
1425
Mark Fashehcf8e06f2007-12-20 16:43:10 -08001426static int ocfs2_wait_for_mask_interruptible(struct ocfs2_mask_waiter *mw,
1427 struct ocfs2_lock_res *lockres)
1428{
1429 int ret;
1430
1431 ret = wait_for_completion_interruptible(&mw->mw_complete);
1432 if (ret)
1433 lockres_remove_mask_waiter(lockres, mw);
1434 else
1435 ret = mw->mw_status;
1436 /* Re-arm the completion in case we want to wait on it again */
Wolfram Sang16735d02013-11-14 14:32:02 -08001437 reinit_completion(&mw->mw_complete);
Mark Fashehcf8e06f2007-12-20 16:43:10 -08001438 return ret;
1439}
1440
Jan Karacb257972009-06-04 15:26:50 +02001441static int __ocfs2_cluster_lock(struct ocfs2_super *osb,
1442 struct ocfs2_lock_res *lockres,
1443 int level,
1444 u32 lkm_flags,
1445 int arg_flags,
1446 int l_subclass,
1447 unsigned long caller_ip)
Mark Fashehccd979b2005-12-15 14:31:24 -08001448{
1449 struct ocfs2_mask_waiter mw;
Mark Fashehccd979b2005-12-15 14:31:24 -08001450 int wait, catch_signals = !(osb->s_mount_opt & OCFS2_MOUNT_NOINTR);
1451 int ret = 0; /* gcc doesn't realize wait = 1 guarantees ret is set */
1452 unsigned long flags;
Joel Beckerde551242008-02-01 14:45:08 -08001453 unsigned int gen;
David Teigland1693a5c2008-01-30 16:52:53 -08001454 int noqueue_attempted = 0;
Xue jiufeid1e782382014-12-10 15:41:59 -08001455 int dlm_locked = 0;
Tariq Saeedb1b1e152016-01-21 16:40:39 -08001456 int kick_dc = 0;
Mark Fashehccd979b2005-12-15 14:31:24 -08001457
alex chen2f2eca22015-04-14 15:43:49 -07001458 if (!(lockres->l_flags & OCFS2_LOCK_INITIALIZED)) {
1459 mlog_errno(-EINVAL);
1460 return -EINVAL;
1461 }
1462
Mark Fashehccd979b2005-12-15 14:31:24 -08001463 ocfs2_init_mask_waiter(&mw);
1464
Mark Fashehb80fc012006-09-12 22:08:14 -07001465 if (lockres->l_ops->flags & LOCK_TYPE_USES_LVB)
Joel Beckerbd3e7612008-02-01 12:14:57 -08001466 lkm_flags |= DLM_LKF_VALBLK;
Mark Fashehb80fc012006-09-12 22:08:14 -07001467
Mark Fashehccd979b2005-12-15 14:31:24 -08001468again:
1469 wait = 0;
1470
Sunil Mushrana1912822010-01-21 10:50:03 -08001471 spin_lock_irqsave(&lockres->l_lock, flags);
1472
Mark Fashehccd979b2005-12-15 14:31:24 -08001473 if (catch_signals && signal_pending(current)) {
1474 ret = -ERESTARTSYS;
Sunil Mushrana1912822010-01-21 10:50:03 -08001475 goto unlock;
Mark Fashehccd979b2005-12-15 14:31:24 -08001476 }
1477
Mark Fashehccd979b2005-12-15 14:31:24 -08001478 mlog_bug_on_msg(lockres->l_flags & OCFS2_LOCK_FREEING,
1479 "Cluster lock called on freeing lockres %s! flags "
1480 "0x%lx\n", lockres->l_name, lockres->l_flags);
1481
1482 /* We only compare against the currently granted level
1483 * here. If the lock is blocked waiting on a downconvert,
1484 * we'll get caught below. */
1485 if (lockres->l_flags & OCFS2_LOCK_BUSY &&
1486 level > lockres->l_level) {
1487 /* is someone sitting in dlm_lock? If so, wait on
1488 * them. */
1489 lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_BUSY, 0);
1490 wait = 1;
1491 goto unlock;
1492 }
1493
Sunil Mushrana1912822010-01-21 10:50:03 -08001494 if (lockres->l_flags & OCFS2_LOCK_UPCONVERT_FINISHING) {
1495 /*
1496 * We've upconverted. If the lock now has a level we can
1497 * work with, we take it. If, however, the lock is not at the
1498 * required level, we go thru the full cycle. One way this could
1499 * happen is if a process requesting an upconvert to PR is
1500 * closely followed by another requesting upconvert to an EX.
1501 * If the process requesting EX lands here, we want it to
1502 * continue attempting to upconvert and let the process
1503 * requesting PR take the lock.
1504 * If multiple processes request upconvert to PR, the first one
1505 * here will take the lock. The others will have to go thru the
1506 * OCFS2_LOCK_BLOCKED check to ensure that there is no pending
1507 * downconvert request.
1508 */
1509 if (level <= lockres->l_level)
1510 goto update_holders;
1511 }
1512
Mark Fashehccd979b2005-12-15 14:31:24 -08001513 if (lockres->l_flags & OCFS2_LOCK_BLOCKED &&
1514 !ocfs2_may_continue_on_blocked_lock(lockres, level)) {
1515 /* is the lock is currently blocked on behalf of
1516 * another node */
1517 lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_BLOCKED, 0);
1518 wait = 1;
1519 goto unlock;
1520 }
1521
1522 if (level > lockres->l_level) {
David Teigland1693a5c2008-01-30 16:52:53 -08001523 if (noqueue_attempted > 0) {
1524 ret = -EAGAIN;
1525 goto unlock;
1526 }
1527 if (lkm_flags & DLM_LKF_NOQUEUE)
1528 noqueue_attempted = 1;
1529
Mark Fashehccd979b2005-12-15 14:31:24 -08001530 if (lockres->l_action != OCFS2_AST_INVALID)
1531 mlog(ML_ERROR, "lockres %s has action %u pending\n",
1532 lockres->l_name, lockres->l_action);
1533
Mark Fasheh019d1b22007-10-05 12:09:05 -07001534 if (!(lockres->l_flags & OCFS2_LOCK_ATTACHED)) {
1535 lockres->l_action = OCFS2_AST_ATTACH;
Joel Beckerbd3e7612008-02-01 12:14:57 -08001536 lkm_flags &= ~DLM_LKF_CONVERT;
Mark Fasheh019d1b22007-10-05 12:09:05 -07001537 } else {
1538 lockres->l_action = OCFS2_AST_CONVERT;
Joel Beckerbd3e7612008-02-01 12:14:57 -08001539 lkm_flags |= DLM_LKF_CONVERT;
Mark Fasheh019d1b22007-10-05 12:09:05 -07001540 }
1541
Mark Fashehccd979b2005-12-15 14:31:24 -08001542 lockres->l_requested = level;
1543 lockres_or_flags(lockres, OCFS2_LOCK_BUSY);
Joel Beckerde551242008-02-01 14:45:08 -08001544 gen = lockres_set_pending(lockres);
Mark Fashehccd979b2005-12-15 14:31:24 -08001545 spin_unlock_irqrestore(&lockres->l_lock, flags);
1546
Joel Beckerbd3e7612008-02-01 12:14:57 -08001547 BUG_ON(level == DLM_LOCK_IV);
1548 BUG_ON(level == DLM_LOCK_NL);
Mark Fashehccd979b2005-12-15 14:31:24 -08001549
Sunil Mushran9b915182010-02-26 19:42:44 -08001550 mlog(ML_BASTS, "lockres %s, convert from %d to %d\n",
Mark Fashehccd979b2005-12-15 14:31:24 -08001551 lockres->l_name, lockres->l_level, level);
1552
1553 /* call dlm_lock to upgrade lock now */
Joel Becker4670c462008-02-01 14:39:35 -08001554 ret = ocfs2_dlm_lock(osb->cconn,
Joel Becker7431cd72008-02-01 12:15:37 -08001555 level,
1556 &lockres->l_lksb,
1557 lkm_flags,
1558 lockres->l_name,
Joel Beckera796d282010-01-28 19:22:39 -08001559 OCFS2_LOCK_ID_MAX_LEN - 1);
Joel Beckerde551242008-02-01 14:45:08 -08001560 lockres_clear_pending(lockres, gen, osb);
Joel Becker7431cd72008-02-01 12:15:37 -08001561 if (ret) {
1562 if (!(lkm_flags & DLM_LKF_NOQUEUE) ||
1563 (ret != -EAGAIN)) {
Joel Becker24ef1812008-01-29 17:37:32 -08001564 ocfs2_log_dlm_error("ocfs2_dlm_lock",
Joel Becker7431cd72008-02-01 12:15:37 -08001565 ret, lockres);
Mark Fashehccd979b2005-12-15 14:31:24 -08001566 }
1567 ocfs2_recover_from_dlm_error(lockres, 1);
1568 goto out;
1569 }
Xue jiufeid1e782382014-12-10 15:41:59 -08001570 dlm_locked = 1;
Mark Fashehccd979b2005-12-15 14:31:24 -08001571
Coly Li73ac36e2009-01-07 18:09:16 -08001572 mlog(0, "lock %s, successful return from ocfs2_dlm_lock\n",
Mark Fashehccd979b2005-12-15 14:31:24 -08001573 lockres->l_name);
1574
1575 /* At this point we've gone inside the dlm and need to
1576 * complete our work regardless. */
1577 catch_signals = 0;
1578
1579 /* wait for busy to clear and carry on */
1580 goto again;
1581 }
1582
Sunil Mushrana1912822010-01-21 10:50:03 -08001583update_holders:
Mark Fashehccd979b2005-12-15 14:31:24 -08001584 /* Ok, if we get here then we're good to go. */
1585 ocfs2_inc_holders(lockres, level);
1586
1587 ret = 0;
1588unlock:
Sunil Mushrana1912822010-01-21 10:50:03 -08001589 lockres_clear_flags(lockres, OCFS2_LOCK_UPCONVERT_FINISHING);
1590
Tariq Saeedb1b1e152016-01-21 16:40:39 -08001591 /* ocfs2_unblock_lock reques on seeing OCFS2_LOCK_UPCONVERT_FINISHING */
1592 kick_dc = (lockres->l_flags & OCFS2_LOCK_BLOCKED);
1593
Mark Fashehccd979b2005-12-15 14:31:24 -08001594 spin_unlock_irqrestore(&lockres->l_lock, flags);
Tariq Saeedb1b1e152016-01-21 16:40:39 -08001595 if (kick_dc)
1596 ocfs2_wake_downconvert_thread(osb);
Mark Fashehccd979b2005-12-15 14:31:24 -08001597out:
1598 /*
1599 * This is helping work around a lock inversion between the page lock
1600 * and dlm locks. One path holds the page lock while calling aops
1601 * which block acquiring dlm locks. The voting thread holds dlm
1602 * locks while acquiring page locks while down converting data locks.
1603 * This block is helping an aop path notice the inversion and back
1604 * off to unlock its page lock before trying the dlm lock again.
1605 */
1606 if (wait && arg_flags & OCFS2_LOCK_NONBLOCK &&
1607 mw.mw_mask & (OCFS2_LOCK_BUSY|OCFS2_LOCK_BLOCKED)) {
1608 wait = 0;
Xue jiufeid1e782382014-12-10 15:41:59 -08001609 spin_lock_irqsave(&lockres->l_lock, flags);
1610 if (__lockres_remove_mask_waiter(lockres, &mw)) {
1611 if (dlm_locked)
1612 lockres_or_flags(lockres,
1613 OCFS2_LOCK_NONBLOCK_FINISHED);
1614 spin_unlock_irqrestore(&lockres->l_lock, flags);
Mark Fashehccd979b2005-12-15 14:31:24 -08001615 ret = -EAGAIN;
Xue jiufeid1e782382014-12-10 15:41:59 -08001616 } else {
1617 spin_unlock_irqrestore(&lockres->l_lock, flags);
Mark Fashehccd979b2005-12-15 14:31:24 -08001618 goto again;
Xue jiufeid1e782382014-12-10 15:41:59 -08001619 }
Mark Fashehccd979b2005-12-15 14:31:24 -08001620 }
1621 if (wait) {
1622 ret = ocfs2_wait_for_mask(&mw);
1623 if (ret == 0)
1624 goto again;
1625 mlog_errno(ret);
1626 }
Sunil Mushran8ddb7b02008-05-13 13:45:15 -07001627 ocfs2_update_lock_stats(lockres, level, &mw, ret);
Mark Fashehccd979b2005-12-15 14:31:24 -08001628
Jan Karacb257972009-06-04 15:26:50 +02001629#ifdef CONFIG_DEBUG_LOCK_ALLOC
1630 if (!ret && lockres->l_lockdep_map.key != NULL) {
1631 if (level == DLM_LOCK_PR)
1632 rwsem_acquire_read(&lockres->l_lockdep_map, l_subclass,
1633 !!(arg_flags & OCFS2_META_LOCK_NOQUEUE),
1634 caller_ip);
1635 else
1636 rwsem_acquire(&lockres->l_lockdep_map, l_subclass,
1637 !!(arg_flags & OCFS2_META_LOCK_NOQUEUE),
1638 caller_ip);
1639 }
1640#endif
Mark Fashehccd979b2005-12-15 14:31:24 -08001641 return ret;
1642}
1643
Jan Karacb257972009-06-04 15:26:50 +02001644static inline int ocfs2_cluster_lock(struct ocfs2_super *osb,
1645 struct ocfs2_lock_res *lockres,
1646 int level,
1647 u32 lkm_flags,
1648 int arg_flags)
1649{
1650 return __ocfs2_cluster_lock(osb, lockres, level, lkm_flags, arg_flags,
1651 0, _RET_IP_);
1652}
1653
1654
1655static void __ocfs2_cluster_unlock(struct ocfs2_super *osb,
1656 struct ocfs2_lock_res *lockres,
1657 int level,
1658 unsigned long caller_ip)
Mark Fashehccd979b2005-12-15 14:31:24 -08001659{
1660 unsigned long flags;
1661
Mark Fashehccd979b2005-12-15 14:31:24 -08001662 spin_lock_irqsave(&lockres->l_lock, flags);
1663 ocfs2_dec_holders(lockres, level);
Mark Fasheh34d024f2007-09-24 15:56:19 -07001664 ocfs2_downconvert_on_unlock(osb, lockres);
Mark Fashehccd979b2005-12-15 14:31:24 -08001665 spin_unlock_irqrestore(&lockres->l_lock, flags);
Jan Karacb257972009-06-04 15:26:50 +02001666#ifdef CONFIG_DEBUG_LOCK_ALLOC
1667 if (lockres->l_lockdep_map.key != NULL)
1668 rwsem_release(&lockres->l_lockdep_map, 1, caller_ip);
1669#endif
Mark Fashehccd979b2005-12-15 14:31:24 -08001670}
1671
Adrian Bunkda661162006-11-20 03:24:28 +01001672static int ocfs2_create_new_lock(struct ocfs2_super *osb,
1673 struct ocfs2_lock_res *lockres,
1674 int ex,
1675 int local)
Mark Fashehccd979b2005-12-15 14:31:24 -08001676{
Joel Beckerbd3e7612008-02-01 12:14:57 -08001677 int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR;
Mark Fashehccd979b2005-12-15 14:31:24 -08001678 unsigned long flags;
Joel Beckerbd3e7612008-02-01 12:14:57 -08001679 u32 lkm_flags = local ? DLM_LKF_LOCAL : 0;
Mark Fashehccd979b2005-12-15 14:31:24 -08001680
1681 spin_lock_irqsave(&lockres->l_lock, flags);
1682 BUG_ON(lockres->l_flags & OCFS2_LOCK_ATTACHED);
1683 lockres_or_flags(lockres, OCFS2_LOCK_LOCAL);
1684 spin_unlock_irqrestore(&lockres->l_lock, flags);
1685
Mark Fasheh24c19ef2006-09-22 17:28:19 -07001686 return ocfs2_lock_create(osb, lockres, level, lkm_flags);
Mark Fashehccd979b2005-12-15 14:31:24 -08001687}
1688
1689/* Grants us an EX lock on the data and metadata resources, skipping
1690 * the normal cluster directory lookup. Use this ONLY on newly created
1691 * inodes which other nodes can't possibly see, and which haven't been
1692 * hashed in the inode hash yet. This can give us a good performance
1693 * increase as it'll skip the network broadcast normally associated
1694 * with creating a new lock resource. */
1695int ocfs2_create_new_inode_locks(struct inode *inode)
1696{
1697 int ret;
Mark Fashehd680efe2006-09-08 14:14:34 -07001698 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
Mark Fashehccd979b2005-12-15 14:31:24 -08001699
Mark Fashehccd979b2005-12-15 14:31:24 -08001700 BUG_ON(!ocfs2_inode_is_new(inode));
1701
Mark Fashehb06970532006-03-03 10:24:33 -08001702 mlog(0, "Inode %llu\n", (unsigned long long)OCFS2_I(inode)->ip_blkno);
Mark Fashehccd979b2005-12-15 14:31:24 -08001703
1704 /* NOTE: That we don't increment any of the holder counts, nor
1705 * do we add anything to a journal handle. Since this is
1706 * supposed to be a new inode which the cluster doesn't know
1707 * about yet, there is no need to. As far as the LVB handling
1708 * is concerned, this is basically like acquiring an EX lock
1709 * on a resource which has an invalid one -- we'll set it
1710 * valid when we release the EX. */
1711
Mark Fasheh24c19ef2006-09-22 17:28:19 -07001712 ret = ocfs2_create_new_lock(osb, &OCFS2_I(inode)->ip_rw_lockres, 1, 1);
Mark Fashehccd979b2005-12-15 14:31:24 -08001713 if (ret) {
1714 mlog_errno(ret);
1715 goto bail;
1716 }
1717
Mark Fasheh24c19ef2006-09-22 17:28:19 -07001718 /*
Joel Beckerbd3e7612008-02-01 12:14:57 -08001719 * We don't want to use DLM_LKF_LOCAL on a meta data lock as they
Mark Fasheh24c19ef2006-09-22 17:28:19 -07001720 * don't use a generation in their lock names.
1721 */
Mark Fashehe63aecb62007-10-18 15:30:42 -07001722 ret = ocfs2_create_new_lock(osb, &OCFS2_I(inode)->ip_inode_lockres, 1, 0);
Mark Fashehccd979b2005-12-15 14:31:24 -08001723 if (ret) {
1724 mlog_errno(ret);
1725 goto bail;
1726 }
1727
Tiger Yang50008632007-03-20 16:01:38 -07001728 ret = ocfs2_create_new_lock(osb, &OCFS2_I(inode)->ip_open_lockres, 0, 0);
Joseph Qia8f24f1b2016-07-26 15:21:35 -07001729 if (ret)
Tiger Yang50008632007-03-20 16:01:38 -07001730 mlog_errno(ret);
Tiger Yang50008632007-03-20 16:01:38 -07001731
Mark Fashehccd979b2005-12-15 14:31:24 -08001732bail:
Mark Fashehccd979b2005-12-15 14:31:24 -08001733 return ret;
1734}
1735
1736int ocfs2_rw_lock(struct inode *inode, int write)
1737{
1738 int status, level;
1739 struct ocfs2_lock_res *lockres;
Sunil Mushranc271c5c2006-12-05 17:56:35 -08001740 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
Mark Fashehccd979b2005-12-15 14:31:24 -08001741
Mark Fashehb06970532006-03-03 10:24:33 -08001742 mlog(0, "inode %llu take %s RW lock\n",
1743 (unsigned long long)OCFS2_I(inode)->ip_blkno,
Mark Fashehccd979b2005-12-15 14:31:24 -08001744 write ? "EXMODE" : "PRMODE");
1745
Tao Mac1e8d352011-03-07 16:43:21 +08001746 if (ocfs2_mount_local(osb))
Sunil Mushranc271c5c2006-12-05 17:56:35 -08001747 return 0;
1748
Mark Fashehccd979b2005-12-15 14:31:24 -08001749 lockres = &OCFS2_I(inode)->ip_rw_lockres;
1750
Joel Beckerbd3e7612008-02-01 12:14:57 -08001751 level = write ? DLM_LOCK_EX : DLM_LOCK_PR;
Mark Fashehccd979b2005-12-15 14:31:24 -08001752
piaojun1119d3c2018-04-05 16:18:33 -07001753 status = ocfs2_cluster_lock(osb, lockres, level, 0, 0);
Mark Fashehccd979b2005-12-15 14:31:24 -08001754 if (status < 0)
1755 mlog_errno(status);
1756
Mark Fashehccd979b2005-12-15 14:31:24 -08001757 return status;
1758}
1759
Gang He06e7f132018-01-31 16:15:17 -08001760int ocfs2_try_rw_lock(struct inode *inode, int write)
1761{
1762 int status, level;
1763 struct ocfs2_lock_res *lockres;
1764 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
1765
1766 mlog(0, "inode %llu try to take %s RW lock\n",
1767 (unsigned long long)OCFS2_I(inode)->ip_blkno,
1768 write ? "EXMODE" : "PRMODE");
1769
1770 if (ocfs2_mount_local(osb))
1771 return 0;
1772
1773 lockres = &OCFS2_I(inode)->ip_rw_lockres;
1774
1775 level = write ? DLM_LOCK_EX : DLM_LOCK_PR;
1776
1777 status = ocfs2_cluster_lock(osb, lockres, level, DLM_LKF_NOQUEUE, 0);
1778 return status;
1779}
1780
Mark Fashehccd979b2005-12-15 14:31:24 -08001781void ocfs2_rw_unlock(struct inode *inode, int write)
1782{
Joel Beckerbd3e7612008-02-01 12:14:57 -08001783 int level = write ? DLM_LOCK_EX : DLM_LOCK_PR;
Mark Fashehccd979b2005-12-15 14:31:24 -08001784 struct ocfs2_lock_res *lockres = &OCFS2_I(inode)->ip_rw_lockres;
Sunil Mushranc271c5c2006-12-05 17:56:35 -08001785 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
Mark Fashehccd979b2005-12-15 14:31:24 -08001786
Mark Fashehb06970532006-03-03 10:24:33 -08001787 mlog(0, "inode %llu drop %s RW lock\n",
1788 (unsigned long long)OCFS2_I(inode)->ip_blkno,
Mark Fashehccd979b2005-12-15 14:31:24 -08001789 write ? "EXMODE" : "PRMODE");
1790
Sunil Mushranc271c5c2006-12-05 17:56:35 -08001791 if (!ocfs2_mount_local(osb))
piaojun1119d3c2018-04-05 16:18:33 -07001792 ocfs2_cluster_unlock(osb, lockres, level);
Mark Fashehccd979b2005-12-15 14:31:24 -08001793}
1794
Tiger Yang50008632007-03-20 16:01:38 -07001795/*
1796 * ocfs2_open_lock always get PR mode lock.
1797 */
1798int ocfs2_open_lock(struct inode *inode)
1799{
1800 int status = 0;
1801 struct ocfs2_lock_res *lockres;
1802 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
1803
Tiger Yang50008632007-03-20 16:01:38 -07001804 mlog(0, "inode %llu take PRMODE open lock\n",
1805 (unsigned long long)OCFS2_I(inode)->ip_blkno);
1806
Tiger Yang03efed82011-05-28 00:34:19 +08001807 if (ocfs2_is_hard_readonly(osb) || ocfs2_mount_local(osb))
Tiger Yang50008632007-03-20 16:01:38 -07001808 goto out;
1809
1810 lockres = &OCFS2_I(inode)->ip_open_lockres;
1811
piaojun1119d3c2018-04-05 16:18:33 -07001812 status = ocfs2_cluster_lock(osb, lockres, DLM_LOCK_PR, 0, 0);
Tiger Yang50008632007-03-20 16:01:38 -07001813 if (status < 0)
1814 mlog_errno(status);
1815
1816out:
Tiger Yang50008632007-03-20 16:01:38 -07001817 return status;
1818}
1819
1820int ocfs2_try_open_lock(struct inode *inode, int write)
1821{
1822 int status = 0, level;
1823 struct ocfs2_lock_res *lockres;
1824 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
1825
Tiger Yang50008632007-03-20 16:01:38 -07001826 mlog(0, "inode %llu try to take %s open lock\n",
1827 (unsigned long long)OCFS2_I(inode)->ip_blkno,
1828 write ? "EXMODE" : "PRMODE");
1829
Tiger Yang03efed82011-05-28 00:34:19 +08001830 if (ocfs2_is_hard_readonly(osb)) {
1831 if (write)
1832 status = -EROFS;
1833 goto out;
1834 }
1835
Tiger Yang50008632007-03-20 16:01:38 -07001836 if (ocfs2_mount_local(osb))
1837 goto out;
1838
1839 lockres = &OCFS2_I(inode)->ip_open_lockres;
1840
Joel Beckerbd3e7612008-02-01 12:14:57 -08001841 level = write ? DLM_LOCK_EX : DLM_LOCK_PR;
Tiger Yang50008632007-03-20 16:01:38 -07001842
1843 /*
1844 * The file system may already holding a PRMODE/EXMODE open lock.
Joel Beckerbd3e7612008-02-01 12:14:57 -08001845 * Since we pass DLM_LKF_NOQUEUE, the request won't block waiting on
Tiger Yang50008632007-03-20 16:01:38 -07001846 * other nodes and the -EAGAIN will indicate to the caller that
1847 * this inode is still in use.
1848 */
piaojun1119d3c2018-04-05 16:18:33 -07001849 status = ocfs2_cluster_lock(osb, lockres, level, DLM_LKF_NOQUEUE, 0);
Tiger Yang50008632007-03-20 16:01:38 -07001850
1851out:
Tiger Yang50008632007-03-20 16:01:38 -07001852 return status;
1853}
1854
1855/*
1856 * ocfs2_open_unlock unlock PR and EX mode open locks.
1857 */
1858void ocfs2_open_unlock(struct inode *inode)
1859{
1860 struct ocfs2_lock_res *lockres = &OCFS2_I(inode)->ip_open_lockres;
1861 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
1862
Tiger Yang50008632007-03-20 16:01:38 -07001863 mlog(0, "inode %llu drop open lock\n",
1864 (unsigned long long)OCFS2_I(inode)->ip_blkno);
1865
1866 if (ocfs2_mount_local(osb))
1867 goto out;
1868
1869 if(lockres->l_ro_holders)
piaojun1119d3c2018-04-05 16:18:33 -07001870 ocfs2_cluster_unlock(osb, lockres, DLM_LOCK_PR);
Tiger Yang50008632007-03-20 16:01:38 -07001871 if(lockres->l_ex_holders)
piaojun1119d3c2018-04-05 16:18:33 -07001872 ocfs2_cluster_unlock(osb, lockres, DLM_LOCK_EX);
Tiger Yang50008632007-03-20 16:01:38 -07001873
1874out:
Tao Mac1e8d352011-03-07 16:43:21 +08001875 return;
Tiger Yang50008632007-03-20 16:01:38 -07001876}
1877
Mark Fashehcf8e06f2007-12-20 16:43:10 -08001878static int ocfs2_flock_handle_signal(struct ocfs2_lock_res *lockres,
1879 int level)
1880{
1881 int ret;
1882 struct ocfs2_super *osb = ocfs2_get_lockres_osb(lockres);
1883 unsigned long flags;
1884 struct ocfs2_mask_waiter mw;
1885
1886 ocfs2_init_mask_waiter(&mw);
1887
1888retry_cancel:
1889 spin_lock_irqsave(&lockres->l_lock, flags);
1890 if (lockres->l_flags & OCFS2_LOCK_BUSY) {
1891 ret = ocfs2_prepare_cancel_convert(osb, lockres);
1892 if (ret) {
1893 spin_unlock_irqrestore(&lockres->l_lock, flags);
1894 ret = ocfs2_cancel_convert(osb, lockres);
1895 if (ret < 0) {
1896 mlog_errno(ret);
1897 goto out;
1898 }
1899 goto retry_cancel;
1900 }
1901 lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_BUSY, 0);
1902 spin_unlock_irqrestore(&lockres->l_lock, flags);
1903
1904 ocfs2_wait_for_mask(&mw);
1905 goto retry_cancel;
1906 }
1907
1908 ret = -ERESTARTSYS;
1909 /*
1910 * We may still have gotten the lock, in which case there's no
1911 * point to restarting the syscall.
1912 */
1913 if (lockres->l_level == level)
1914 ret = 0;
1915
1916 mlog(0, "Cancel returning %d. flags: 0x%lx, level: %d, act: %d\n", ret,
1917 lockres->l_flags, lockres->l_level, lockres->l_action);
1918
1919 spin_unlock_irqrestore(&lockres->l_lock, flags);
1920
1921out:
1922 return ret;
1923}
1924
1925/*
1926 * ocfs2_file_lock() and ocfs2_file_unlock() map to a single pair of
1927 * flock() calls. The locking approach this requires is sufficiently
1928 * different from all other cluster lock types that we implement a
Daniel Mack3ad2f3fb2010-02-03 08:01:28 +08001929 * separate path to the "low-level" dlm calls. In particular:
Mark Fashehcf8e06f2007-12-20 16:43:10 -08001930 *
1931 * - No optimization of lock levels is done - we take at exactly
1932 * what's been requested.
1933 *
1934 * - No lock caching is employed. We immediately downconvert to
1935 * no-lock at unlock time. This also means flock locks never go on
1936 * the blocking list).
1937 *
1938 * - Since userspace can trivially deadlock itself with flock, we make
1939 * sure to allow cancellation of a misbehaving applications flock()
1940 * request.
1941 *
1942 * - Access to any flock lockres doesn't require concurrency, so we
1943 * can simplify the code by requiring the caller to guarantee
1944 * serialization of dlmglue flock calls.
1945 */
1946int ocfs2_file_lock(struct file *file, int ex, int trylock)
1947{
Mark Fashehe988cf12008-07-10 09:25:39 -07001948 int ret, level = ex ? DLM_LOCK_EX : DLM_LOCK_PR;
1949 unsigned int lkm_flags = trylock ? DLM_LKF_NOQUEUE : 0;
Mark Fashehcf8e06f2007-12-20 16:43:10 -08001950 unsigned long flags;
1951 struct ocfs2_file_private *fp = file->private_data;
1952 struct ocfs2_lock_res *lockres = &fp->fp_flock;
1953 struct ocfs2_super *osb = OCFS2_SB(file->f_mapping->host->i_sb);
1954 struct ocfs2_mask_waiter mw;
1955
1956 ocfs2_init_mask_waiter(&mw);
1957
1958 if ((lockres->l_flags & OCFS2_LOCK_BUSY) ||
Joel Beckerbd3e7612008-02-01 12:14:57 -08001959 (lockres->l_level > DLM_LOCK_NL)) {
Mark Fashehcf8e06f2007-12-20 16:43:10 -08001960 mlog(ML_ERROR,
1961 "File lock \"%s\" has busy or locked state: flags: 0x%lx, "
1962 "level: %u\n", lockres->l_name, lockres->l_flags,
1963 lockres->l_level);
1964 return -EINVAL;
1965 }
1966
1967 spin_lock_irqsave(&lockres->l_lock, flags);
1968 if (!(lockres->l_flags & OCFS2_LOCK_ATTACHED)) {
1969 lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_BUSY, 0);
1970 spin_unlock_irqrestore(&lockres->l_lock, flags);
1971
1972 /*
1973 * Get the lock at NLMODE to start - that way we
1974 * can cancel the upconvert request if need be.
1975 */
Mark Fashehe988cf12008-07-10 09:25:39 -07001976 ret = ocfs2_lock_create(osb, lockres, DLM_LOCK_NL, 0);
Mark Fashehcf8e06f2007-12-20 16:43:10 -08001977 if (ret < 0) {
1978 mlog_errno(ret);
1979 goto out;
1980 }
1981
1982 ret = ocfs2_wait_for_mask(&mw);
1983 if (ret) {
1984 mlog_errno(ret);
1985 goto out;
1986 }
1987 spin_lock_irqsave(&lockres->l_lock, flags);
1988 }
1989
1990 lockres->l_action = OCFS2_AST_CONVERT;
Mark Fashehe988cf12008-07-10 09:25:39 -07001991 lkm_flags |= DLM_LKF_CONVERT;
Mark Fashehcf8e06f2007-12-20 16:43:10 -08001992 lockres->l_requested = level;
1993 lockres_or_flags(lockres, OCFS2_LOCK_BUSY);
1994
1995 lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_BUSY, 0);
1996 spin_unlock_irqrestore(&lockres->l_lock, flags);
1997
Joel Becker4670c462008-02-01 14:39:35 -08001998 ret = ocfs2_dlm_lock(osb->cconn, level, &lockres->l_lksb, lkm_flags,
Joel Beckera796d282010-01-28 19:22:39 -08001999 lockres->l_name, OCFS2_LOCK_ID_MAX_LEN - 1);
Joel Becker7431cd72008-02-01 12:15:37 -08002000 if (ret) {
2001 if (!trylock || (ret != -EAGAIN)) {
Joel Becker24ef1812008-01-29 17:37:32 -08002002 ocfs2_log_dlm_error("ocfs2_dlm_lock", ret, lockres);
Mark Fashehcf8e06f2007-12-20 16:43:10 -08002003 ret = -EINVAL;
2004 }
2005
2006 ocfs2_recover_from_dlm_error(lockres, 1);
2007 lockres_remove_mask_waiter(lockres, &mw);
2008 goto out;
2009 }
2010
2011 ret = ocfs2_wait_for_mask_interruptible(&mw, lockres);
2012 if (ret == -ERESTARTSYS) {
2013 /*
2014 * Userspace can cause deadlock itself with
2015 * flock(). Current behavior locally is to allow the
2016 * deadlock, but abort the system call if a signal is
2017 * received. We follow this example, otherwise a
2018 * poorly written program could sit in kernel until
2019 * reboot.
2020 *
2021 * Handling this is a bit more complicated for Ocfs2
2022 * though. We can't exit this function with an
2023 * outstanding lock request, so a cancel convert is
2024 * required. We intentionally overwrite 'ret' - if the
2025 * cancel fails and the lock was granted, it's easier
André Goddard Rosaaf901ca2009-11-14 13:09:05 -02002026 * to just bubble success back up to the user.
Mark Fashehcf8e06f2007-12-20 16:43:10 -08002027 */
2028 ret = ocfs2_flock_handle_signal(lockres, level);
David Teigland1693a5c2008-01-30 16:52:53 -08002029 } else if (!ret && (level > lockres->l_level)) {
2030 /* Trylock failed asynchronously */
2031 BUG_ON(!trylock);
2032 ret = -EAGAIN;
Mark Fashehcf8e06f2007-12-20 16:43:10 -08002033 }
2034
2035out:
2036
2037 mlog(0, "Lock: \"%s\" ex: %d, trylock: %d, returns: %d\n",
2038 lockres->l_name, ex, trylock, ret);
2039 return ret;
2040}
2041
2042void ocfs2_file_unlock(struct file *file)
2043{
2044 int ret;
Joel Beckerde551242008-02-01 14:45:08 -08002045 unsigned int gen;
Mark Fashehcf8e06f2007-12-20 16:43:10 -08002046 unsigned long flags;
2047 struct ocfs2_file_private *fp = file->private_data;
2048 struct ocfs2_lock_res *lockres = &fp->fp_flock;
2049 struct ocfs2_super *osb = OCFS2_SB(file->f_mapping->host->i_sb);
2050 struct ocfs2_mask_waiter mw;
2051
2052 ocfs2_init_mask_waiter(&mw);
2053
2054 if (!(lockres->l_flags & OCFS2_LOCK_ATTACHED))
2055 return;
2056
Mark Fashehe988cf12008-07-10 09:25:39 -07002057 if (lockres->l_level == DLM_LOCK_NL)
Mark Fashehcf8e06f2007-12-20 16:43:10 -08002058 return;
2059
2060 mlog(0, "Unlock: \"%s\" flags: 0x%lx, level: %d, act: %d\n",
2061 lockres->l_name, lockres->l_flags, lockres->l_level,
2062 lockres->l_action);
2063
2064 spin_lock_irqsave(&lockres->l_lock, flags);
2065 /*
2066 * Fake a blocking ast for the downconvert code.
2067 */
2068 lockres_or_flags(lockres, OCFS2_LOCK_BLOCKED);
Joel Beckerbd3e7612008-02-01 12:14:57 -08002069 lockres->l_blocking = DLM_LOCK_EX;
Mark Fashehcf8e06f2007-12-20 16:43:10 -08002070
Mark Fashehe988cf12008-07-10 09:25:39 -07002071 gen = ocfs2_prepare_downconvert(lockres, DLM_LOCK_NL);
Mark Fashehcf8e06f2007-12-20 16:43:10 -08002072 lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_BUSY, 0);
2073 spin_unlock_irqrestore(&lockres->l_lock, flags);
2074
Mark Fashehe988cf12008-07-10 09:25:39 -07002075 ret = ocfs2_downconvert_lock(osb, lockres, DLM_LOCK_NL, 0, gen);
Mark Fashehcf8e06f2007-12-20 16:43:10 -08002076 if (ret) {
2077 mlog_errno(ret);
2078 return;
2079 }
2080
2081 ret = ocfs2_wait_for_mask(&mw);
2082 if (ret)
2083 mlog_errno(ret);
2084}
2085
Mark Fasheh34d024f2007-09-24 15:56:19 -07002086static void ocfs2_downconvert_on_unlock(struct ocfs2_super *osb,
2087 struct ocfs2_lock_res *lockres)
Mark Fashehccd979b2005-12-15 14:31:24 -08002088{
2089 int kick = 0;
2090
Mark Fashehccd979b2005-12-15 14:31:24 -08002091 /* If we know that another node is waiting on our lock, kick
Mark Fasheh34d024f2007-09-24 15:56:19 -07002092 * the downconvert thread * pre-emptively when we reach a release
Mark Fashehccd979b2005-12-15 14:31:24 -08002093 * condition. */
2094 if (lockres->l_flags & OCFS2_LOCK_BLOCKED) {
2095 switch(lockres->l_blocking) {
Joel Beckerbd3e7612008-02-01 12:14:57 -08002096 case DLM_LOCK_EX:
Mark Fashehccd979b2005-12-15 14:31:24 -08002097 if (!lockres->l_ex_holders && !lockres->l_ro_holders)
2098 kick = 1;
2099 break;
Joel Beckerbd3e7612008-02-01 12:14:57 -08002100 case DLM_LOCK_PR:
Mark Fashehccd979b2005-12-15 14:31:24 -08002101 if (!lockres->l_ex_holders)
2102 kick = 1;
2103 break;
2104 default:
2105 BUG();
2106 }
2107 }
2108
2109 if (kick)
Mark Fasheh34d024f2007-09-24 15:56:19 -07002110 ocfs2_wake_downconvert_thread(osb);
Mark Fashehccd979b2005-12-15 14:31:24 -08002111}
2112
Mark Fashehccd979b2005-12-15 14:31:24 -08002113#define OCFS2_SEC_BITS 34
2114#define OCFS2_SEC_SHIFT (64 - 34)
2115#define OCFS2_NSEC_MASK ((1ULL << OCFS2_SEC_SHIFT) - 1)
2116
2117/* LVB only has room for 64 bits of time here so we pack it for
2118 * now. */
Arnd Bergmann3a3d1e52018-11-02 15:48:23 -07002119static u64 ocfs2_pack_timespec(struct timespec64 *spec)
Mark Fashehccd979b2005-12-15 14:31:24 -08002120{
2121 u64 res;
Arnd Bergmann3a3d1e52018-11-02 15:48:23 -07002122 u64 sec = clamp_t(time64_t, spec->tv_sec, 0, 0x3ffffffffull);
Mark Fashehccd979b2005-12-15 14:31:24 -08002123 u32 nsec = spec->tv_nsec;
2124
2125 res = (sec << OCFS2_SEC_SHIFT) | (nsec & OCFS2_NSEC_MASK);
2126
2127 return res;
2128}
2129
2130/* Call this with the lockres locked. I am reasonably sure we don't
2131 * need ip_lock in this function as anyone who would be changing those
Mark Fashehe63aecb62007-10-18 15:30:42 -07002132 * values is supposed to be blocked in ocfs2_inode_lock right now. */
Mark Fashehccd979b2005-12-15 14:31:24 -08002133static void __ocfs2_stuff_meta_lvb(struct inode *inode)
2134{
2135 struct ocfs2_inode_info *oi = OCFS2_I(inode);
Mark Fashehe63aecb62007-10-18 15:30:42 -07002136 struct ocfs2_lock_res *lockres = &oi->ip_inode_lockres;
Mark Fashehccd979b2005-12-15 14:31:24 -08002137 struct ocfs2_meta_lvb *lvb;
2138
Mark Fasheha641dc22008-12-24 16:03:48 -08002139 lvb = ocfs2_dlm_lvb(&lockres->l_lksb);
Mark Fashehccd979b2005-12-15 14:31:24 -08002140
Mark Fasheh24c19ef2006-09-22 17:28:19 -07002141 /*
2142 * Invalidate the LVB of a deleted inode - this way other
2143 * nodes are forced to go to disk and discover the new inode
2144 * status.
2145 */
2146 if (oi->ip_flags & OCFS2_INODE_DELETED) {
2147 lvb->lvb_version = 0;
2148 goto out;
2149 }
2150
Mark Fasheh4d3b83f2006-09-12 15:22:18 -07002151 lvb->lvb_version = OCFS2_LVB_VERSION;
Mark Fashehccd979b2005-12-15 14:31:24 -08002152 lvb->lvb_isize = cpu_to_be64(i_size_read(inode));
2153 lvb->lvb_iclusters = cpu_to_be32(oi->ip_clusters);
Eric W. Biederman03ab30f2013-01-31 16:59:23 -08002154 lvb->lvb_iuid = cpu_to_be32(i_uid_read(inode));
2155 lvb->lvb_igid = cpu_to_be32(i_gid_read(inode));
Mark Fashehccd979b2005-12-15 14:31:24 -08002156 lvb->lvb_imode = cpu_to_be16(inode->i_mode);
2157 lvb->lvb_inlink = cpu_to_be16(inode->i_nlink);
2158 lvb->lvb_iatime_packed =
Arnd Bergmann3a3d1e52018-11-02 15:48:23 -07002159 cpu_to_be64(ocfs2_pack_timespec(&inode->i_atime));
Mark Fashehccd979b2005-12-15 14:31:24 -08002160 lvb->lvb_ictime_packed =
Arnd Bergmann3a3d1e52018-11-02 15:48:23 -07002161 cpu_to_be64(ocfs2_pack_timespec(&inode->i_ctime));
Mark Fashehccd979b2005-12-15 14:31:24 -08002162 lvb->lvb_imtime_packed =
Arnd Bergmann3a3d1e52018-11-02 15:48:23 -07002163 cpu_to_be64(ocfs2_pack_timespec(&inode->i_mtime));
Herbert Poetzlca4d1472006-07-03 17:27:12 -07002164 lvb->lvb_iattr = cpu_to_be32(oi->ip_attr);
Mark Fasheh15b1e362007-09-07 13:58:15 -07002165 lvb->lvb_idynfeatures = cpu_to_be16(oi->ip_dyn_features);
Mark Fashehf9e2d822006-09-12 15:35:49 -07002166 lvb->lvb_igeneration = cpu_to_be32(inode->i_generation);
Mark Fashehccd979b2005-12-15 14:31:24 -08002167
Mark Fasheh24c19ef2006-09-22 17:28:19 -07002168out:
Mark Fashehccd979b2005-12-15 14:31:24 -08002169 mlog_meta_lvb(0, lockres);
Mark Fashehccd979b2005-12-15 14:31:24 -08002170}
2171
Arnd Bergmann3a3d1e52018-11-02 15:48:23 -07002172static void ocfs2_unpack_timespec(struct timespec64 *spec,
Mark Fashehccd979b2005-12-15 14:31:24 -08002173 u64 packed_time)
2174{
2175 spec->tv_sec = packed_time >> OCFS2_SEC_SHIFT;
2176 spec->tv_nsec = packed_time & OCFS2_NSEC_MASK;
2177}
2178
2179static void ocfs2_refresh_inode_from_lvb(struct inode *inode)
2180{
2181 struct ocfs2_inode_info *oi = OCFS2_I(inode);
Mark Fashehe63aecb62007-10-18 15:30:42 -07002182 struct ocfs2_lock_res *lockres = &oi->ip_inode_lockres;
Mark Fashehccd979b2005-12-15 14:31:24 -08002183 struct ocfs2_meta_lvb *lvb;
2184
Mark Fashehccd979b2005-12-15 14:31:24 -08002185 mlog_meta_lvb(0, lockres);
2186
Mark Fasheha641dc22008-12-24 16:03:48 -08002187 lvb = ocfs2_dlm_lvb(&lockres->l_lksb);
Mark Fashehccd979b2005-12-15 14:31:24 -08002188
2189 /* We're safe here without the lockres lock... */
2190 spin_lock(&oi->ip_lock);
2191 oi->ip_clusters = be32_to_cpu(lvb->lvb_iclusters);
2192 i_size_write(inode, be64_to_cpu(lvb->lvb_isize));
2193
Herbert Poetzlca4d1472006-07-03 17:27:12 -07002194 oi->ip_attr = be32_to_cpu(lvb->lvb_iattr);
Mark Fasheh15b1e362007-09-07 13:58:15 -07002195 oi->ip_dyn_features = be16_to_cpu(lvb->lvb_idynfeatures);
Herbert Poetzlca4d1472006-07-03 17:27:12 -07002196 ocfs2_set_inode_flags(inode);
2197
Mark Fashehccd979b2005-12-15 14:31:24 -08002198 /* fast-symlinks are a special case */
2199 if (S_ISLNK(inode->i_mode) && !oi->ip_clusters)
2200 inode->i_blocks = 0;
2201 else
Mark Fasheh8110b072007-03-22 16:53:23 -07002202 inode->i_blocks = ocfs2_inode_sector_count(inode);
Mark Fashehccd979b2005-12-15 14:31:24 -08002203
Eric W. Biederman03ab30f2013-01-31 16:59:23 -08002204 i_uid_write(inode, be32_to_cpu(lvb->lvb_iuid));
2205 i_gid_write(inode, be32_to_cpu(lvb->lvb_igid));
Mark Fashehccd979b2005-12-15 14:31:24 -08002206 inode->i_mode = be16_to_cpu(lvb->lvb_imode);
Miklos Szeredibfe86842011-10-28 14:13:29 +02002207 set_nlink(inode, be16_to_cpu(lvb->lvb_inlink));
Arnd Bergmann3a3d1e52018-11-02 15:48:23 -07002208 ocfs2_unpack_timespec(&inode->i_atime,
Mark Fashehccd979b2005-12-15 14:31:24 -08002209 be64_to_cpu(lvb->lvb_iatime_packed));
Arnd Bergmann3a3d1e52018-11-02 15:48:23 -07002210 ocfs2_unpack_timespec(&inode->i_mtime,
Mark Fashehccd979b2005-12-15 14:31:24 -08002211 be64_to_cpu(lvb->lvb_imtime_packed));
Arnd Bergmann3a3d1e52018-11-02 15:48:23 -07002212 ocfs2_unpack_timespec(&inode->i_ctime,
Mark Fashehccd979b2005-12-15 14:31:24 -08002213 be64_to_cpu(lvb->lvb_ictime_packed));
2214 spin_unlock(&oi->ip_lock);
Mark Fashehccd979b2005-12-15 14:31:24 -08002215}
2216
Mark Fashehf9e2d822006-09-12 15:35:49 -07002217static inline int ocfs2_meta_lvb_is_trustable(struct inode *inode,
2218 struct ocfs2_lock_res *lockres)
Mark Fashehccd979b2005-12-15 14:31:24 -08002219{
Mark Fasheha641dc22008-12-24 16:03:48 -08002220 struct ocfs2_meta_lvb *lvb = ocfs2_dlm_lvb(&lockres->l_lksb);
Mark Fashehccd979b2005-12-15 14:31:24 -08002221
Joel Becker1c520df2009-06-19 15:14:13 -07002222 if (ocfs2_dlm_lvb_valid(&lockres->l_lksb)
2223 && lvb->lvb_version == OCFS2_LVB_VERSION
Mark Fashehf9e2d822006-09-12 15:35:49 -07002224 && be32_to_cpu(lvb->lvb_igeneration) == inode->i_generation)
Mark Fashehccd979b2005-12-15 14:31:24 -08002225 return 1;
2226 return 0;
2227}
2228
2229/* Determine whether a lock resource needs to be refreshed, and
2230 * arbitrate who gets to refresh it.
2231 *
2232 * 0 means no refresh needed.
2233 *
2234 * > 0 means you need to refresh this and you MUST call
2235 * ocfs2_complete_lock_res_refresh afterwards. */
2236static int ocfs2_should_refresh_lock_res(struct ocfs2_lock_res *lockres)
2237{
2238 unsigned long flags;
2239 int status = 0;
2240
Mark Fashehccd979b2005-12-15 14:31:24 -08002241refresh_check:
2242 spin_lock_irqsave(&lockres->l_lock, flags);
2243 if (!(lockres->l_flags & OCFS2_LOCK_NEEDS_REFRESH)) {
2244 spin_unlock_irqrestore(&lockres->l_lock, flags);
2245 goto bail;
2246 }
2247
2248 if (lockres->l_flags & OCFS2_LOCK_REFRESHING) {
2249 spin_unlock_irqrestore(&lockres->l_lock, flags);
2250
2251 ocfs2_wait_on_refreshing_lock(lockres);
2252 goto refresh_check;
2253 }
2254
2255 /* Ok, I'll be the one to refresh this lock. */
2256 lockres_or_flags(lockres, OCFS2_LOCK_REFRESHING);
2257 spin_unlock_irqrestore(&lockres->l_lock, flags);
2258
2259 status = 1;
2260bail:
Tao Mac1e8d352011-03-07 16:43:21 +08002261 mlog(0, "status %d\n", status);
Mark Fashehccd979b2005-12-15 14:31:24 -08002262 return status;
2263}
2264
2265/* If status is non zero, I'll mark it as not being in refresh
2266 * anymroe, but i won't clear the needs refresh flag. */
2267static inline void ocfs2_complete_lock_res_refresh(struct ocfs2_lock_res *lockres,
2268 int status)
2269{
2270 unsigned long flags;
Mark Fashehccd979b2005-12-15 14:31:24 -08002271
2272 spin_lock_irqsave(&lockres->l_lock, flags);
2273 lockres_clear_flags(lockres, OCFS2_LOCK_REFRESHING);
2274 if (!status)
2275 lockres_clear_flags(lockres, OCFS2_LOCK_NEEDS_REFRESH);
2276 spin_unlock_irqrestore(&lockres->l_lock, flags);
2277
2278 wake_up(&lockres->l_event);
Mark Fashehccd979b2005-12-15 14:31:24 -08002279}
2280
2281/* may or may not return a bh if it went to disk. */
Mark Fashehe63aecb62007-10-18 15:30:42 -07002282static int ocfs2_inode_lock_update(struct inode *inode,
Mark Fashehccd979b2005-12-15 14:31:24 -08002283 struct buffer_head **bh)
2284{
2285 int status = 0;
2286 struct ocfs2_inode_info *oi = OCFS2_I(inode);
Mark Fashehe63aecb62007-10-18 15:30:42 -07002287 struct ocfs2_lock_res *lockres = &oi->ip_inode_lockres;
Mark Fashehccd979b2005-12-15 14:31:24 -08002288 struct ocfs2_dinode *fe;
Sunil Mushranc271c5c2006-12-05 17:56:35 -08002289 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
Mark Fashehccd979b2005-12-15 14:31:24 -08002290
Mark Fashehbe9e9862007-04-18 15:22:08 -07002291 if (ocfs2_mount_local(osb))
2292 goto bail;
2293
Mark Fashehccd979b2005-12-15 14:31:24 -08002294 spin_lock(&oi->ip_lock);
2295 if (oi->ip_flags & OCFS2_INODE_DELETED) {
Mark Fashehb06970532006-03-03 10:24:33 -08002296 mlog(0, "Orphaned inode %llu was deleted while we "
Mark Fashehccd979b2005-12-15 14:31:24 -08002297 "were waiting on a lock. ip_flags = 0x%x\n",
Mark Fashehb06970532006-03-03 10:24:33 -08002298 (unsigned long long)oi->ip_blkno, oi->ip_flags);
Mark Fashehccd979b2005-12-15 14:31:24 -08002299 spin_unlock(&oi->ip_lock);
2300 status = -ENOENT;
2301 goto bail;
2302 }
2303 spin_unlock(&oi->ip_lock);
2304
Mark Fashehbe9e9862007-04-18 15:22:08 -07002305 if (!ocfs2_should_refresh_lock_res(lockres))
2306 goto bail;
Mark Fashehccd979b2005-12-15 14:31:24 -08002307
2308 /* This will discard any caching information we might have had
2309 * for the inode metadata. */
Joel Becker8cb471e2009-02-10 20:00:41 -08002310 ocfs2_metadata_cache_purge(INODE_CACHE(inode));
Mark Fashehccd979b2005-12-15 14:31:24 -08002311
Mark Fasheh83418972007-04-23 18:53:12 -07002312 ocfs2_extent_map_trunc(inode, 0);
2313
Mark Fashehbe9e9862007-04-18 15:22:08 -07002314 if (ocfs2_meta_lvb_is_trustable(inode, lockres)) {
Mark Fashehb06970532006-03-03 10:24:33 -08002315 mlog(0, "Trusting LVB on inode %llu\n",
2316 (unsigned long long)oi->ip_blkno);
Mark Fashehccd979b2005-12-15 14:31:24 -08002317 ocfs2_refresh_inode_from_lvb(inode);
2318 } else {
2319 /* Boo, we have to go to disk. */
2320 /* read bh, cast, ocfs2_refresh_inode */
Joel Beckerb657c952008-11-13 14:49:11 -08002321 status = ocfs2_read_inode_block(inode, bh);
Mark Fashehccd979b2005-12-15 14:31:24 -08002322 if (status < 0) {
2323 mlog_errno(status);
2324 goto bail_refresh;
2325 }
2326 fe = (struct ocfs2_dinode *) (*bh)->b_data;
2327
2328 /* This is a good chance to make sure we're not
Joel Beckerb657c952008-11-13 14:49:11 -08002329 * locking an invalid object. ocfs2_read_inode_block()
2330 * already checked that the inode block is sane.
Mark Fashehccd979b2005-12-15 14:31:24 -08002331 *
2332 * We bug on a stale inode here because we checked
2333 * above whether it was wiped from disk. The wiping
2334 * node provides a guarantee that we receive that
2335 * message and can mark the inode before dropping any
2336 * locks associated with it. */
Mark Fashehccd979b2005-12-15 14:31:24 -08002337 mlog_bug_on_msg(inode->i_generation !=
2338 le32_to_cpu(fe->i_generation),
Mark Fashehb06970532006-03-03 10:24:33 -08002339 "Invalid dinode %llu disk generation: %u "
Mark Fashehccd979b2005-12-15 14:31:24 -08002340 "inode->i_generation: %u\n",
Mark Fashehb06970532006-03-03 10:24:33 -08002341 (unsigned long long)oi->ip_blkno,
2342 le32_to_cpu(fe->i_generation),
Mark Fashehccd979b2005-12-15 14:31:24 -08002343 inode->i_generation);
2344 mlog_bug_on_msg(le64_to_cpu(fe->i_dtime) ||
2345 !(fe->i_flags & cpu_to_le32(OCFS2_VALID_FL)),
Mark Fashehb06970532006-03-03 10:24:33 -08002346 "Stale dinode %llu dtime: %llu flags: 0x%x\n",
2347 (unsigned long long)oi->ip_blkno,
2348 (unsigned long long)le64_to_cpu(fe->i_dtime),
Mark Fashehccd979b2005-12-15 14:31:24 -08002349 le32_to_cpu(fe->i_flags));
2350
2351 ocfs2_refresh_inode(inode, fe);
Sunil Mushran8ddb7b02008-05-13 13:45:15 -07002352 ocfs2_track_lock_refresh(lockres);
Mark Fashehccd979b2005-12-15 14:31:24 -08002353 }
2354
2355 status = 0;
2356bail_refresh:
Mark Fashehbe9e9862007-04-18 15:22:08 -07002357 ocfs2_complete_lock_res_refresh(lockres, status);
Mark Fashehccd979b2005-12-15 14:31:24 -08002358bail:
Mark Fashehccd979b2005-12-15 14:31:24 -08002359 return status;
2360}
2361
2362static int ocfs2_assign_bh(struct inode *inode,
2363 struct buffer_head **ret_bh,
2364 struct buffer_head *passed_bh)
2365{
2366 int status;
2367
2368 if (passed_bh) {
2369 /* Ok, the update went to disk for us, use the
2370 * returned bh. */
2371 *ret_bh = passed_bh;
2372 get_bh(*ret_bh);
2373
2374 return 0;
2375 }
2376
Joel Beckerb657c952008-11-13 14:49:11 -08002377 status = ocfs2_read_inode_block(inode, ret_bh);
Mark Fashehccd979b2005-12-15 14:31:24 -08002378 if (status < 0)
2379 mlog_errno(status);
2380
2381 return status;
2382}
2383
2384/*
2385 * returns < 0 error if the callback will never be called, otherwise
2386 * the result of the lock will be communicated via the callback.
2387 */
Jan Karacb257972009-06-04 15:26:50 +02002388int ocfs2_inode_lock_full_nested(struct inode *inode,
2389 struct buffer_head **ret_bh,
2390 int ex,
2391 int arg_flags,
2392 int subclass)
Mark Fashehccd979b2005-12-15 14:31:24 -08002393{
Joel Beckerbd3e7612008-02-01 12:14:57 -08002394 int status, level, acquired;
2395 u32 dlm_flags;
Sunil Mushranc271c5c2006-12-05 17:56:35 -08002396 struct ocfs2_lock_res *lockres = NULL;
Mark Fashehccd979b2005-12-15 14:31:24 -08002397 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
2398 struct buffer_head *local_bh = NULL;
2399
Mark Fashehb06970532006-03-03 10:24:33 -08002400 mlog(0, "inode %llu, take %s META lock\n",
2401 (unsigned long long)OCFS2_I(inode)->ip_blkno,
Mark Fashehccd979b2005-12-15 14:31:24 -08002402 ex ? "EXMODE" : "PRMODE");
2403
2404 status = 0;
2405 acquired = 0;
2406 /* We'll allow faking a readonly metadata lock for
2407 * rodevices. */
2408 if (ocfs2_is_hard_readonly(osb)) {
2409 if (ex)
2410 status = -EROFS;
Tiger Yang03efed82011-05-28 00:34:19 +08002411 goto getbh;
Mark Fashehccd979b2005-12-15 14:31:24 -08002412 }
2413
Eric Ren439a36b2017-02-22 15:40:41 -08002414 if ((arg_flags & OCFS2_META_LOCK_GETBH) ||
2415 ocfs2_mount_local(osb))
2416 goto update;
Sunil Mushranc271c5c2006-12-05 17:56:35 -08002417
Mark Fashehccd979b2005-12-15 14:31:24 -08002418 if (!(arg_flags & OCFS2_META_LOCK_RECOVERY))
Joel Becker553abd02008-02-01 12:03:57 -08002419 ocfs2_wait_for_recovery(osb);
Mark Fashehccd979b2005-12-15 14:31:24 -08002420
Mark Fashehe63aecb62007-10-18 15:30:42 -07002421 lockres = &OCFS2_I(inode)->ip_inode_lockres;
Joel Beckerbd3e7612008-02-01 12:14:57 -08002422 level = ex ? DLM_LOCK_EX : DLM_LOCK_PR;
Mark Fashehccd979b2005-12-15 14:31:24 -08002423 dlm_flags = 0;
2424 if (arg_flags & OCFS2_META_LOCK_NOQUEUE)
Joel Beckerbd3e7612008-02-01 12:14:57 -08002425 dlm_flags |= DLM_LKF_NOQUEUE;
Mark Fashehccd979b2005-12-15 14:31:24 -08002426
Jan Karacb257972009-06-04 15:26:50 +02002427 status = __ocfs2_cluster_lock(osb, lockres, level, dlm_flags,
2428 arg_flags, subclass, _RET_IP_);
Mark Fashehccd979b2005-12-15 14:31:24 -08002429 if (status < 0) {
Zach Brown41003a72013-05-07 16:18:25 -07002430 if (status != -EAGAIN)
Mark Fashehccd979b2005-12-15 14:31:24 -08002431 mlog_errno(status);
2432 goto bail;
2433 }
2434
2435 /* Notify the error cleanup path to drop the cluster lock. */
2436 acquired = 1;
2437
2438 /* We wait twice because a node may have died while we were in
2439 * the lower dlm layers. The second time though, we've
2440 * committed to owning this lock so we don't allow signals to
2441 * abort the operation. */
2442 if (!(arg_flags & OCFS2_META_LOCK_RECOVERY))
Joel Becker553abd02008-02-01 12:03:57 -08002443 ocfs2_wait_for_recovery(osb);
Mark Fashehccd979b2005-12-15 14:31:24 -08002444
Eric Ren439a36b2017-02-22 15:40:41 -08002445update:
Mark Fasheh24c19ef2006-09-22 17:28:19 -07002446 /*
2447 * We only see this flag if we're being called from
2448 * ocfs2_read_locked_inode(). It means we're locking an inode
2449 * which hasn't been populated yet, so clear the refresh flag
2450 * and let the caller handle it.
2451 */
2452 if (inode->i_state & I_NEW) {
2453 status = 0;
Sunil Mushranc271c5c2006-12-05 17:56:35 -08002454 if (lockres)
2455 ocfs2_complete_lock_res_refresh(lockres, 0);
Mark Fasheh24c19ef2006-09-22 17:28:19 -07002456 goto bail;
2457 }
2458
Mark Fashehccd979b2005-12-15 14:31:24 -08002459 /* This is fun. The caller may want a bh back, or it may
Mark Fashehe63aecb62007-10-18 15:30:42 -07002460 * not. ocfs2_inode_lock_update definitely wants one in, but
Mark Fashehccd979b2005-12-15 14:31:24 -08002461 * may or may not read one, depending on what's in the
2462 * LVB. The result of all of this is that we've *only* gone to
2463 * disk if we have to, so the complexity is worthwhile. */
Mark Fashehe63aecb62007-10-18 15:30:42 -07002464 status = ocfs2_inode_lock_update(inode, &local_bh);
Mark Fashehccd979b2005-12-15 14:31:24 -08002465 if (status < 0) {
2466 if (status != -ENOENT)
2467 mlog_errno(status);
2468 goto bail;
2469 }
Tiger Yang03efed82011-05-28 00:34:19 +08002470getbh:
Mark Fashehccd979b2005-12-15 14:31:24 -08002471 if (ret_bh) {
2472 status = ocfs2_assign_bh(inode, ret_bh, local_bh);
2473 if (status < 0) {
2474 mlog_errno(status);
2475 goto bail;
2476 }
2477 }
2478
Mark Fashehccd979b2005-12-15 14:31:24 -08002479bail:
2480 if (status < 0) {
2481 if (ret_bh && (*ret_bh)) {
2482 brelse(*ret_bh);
2483 *ret_bh = NULL;
2484 }
2485 if (acquired)
Mark Fashehe63aecb62007-10-18 15:30:42 -07002486 ocfs2_inode_unlock(inode, ex);
Mark Fashehccd979b2005-12-15 14:31:24 -08002487 }
2488
2489 if (local_bh)
2490 brelse(local_bh);
2491
Mark Fashehccd979b2005-12-15 14:31:24 -08002492 return status;
2493}
2494
2495/*
Mark Fasheh34d024f2007-09-24 15:56:19 -07002496 * This is working around a lock inversion between tasks acquiring DLM
2497 * locks while holding a page lock and the downconvert thread which
2498 * blocks dlm lock acquiry while acquiring page locks.
Mark Fashehccd979b2005-12-15 14:31:24 -08002499 *
2500 * ** These _with_page variantes are only intended to be called from aop
2501 * methods that hold page locks and return a very specific *positive* error
2502 * code that aop methods pass up to the VFS -- test for errors with != 0. **
2503 *
Mark Fasheh34d024f2007-09-24 15:56:19 -07002504 * The DLM is called such that it returns -EAGAIN if it would have
2505 * blocked waiting for the downconvert thread. In that case we unlock
2506 * our page so the downconvert thread can make progress. Once we've
2507 * done this we have to return AOP_TRUNCATED_PAGE so the aop method
2508 * that called us can bubble that back up into the VFS who will then
2509 * immediately retry the aop call.
Mark Fashehccd979b2005-12-15 14:31:24 -08002510 */
Mark Fashehe63aecb62007-10-18 15:30:42 -07002511int ocfs2_inode_lock_with_page(struct inode *inode,
Mark Fashehccd979b2005-12-15 14:31:24 -08002512 struct buffer_head **ret_bh,
2513 int ex,
2514 struct page *page)
2515{
2516 int ret;
2517
Mark Fashehe63aecb62007-10-18 15:30:42 -07002518 ret = ocfs2_inode_lock_full(inode, ret_bh, ex, OCFS2_LOCK_NONBLOCK);
Mark Fashehccd979b2005-12-15 14:31:24 -08002519 if (ret == -EAGAIN) {
2520 unlock_page(page);
Gang Heff26cc12018-01-31 16:14:48 -08002521 /*
2522 * If we can't get inode lock immediately, we should not return
2523 * directly here, since this will lead to a softlockup problem.
2524 * The method is to get a blocking lock and immediately unlock
2525 * before returning, this can avoid CPU resource waste due to
2526 * lots of retries, and benefits fairness in getting lock.
2527 */
2528 if (ocfs2_inode_lock(inode, ret_bh, ex) == 0)
2529 ocfs2_inode_unlock(inode, ex);
Mark Fashehccd979b2005-12-15 14:31:24 -08002530 ret = AOP_TRUNCATED_PAGE;
2531 }
2532
2533 return ret;
2534}
2535
Mark Fashehe63aecb62007-10-18 15:30:42 -07002536int ocfs2_inode_lock_atime(struct inode *inode,
Tiger Yang7f1a37e2006-11-15 15:48:42 +08002537 struct vfsmount *vfsmnt,
Gang Hec4c24162018-01-31 16:15:25 -08002538 int *level, int wait)
Tiger Yang7f1a37e2006-11-15 15:48:42 +08002539{
2540 int ret;
2541
Gang Hec4c24162018-01-31 16:15:25 -08002542 if (wait)
2543 ret = ocfs2_inode_lock(inode, NULL, 0);
2544 else
2545 ret = ocfs2_try_inode_lock(inode, NULL, 0);
2546
Tiger Yang7f1a37e2006-11-15 15:48:42 +08002547 if (ret < 0) {
Gang Hec4c24162018-01-31 16:15:25 -08002548 if (ret != -EAGAIN)
2549 mlog_errno(ret);
Tiger Yang7f1a37e2006-11-15 15:48:42 +08002550 return ret;
2551 }
2552
2553 /*
2554 * If we should update atime, we will get EX lock,
2555 * otherwise we just get PR lock.
2556 */
2557 if (ocfs2_should_update_atime(inode, vfsmnt)) {
2558 struct buffer_head *bh = NULL;
2559
Mark Fashehe63aecb62007-10-18 15:30:42 -07002560 ocfs2_inode_unlock(inode, 0);
Gang Hec4c24162018-01-31 16:15:25 -08002561 if (wait)
2562 ret = ocfs2_inode_lock(inode, &bh, 1);
2563 else
2564 ret = ocfs2_try_inode_lock(inode, &bh, 1);
2565
Tiger Yang7f1a37e2006-11-15 15:48:42 +08002566 if (ret < 0) {
Gang Hec4c24162018-01-31 16:15:25 -08002567 if (ret != -EAGAIN)
2568 mlog_errno(ret);
Tiger Yang7f1a37e2006-11-15 15:48:42 +08002569 return ret;
2570 }
2571 *level = 1;
2572 if (ocfs2_should_update_atime(inode, vfsmnt))
2573 ocfs2_update_inode_atime(inode, bh);
2574 if (bh)
2575 brelse(bh);
2576 } else
2577 *level = 0;
2578
Tiger Yang7f1a37e2006-11-15 15:48:42 +08002579 return ret;
2580}
2581
Mark Fashehe63aecb62007-10-18 15:30:42 -07002582void ocfs2_inode_unlock(struct inode *inode,
Mark Fashehccd979b2005-12-15 14:31:24 -08002583 int ex)
2584{
Joel Beckerbd3e7612008-02-01 12:14:57 -08002585 int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR;
Mark Fashehe63aecb62007-10-18 15:30:42 -07002586 struct ocfs2_lock_res *lockres = &OCFS2_I(inode)->ip_inode_lockres;
Sunil Mushranc271c5c2006-12-05 17:56:35 -08002587 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
Mark Fashehccd979b2005-12-15 14:31:24 -08002588
Mark Fashehb06970532006-03-03 10:24:33 -08002589 mlog(0, "inode %llu drop %s META lock\n",
2590 (unsigned long long)OCFS2_I(inode)->ip_blkno,
Mark Fashehccd979b2005-12-15 14:31:24 -08002591 ex ? "EXMODE" : "PRMODE");
2592
piaojun1119d3c2018-04-05 16:18:33 -07002593 if (!ocfs2_is_hard_readonly(osb) &&
Sunil Mushranc271c5c2006-12-05 17:56:35 -08002594 !ocfs2_mount_local(osb))
piaojun1119d3c2018-04-05 16:18:33 -07002595 ocfs2_cluster_unlock(osb, lockres, level);
Mark Fashehccd979b2005-12-15 14:31:24 -08002596}
2597
Eric Ren439a36b2017-02-22 15:40:41 -08002598/*
2599 * This _tracker variantes are introduced to deal with the recursive cluster
2600 * locking issue. The idea is to keep track of a lock holder on the stack of
2601 * the current process. If there's a lock holder on the stack, we know the
2602 * task context is already protected by cluster locking. Currently, they're
2603 * used in some VFS entry routines.
2604 *
2605 * return < 0 on error, return == 0 if there's no lock holder on the stack
2606 * before this call, return == 1 if this call would be a recursive locking.
Larry Chen133b81f2018-06-07 17:04:43 -07002607 * return == -1 if this lock attempt will cause an upgrade which is forbidden.
2608 *
2609 * When taking lock levels into account,we face some different situations.
2610 *
2611 * 1. no lock is held
2612 * In this case, just lock the inode as requested and return 0
2613 *
2614 * 2. We are holding a lock
2615 * For this situation, things diverges into several cases
2616 *
2617 * wanted holding what to do
2618 * ex ex see 2.1 below
2619 * ex pr see 2.2 below
2620 * pr ex see 2.1 below
2621 * pr pr see 2.1 below
2622 *
2623 * 2.1 lock level that is been held is compatible
2624 * with the wanted level, so no lock action will be tacken.
2625 *
2626 * 2.2 Otherwise, an upgrade is needed, but it is forbidden.
2627 *
2628 * Reason why upgrade within a process is forbidden is that
2629 * lock upgrade may cause dead lock. The following illustrates
2630 * how it happens.
2631 *
2632 * thread on node1 thread on node2
2633 * ocfs2_inode_lock_tracker(ex=0)
2634 *
2635 * <====== ocfs2_inode_lock_tracker(ex=1)
2636 *
2637 * ocfs2_inode_lock_tracker(ex=1)
Eric Ren439a36b2017-02-22 15:40:41 -08002638 */
2639int ocfs2_inode_lock_tracker(struct inode *inode,
2640 struct buffer_head **ret_bh,
2641 int ex,
2642 struct ocfs2_lock_holder *oh)
2643{
Larry Chen133b81f2018-06-07 17:04:43 -07002644 int status = 0;
Eric Ren439a36b2017-02-22 15:40:41 -08002645 struct ocfs2_lock_res *lockres;
Larry Chen133b81f2018-06-07 17:04:43 -07002646 struct ocfs2_lock_holder *tmp_oh;
2647 struct pid *pid = task_pid(current);
2648
Eric Ren439a36b2017-02-22 15:40:41 -08002649
2650 lockres = &OCFS2_I(inode)->ip_inode_lockres;
Larry Chen133b81f2018-06-07 17:04:43 -07002651 tmp_oh = ocfs2_pid_holder(lockres, pid);
Eric Ren439a36b2017-02-22 15:40:41 -08002652
Larry Chen133b81f2018-06-07 17:04:43 -07002653 if (!tmp_oh) {
2654 /*
2655 * This corresponds to the case 1.
2656 * We haven't got any lock before.
2657 */
2658 status = ocfs2_inode_lock_full(inode, ret_bh, ex, 0);
2659 if (status < 0) {
2660 if (status != -ENOENT)
2661 mlog_errno(status);
2662 return status;
2663 }
2664
2665 oh->oh_ex = ex;
2666 ocfs2_add_holder(lockres, oh);
2667 return 0;
2668 }
2669
2670 if (unlikely(ex && !tmp_oh->oh_ex)) {
2671 /*
2672 * case 2.2 upgrade may cause dead lock, forbid it.
2673 */
2674 mlog(ML_ERROR, "Recursive locking is not permitted to "
2675 "upgrade to EX level from PR level.\n");
2676 dump_stack();
2677 return -EINVAL;
2678 }
2679
2680 /*
2681 * case 2.1 OCFS2_META_LOCK_GETBH flag make ocfs2_inode_lock_full.
2682 * ignore the lock level and just update it.
2683 */
2684 if (ret_bh) {
2685 status = ocfs2_inode_lock_full(inode, ret_bh, ex,
2686 OCFS2_META_LOCK_GETBH);
Eric Ren439a36b2017-02-22 15:40:41 -08002687 if (status < 0) {
2688 if (status != -ENOENT)
2689 mlog_errno(status);
2690 return status;
2691 }
2692 }
Larry Chen133b81f2018-06-07 17:04:43 -07002693 return tmp_oh ? 1 : 0;
Eric Ren439a36b2017-02-22 15:40:41 -08002694}
2695
2696void ocfs2_inode_unlock_tracker(struct inode *inode,
2697 int ex,
2698 struct ocfs2_lock_holder *oh,
2699 int had_lock)
2700{
2701 struct ocfs2_lock_res *lockres;
2702
2703 lockres = &OCFS2_I(inode)->ip_inode_lockres;
Eric Ren8818efa2017-06-23 15:08:55 -07002704 /* had_lock means that the currect process already takes the cluster
Larry Chen133b81f2018-06-07 17:04:43 -07002705 * lock previously.
2706 * If had_lock is 1, we have nothing to do here.
2707 * If had_lock is 0, we will release the lock.
Eric Ren8818efa2017-06-23 15:08:55 -07002708 */
Eric Ren439a36b2017-02-22 15:40:41 -08002709 if (!had_lock) {
Larry Chen133b81f2018-06-07 17:04:43 -07002710 ocfs2_inode_unlock(inode, oh->oh_ex);
Eric Ren439a36b2017-02-22 15:40:41 -08002711 ocfs2_remove_holder(lockres, oh);
Eric Ren439a36b2017-02-22 15:40:41 -08002712 }
2713}
2714
Sunil Mushrandf152c22009-06-22 11:40:07 -07002715int ocfs2_orphan_scan_lock(struct ocfs2_super *osb, u32 *seqno)
Srinivas Eeda83273932009-06-03 17:02:55 -07002716{
2717 struct ocfs2_lock_res *lockres;
2718 struct ocfs2_orphan_scan_lvb *lvb;
Srinivas Eeda83273932009-06-03 17:02:55 -07002719 int status = 0;
2720
Sunil Mushrandf152c22009-06-22 11:40:07 -07002721 if (ocfs2_is_hard_readonly(osb))
2722 return -EROFS;
2723
2724 if (ocfs2_mount_local(osb))
2725 return 0;
2726
Srinivas Eeda83273932009-06-03 17:02:55 -07002727 lockres = &osb->osb_orphan_scan.os_lockres;
Sunil Mushrandf152c22009-06-22 11:40:07 -07002728 status = ocfs2_cluster_lock(osb, lockres, DLM_LOCK_EX, 0, 0);
Srinivas Eeda83273932009-06-03 17:02:55 -07002729 if (status < 0)
2730 return status;
2731
2732 lvb = ocfs2_dlm_lvb(&lockres->l_lksb);
Joel Becker1c520df2009-06-19 15:14:13 -07002733 if (ocfs2_dlm_lvb_valid(&lockres->l_lksb) &&
2734 lvb->lvb_version == OCFS2_ORPHAN_LVB_VERSION)
Srinivas Eeda83273932009-06-03 17:02:55 -07002735 *seqno = be32_to_cpu(lvb->lvb_os_seqno);
Sunil Mushran32119492009-06-19 16:53:18 -07002736 else
2737 *seqno = osb->osb_orphan_scan.os_seqno + 1;
2738
Srinivas Eeda83273932009-06-03 17:02:55 -07002739 return status;
2740}
2741
Sunil Mushrandf152c22009-06-22 11:40:07 -07002742void ocfs2_orphan_scan_unlock(struct ocfs2_super *osb, u32 seqno)
Srinivas Eeda83273932009-06-03 17:02:55 -07002743{
2744 struct ocfs2_lock_res *lockres;
2745 struct ocfs2_orphan_scan_lvb *lvb;
Srinivas Eeda83273932009-06-03 17:02:55 -07002746
Sunil Mushrandf152c22009-06-22 11:40:07 -07002747 if (!ocfs2_is_hard_readonly(osb) && !ocfs2_mount_local(osb)) {
2748 lockres = &osb->osb_orphan_scan.os_lockres;
2749 lvb = ocfs2_dlm_lvb(&lockres->l_lksb);
2750 lvb->lvb_version = OCFS2_ORPHAN_LVB_VERSION;
2751 lvb->lvb_os_seqno = cpu_to_be32(seqno);
2752 ocfs2_cluster_unlock(osb, lockres, DLM_LOCK_EX);
2753 }
Srinivas Eeda83273932009-06-03 17:02:55 -07002754}
2755
Mark Fashehccd979b2005-12-15 14:31:24 -08002756int ocfs2_super_lock(struct ocfs2_super *osb,
2757 int ex)
2758{
Sunil Mushranc271c5c2006-12-05 17:56:35 -08002759 int status = 0;
Joel Beckerbd3e7612008-02-01 12:14:57 -08002760 int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR;
Mark Fashehccd979b2005-12-15 14:31:24 -08002761 struct ocfs2_lock_res *lockres = &osb->osb_super_lockres;
Mark Fashehccd979b2005-12-15 14:31:24 -08002762
Mark Fashehccd979b2005-12-15 14:31:24 -08002763 if (ocfs2_is_hard_readonly(osb))
2764 return -EROFS;
2765
Sunil Mushranc271c5c2006-12-05 17:56:35 -08002766 if (ocfs2_mount_local(osb))
2767 goto bail;
2768
Mark Fashehccd979b2005-12-15 14:31:24 -08002769 status = ocfs2_cluster_lock(osb, lockres, level, 0, 0);
2770 if (status < 0) {
2771 mlog_errno(status);
2772 goto bail;
2773 }
2774
2775 /* The super block lock path is really in the best position to
2776 * know when resources covered by the lock need to be
2777 * refreshed, so we do it here. Of course, making sense of
2778 * everything is up to the caller :) */
2779 status = ocfs2_should_refresh_lock_res(lockres);
Mark Fashehccd979b2005-12-15 14:31:24 -08002780 if (status) {
Mark Fasheh8e8a4602008-02-01 11:59:09 -08002781 status = ocfs2_refresh_slot_info(osb);
Mark Fashehccd979b2005-12-15 14:31:24 -08002782
2783 ocfs2_complete_lock_res_refresh(lockres, status);
2784
Junxiao Bi3278bb72013-02-21 16:42:45 -08002785 if (status < 0) {
2786 ocfs2_cluster_unlock(osb, lockres, level);
Mark Fashehccd979b2005-12-15 14:31:24 -08002787 mlog_errno(status);
Junxiao Bi3278bb72013-02-21 16:42:45 -08002788 }
Sunil Mushran8ddb7b02008-05-13 13:45:15 -07002789 ocfs2_track_lock_refresh(lockres);
Mark Fashehccd979b2005-12-15 14:31:24 -08002790 }
2791bail:
Mark Fashehccd979b2005-12-15 14:31:24 -08002792 return status;
2793}
2794
2795void ocfs2_super_unlock(struct ocfs2_super *osb,
2796 int ex)
2797{
Joel Beckerbd3e7612008-02-01 12:14:57 -08002798 int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR;
Mark Fashehccd979b2005-12-15 14:31:24 -08002799 struct ocfs2_lock_res *lockres = &osb->osb_super_lockres;
2800
Sunil Mushranc271c5c2006-12-05 17:56:35 -08002801 if (!ocfs2_mount_local(osb))
2802 ocfs2_cluster_unlock(osb, lockres, level);
Mark Fashehccd979b2005-12-15 14:31:24 -08002803}
2804
2805int ocfs2_rename_lock(struct ocfs2_super *osb)
2806{
2807 int status;
2808 struct ocfs2_lock_res *lockres = &osb->osb_rename_lockres;
2809
2810 if (ocfs2_is_hard_readonly(osb))
2811 return -EROFS;
2812
Sunil Mushranc271c5c2006-12-05 17:56:35 -08002813 if (ocfs2_mount_local(osb))
2814 return 0;
2815
Joel Beckerbd3e7612008-02-01 12:14:57 -08002816 status = ocfs2_cluster_lock(osb, lockres, DLM_LOCK_EX, 0, 0);
Mark Fashehccd979b2005-12-15 14:31:24 -08002817 if (status < 0)
2818 mlog_errno(status);
2819
2820 return status;
2821}
2822
2823void ocfs2_rename_unlock(struct ocfs2_super *osb)
2824{
2825 struct ocfs2_lock_res *lockres = &osb->osb_rename_lockres;
2826
Sunil Mushranc271c5c2006-12-05 17:56:35 -08002827 if (!ocfs2_mount_local(osb))
Joel Beckerbd3e7612008-02-01 12:14:57 -08002828 ocfs2_cluster_unlock(osb, lockres, DLM_LOCK_EX);
Mark Fashehccd979b2005-12-15 14:31:24 -08002829}
2830
wengang wang6ca497a2009-03-06 21:29:10 +08002831int ocfs2_nfs_sync_lock(struct ocfs2_super *osb, int ex)
2832{
2833 int status;
2834 struct ocfs2_lock_res *lockres = &osb->osb_nfs_sync_lockres;
2835
2836 if (ocfs2_is_hard_readonly(osb))
2837 return -EROFS;
2838
2839 if (ocfs2_mount_local(osb))
2840 return 0;
2841
2842 status = ocfs2_cluster_lock(osb, lockres, ex ? LKM_EXMODE : LKM_PRMODE,
2843 0, 0);
2844 if (status < 0)
2845 mlog(ML_ERROR, "lock on nfs sync lock failed %d\n", status);
2846
2847 return status;
2848}
2849
2850void ocfs2_nfs_sync_unlock(struct ocfs2_super *osb, int ex)
2851{
2852 struct ocfs2_lock_res *lockres = &osb->osb_nfs_sync_lockres;
2853
2854 if (!ocfs2_mount_local(osb))
2855 ocfs2_cluster_unlock(osb, lockres,
2856 ex ? LKM_EXMODE : LKM_PRMODE);
2857}
2858
Gang He4882abe2018-01-31 16:15:10 -08002859int ocfs2_trim_fs_lock(struct ocfs2_super *osb,
2860 struct ocfs2_trim_fs_info *info, int trylock)
2861{
2862 int status;
2863 struct ocfs2_trim_fs_lvb *lvb;
2864 struct ocfs2_lock_res *lockres = &osb->osb_trim_fs_lockres;
2865
2866 if (info)
2867 info->tf_valid = 0;
2868
2869 if (ocfs2_is_hard_readonly(osb))
2870 return -EROFS;
2871
2872 if (ocfs2_mount_local(osb))
2873 return 0;
2874
2875 status = ocfs2_cluster_lock(osb, lockres, DLM_LOCK_EX,
2876 trylock ? DLM_LKF_NOQUEUE : 0, 0);
2877 if (status < 0) {
2878 if (status != -EAGAIN)
2879 mlog_errno(status);
2880 return status;
2881 }
2882
2883 if (info) {
2884 lvb = ocfs2_dlm_lvb(&lockres->l_lksb);
2885 if (ocfs2_dlm_lvb_valid(&lockres->l_lksb) &&
2886 lvb->lvb_version == OCFS2_TRIMFS_LVB_VERSION) {
2887 info->tf_valid = 1;
2888 info->tf_success = lvb->lvb_success;
2889 info->tf_nodenum = be32_to_cpu(lvb->lvb_nodenum);
2890 info->tf_start = be64_to_cpu(lvb->lvb_start);
2891 info->tf_len = be64_to_cpu(lvb->lvb_len);
2892 info->tf_minlen = be64_to_cpu(lvb->lvb_minlen);
2893 info->tf_trimlen = be64_to_cpu(lvb->lvb_trimlen);
2894 }
2895 }
2896
2897 return status;
2898}
2899
2900void ocfs2_trim_fs_unlock(struct ocfs2_super *osb,
2901 struct ocfs2_trim_fs_info *info)
2902{
2903 struct ocfs2_trim_fs_lvb *lvb;
2904 struct ocfs2_lock_res *lockres = &osb->osb_trim_fs_lockres;
2905
2906 if (ocfs2_mount_local(osb))
2907 return;
2908
2909 if (info) {
2910 lvb = ocfs2_dlm_lvb(&lockres->l_lksb);
2911 lvb->lvb_version = OCFS2_TRIMFS_LVB_VERSION;
2912 lvb->lvb_success = info->tf_success;
2913 lvb->lvb_nodenum = cpu_to_be32(info->tf_nodenum);
2914 lvb->lvb_start = cpu_to_be64(info->tf_start);
2915 lvb->lvb_len = cpu_to_be64(info->tf_len);
2916 lvb->lvb_minlen = cpu_to_be64(info->tf_minlen);
2917 lvb->lvb_trimlen = cpu_to_be64(info->tf_trimlen);
2918 }
2919
2920 ocfs2_cluster_unlock(osb, lockres, DLM_LOCK_EX);
2921}
2922
Mark Fashehd680efe2006-09-08 14:14:34 -07002923int ocfs2_dentry_lock(struct dentry *dentry, int ex)
2924{
2925 int ret;
Joel Beckerbd3e7612008-02-01 12:14:57 -08002926 int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR;
Mark Fashehd680efe2006-09-08 14:14:34 -07002927 struct ocfs2_dentry_lock *dl = dentry->d_fsdata;
2928 struct ocfs2_super *osb = OCFS2_SB(dentry->d_sb);
2929
2930 BUG_ON(!dl);
2931
Tiger Yang03efed82011-05-28 00:34:19 +08002932 if (ocfs2_is_hard_readonly(osb)) {
2933 if (ex)
2934 return -EROFS;
2935 return 0;
2936 }
Mark Fashehd680efe2006-09-08 14:14:34 -07002937
Sunil Mushranc271c5c2006-12-05 17:56:35 -08002938 if (ocfs2_mount_local(osb))
2939 return 0;
2940
Mark Fashehd680efe2006-09-08 14:14:34 -07002941 ret = ocfs2_cluster_lock(osb, &dl->dl_lockres, level, 0, 0);
2942 if (ret < 0)
2943 mlog_errno(ret);
2944
2945 return ret;
2946}
2947
2948void ocfs2_dentry_unlock(struct dentry *dentry, int ex)
2949{
Joel Beckerbd3e7612008-02-01 12:14:57 -08002950 int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR;
Mark Fashehd680efe2006-09-08 14:14:34 -07002951 struct ocfs2_dentry_lock *dl = dentry->d_fsdata;
2952 struct ocfs2_super *osb = OCFS2_SB(dentry->d_sb);
2953
Tiger Yang03efed82011-05-28 00:34:19 +08002954 if (!ocfs2_is_hard_readonly(osb) && !ocfs2_mount_local(osb))
Sunil Mushranc271c5c2006-12-05 17:56:35 -08002955 ocfs2_cluster_unlock(osb, &dl->dl_lockres, level);
Mark Fashehd680efe2006-09-08 14:14:34 -07002956}
2957
Mark Fashehccd979b2005-12-15 14:31:24 -08002958/* Reference counting of the dlm debug structure. We want this because
2959 * open references on the debug inodes can live on after a mount, so
2960 * we can't rely on the ocfs2_super to always exist. */
2961static void ocfs2_dlm_debug_free(struct kref *kref)
2962{
2963 struct ocfs2_dlm_debug *dlm_debug;
2964
2965 dlm_debug = container_of(kref, struct ocfs2_dlm_debug, d_refcnt);
2966
2967 kfree(dlm_debug);
2968}
2969
2970void ocfs2_put_dlm_debug(struct ocfs2_dlm_debug *dlm_debug)
2971{
2972 if (dlm_debug)
2973 kref_put(&dlm_debug->d_refcnt, ocfs2_dlm_debug_free);
2974}
2975
2976static void ocfs2_get_dlm_debug(struct ocfs2_dlm_debug *debug)
2977{
2978 kref_get(&debug->d_refcnt);
2979}
2980
2981struct ocfs2_dlm_debug *ocfs2_new_dlm_debug(void)
2982{
2983 struct ocfs2_dlm_debug *dlm_debug;
2984
2985 dlm_debug = kmalloc(sizeof(struct ocfs2_dlm_debug), GFP_KERNEL);
2986 if (!dlm_debug) {
2987 mlog_errno(-ENOMEM);
2988 goto out;
2989 }
2990
2991 kref_init(&dlm_debug->d_refcnt);
2992 INIT_LIST_HEAD(&dlm_debug->d_lockres_tracking);
2993 dlm_debug->d_locking_state = NULL;
Gang He80567732019-07-11 20:53:05 -07002994 dlm_debug->d_locking_filter = NULL;
2995 dlm_debug->d_filter_secs = 0;
Mark Fashehccd979b2005-12-15 14:31:24 -08002996out:
2997 return dlm_debug;
2998}
2999
3000/* Access to this is arbitrated for us via seq_file->sem. */
3001struct ocfs2_dlm_seq_priv {
3002 struct ocfs2_dlm_debug *p_dlm_debug;
3003 struct ocfs2_lock_res p_iter_res;
3004 struct ocfs2_lock_res p_tmp_res;
3005};
3006
3007static struct ocfs2_lock_res *ocfs2_dlm_next_res(struct ocfs2_lock_res *start,
3008 struct ocfs2_dlm_seq_priv *priv)
3009{
3010 struct ocfs2_lock_res *iter, *ret = NULL;
3011 struct ocfs2_dlm_debug *dlm_debug = priv->p_dlm_debug;
3012
3013 assert_spin_locked(&ocfs2_dlm_tracking_lock);
3014
3015 list_for_each_entry(iter, &start->l_debug_list, l_debug_list) {
3016 /* discover the head of the list */
3017 if (&iter->l_debug_list == &dlm_debug->d_lockres_tracking) {
3018 mlog(0, "End of list found, %p\n", ret);
3019 break;
3020 }
3021
3022 /* We track our "dummy" iteration lockres' by a NULL
3023 * l_ops field. */
3024 if (iter->l_ops != NULL) {
3025 ret = iter;
3026 break;
3027 }
3028 }
3029
3030 return ret;
3031}
3032
3033static void *ocfs2_dlm_seq_start(struct seq_file *m, loff_t *pos)
3034{
3035 struct ocfs2_dlm_seq_priv *priv = m->private;
3036 struct ocfs2_lock_res *iter;
3037
3038 spin_lock(&ocfs2_dlm_tracking_lock);
3039 iter = ocfs2_dlm_next_res(&priv->p_iter_res, priv);
3040 if (iter) {
3041 /* Since lockres' have the lifetime of their container
3042 * (which can be inodes, ocfs2_supers, etc) we want to
3043 * copy this out to a temporary lockres while still
3044 * under the spinlock. Obviously after this we can't
3045 * trust any pointers on the copy returned, but that's
3046 * ok as the information we want isn't typically held
3047 * in them. */
3048 priv->p_tmp_res = *iter;
3049 iter = &priv->p_tmp_res;
3050 }
3051 spin_unlock(&ocfs2_dlm_tracking_lock);
3052
3053 return iter;
3054}
3055
3056static void ocfs2_dlm_seq_stop(struct seq_file *m, void *v)
3057{
3058}
3059
3060static void *ocfs2_dlm_seq_next(struct seq_file *m, void *v, loff_t *pos)
3061{
3062 struct ocfs2_dlm_seq_priv *priv = m->private;
3063 struct ocfs2_lock_res *iter = v;
3064 struct ocfs2_lock_res *dummy = &priv->p_iter_res;
3065
3066 spin_lock(&ocfs2_dlm_tracking_lock);
3067 iter = ocfs2_dlm_next_res(iter, priv);
3068 list_del_init(&dummy->l_debug_list);
3069 if (iter) {
3070 list_add(&dummy->l_debug_list, &iter->l_debug_list);
3071 priv->p_tmp_res = *iter;
3072 iter = &priv->p_tmp_res;
3073 }
3074 spin_unlock(&ocfs2_dlm_tracking_lock);
3075
3076 return iter;
3077}
3078
Sunil Mushran5bc970e2010-12-28 23:26:03 -08003079/*
3080 * Version is used by debugfs.ocfs2 to determine the format being used
3081 *
3082 * New in version 2
3083 * - Lock stats printed
3084 * New in version 3
3085 * - Max time in lock stats is in usecs (instead of nsecs)
Gang He8a7f5f42019-07-11 20:53:02 -07003086 * New in version 4
3087 * - Add last pr/ex unlock times in usecs
Sunil Mushran5bc970e2010-12-28 23:26:03 -08003088 */
Gang He8a7f5f42019-07-11 20:53:02 -07003089#define OCFS2_DLM_DEBUG_STR_VERSION 4
Mark Fashehccd979b2005-12-15 14:31:24 -08003090static int ocfs2_dlm_seq_show(struct seq_file *m, void *v)
3091{
3092 int i;
3093 char *lvb;
3094 struct ocfs2_lock_res *lockres = v;
Gang He80567732019-07-11 20:53:05 -07003095#ifdef CONFIG_OCFS2_FS_STATS
3096 u64 now, last;
3097 struct ocfs2_dlm_debug *dlm_debug =
3098 ((struct ocfs2_dlm_seq_priv *)m->private)->p_dlm_debug;
3099#endif
Mark Fashehccd979b2005-12-15 14:31:24 -08003100
3101 if (!lockres)
3102 return -EINVAL;
3103
Gang He80567732019-07-11 20:53:05 -07003104#ifdef CONFIG_OCFS2_FS_STATS
3105 if (dlm_debug->d_filter_secs) {
3106 now = ktime_to_us(ktime_get_real());
3107 if (lockres->l_lock_prmode.ls_last >
3108 lockres->l_lock_exmode.ls_last)
3109 last = lockres->l_lock_prmode.ls_last;
3110 else
3111 last = lockres->l_lock_exmode.ls_last;
3112 /*
3113 * Use d_filter_secs field to filter lock resources dump,
3114 * the default d_filter_secs(0) value filters nothing,
3115 * otherwise, only dump the last N seconds active lock
3116 * resources.
3117 */
3118 if (div_u64(now - last, 1000000) > dlm_debug->d_filter_secs)
3119 return 0;
3120 }
3121#endif
3122
Mark Fashehd680efe2006-09-08 14:14:34 -07003123 seq_printf(m, "0x%x\t", OCFS2_DLM_DEBUG_STR_VERSION);
3124
3125 if (lockres->l_type == OCFS2_LOCK_TYPE_DENTRY)
3126 seq_printf(m, "%.*s%08x\t", OCFS2_DENTRY_LOCK_INO_START - 1,
3127 lockres->l_name,
3128 (unsigned int)ocfs2_get_dentry_lock_ino(lockres));
3129 else
3130 seq_printf(m, "%.*s\t", OCFS2_LOCK_ID_MAX_LEN, lockres->l_name);
3131
3132 seq_printf(m, "%d\t"
Mark Fashehccd979b2005-12-15 14:31:24 -08003133 "0x%lx\t"
3134 "0x%x\t"
3135 "0x%x\t"
3136 "%u\t"
3137 "%u\t"
3138 "%d\t"
3139 "%d\t",
Mark Fashehccd979b2005-12-15 14:31:24 -08003140 lockres->l_level,
3141 lockres->l_flags,
3142 lockres->l_action,
3143 lockres->l_unlock_action,
3144 lockres->l_ro_holders,
3145 lockres->l_ex_holders,
3146 lockres->l_requested,
3147 lockres->l_blocking);
3148
3149 /* Dump the raw LVB */
Joel Becker8f2c9c12008-02-01 12:16:57 -08003150 lvb = ocfs2_dlm_lvb(&lockres->l_lksb);
Mark Fashehccd979b2005-12-15 14:31:24 -08003151 for(i = 0; i < DLM_LVB_LEN; i++)
3152 seq_printf(m, "0x%x\t", lvb[i]);
3153
Sunil Mushran8ddb7b02008-05-13 13:45:15 -07003154#ifdef CONFIG_OCFS2_FS_STATS
Sunil Mushran5bc970e2010-12-28 23:26:03 -08003155# define lock_num_prmode(_l) ((_l)->l_lock_prmode.ls_gets)
3156# define lock_num_exmode(_l) ((_l)->l_lock_exmode.ls_gets)
3157# define lock_num_prmode_failed(_l) ((_l)->l_lock_prmode.ls_fail)
3158# define lock_num_exmode_failed(_l) ((_l)->l_lock_exmode.ls_fail)
3159# define lock_total_prmode(_l) ((_l)->l_lock_prmode.ls_total)
3160# define lock_total_exmode(_l) ((_l)->l_lock_exmode.ls_total)
3161# define lock_max_prmode(_l) ((_l)->l_lock_prmode.ls_max)
3162# define lock_max_exmode(_l) ((_l)->l_lock_exmode.ls_max)
3163# define lock_refresh(_l) ((_l)->l_lock_refresh)
Gang He8a7f5f42019-07-11 20:53:02 -07003164# define lock_last_prmode(_l) ((_l)->l_lock_prmode.ls_last)
3165# define lock_last_exmode(_l) ((_l)->l_lock_exmode.ls_last)
Sunil Mushran8ddb7b02008-05-13 13:45:15 -07003166#else
Sunil Mushran5bc970e2010-12-28 23:26:03 -08003167# define lock_num_prmode(_l) (0)
3168# define lock_num_exmode(_l) (0)
Sunil Mushran8ddb7b02008-05-13 13:45:15 -07003169# define lock_num_prmode_failed(_l) (0)
3170# define lock_num_exmode_failed(_l) (0)
Randy Dunlapdd25e552008-05-28 14:41:00 -07003171# define lock_total_prmode(_l) (0ULL)
3172# define lock_total_exmode(_l) (0ULL)
Sunil Mushran8ddb7b02008-05-13 13:45:15 -07003173# define lock_max_prmode(_l) (0)
3174# define lock_max_exmode(_l) (0)
3175# define lock_refresh(_l) (0)
Gang He8a7f5f42019-07-11 20:53:02 -07003176# define lock_last_prmode(_l) (0ULL)
3177# define lock_last_exmode(_l) (0ULL)
Sunil Mushran8ddb7b02008-05-13 13:45:15 -07003178#endif
3179 /* The following seq_print was added in version 2 of this output */
Sunil Mushran5bc970e2010-12-28 23:26:03 -08003180 seq_printf(m, "%u\t"
3181 "%u\t"
Sunil Mushran8ddb7b02008-05-13 13:45:15 -07003182 "%u\t"
3183 "%u\t"
3184 "%llu\t"
3185 "%llu\t"
3186 "%u\t"
3187 "%u\t"
Gang He8a7f5f42019-07-11 20:53:02 -07003188 "%u\t"
3189 "%llu\t"
3190 "%llu\t",
Sunil Mushran8ddb7b02008-05-13 13:45:15 -07003191 lock_num_prmode(lockres),
3192 lock_num_exmode(lockres),
3193 lock_num_prmode_failed(lockres),
3194 lock_num_exmode_failed(lockres),
3195 lock_total_prmode(lockres),
3196 lock_total_exmode(lockres),
3197 lock_max_prmode(lockres),
3198 lock_max_exmode(lockres),
Gang He8a7f5f42019-07-11 20:53:02 -07003199 lock_refresh(lockres),
3200 lock_last_prmode(lockres),
3201 lock_last_exmode(lockres));
Sunil Mushran8ddb7b02008-05-13 13:45:15 -07003202
Mark Fashehccd979b2005-12-15 14:31:24 -08003203 /* End the line */
3204 seq_printf(m, "\n");
3205 return 0;
3206}
3207
Jan Engelhardt90d99772008-01-22 20:52:20 +01003208static const struct seq_operations ocfs2_dlm_seq_ops = {
Mark Fashehccd979b2005-12-15 14:31:24 -08003209 .start = ocfs2_dlm_seq_start,
3210 .stop = ocfs2_dlm_seq_stop,
3211 .next = ocfs2_dlm_seq_next,
3212 .show = ocfs2_dlm_seq_show,
3213};
3214
3215static int ocfs2_dlm_debug_release(struct inode *inode, struct file *file)
3216{
Joe Perches33fa1d92010-07-12 13:50:19 -07003217 struct seq_file *seq = file->private_data;
Mark Fashehccd979b2005-12-15 14:31:24 -08003218 struct ocfs2_dlm_seq_priv *priv = seq->private;
3219 struct ocfs2_lock_res *res = &priv->p_iter_res;
3220
3221 ocfs2_remove_lockres_tracking(res);
3222 ocfs2_put_dlm_debug(priv->p_dlm_debug);
3223 return seq_release_private(inode, file);
3224}
3225
3226static int ocfs2_dlm_debug_open(struct inode *inode, struct file *file)
3227{
Mark Fashehccd979b2005-12-15 14:31:24 -08003228 struct ocfs2_dlm_seq_priv *priv;
Mark Fashehccd979b2005-12-15 14:31:24 -08003229 struct ocfs2_super *osb;
3230
Rob Jones1848cb552014-10-09 15:25:09 -07003231 priv = __seq_open_private(file, &ocfs2_dlm_seq_ops, sizeof(*priv));
Mark Fashehccd979b2005-12-15 14:31:24 -08003232 if (!priv) {
Rob Jones1848cb552014-10-09 15:25:09 -07003233 mlog_errno(-ENOMEM);
3234 return -ENOMEM;
Mark Fashehccd979b2005-12-15 14:31:24 -08003235 }
Rob Jones1848cb552014-10-09 15:25:09 -07003236
Theodore Ts'o8e18e292006-09-27 01:50:46 -07003237 osb = inode->i_private;
Mark Fashehccd979b2005-12-15 14:31:24 -08003238 ocfs2_get_dlm_debug(osb->osb_dlm_debug);
3239 priv->p_dlm_debug = osb->osb_dlm_debug;
3240 INIT_LIST_HEAD(&priv->p_iter_res.l_debug_list);
3241
Mark Fashehccd979b2005-12-15 14:31:24 -08003242 ocfs2_add_lockres_tracking(&priv->p_iter_res,
3243 priv->p_dlm_debug);
3244
Rob Jones1848cb552014-10-09 15:25:09 -07003245 return 0;
Mark Fashehccd979b2005-12-15 14:31:24 -08003246}
3247
Arjan van de Ven4b6f5d22006-03-28 01:56:42 -08003248static const struct file_operations ocfs2_dlm_debug_fops = {
Mark Fashehccd979b2005-12-15 14:31:24 -08003249 .open = ocfs2_dlm_debug_open,
3250 .release = ocfs2_dlm_debug_release,
3251 .read = seq_read,
3252 .llseek = seq_lseek,
3253};
3254
3255static int ocfs2_dlm_init_debug(struct ocfs2_super *osb)
3256{
3257 int ret = 0;
3258 struct ocfs2_dlm_debug *dlm_debug = osb->osb_dlm_debug;
3259
3260 dlm_debug->d_locking_state = debugfs_create_file("locking_state",
3261 S_IFREG|S_IRUSR,
3262 osb->osb_debug_root,
3263 osb,
3264 &ocfs2_dlm_debug_fops);
Linus Torvalds8f443e22015-04-21 09:17:28 -07003265 if (!dlm_debug->d_locking_state) {
Mark Fashehccd979b2005-12-15 14:31:24 -08003266 ret = -EINVAL;
3267 mlog(ML_ERROR,
3268 "Unable to create locking state debugfs file.\n");
3269 goto out;
3270 }
3271
Gang He80567732019-07-11 20:53:05 -07003272 dlm_debug->d_locking_filter = debugfs_create_u32("locking_filter",
3273 0600,
3274 osb->osb_debug_root,
3275 &dlm_debug->d_filter_secs);
3276 if (!dlm_debug->d_locking_filter) {
3277 ret = -EINVAL;
3278 mlog(ML_ERROR,
3279 "Unable to create locking filter debugfs file.\n");
3280 goto out;
3281 }
3282
Mark Fashehccd979b2005-12-15 14:31:24 -08003283 ocfs2_get_dlm_debug(dlm_debug);
3284out:
3285 return ret;
3286}
3287
3288static void ocfs2_dlm_shutdown_debug(struct ocfs2_super *osb)
3289{
3290 struct ocfs2_dlm_debug *dlm_debug = osb->osb_dlm_debug;
3291
3292 if (dlm_debug) {
3293 debugfs_remove(dlm_debug->d_locking_state);
Gang He80567732019-07-11 20:53:05 -07003294 debugfs_remove(dlm_debug->d_locking_filter);
Mark Fashehccd979b2005-12-15 14:31:24 -08003295 ocfs2_put_dlm_debug(dlm_debug);
3296 }
3297}
3298
3299int ocfs2_dlm_init(struct ocfs2_super *osb)
3300{
Sunil Mushranc271c5c2006-12-05 17:56:35 -08003301 int status = 0;
Joel Becker4670c462008-02-01 14:39:35 -08003302 struct ocfs2_cluster_connection *conn = NULL;
Mark Fashehccd979b2005-12-15 14:31:24 -08003303
Mark Fasheh0abd6d12008-01-29 16:59:56 -08003304 if (ocfs2_mount_local(osb)) {
3305 osb->node_num = 0;
Sunil Mushranc271c5c2006-12-05 17:56:35 -08003306 goto local;
Mark Fasheh0abd6d12008-01-29 16:59:56 -08003307 }
Sunil Mushranc271c5c2006-12-05 17:56:35 -08003308
Mark Fashehccd979b2005-12-15 14:31:24 -08003309 status = ocfs2_dlm_init_debug(osb);
3310 if (status < 0) {
3311 mlog_errno(status);
3312 goto bail;
3313 }
3314
Mark Fasheh34d024f2007-09-24 15:56:19 -07003315 /* launch downconvert thread */
Joseph Qi5afc44e2015-11-05 18:44:13 -08003316 osb->dc_task = kthread_run(ocfs2_downconvert_thread, osb, "ocfs2dc-%s",
3317 osb->uuid_str);
Mark Fasheh34d024f2007-09-24 15:56:19 -07003318 if (IS_ERR(osb->dc_task)) {
3319 status = PTR_ERR(osb->dc_task);
3320 osb->dc_task = NULL;
Mark Fashehccd979b2005-12-15 14:31:24 -08003321 mlog_errno(status);
3322 goto bail;
3323 }
3324
Mark Fashehccd979b2005-12-15 14:31:24 -08003325 /* for now, uuid == domain */
Joel Becker9c6c8772008-02-01 15:17:30 -08003326 status = ocfs2_cluster_connect(osb->osb_cluster_stack,
Goldwyn Rodriguesc74a3bd2014-01-21 15:48:21 -08003327 osb->osb_cluster_name,
3328 strlen(osb->osb_cluster_name),
Joel Becker9c6c8772008-02-01 15:17:30 -08003329 osb->uuid_str,
Joel Becker4670c462008-02-01 14:39:35 -08003330 strlen(osb->uuid_str),
Joel Becker553b5eb2010-01-29 17:19:06 -08003331 &lproto, ocfs2_do_node_down, osb,
Joel Becker4670c462008-02-01 14:39:35 -08003332 &conn);
3333 if (status) {
Mark Fashehccd979b2005-12-15 14:31:24 -08003334 mlog_errno(status);
3335 goto bail;
3336 }
3337
Goldwyn Rodrigues3e834152014-01-21 15:48:24 -08003338 status = ocfs2_cluster_this_node(conn, &osb->node_num);
Mark Fasheh0abd6d12008-01-29 16:59:56 -08003339 if (status < 0) {
3340 mlog_errno(status);
3341 mlog(ML_ERROR,
3342 "could not find this host's node number\n");
Joel Becker286eaa92008-02-01 15:03:57 -08003343 ocfs2_cluster_disconnect(conn, 0);
Mark Fasheh0abd6d12008-01-29 16:59:56 -08003344 goto bail;
3345 }
3346
Sunil Mushranc271c5c2006-12-05 17:56:35 -08003347local:
Mark Fashehccd979b2005-12-15 14:31:24 -08003348 ocfs2_super_lock_res_init(&osb->osb_super_lockres, osb);
3349 ocfs2_rename_lock_res_init(&osb->osb_rename_lockres, osb);
wengang wang6ca497a2009-03-06 21:29:10 +08003350 ocfs2_nfs_sync_lock_res_init(&osb->osb_nfs_sync_lockres, osb);
Srinivas Eeda83273932009-06-03 17:02:55 -07003351 ocfs2_orphan_scan_lock_res_init(&osb->osb_orphan_scan.os_lockres, osb);
Mark Fashehccd979b2005-12-15 14:31:24 -08003352
Joel Becker4670c462008-02-01 14:39:35 -08003353 osb->cconn = conn;
Mark Fashehccd979b2005-12-15 14:31:24 -08003354bail:
3355 if (status < 0) {
3356 ocfs2_dlm_shutdown_debug(osb);
Mark Fasheh34d024f2007-09-24 15:56:19 -07003357 if (osb->dc_task)
3358 kthread_stop(osb->dc_task);
Mark Fashehccd979b2005-12-15 14:31:24 -08003359 }
3360
Mark Fashehccd979b2005-12-15 14:31:24 -08003361 return status;
3362}
3363
Joel Becker286eaa92008-02-01 15:03:57 -08003364void ocfs2_dlm_shutdown(struct ocfs2_super *osb,
3365 int hangup_pending)
Mark Fashehccd979b2005-12-15 14:31:24 -08003366{
Mark Fashehccd979b2005-12-15 14:31:24 -08003367 ocfs2_drop_osb_locks(osb);
3368
Joel Becker4670c462008-02-01 14:39:35 -08003369 /*
3370 * Now that we have dropped all locks and ocfs2_dismount_volume()
3371 * has disabled recovery, the DLM won't be talking to us. It's
3372 * safe to tear things down before disconnecting the cluster.
3373 */
3374
Mark Fasheh34d024f2007-09-24 15:56:19 -07003375 if (osb->dc_task) {
3376 kthread_stop(osb->dc_task);
3377 osb->dc_task = NULL;
Mark Fashehccd979b2005-12-15 14:31:24 -08003378 }
3379
3380 ocfs2_lock_res_free(&osb->osb_super_lockres);
3381 ocfs2_lock_res_free(&osb->osb_rename_lockres);
wengang wang6ca497a2009-03-06 21:29:10 +08003382 ocfs2_lock_res_free(&osb->osb_nfs_sync_lockres);
Srinivas Eeda83273932009-06-03 17:02:55 -07003383 ocfs2_lock_res_free(&osb->osb_orphan_scan.os_lockres);
Mark Fashehccd979b2005-12-15 14:31:24 -08003384
Joel Becker286eaa92008-02-01 15:03:57 -08003385 ocfs2_cluster_disconnect(osb->cconn, hangup_pending);
Joel Becker4670c462008-02-01 14:39:35 -08003386 osb->cconn = NULL;
Mark Fashehccd979b2005-12-15 14:31:24 -08003387
3388 ocfs2_dlm_shutdown_debug(osb);
Mark Fashehccd979b2005-12-15 14:31:24 -08003389}
3390
Mark Fashehccd979b2005-12-15 14:31:24 -08003391static int ocfs2_drop_lock(struct ocfs2_super *osb,
Mark Fasheh0d5dc6c2006-09-14 14:44:51 -07003392 struct ocfs2_lock_res *lockres)
Mark Fashehccd979b2005-12-15 14:31:24 -08003393{
Joel Becker7431cd72008-02-01 12:15:37 -08003394 int ret;
Mark Fashehccd979b2005-12-15 14:31:24 -08003395 unsigned long flags;
Joel Beckerbd3e7612008-02-01 12:14:57 -08003396 u32 lkm_flags = 0;
Mark Fashehccd979b2005-12-15 14:31:24 -08003397
3398 /* We didn't get anywhere near actually using this lockres. */
3399 if (!(lockres->l_flags & OCFS2_LOCK_INITIALIZED))
3400 goto out;
3401
Mark Fashehb80fc012006-09-12 22:08:14 -07003402 if (lockres->l_ops->flags & LOCK_TYPE_USES_LVB)
Joel Beckerbd3e7612008-02-01 12:14:57 -08003403 lkm_flags |= DLM_LKF_VALBLK;
Mark Fashehb80fc012006-09-12 22:08:14 -07003404
Mark Fashehccd979b2005-12-15 14:31:24 -08003405 spin_lock_irqsave(&lockres->l_lock, flags);
3406
3407 mlog_bug_on_msg(!(lockres->l_flags & OCFS2_LOCK_FREEING),
3408 "lockres %s, flags 0x%lx\n",
3409 lockres->l_name, lockres->l_flags);
3410
3411 while (lockres->l_flags & OCFS2_LOCK_BUSY) {
3412 mlog(0, "waiting on busy lock \"%s\": flags = %lx, action = "
3413 "%u, unlock_action = %u\n",
3414 lockres->l_name, lockres->l_flags, lockres->l_action,
3415 lockres->l_unlock_action);
3416
3417 spin_unlock_irqrestore(&lockres->l_lock, flags);
3418
3419 /* XXX: Today we just wait on any busy
3420 * locks... Perhaps we need to cancel converts in the
3421 * future? */
3422 ocfs2_wait_on_busy_lock(lockres);
3423
3424 spin_lock_irqsave(&lockres->l_lock, flags);
3425 }
3426
Mark Fasheh0d5dc6c2006-09-14 14:44:51 -07003427 if (lockres->l_ops->flags & LOCK_TYPE_USES_LVB) {
3428 if (lockres->l_flags & OCFS2_LOCK_ATTACHED &&
Joel Beckerbd3e7612008-02-01 12:14:57 -08003429 lockres->l_level == DLM_LOCK_EX &&
Mark Fasheh0d5dc6c2006-09-14 14:44:51 -07003430 !(lockres->l_flags & OCFS2_LOCK_NEEDS_REFRESH))
3431 lockres->l_ops->set_lvb(lockres);
3432 }
Mark Fashehccd979b2005-12-15 14:31:24 -08003433
3434 if (lockres->l_flags & OCFS2_LOCK_BUSY)
3435 mlog(ML_ERROR, "destroying busy lock: \"%s\"\n",
3436 lockres->l_name);
3437 if (lockres->l_flags & OCFS2_LOCK_BLOCKED)
3438 mlog(0, "destroying blocked lock: \"%s\"\n", lockres->l_name);
3439
3440 if (!(lockres->l_flags & OCFS2_LOCK_ATTACHED)) {
3441 spin_unlock_irqrestore(&lockres->l_lock, flags);
3442 goto out;
3443 }
3444
3445 lockres_clear_flags(lockres, OCFS2_LOCK_ATTACHED);
3446
3447 /* make sure we never get here while waiting for an ast to
3448 * fire. */
3449 BUG_ON(lockres->l_action != OCFS2_AST_INVALID);
3450
3451 /* is this necessary? */
3452 lockres_or_flags(lockres, OCFS2_LOCK_BUSY);
3453 lockres->l_unlock_action = OCFS2_UNLOCK_DROP_LOCK;
3454 spin_unlock_irqrestore(&lockres->l_lock, flags);
3455
3456 mlog(0, "lock %s\n", lockres->l_name);
3457
Joel Beckera796d282010-01-28 19:22:39 -08003458 ret = ocfs2_dlm_unlock(osb->cconn, &lockres->l_lksb, lkm_flags);
Joel Becker7431cd72008-02-01 12:15:37 -08003459 if (ret) {
3460 ocfs2_log_dlm_error("ocfs2_dlm_unlock", ret, lockres);
Mark Fashehccd979b2005-12-15 14:31:24 -08003461 mlog(ML_ERROR, "lockres flags: %lu\n", lockres->l_flags);
Joel Beckercf0acdc2008-01-29 16:59:55 -08003462 ocfs2_dlm_dump_lksb(&lockres->l_lksb);
Mark Fashehccd979b2005-12-15 14:31:24 -08003463 BUG();
3464 }
Coly Li73ac36e2009-01-07 18:09:16 -08003465 mlog(0, "lock %s, successful return from ocfs2_dlm_unlock\n",
Mark Fashehccd979b2005-12-15 14:31:24 -08003466 lockres->l_name);
3467
3468 ocfs2_wait_on_busy_lock(lockres);
3469out:
Mark Fashehccd979b2005-12-15 14:31:24 -08003470 return 0;
3471}
3472
Jan Kara84d86f82014-04-03 14:46:57 -07003473static void ocfs2_process_blocked_lock(struct ocfs2_super *osb,
3474 struct ocfs2_lock_res *lockres);
3475
Mark Fashehccd979b2005-12-15 14:31:24 -08003476/* Mark the lockres as being dropped. It will no longer be
3477 * queued if blocking, but we still may have to wait on it
Mark Fasheh34d024f2007-09-24 15:56:19 -07003478 * being dequeued from the downconvert thread before we can consider
Sunil Mushran2bd63212010-01-25 16:57:38 -08003479 * it safe to drop.
Mark Fashehccd979b2005-12-15 14:31:24 -08003480 *
3481 * You can *not* attempt to call cluster_lock on this lockres anymore. */
Jan Kara84d86f82014-04-03 14:46:57 -07003482void ocfs2_mark_lockres_freeing(struct ocfs2_super *osb,
3483 struct ocfs2_lock_res *lockres)
Mark Fashehccd979b2005-12-15 14:31:24 -08003484{
3485 int status;
3486 struct ocfs2_mask_waiter mw;
Jan Kara84d86f82014-04-03 14:46:57 -07003487 unsigned long flags, flags2;
Mark Fashehccd979b2005-12-15 14:31:24 -08003488
3489 ocfs2_init_mask_waiter(&mw);
3490
3491 spin_lock_irqsave(&lockres->l_lock, flags);
3492 lockres->l_flags |= OCFS2_LOCK_FREEING;
Jan Kara84d86f82014-04-03 14:46:57 -07003493 if (lockres->l_flags & OCFS2_LOCK_QUEUED && current == osb->dc_task) {
3494 /*
3495 * We know the downconvert is queued but not in progress
3496 * because we are the downconvert thread and processing
3497 * different lock. So we can just remove the lock from the
3498 * queue. This is not only an optimization but also a way
3499 * to avoid the following deadlock:
3500 * ocfs2_dentry_post_unlock()
3501 * ocfs2_dentry_lock_put()
3502 * ocfs2_drop_dentry_lock()
3503 * iput()
3504 * ocfs2_evict_inode()
3505 * ocfs2_clear_inode()
3506 * ocfs2_mark_lockres_freeing()
3507 * ... blocks waiting for OCFS2_LOCK_QUEUED
3508 * since we are the downconvert thread which
3509 * should clear the flag.
3510 */
3511 spin_unlock_irqrestore(&lockres->l_lock, flags);
3512 spin_lock_irqsave(&osb->dc_task_lock, flags2);
3513 list_del_init(&lockres->l_blocked_list);
3514 osb->blocked_lock_count--;
3515 spin_unlock_irqrestore(&osb->dc_task_lock, flags2);
3516 /*
3517 * Warn if we recurse into another post_unlock call. Strictly
3518 * speaking it isn't a problem but we need to be careful if
3519 * that happens (stack overflow, deadlocks, ...) so warn if
3520 * ocfs2 grows a path for which this can happen.
3521 */
3522 WARN_ON_ONCE(lockres->l_ops->post_unlock);
3523 /* Since the lock is freeing we don't do much in the fn below */
3524 ocfs2_process_blocked_lock(osb, lockres);
3525 return;
3526 }
Mark Fashehccd979b2005-12-15 14:31:24 -08003527 while (lockres->l_flags & OCFS2_LOCK_QUEUED) {
3528 lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_QUEUED, 0);
3529 spin_unlock_irqrestore(&lockres->l_lock, flags);
3530
3531 mlog(0, "Waiting on lockres %s\n", lockres->l_name);
3532
3533 status = ocfs2_wait_for_mask(&mw);
3534 if (status)
3535 mlog_errno(status);
3536
3537 spin_lock_irqsave(&lockres->l_lock, flags);
3538 }
3539 spin_unlock_irqrestore(&lockres->l_lock, flags);
3540}
3541
Mark Fashehd680efe2006-09-08 14:14:34 -07003542void ocfs2_simple_drop_lockres(struct ocfs2_super *osb,
3543 struct ocfs2_lock_res *lockres)
3544{
3545 int ret;
3546
Jan Kara84d86f82014-04-03 14:46:57 -07003547 ocfs2_mark_lockres_freeing(osb, lockres);
Mark Fasheh0d5dc6c2006-09-14 14:44:51 -07003548 ret = ocfs2_drop_lock(osb, lockres);
Mark Fashehd680efe2006-09-08 14:14:34 -07003549 if (ret)
3550 mlog_errno(ret);
3551}
3552
Mark Fashehccd979b2005-12-15 14:31:24 -08003553static void ocfs2_drop_osb_locks(struct ocfs2_super *osb)
3554{
Mark Fashehd680efe2006-09-08 14:14:34 -07003555 ocfs2_simple_drop_lockres(osb, &osb->osb_super_lockres);
3556 ocfs2_simple_drop_lockres(osb, &osb->osb_rename_lockres);
wengang wang6ca497a2009-03-06 21:29:10 +08003557 ocfs2_simple_drop_lockres(osb, &osb->osb_nfs_sync_lockres);
Srinivas Eeda83273932009-06-03 17:02:55 -07003558 ocfs2_simple_drop_lockres(osb, &osb->osb_orphan_scan.os_lockres);
Mark Fashehccd979b2005-12-15 14:31:24 -08003559}
3560
Mark Fashehccd979b2005-12-15 14:31:24 -08003561int ocfs2_drop_inode_locks(struct inode *inode)
3562{
3563 int status, err;
Mark Fashehccd979b2005-12-15 14:31:24 -08003564
Mark Fashehccd979b2005-12-15 14:31:24 -08003565 /* No need to call ocfs2_mark_lockres_freeing here -
3566 * ocfs2_clear_inode has done it for us. */
3567
3568 err = ocfs2_drop_lock(OCFS2_SB(inode->i_sb),
Tiger Yang50008632007-03-20 16:01:38 -07003569 &OCFS2_I(inode)->ip_open_lockres);
Mark Fashehccd979b2005-12-15 14:31:24 -08003570 if (err < 0)
3571 mlog_errno(err);
3572
3573 status = err;
3574
3575 err = ocfs2_drop_lock(OCFS2_SB(inode->i_sb),
Mark Fashehe63aecb62007-10-18 15:30:42 -07003576 &OCFS2_I(inode)->ip_inode_lockres);
Mark Fashehccd979b2005-12-15 14:31:24 -08003577 if (err < 0)
3578 mlog_errno(err);
3579 if (err < 0 && !status)
3580 status = err;
3581
3582 err = ocfs2_drop_lock(OCFS2_SB(inode->i_sb),
Mark Fasheh0d5dc6c2006-09-14 14:44:51 -07003583 &OCFS2_I(inode)->ip_rw_lockres);
Mark Fashehccd979b2005-12-15 14:31:24 -08003584 if (err < 0)
3585 mlog_errno(err);
3586 if (err < 0 && !status)
3587 status = err;
3588
Mark Fashehccd979b2005-12-15 14:31:24 -08003589 return status;
3590}
3591
Joel Beckerde551242008-02-01 14:45:08 -08003592static unsigned int ocfs2_prepare_downconvert(struct ocfs2_lock_res *lockres,
3593 int new_level)
Mark Fashehccd979b2005-12-15 14:31:24 -08003594{
3595 assert_spin_locked(&lockres->l_lock);
3596
Joel Beckerbd3e7612008-02-01 12:14:57 -08003597 BUG_ON(lockres->l_blocking <= DLM_LOCK_NL);
Mark Fashehccd979b2005-12-15 14:31:24 -08003598
3599 if (lockres->l_level <= new_level) {
Sunil Mushran9b915182010-02-26 19:42:44 -08003600 mlog(ML_ERROR, "lockres %s, lvl %d <= %d, blcklst %d, mask %d, "
3601 "type %d, flags 0x%lx, hold %d %d, act %d %d, req %d, "
3602 "block %d, pgen %d\n", lockres->l_name, lockres->l_level,
3603 new_level, list_empty(&lockres->l_blocked_list),
3604 list_empty(&lockres->l_mask_waiters), lockres->l_type,
3605 lockres->l_flags, lockres->l_ro_holders,
3606 lockres->l_ex_holders, lockres->l_action,
3607 lockres->l_unlock_action, lockres->l_requested,
3608 lockres->l_blocking, lockres->l_pending_gen);
Mark Fashehccd979b2005-12-15 14:31:24 -08003609 BUG();
3610 }
3611
Sunil Mushran9b915182010-02-26 19:42:44 -08003612 mlog(ML_BASTS, "lockres %s, level %d => %d, blocking %d\n",
3613 lockres->l_name, lockres->l_level, new_level, lockres->l_blocking);
Mark Fashehccd979b2005-12-15 14:31:24 -08003614
3615 lockres->l_action = OCFS2_AST_DOWNCONVERT;
3616 lockres->l_requested = new_level;
3617 lockres_or_flags(lockres, OCFS2_LOCK_BUSY);
Joel Beckerde551242008-02-01 14:45:08 -08003618 return lockres_set_pending(lockres);
Mark Fashehccd979b2005-12-15 14:31:24 -08003619}
3620
3621static int ocfs2_downconvert_lock(struct ocfs2_super *osb,
3622 struct ocfs2_lock_res *lockres,
3623 int new_level,
Joel Beckerde551242008-02-01 14:45:08 -08003624 int lvb,
3625 unsigned int generation)
Mark Fashehccd979b2005-12-15 14:31:24 -08003626{
Joel Beckerbd3e7612008-02-01 12:14:57 -08003627 int ret;
3628 u32 dlm_flags = DLM_LKF_CONVERT;
Mark Fashehccd979b2005-12-15 14:31:24 -08003629
Sunil Mushran9b915182010-02-26 19:42:44 -08003630 mlog(ML_BASTS, "lockres %s, level %d => %d\n", lockres->l_name,
3631 lockres->l_level, new_level);
3632
Eric Rene7ee2c082017-01-10 16:57:33 -08003633 /*
3634 * On DLM_LKF_VALBLK, fsdlm behaves differently with o2cb. It always
3635 * expects DLM_LKF_VALBLK being set if the LKB has LVB, so that
3636 * we can recover correctly from node failure. Otherwise, we may get
Ingo Molnar793057e2018-02-28 09:39:48 +01003637 * invalid LVB in LKB, but without DLM_SBF_VALNOTVALID being set.
Eric Rene7ee2c082017-01-10 16:57:33 -08003638 */
Gang Hea6346442018-11-02 15:48:03 -07003639 if (ocfs2_userspace_stack(osb) &&
Eric Rene7ee2c082017-01-10 16:57:33 -08003640 lockres->l_ops->flags & LOCK_TYPE_USES_LVB)
3641 lvb = 1;
3642
Mark Fashehccd979b2005-12-15 14:31:24 -08003643 if (lvb)
Joel Beckerbd3e7612008-02-01 12:14:57 -08003644 dlm_flags |= DLM_LKF_VALBLK;
Mark Fashehccd979b2005-12-15 14:31:24 -08003645
Joel Becker4670c462008-02-01 14:39:35 -08003646 ret = ocfs2_dlm_lock(osb->cconn,
Joel Becker7431cd72008-02-01 12:15:37 -08003647 new_level,
3648 &lockres->l_lksb,
3649 dlm_flags,
3650 lockres->l_name,
Joel Beckera796d282010-01-28 19:22:39 -08003651 OCFS2_LOCK_ID_MAX_LEN - 1);
Joel Beckerde551242008-02-01 14:45:08 -08003652 lockres_clear_pending(lockres, generation, osb);
Joel Becker7431cd72008-02-01 12:15:37 -08003653 if (ret) {
3654 ocfs2_log_dlm_error("ocfs2_dlm_lock", ret, lockres);
Mark Fashehccd979b2005-12-15 14:31:24 -08003655 ocfs2_recover_from_dlm_error(lockres, 1);
3656 goto bail;
3657 }
3658
3659 ret = 0;
3660bail:
Mark Fashehccd979b2005-12-15 14:31:24 -08003661 return ret;
3662}
3663
Joel Becker24ef1812008-01-29 17:37:32 -08003664/* returns 1 when the caller should unlock and call ocfs2_dlm_unlock */
Mark Fashehccd979b2005-12-15 14:31:24 -08003665static int ocfs2_prepare_cancel_convert(struct ocfs2_super *osb,
3666 struct ocfs2_lock_res *lockres)
3667{
3668 assert_spin_locked(&lockres->l_lock);
3669
Mark Fashehccd979b2005-12-15 14:31:24 -08003670 if (lockres->l_unlock_action == OCFS2_UNLOCK_CANCEL_CONVERT) {
3671 /* If we're already trying to cancel a lock conversion
3672 * then just drop the spinlock and allow the caller to
3673 * requeue this lock. */
Sunil Mushran9b915182010-02-26 19:42:44 -08003674 mlog(ML_BASTS, "lockres %s, skip convert\n", lockres->l_name);
Mark Fashehccd979b2005-12-15 14:31:24 -08003675 return 0;
3676 }
3677
3678 /* were we in a convert when we got the bast fire? */
3679 BUG_ON(lockres->l_action != OCFS2_AST_CONVERT &&
3680 lockres->l_action != OCFS2_AST_DOWNCONVERT);
3681 /* set things up for the unlockast to know to just
3682 * clear out the ast_action and unset busy, etc. */
3683 lockres->l_unlock_action = OCFS2_UNLOCK_CANCEL_CONVERT;
3684
3685 mlog_bug_on_msg(!(lockres->l_flags & OCFS2_LOCK_BUSY),
3686 "lock %s, invalid flags: 0x%lx\n",
3687 lockres->l_name, lockres->l_flags);
3688
Sunil Mushran9b915182010-02-26 19:42:44 -08003689 mlog(ML_BASTS, "lockres %s\n", lockres->l_name);
3690
Mark Fashehccd979b2005-12-15 14:31:24 -08003691 return 1;
3692}
3693
3694static int ocfs2_cancel_convert(struct ocfs2_super *osb,
3695 struct ocfs2_lock_res *lockres)
3696{
3697 int ret;
Mark Fashehccd979b2005-12-15 14:31:24 -08003698
Joel Becker4670c462008-02-01 14:39:35 -08003699 ret = ocfs2_dlm_unlock(osb->cconn, &lockres->l_lksb,
Joel Beckera796d282010-01-28 19:22:39 -08003700 DLM_LKF_CANCEL);
Joel Becker7431cd72008-02-01 12:15:37 -08003701 if (ret) {
3702 ocfs2_log_dlm_error("ocfs2_dlm_unlock", ret, lockres);
Mark Fashehccd979b2005-12-15 14:31:24 -08003703 ocfs2_recover_from_dlm_error(lockres, 0);
3704 }
3705
Sunil Mushran9b915182010-02-26 19:42:44 -08003706 mlog(ML_BASTS, "lockres %s\n", lockres->l_name);
Mark Fashehccd979b2005-12-15 14:31:24 -08003707
Mark Fashehccd979b2005-12-15 14:31:24 -08003708 return ret;
3709}
3710
Mark Fashehb5e500e2006-09-13 22:01:16 -07003711static int ocfs2_unblock_lock(struct ocfs2_super *osb,
3712 struct ocfs2_lock_res *lockres,
3713 struct ocfs2_unblock_ctl *ctl)
Mark Fashehccd979b2005-12-15 14:31:24 -08003714{
3715 unsigned long flags;
3716 int blocking;
3717 int new_level;
Sunil Mushran079b8052010-02-03 10:16:54 -08003718 int level;
Mark Fashehccd979b2005-12-15 14:31:24 -08003719 int ret = 0;
Mark Fasheh5ef0d4e2006-09-13 21:21:52 -07003720 int set_lvb = 0;
Joel Beckerde551242008-02-01 14:45:08 -08003721 unsigned int gen;
Mark Fashehccd979b2005-12-15 14:31:24 -08003722
Mark Fashehccd979b2005-12-15 14:31:24 -08003723 spin_lock_irqsave(&lockres->l_lock, flags);
3724
Mark Fashehccd979b2005-12-15 14:31:24 -08003725recheck:
Sunil Mushrandb0f6ce2010-02-01 16:55:50 -08003726 /*
3727 * Is it still blocking? If not, we have no more work to do.
3728 */
3729 if (!(lockres->l_flags & OCFS2_LOCK_BLOCKED)) {
3730 BUG_ON(lockres->l_blocking != DLM_LOCK_NL);
3731 spin_unlock_irqrestore(&lockres->l_lock, flags);
3732 ret = 0;
3733 goto leave;
3734 }
3735
Mark Fashehccd979b2005-12-15 14:31:24 -08003736 if (lockres->l_flags & OCFS2_LOCK_BUSY) {
Joel Beckerde551242008-02-01 14:45:08 -08003737 /* XXX
3738 * This is a *big* race. The OCFS2_LOCK_PENDING flag
3739 * exists entirely for one reason - another thread has set
3740 * OCFS2_LOCK_BUSY, but has *NOT* yet called dlm_lock().
3741 *
3742 * If we do ocfs2_cancel_convert() before the other thread
3743 * calls dlm_lock(), our cancel will do nothing. We will
3744 * get no ast, and we will have no way of knowing the
3745 * cancel failed. Meanwhile, the other thread will call
3746 * into dlm_lock() and wait...forever.
3747 *
3748 * Why forever? Because another node has asked for the
3749 * lock first; that's why we're here in unblock_lock().
3750 *
3751 * The solution is OCFS2_LOCK_PENDING. When PENDING is
3752 * set, we just requeue the unblock. Only when the other
3753 * thread has called dlm_lock() and cleared PENDING will
3754 * we then cancel their request.
3755 *
3756 * All callers of dlm_lock() must set OCFS2_DLM_PENDING
3757 * at the same time they set OCFS2_DLM_BUSY. They must
3758 * clear OCFS2_DLM_PENDING after dlm_lock() returns.
3759 */
Sunil Mushran9b915182010-02-26 19:42:44 -08003760 if (lockres->l_flags & OCFS2_LOCK_PENDING) {
3761 mlog(ML_BASTS, "lockres %s, ReQ: Pending\n",
3762 lockres->l_name);
Joel Beckerde551242008-02-01 14:45:08 -08003763 goto leave_requeue;
Sunil Mushran9b915182010-02-26 19:42:44 -08003764 }
Joel Beckerde551242008-02-01 14:45:08 -08003765
Mark Fashehd680efe2006-09-08 14:14:34 -07003766 ctl->requeue = 1;
Mark Fashehccd979b2005-12-15 14:31:24 -08003767 ret = ocfs2_prepare_cancel_convert(osb, lockres);
3768 spin_unlock_irqrestore(&lockres->l_lock, flags);
3769 if (ret) {
3770 ret = ocfs2_cancel_convert(osb, lockres);
3771 if (ret < 0)
3772 mlog_errno(ret);
3773 }
3774 goto leave;
3775 }
3776
Sunil Mushrana1912822010-01-21 10:50:03 -08003777 /*
3778 * This prevents livelocks. OCFS2_LOCK_UPCONVERT_FINISHING flag is
3779 * set when the ast is received for an upconvert just before the
3780 * OCFS2_LOCK_BUSY flag is cleared. Now if the fs received a bast
3781 * on the heels of the ast, we want to delay the downconvert just
3782 * enough to allow the up requestor to do its task. Because this
3783 * lock is in the blocked queue, the lock will be downconverted
3784 * as soon as the requestor is done with the lock.
3785 */
3786 if (lockres->l_flags & OCFS2_LOCK_UPCONVERT_FINISHING)
3787 goto leave_requeue;
3788
Sunil Mushran0d741252010-01-29 09:44:11 -08003789 /*
3790 * How can we block and yet be at NL? We were trying to upconvert
3791 * from NL and got canceled. The code comes back here, and now
3792 * we notice and clear BLOCKING.
3793 */
3794 if (lockres->l_level == DLM_LOCK_NL) {
3795 BUG_ON(lockres->l_ex_holders || lockres->l_ro_holders);
Sunil Mushran9b915182010-02-26 19:42:44 -08003796 mlog(ML_BASTS, "lockres %s, Aborting dc\n", lockres->l_name);
Sunil Mushran0d741252010-01-29 09:44:11 -08003797 lockres->l_blocking = DLM_LOCK_NL;
3798 lockres_clear_flags(lockres, OCFS2_LOCK_BLOCKED);
3799 spin_unlock_irqrestore(&lockres->l_lock, flags);
3800 goto leave;
3801 }
3802
Mark Fashehccd979b2005-12-15 14:31:24 -08003803 /* if we're blocking an exclusive and we have *any* holders,
3804 * then requeue. */
Joel Beckerbd3e7612008-02-01 12:14:57 -08003805 if ((lockres->l_blocking == DLM_LOCK_EX)
Sunil Mushran9b915182010-02-26 19:42:44 -08003806 && (lockres->l_ex_holders || lockres->l_ro_holders)) {
3807 mlog(ML_BASTS, "lockres %s, ReQ: EX/PR Holders %u,%u\n",
3808 lockres->l_name, lockres->l_ex_holders,
3809 lockres->l_ro_holders);
Mark Fashehf7fbfdd2006-09-13 21:02:29 -07003810 goto leave_requeue;
Sunil Mushran9b915182010-02-26 19:42:44 -08003811 }
Mark Fashehccd979b2005-12-15 14:31:24 -08003812
3813 /* If it's a PR we're blocking, then only
3814 * requeue if we've got any EX holders */
Joel Beckerbd3e7612008-02-01 12:14:57 -08003815 if (lockres->l_blocking == DLM_LOCK_PR &&
Sunil Mushran9b915182010-02-26 19:42:44 -08003816 lockres->l_ex_holders) {
3817 mlog(ML_BASTS, "lockres %s, ReQ: EX Holders %u\n",
3818 lockres->l_name, lockres->l_ex_holders);
Mark Fashehf7fbfdd2006-09-13 21:02:29 -07003819 goto leave_requeue;
Sunil Mushran9b915182010-02-26 19:42:44 -08003820 }
Mark Fashehf7fbfdd2006-09-13 21:02:29 -07003821
3822 /*
3823 * Can we get a lock in this state if the holder counts are
3824 * zero? The meta data unblock code used to check this.
3825 */
3826 if ((lockres->l_ops->flags & LOCK_TYPE_REQUIRES_REFRESH)
Sunil Mushran9b915182010-02-26 19:42:44 -08003827 && (lockres->l_flags & OCFS2_LOCK_REFRESHING)) {
3828 mlog(ML_BASTS, "lockres %s, ReQ: Lock Refreshing\n",
3829 lockres->l_name);
Mark Fashehf7fbfdd2006-09-13 21:02:29 -07003830 goto leave_requeue;
Sunil Mushran9b915182010-02-26 19:42:44 -08003831 }
Mark Fashehccd979b2005-12-15 14:31:24 -08003832
Mark Fasheh16d5b952006-09-13 21:10:12 -07003833 new_level = ocfs2_highest_compat_lock_level(lockres->l_blocking);
3834
3835 if (lockres->l_ops->check_downconvert
Sunil Mushran9b915182010-02-26 19:42:44 -08003836 && !lockres->l_ops->check_downconvert(lockres, new_level)) {
3837 mlog(ML_BASTS, "lockres %s, ReQ: Checkpointing\n",
3838 lockres->l_name);
Mark Fasheh16d5b952006-09-13 21:10:12 -07003839 goto leave_requeue;
Sunil Mushran9b915182010-02-26 19:42:44 -08003840 }
Mark Fasheh16d5b952006-09-13 21:10:12 -07003841
Mark Fashehccd979b2005-12-15 14:31:24 -08003842 /* If we get here, then we know that there are no more
3843 * incompatible holders (and anyone asking for an incompatible
3844 * lock is blocked). We can now downconvert the lock */
Mark Fashehcc567d82006-09-13 21:52:21 -07003845 if (!lockres->l_ops->downconvert_worker)
Mark Fashehccd979b2005-12-15 14:31:24 -08003846 goto downconvert;
3847
3848 /* Some lockres types want to do a bit of work before
3849 * downconverting a lock. Allow that here. The worker function
3850 * may sleep, so we save off a copy of what we're blocking as
3851 * it may change while we're not holding the spin lock. */
3852 blocking = lockres->l_blocking;
Sunil Mushran079b8052010-02-03 10:16:54 -08003853 level = lockres->l_level;
Mark Fashehccd979b2005-12-15 14:31:24 -08003854 spin_unlock_irqrestore(&lockres->l_lock, flags);
3855
Mark Fashehcc567d82006-09-13 21:52:21 -07003856 ctl->unblock_action = lockres->l_ops->downconvert_worker(lockres, blocking);
Mark Fashehd680efe2006-09-08 14:14:34 -07003857
Sunil Mushran9b915182010-02-26 19:42:44 -08003858 if (ctl->unblock_action == UNBLOCK_STOP_POST) {
3859 mlog(ML_BASTS, "lockres %s, UNBLOCK_STOP_POST\n",
3860 lockres->l_name);
Mark Fashehd680efe2006-09-08 14:14:34 -07003861 goto leave;
Sunil Mushran9b915182010-02-26 19:42:44 -08003862 }
Mark Fashehccd979b2005-12-15 14:31:24 -08003863
3864 spin_lock_irqsave(&lockres->l_lock, flags);
Sunil Mushran079b8052010-02-03 10:16:54 -08003865 if ((blocking != lockres->l_blocking) || (level != lockres->l_level)) {
Mark Fashehccd979b2005-12-15 14:31:24 -08003866 /* If this changed underneath us, then we can't drop
3867 * it just yet. */
Sunil Mushran9b915182010-02-26 19:42:44 -08003868 mlog(ML_BASTS, "lockres %s, block=%d:%d, level=%d:%d, "
3869 "Recheck\n", lockres->l_name, blocking,
3870 lockres->l_blocking, level, lockres->l_level);
Mark Fashehccd979b2005-12-15 14:31:24 -08003871 goto recheck;
3872 }
3873
3874downconvert:
Mark Fashehd680efe2006-09-08 14:14:34 -07003875 ctl->requeue = 0;
Mark Fashehccd979b2005-12-15 14:31:24 -08003876
Mark Fasheh5ef0d4e2006-09-13 21:21:52 -07003877 if (lockres->l_ops->flags & LOCK_TYPE_USES_LVB) {
Joel Beckerbd3e7612008-02-01 12:14:57 -08003878 if (lockres->l_level == DLM_LOCK_EX)
Mark Fasheh5ef0d4e2006-09-13 21:21:52 -07003879 set_lvb = 1;
3880
3881 /*
3882 * We only set the lvb if the lock has been fully
3883 * refreshed - otherwise we risk setting stale
3884 * data. Otherwise, there's no need to actually clear
3885 * out the lvb here as it's value is still valid.
3886 */
3887 if (set_lvb && !(lockres->l_flags & OCFS2_LOCK_NEEDS_REFRESH))
3888 lockres->l_ops->set_lvb(lockres);
3889 }
3890
Joel Beckerde551242008-02-01 14:45:08 -08003891 gen = ocfs2_prepare_downconvert(lockres, new_level);
Mark Fashehccd979b2005-12-15 14:31:24 -08003892 spin_unlock_irqrestore(&lockres->l_lock, flags);
Joel Beckerde551242008-02-01 14:45:08 -08003893 ret = ocfs2_downconvert_lock(osb, lockres, new_level, set_lvb,
3894 gen);
3895
Mark Fashehccd979b2005-12-15 14:31:24 -08003896leave:
Tao Mac1e8d352011-03-07 16:43:21 +08003897 if (ret)
3898 mlog_errno(ret);
Mark Fashehccd979b2005-12-15 14:31:24 -08003899 return ret;
Mark Fashehf7fbfdd2006-09-13 21:02:29 -07003900
3901leave_requeue:
3902 spin_unlock_irqrestore(&lockres->l_lock, flags);
3903 ctl->requeue = 1;
3904
Mark Fashehf7fbfdd2006-09-13 21:02:29 -07003905 return 0;
Mark Fashehccd979b2005-12-15 14:31:24 -08003906}
3907
Mark Fashehd680efe2006-09-08 14:14:34 -07003908static int ocfs2_data_convert_worker(struct ocfs2_lock_res *lockres,
3909 int blocking)
Mark Fashehccd979b2005-12-15 14:31:24 -08003910{
3911 struct inode *inode;
3912 struct address_space *mapping;
Goldwyn Rodrigues5e98d492010-06-28 10:04:32 -05003913 struct ocfs2_inode_info *oi;
Mark Fashehccd979b2005-12-15 14:31:24 -08003914
Mark Fashehccd979b2005-12-15 14:31:24 -08003915 inode = ocfs2_lock_res_inode(lockres);
3916 mapping = inode->i_mapping;
3917
Goldwyn Rodrigues5e98d492010-06-28 10:04:32 -05003918 if (S_ISDIR(inode->i_mode)) {
3919 oi = OCFS2_I(inode);
3920 oi->ip_dir_lock_gen++;
3921 mlog(0, "generation: %u\n", oi->ip_dir_lock_gen);
3922 goto out;
3923 }
3924
Mark Fasheh1044e402008-02-28 17:16:03 -08003925 if (!S_ISREG(inode->i_mode))
Mark Fashehf1f54062007-10-18 15:13:59 -07003926 goto out;
3927
Mark Fasheh7f4a2a92006-12-11 11:06:36 -08003928 /*
3929 * We need this before the filemap_fdatawrite() so that it can
3930 * transfer the dirty bit from the PTE to the
3931 * page. Unfortunately this means that even for EX->PR
3932 * downconverts, we'll lose our mappings and have to build
3933 * them up again.
3934 */
3935 unmap_mapping_range(mapping, 0, 0, 0);
3936
Mark Fashehccd979b2005-12-15 14:31:24 -08003937 if (filemap_fdatawrite(mapping)) {
Mark Fashehb06970532006-03-03 10:24:33 -08003938 mlog(ML_ERROR, "Could not sync inode %llu for downconvert!",
3939 (unsigned long long)OCFS2_I(inode)->ip_blkno);
Mark Fashehccd979b2005-12-15 14:31:24 -08003940 }
3941 sync_mapping_buffers(mapping);
Joel Beckerbd3e7612008-02-01 12:14:57 -08003942 if (blocking == DLM_LOCK_EX) {
Mark Fashehccd979b2005-12-15 14:31:24 -08003943 truncate_inode_pages(mapping, 0);
Mark Fashehccd979b2005-12-15 14:31:24 -08003944 } else {
3945 /* We only need to wait on the I/O if we're not also
3946 * truncating pages because truncate_inode_pages waits
3947 * for us above. We don't truncate pages if we're
3948 * blocking anything < EXMODE because we want to keep
3949 * them around in that case. */
3950 filemap_fdatawait(mapping);
3951 }
3952
Andreas Gruenbacherb8a7a3a2016-03-24 14:38:37 +01003953 forget_all_cached_acls(inode);
3954
Mark Fashehf1f54062007-10-18 15:13:59 -07003955out:
Mark Fashehd680efe2006-09-08 14:14:34 -07003956 return UNBLOCK_CONTINUE;
Mark Fashehccd979b2005-12-15 14:31:24 -08003957}
3958
Tao Maa4338482009-08-18 11:19:29 +08003959static int ocfs2_ci_checkpointed(struct ocfs2_caching_info *ci,
3960 struct ocfs2_lock_res *lockres,
3961 int new_level)
Mark Fasheh810d5ae2006-09-13 21:39:52 -07003962{
Tao Maa4338482009-08-18 11:19:29 +08003963 int checkpointed = ocfs2_ci_fully_checkpointed(ci);
Mark Fasheh810d5ae2006-09-13 21:39:52 -07003964
Joel Beckerbd3e7612008-02-01 12:14:57 -08003965 BUG_ON(new_level != DLM_LOCK_NL && new_level != DLM_LOCK_PR);
3966 BUG_ON(lockres->l_level != DLM_LOCK_EX && !checkpointed);
Mark Fasheh810d5ae2006-09-13 21:39:52 -07003967
3968 if (checkpointed)
3969 return 1;
3970
Tao Maa4338482009-08-18 11:19:29 +08003971 ocfs2_start_checkpoint(OCFS2_SB(ocfs2_metadata_cache_get_super(ci)));
Mark Fasheh810d5ae2006-09-13 21:39:52 -07003972 return 0;
3973}
3974
Tao Maa4338482009-08-18 11:19:29 +08003975static int ocfs2_check_meta_downconvert(struct ocfs2_lock_res *lockres,
3976 int new_level)
3977{
3978 struct inode *inode = ocfs2_lock_res_inode(lockres);
3979
3980 return ocfs2_ci_checkpointed(INODE_CACHE(inode), lockres, new_level);
3981}
3982
Mark Fasheh810d5ae2006-09-13 21:39:52 -07003983static void ocfs2_set_meta_lvb(struct ocfs2_lock_res *lockres)
3984{
3985 struct inode *inode = ocfs2_lock_res_inode(lockres);
3986
3987 __ocfs2_stuff_meta_lvb(inode);
3988}
3989
Mark Fashehd680efe2006-09-08 14:14:34 -07003990/*
3991 * Does the final reference drop on our dentry lock. Right now this
Mark Fasheh34d024f2007-09-24 15:56:19 -07003992 * happens in the downconvert thread, but we could choose to simplify the
Mark Fashehd680efe2006-09-08 14:14:34 -07003993 * dlmglue API and push these off to the ocfs2_wq in the future.
3994 */
3995static void ocfs2_dentry_post_unlock(struct ocfs2_super *osb,
3996 struct ocfs2_lock_res *lockres)
3997{
3998 struct ocfs2_dentry_lock *dl = ocfs2_lock_res_dl(lockres);
3999 ocfs2_dentry_lock_put(osb, dl);
4000}
4001
4002/*
4003 * d_delete() matching dentries before the lock downconvert.
4004 *
4005 * At this point, any process waiting to destroy the
4006 * dentry_lock due to last ref count is stopped by the
4007 * OCFS2_LOCK_QUEUED flag.
4008 *
4009 * We have two potential problems
4010 *
4011 * 1) If we do the last reference drop on our dentry_lock (via dput)
4012 * we'll wind up in ocfs2_release_dentry_lock(), waiting on
4013 * the downconvert to finish. Instead we take an elevated
4014 * reference and push the drop until after we've completed our
4015 * unblock processing.
4016 *
4017 * 2) There might be another process with a final reference,
4018 * waiting on us to finish processing. If this is the case, we
4019 * detect it and exit out - there's no more dentries anyway.
4020 */
4021static int ocfs2_dentry_convert_worker(struct ocfs2_lock_res *lockres,
4022 int blocking)
4023{
4024 struct ocfs2_dentry_lock *dl = ocfs2_lock_res_dl(lockres);
4025 struct ocfs2_inode_info *oi = OCFS2_I(dl->dl_inode);
4026 struct dentry *dentry;
4027 unsigned long flags;
4028 int extra_ref = 0;
4029
4030 /*
4031 * This node is blocking another node from getting a read
4032 * lock. This happens when we've renamed within a
4033 * directory. We've forced the other nodes to d_delete(), but
4034 * we never actually dropped our lock because it's still
4035 * valid. The downconvert code will retain a PR for this node,
4036 * so there's no further work to do.
4037 */
Joel Beckerbd3e7612008-02-01 12:14:57 -08004038 if (blocking == DLM_LOCK_PR)
Mark Fashehd680efe2006-09-08 14:14:34 -07004039 return UNBLOCK_CONTINUE;
4040
4041 /*
4042 * Mark this inode as potentially orphaned. The code in
4043 * ocfs2_delete_inode() will figure out whether it actually
4044 * needs to be freed or not.
4045 */
4046 spin_lock(&oi->ip_lock);
4047 oi->ip_flags |= OCFS2_INODE_MAYBE_ORPHANED;
4048 spin_unlock(&oi->ip_lock);
4049
4050 /*
4051 * Yuck. We need to make sure however that the check of
4052 * OCFS2_LOCK_FREEING and the extra reference are atomic with
4053 * respect to a reference decrement or the setting of that
4054 * flag.
4055 */
4056 spin_lock_irqsave(&lockres->l_lock, flags);
4057 spin_lock(&dentry_attach_lock);
4058 if (!(lockres->l_flags & OCFS2_LOCK_FREEING)
4059 && dl->dl_count) {
4060 dl->dl_count++;
4061 extra_ref = 1;
4062 }
4063 spin_unlock(&dentry_attach_lock);
4064 spin_unlock_irqrestore(&lockres->l_lock, flags);
4065
4066 mlog(0, "extra_ref = %d\n", extra_ref);
4067
4068 /*
4069 * We have a process waiting on us in ocfs2_dentry_iput(),
4070 * which means we can't have any more outstanding
4071 * aliases. There's no need to do any more work.
4072 */
4073 if (!extra_ref)
4074 return UNBLOCK_CONTINUE;
4075
4076 spin_lock(&dentry_attach_lock);
4077 while (1) {
4078 dentry = ocfs2_find_local_alias(dl->dl_inode,
4079 dl->dl_parent_blkno, 1);
4080 if (!dentry)
4081 break;
4082 spin_unlock(&dentry_attach_lock);
4083
alex chen10ab8812015-02-10 14:09:23 -08004084 if (S_ISDIR(dl->dl_inode->i_mode))
4085 shrink_dcache_parent(dentry);
4086
Al Viroa4555892014-10-21 20:11:25 -04004087 mlog(0, "d_delete(%pd);\n", dentry);
Mark Fashehd680efe2006-09-08 14:14:34 -07004088
4089 /*
4090 * The following dcache calls may do an
4091 * iput(). Normally we don't want that from the
4092 * downconverting thread, but in this case it's ok
4093 * because the requesting node already has an
4094 * exclusive lock on the inode, so it can't be queued
4095 * for a downconvert.
4096 */
4097 d_delete(dentry);
4098 dput(dentry);
4099
4100 spin_lock(&dentry_attach_lock);
4101 }
4102 spin_unlock(&dentry_attach_lock);
4103
4104 /*
4105 * If we are the last holder of this dentry lock, there is no
4106 * reason to downconvert so skip straight to the unlock.
4107 */
4108 if (dl->dl_count == 1)
4109 return UNBLOCK_STOP_POST;
4110
4111 return UNBLOCK_CONTINUE_POST;
4112}
4113
Tao Ma8dec98e2009-08-18 11:19:58 +08004114static int ocfs2_check_refcount_downconvert(struct ocfs2_lock_res *lockres,
4115 int new_level)
4116{
4117 struct ocfs2_refcount_tree *tree =
4118 ocfs2_lock_res_refcount_tree(lockres);
4119
4120 return ocfs2_ci_checkpointed(&tree->rf_ci, lockres, new_level);
4121}
4122
4123static int ocfs2_refcount_convert_worker(struct ocfs2_lock_res *lockres,
4124 int blocking)
4125{
4126 struct ocfs2_refcount_tree *tree =
4127 ocfs2_lock_res_refcount_tree(lockres);
4128
4129 ocfs2_metadata_cache_purge(&tree->rf_ci);
4130
4131 return UNBLOCK_CONTINUE;
4132}
4133
Jan Kara9e33d692008-08-25 19:56:50 +02004134static void ocfs2_set_qinfo_lvb(struct ocfs2_lock_res *lockres)
4135{
4136 struct ocfs2_qinfo_lvb *lvb;
4137 struct ocfs2_mem_dqinfo *oinfo = ocfs2_lock_res_qinfo(lockres);
4138 struct mem_dqinfo *info = sb_dqinfo(oinfo->dqi_gi.dqi_sb,
4139 oinfo->dqi_gi.dqi_type);
4140
Mark Fasheha641dc22008-12-24 16:03:48 -08004141 lvb = ocfs2_dlm_lvb(&lockres->l_lksb);
Jan Kara9e33d692008-08-25 19:56:50 +02004142 lvb->lvb_version = OCFS2_QINFO_LVB_VERSION;
4143 lvb->lvb_bgrace = cpu_to_be32(info->dqi_bgrace);
4144 lvb->lvb_igrace = cpu_to_be32(info->dqi_igrace);
4145 lvb->lvb_syncms = cpu_to_be32(oinfo->dqi_syncms);
4146 lvb->lvb_blocks = cpu_to_be32(oinfo->dqi_gi.dqi_blocks);
4147 lvb->lvb_free_blk = cpu_to_be32(oinfo->dqi_gi.dqi_free_blk);
4148 lvb->lvb_free_entry = cpu_to_be32(oinfo->dqi_gi.dqi_free_entry);
Jan Kara9e33d692008-08-25 19:56:50 +02004149}
4150
4151void ocfs2_qinfo_unlock(struct ocfs2_mem_dqinfo *oinfo, int ex)
4152{
4153 struct ocfs2_lock_res *lockres = &oinfo->dqi_gqlock;
4154 struct ocfs2_super *osb = OCFS2_SB(oinfo->dqi_gi.dqi_sb);
4155 int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR;
4156
Jan Kara9e33d692008-08-25 19:56:50 +02004157 if (!ocfs2_is_hard_readonly(osb) && !ocfs2_mount_local(osb))
4158 ocfs2_cluster_unlock(osb, lockres, level);
Jan Kara9e33d692008-08-25 19:56:50 +02004159}
4160
4161static int ocfs2_refresh_qinfo(struct ocfs2_mem_dqinfo *oinfo)
4162{
4163 struct mem_dqinfo *info = sb_dqinfo(oinfo->dqi_gi.dqi_sb,
4164 oinfo->dqi_gi.dqi_type);
4165 struct ocfs2_lock_res *lockres = &oinfo->dqi_gqlock;
4166 struct ocfs2_qinfo_lvb *lvb = ocfs2_dlm_lvb(&lockres->l_lksb);
Joel Becker85eb8b72008-11-25 15:31:27 +01004167 struct buffer_head *bh = NULL;
Jan Kara9e33d692008-08-25 19:56:50 +02004168 struct ocfs2_global_disk_dqinfo *gdinfo;
4169 int status = 0;
4170
Joel Becker1c520df2009-06-19 15:14:13 -07004171 if (ocfs2_dlm_lvb_valid(&lockres->l_lksb) &&
4172 lvb->lvb_version == OCFS2_QINFO_LVB_VERSION) {
Jan Kara9e33d692008-08-25 19:56:50 +02004173 info->dqi_bgrace = be32_to_cpu(lvb->lvb_bgrace);
4174 info->dqi_igrace = be32_to_cpu(lvb->lvb_igrace);
4175 oinfo->dqi_syncms = be32_to_cpu(lvb->lvb_syncms);
4176 oinfo->dqi_gi.dqi_blocks = be32_to_cpu(lvb->lvb_blocks);
4177 oinfo->dqi_gi.dqi_free_blk = be32_to_cpu(lvb->lvb_free_blk);
4178 oinfo->dqi_gi.dqi_free_entry =
4179 be32_to_cpu(lvb->lvb_free_entry);
4180 } else {
Jan Karaae4f6ef2010-04-28 19:04:29 +02004181 status = ocfs2_read_quota_phys_block(oinfo->dqi_gqinode,
4182 oinfo->dqi_giblk, &bh);
Joel Becker85eb8b72008-11-25 15:31:27 +01004183 if (status) {
Jan Kara9e33d692008-08-25 19:56:50 +02004184 mlog_errno(status);
4185 goto bail;
4186 }
4187 gdinfo = (struct ocfs2_global_disk_dqinfo *)
4188 (bh->b_data + OCFS2_GLOBAL_INFO_OFF);
4189 info->dqi_bgrace = le32_to_cpu(gdinfo->dqi_bgrace);
4190 info->dqi_igrace = le32_to_cpu(gdinfo->dqi_igrace);
4191 oinfo->dqi_syncms = le32_to_cpu(gdinfo->dqi_syncms);
4192 oinfo->dqi_gi.dqi_blocks = le32_to_cpu(gdinfo->dqi_blocks);
4193 oinfo->dqi_gi.dqi_free_blk = le32_to_cpu(gdinfo->dqi_free_blk);
4194 oinfo->dqi_gi.dqi_free_entry =
4195 le32_to_cpu(gdinfo->dqi_free_entry);
4196 brelse(bh);
4197 ocfs2_track_lock_refresh(lockres);
4198 }
4199
4200bail:
4201 return status;
4202}
4203
4204/* Lock quota info, this function expects at least shared lock on the quota file
4205 * so that we can safely refresh quota info from disk. */
4206int ocfs2_qinfo_lock(struct ocfs2_mem_dqinfo *oinfo, int ex)
4207{
4208 struct ocfs2_lock_res *lockres = &oinfo->dqi_gqlock;
4209 struct ocfs2_super *osb = OCFS2_SB(oinfo->dqi_gi.dqi_sb);
4210 int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR;
4211 int status = 0;
4212
Jan Kara9e33d692008-08-25 19:56:50 +02004213 /* On RO devices, locking really isn't needed... */
4214 if (ocfs2_is_hard_readonly(osb)) {
4215 if (ex)
4216 status = -EROFS;
4217 goto bail;
4218 }
4219 if (ocfs2_mount_local(osb))
4220 goto bail;
4221
4222 status = ocfs2_cluster_lock(osb, lockres, level, 0, 0);
4223 if (status < 0) {
4224 mlog_errno(status);
4225 goto bail;
4226 }
4227 if (!ocfs2_should_refresh_lock_res(lockres))
4228 goto bail;
4229 /* OK, we have the lock but we need to refresh the quota info */
4230 status = ocfs2_refresh_qinfo(oinfo);
4231 if (status)
4232 ocfs2_qinfo_unlock(oinfo, ex);
4233 ocfs2_complete_lock_res_refresh(lockres, status);
4234bail:
Jan Kara9e33d692008-08-25 19:56:50 +02004235 return status;
4236}
4237
Tao Ma8dec98e2009-08-18 11:19:58 +08004238int ocfs2_refcount_lock(struct ocfs2_refcount_tree *ref_tree, int ex)
4239{
4240 int status;
4241 int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR;
4242 struct ocfs2_lock_res *lockres = &ref_tree->rf_lockres;
4243 struct ocfs2_super *osb = lockres->l_priv;
4244
4245
4246 if (ocfs2_is_hard_readonly(osb))
4247 return -EROFS;
4248
4249 if (ocfs2_mount_local(osb))
4250 return 0;
4251
4252 status = ocfs2_cluster_lock(osb, lockres, level, 0, 0);
4253 if (status < 0)
4254 mlog_errno(status);
4255
4256 return status;
4257}
4258
4259void ocfs2_refcount_unlock(struct ocfs2_refcount_tree *ref_tree, int ex)
4260{
4261 int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR;
4262 struct ocfs2_lock_res *lockres = &ref_tree->rf_lockres;
4263 struct ocfs2_super *osb = lockres->l_priv;
4264
4265 if (!ocfs2_mount_local(osb))
4266 ocfs2_cluster_unlock(osb, lockres, level);
4267}
4268
Adrian Bunk00600052008-01-29 00:11:41 +02004269static void ocfs2_process_blocked_lock(struct ocfs2_super *osb,
4270 struct ocfs2_lock_res *lockres)
Mark Fashehccd979b2005-12-15 14:31:24 -08004271{
4272 int status;
Mark Fashehd680efe2006-09-08 14:14:34 -07004273 struct ocfs2_unblock_ctl ctl = {0, 0,};
Mark Fashehccd979b2005-12-15 14:31:24 -08004274 unsigned long flags;
4275
4276 /* Our reference to the lockres in this function can be
4277 * considered valid until we remove the OCFS2_LOCK_QUEUED
4278 * flag. */
4279
Mark Fashehccd979b2005-12-15 14:31:24 -08004280 BUG_ON(!lockres);
4281 BUG_ON(!lockres->l_ops);
Mark Fashehccd979b2005-12-15 14:31:24 -08004282
Sunil Mushran9b915182010-02-26 19:42:44 -08004283 mlog(ML_BASTS, "lockres %s blocked\n", lockres->l_name);
Mark Fashehccd979b2005-12-15 14:31:24 -08004284
4285 /* Detect whether a lock has been marked as going away while
Mark Fasheh34d024f2007-09-24 15:56:19 -07004286 * the downconvert thread was processing other things. A lock can
Mark Fashehccd979b2005-12-15 14:31:24 -08004287 * still be marked with OCFS2_LOCK_FREEING after this check,
4288 * but short circuiting here will still save us some
4289 * performance. */
4290 spin_lock_irqsave(&lockres->l_lock, flags);
4291 if (lockres->l_flags & OCFS2_LOCK_FREEING)
4292 goto unqueue;
4293 spin_unlock_irqrestore(&lockres->l_lock, flags);
4294
Mark Fashehb5e500e2006-09-13 22:01:16 -07004295 status = ocfs2_unblock_lock(osb, lockres, &ctl);
Mark Fashehccd979b2005-12-15 14:31:24 -08004296 if (status < 0)
4297 mlog_errno(status);
4298
4299 spin_lock_irqsave(&lockres->l_lock, flags);
4300unqueue:
Mark Fashehd680efe2006-09-08 14:14:34 -07004301 if (lockres->l_flags & OCFS2_LOCK_FREEING || !ctl.requeue) {
Mark Fashehccd979b2005-12-15 14:31:24 -08004302 lockres_clear_flags(lockres, OCFS2_LOCK_QUEUED);
4303 } else
4304 ocfs2_schedule_blocked_lock(osb, lockres);
4305
Sunil Mushran9b915182010-02-26 19:42:44 -08004306 mlog(ML_BASTS, "lockres %s, requeue = %s.\n", lockres->l_name,
Mark Fashehd680efe2006-09-08 14:14:34 -07004307 ctl.requeue ? "yes" : "no");
Mark Fashehccd979b2005-12-15 14:31:24 -08004308 spin_unlock_irqrestore(&lockres->l_lock, flags);
4309
Mark Fashehd680efe2006-09-08 14:14:34 -07004310 if (ctl.unblock_action != UNBLOCK_CONTINUE
4311 && lockres->l_ops->post_unlock)
4312 lockres->l_ops->post_unlock(osb, lockres);
Mark Fashehccd979b2005-12-15 14:31:24 -08004313}
4314
4315static void ocfs2_schedule_blocked_lock(struct ocfs2_super *osb,
4316 struct ocfs2_lock_res *lockres)
4317{
Srinivas Eedaa75e9cc2012-01-30 21:51:22 -08004318 unsigned long flags;
4319
Mark Fashehccd979b2005-12-15 14:31:24 -08004320 assert_spin_locked(&lockres->l_lock);
4321
4322 if (lockres->l_flags & OCFS2_LOCK_FREEING) {
4323 /* Do not schedule a lock for downconvert when it's on
4324 * the way to destruction - any nodes wanting access
4325 * to the resource will get it soon. */
Sunil Mushran9b915182010-02-26 19:42:44 -08004326 mlog(ML_BASTS, "lockres %s won't be scheduled: flags 0x%lx\n",
Mark Fashehccd979b2005-12-15 14:31:24 -08004327 lockres->l_name, lockres->l_flags);
4328 return;
4329 }
4330
4331 lockres_or_flags(lockres, OCFS2_LOCK_QUEUED);
4332
Srinivas Eedaa75e9cc2012-01-30 21:51:22 -08004333 spin_lock_irqsave(&osb->dc_task_lock, flags);
Mark Fashehccd979b2005-12-15 14:31:24 -08004334 if (list_empty(&lockres->l_blocked_list)) {
4335 list_add_tail(&lockres->l_blocked_list,
4336 &osb->blocked_lock_list);
4337 osb->blocked_lock_count++;
4338 }
Srinivas Eedaa75e9cc2012-01-30 21:51:22 -08004339 spin_unlock_irqrestore(&osb->dc_task_lock, flags);
Mark Fashehccd979b2005-12-15 14:31:24 -08004340}
Mark Fasheh34d024f2007-09-24 15:56:19 -07004341
4342static void ocfs2_downconvert_thread_do_work(struct ocfs2_super *osb)
4343{
4344 unsigned long processed;
Srinivas Eedaa75e9cc2012-01-30 21:51:22 -08004345 unsigned long flags;
Mark Fasheh34d024f2007-09-24 15:56:19 -07004346 struct ocfs2_lock_res *lockres;
4347
Srinivas Eedaa75e9cc2012-01-30 21:51:22 -08004348 spin_lock_irqsave(&osb->dc_task_lock, flags);
Mark Fasheh34d024f2007-09-24 15:56:19 -07004349 /* grab this early so we know to try again if a state change and
4350 * wake happens part-way through our work */
4351 osb->dc_work_sequence = osb->dc_wake_sequence;
4352
4353 processed = osb->blocked_lock_count;
Joseph Qi209f7512d02015-08-06 15:46:23 -07004354 /*
4355 * blocked lock processing in this loop might call iput which can
4356 * remove items off osb->blocked_lock_list. Downconvert up to
4357 * 'processed' number of locks, but stop short if we had some
4358 * removed in ocfs2_mark_lockres_freeing when downconverting.
4359 */
4360 while (processed && !list_empty(&osb->blocked_lock_list)) {
Mark Fasheh34d024f2007-09-24 15:56:19 -07004361 lockres = list_entry(osb->blocked_lock_list.next,
4362 struct ocfs2_lock_res, l_blocked_list);
4363 list_del_init(&lockres->l_blocked_list);
4364 osb->blocked_lock_count--;
Srinivas Eedaa75e9cc2012-01-30 21:51:22 -08004365 spin_unlock_irqrestore(&osb->dc_task_lock, flags);
Mark Fasheh34d024f2007-09-24 15:56:19 -07004366
4367 BUG_ON(!processed);
4368 processed--;
4369
4370 ocfs2_process_blocked_lock(osb, lockres);
4371
Srinivas Eedaa75e9cc2012-01-30 21:51:22 -08004372 spin_lock_irqsave(&osb->dc_task_lock, flags);
Mark Fasheh34d024f2007-09-24 15:56:19 -07004373 }
Srinivas Eedaa75e9cc2012-01-30 21:51:22 -08004374 spin_unlock_irqrestore(&osb->dc_task_lock, flags);
Mark Fasheh34d024f2007-09-24 15:56:19 -07004375}
4376
4377static int ocfs2_downconvert_thread_lists_empty(struct ocfs2_super *osb)
4378{
4379 int empty = 0;
Srinivas Eedaa75e9cc2012-01-30 21:51:22 -08004380 unsigned long flags;
Mark Fasheh34d024f2007-09-24 15:56:19 -07004381
Srinivas Eedaa75e9cc2012-01-30 21:51:22 -08004382 spin_lock_irqsave(&osb->dc_task_lock, flags);
Mark Fasheh34d024f2007-09-24 15:56:19 -07004383 if (list_empty(&osb->blocked_lock_list))
4384 empty = 1;
4385
Srinivas Eedaa75e9cc2012-01-30 21:51:22 -08004386 spin_unlock_irqrestore(&osb->dc_task_lock, flags);
Mark Fasheh34d024f2007-09-24 15:56:19 -07004387 return empty;
4388}
4389
4390static int ocfs2_downconvert_thread_should_wake(struct ocfs2_super *osb)
4391{
4392 int should_wake = 0;
Srinivas Eedaa75e9cc2012-01-30 21:51:22 -08004393 unsigned long flags;
Mark Fasheh34d024f2007-09-24 15:56:19 -07004394
Srinivas Eedaa75e9cc2012-01-30 21:51:22 -08004395 spin_lock_irqsave(&osb->dc_task_lock, flags);
Mark Fasheh34d024f2007-09-24 15:56:19 -07004396 if (osb->dc_work_sequence != osb->dc_wake_sequence)
4397 should_wake = 1;
Srinivas Eedaa75e9cc2012-01-30 21:51:22 -08004398 spin_unlock_irqrestore(&osb->dc_task_lock, flags);
Mark Fasheh34d024f2007-09-24 15:56:19 -07004399
4400 return should_wake;
4401}
4402
Adrian Bunk200bfae2008-02-17 10:20:38 +02004403static int ocfs2_downconvert_thread(void *arg)
Mark Fasheh34d024f2007-09-24 15:56:19 -07004404{
4405 int status = 0;
4406 struct ocfs2_super *osb = arg;
4407
4408 /* only quit once we've been asked to stop and there is no more
4409 * work available */
4410 while (!(kthread_should_stop() &&
4411 ocfs2_downconvert_thread_lists_empty(osb))) {
4412
4413 wait_event_interruptible(osb->dc_event,
4414 ocfs2_downconvert_thread_should_wake(osb) ||
4415 kthread_should_stop());
4416
4417 mlog(0, "downconvert_thread: awoken\n");
4418
4419 ocfs2_downconvert_thread_do_work(osb);
4420 }
4421
4422 osb->dc_task = NULL;
4423 return status;
4424}
4425
4426void ocfs2_wake_downconvert_thread(struct ocfs2_super *osb)
4427{
Srinivas Eedaa75e9cc2012-01-30 21:51:22 -08004428 unsigned long flags;
4429
4430 spin_lock_irqsave(&osb->dc_task_lock, flags);
Mark Fasheh34d024f2007-09-24 15:56:19 -07004431 /* make sure the voting thread gets a swipe at whatever changes
4432 * the caller may have made to the voting state */
4433 osb->dc_wake_sequence++;
Srinivas Eedaa75e9cc2012-01-30 21:51:22 -08004434 spin_unlock_irqrestore(&osb->dc_task_lock, flags);
Mark Fasheh34d024f2007-09-24 15:56:19 -07004435 wake_up(&osb->dc_event);
4436}