blob: 5260660b655a006135c057076f7cdb9265a29fe4 [file] [log] [blame]
// SPDX-License-Identifier: GPL-2.0
/*
* Copyright (C) 2008 Oracle. All rights reserved.
*/
#include <linux/sched.h>
#include <linux/pagemap.h>
#include <linux/spinlock.h>
#include <linux/page-flags.h>
#include <asm/bug.h>
#include "misc.h"
#include "ctree.h"
#include "extent_io.h"
#include "locking.h"
/*
* Extent buffer locking
* =====================
*
* We use a rw_semaphore for tree locking, and the semantics are exactly the
* same:
*
* - reader/writer exclusion
* - writer/writer exclusion
* - reader/reader sharing
* - try-lock semantics for readers and writers
*
* Additionally we need one level nesting recursion, see below. The rwsem
* implementation does opportunistic spinning which reduces number of times the
* locking task needs to sleep.
*
*
* Lock recursion
* --------------
*
* A write operation on a tree might indirectly start a look up on the same
* tree. This can happen when btrfs_cow_block locks the tree and needs to
* lookup free extents.
*
* btrfs_cow_block
* ..
* alloc_tree_block_no_bg_flush
* btrfs_alloc_tree_block
* btrfs_reserve_extent
* ..
* load_free_space_cache
* ..
* btrfs_lookup_file_extent
* btrfs_search_slot
*
*/
/*
* __btrfs_tree_read_lock - lock extent buffer for read
* @eb: the eb to be locked
* @nest: the nesting level to be used for lockdep
* @recurse: if this lock is able to be recursed
*
* This takes the read lock on the extent buffer, using the specified nesting
* level for lockdep purposes.
*
* If you specify recurse = true, then we will allow this to be taken if we
* currently own the lock already. This should only be used in specific
* usecases, and the subsequent unlock will not change the state of the lock.
*/
void __btrfs_tree_read_lock(struct extent_buffer *eb, enum btrfs_lock_nesting nest,
bool recurse)
{
u64 start_ns = 0;
if (trace_btrfs_tree_read_lock_enabled())
start_ns = ktime_get_ns();
if (unlikely(recurse)) {
/* First see if we can grab the lock outright */
if (down_read_trylock(&eb->lock))
goto out;
/*
* Ok still doesn't necessarily mean we are already holding the
* lock, check the owner.
*/
if (eb->lock_owner != current->pid) {
down_read_nested(&eb->lock, nest);
goto out;
}
/*
* Ok we have actually recursed, but we should only be recursing
* once, so blow up if we're already recursed, otherwise set
* ->lock_recursed and carry on.
*/
BUG_ON(eb->lock_recursed);
eb->lock_recursed = true;
goto out;
}
down_read_nested(&eb->lock, nest);
out:
eb->lock_owner = current->pid;
trace_btrfs_tree_read_lock(eb, start_ns);
}
void btrfs_tree_read_lock(struct extent_buffer *eb)
{
__btrfs_tree_read_lock(eb, BTRFS_NESTING_NORMAL, false);
}
/*
* Try-lock for read.
*
* Retrun 1 if the rwlock has been taken, 0 otherwise
*/
int btrfs_try_tree_read_lock(struct extent_buffer *eb)
{
if (down_read_trylock(&eb->lock)) {
eb->lock_owner = current->pid;
trace_btrfs_try_tree_read_lock(eb);
return 1;
}
return 0;
}
/*
* Try-lock for write.
*
* Retrun 1 if the rwlock has been taken, 0 otherwise
*/
int btrfs_try_tree_write_lock(struct extent_buffer *eb)
{
if (down_write_trylock(&eb->lock)) {
eb->lock_owner = current->pid;
trace_btrfs_try_tree_write_lock(eb);
return 1;
}
return 0;
}
/*
* Release read lock. If the read lock was recursed then the lock stays in the
* original state that it was before it was recursively locked.
*/
void btrfs_tree_read_unlock(struct extent_buffer *eb)
{
trace_btrfs_tree_read_unlock(eb);
/*
* if we're nested, we have the write lock. No new locking
* is needed as long as we are the lock owner.
* The write unlock will do a barrier for us, and the lock_recursed
* field only matters to the lock owner.
*/
if (eb->lock_recursed && current->pid == eb->lock_owner) {
eb->lock_recursed = false;
return;
}
eb->lock_owner = 0;
up_read(&eb->lock);
}
/*
* __btrfs_tree_lock - lock eb for write
* @eb: the eb to lock
* @nest: the nesting to use for the lock
*
* Returns with the eb->lock write locked.
*/
void __btrfs_tree_lock(struct extent_buffer *eb, enum btrfs_lock_nesting nest)
__acquires(&eb->lock)
{
u64 start_ns = 0;
if (trace_btrfs_tree_lock_enabled())
start_ns = ktime_get_ns();
down_write_nested(&eb->lock, nest);
eb->lock_owner = current->pid;
trace_btrfs_tree_lock(eb, start_ns);
}
void btrfs_tree_lock(struct extent_buffer *eb)
{
__btrfs_tree_lock(eb, BTRFS_NESTING_NORMAL);
}
/*
* Release the write lock.
*/
void btrfs_tree_unlock(struct extent_buffer *eb)
{
trace_btrfs_tree_unlock(eb);
eb->lock_owner = 0;
up_write(&eb->lock);
}
/*
* This releases any locks held in the path starting at level and going all the
* way up to the root.
*
* btrfs_search_slot will keep the lock held on higher nodes in a few corner
* cases, such as COW of the block at slot zero in the node. This ignores
* those rules, and it should only be called when there are no more updates to
* be done higher up in the tree.
*/
void btrfs_unlock_up_safe(struct btrfs_path *path, int level)
{
int i;
if (path->keep_locks)
return;
for (i = level; i < BTRFS_MAX_LEVEL; i++) {
if (!path->nodes[i])
continue;
if (!path->locks[i])
continue;
btrfs_tree_unlock_rw(path->nodes[i], path->locks[i]);
path->locks[i] = 0;
}
}
/*
* Loop around taking references on and locking the root node of the tree until
* we end up with a lock on the root node.
*
* Return: root extent buffer with write lock held
*/
struct extent_buffer *btrfs_lock_root_node(struct btrfs_root *root)
{
struct extent_buffer *eb;
while (1) {
eb = btrfs_root_node(root);
btrfs_tree_lock(eb);
if (eb == root->node)
break;
btrfs_tree_unlock(eb);
free_extent_buffer(eb);
}
return eb;
}
/*
* Loop around taking references on and locking the root node of the tree until
* we end up with a lock on the root node.
*
* Return: root extent buffer with read lock held
*/
struct extent_buffer *__btrfs_read_lock_root_node(struct btrfs_root *root,
bool recurse)
{
struct extent_buffer *eb;
while (1) {
eb = btrfs_root_node(root);
__btrfs_tree_read_lock(eb, BTRFS_NESTING_NORMAL, recurse);
if (eb == root->node)
break;
btrfs_tree_read_unlock(eb);
free_extent_buffer(eb);
}
return eb;
}
/*
* DREW locks
* ==========
*
* DREW stands for double-reader-writer-exclusion lock. It's used in situation
* where you want to provide A-B exclusion but not AA or BB.
*
* Currently implementation gives more priority to reader. If a reader and a
* writer both race to acquire their respective sides of the lock the writer
* would yield its lock as soon as it detects a concurrent reader. Additionally
* if there are pending readers no new writers would be allowed to come in and
* acquire the lock.
*/
int btrfs_drew_lock_init(struct btrfs_drew_lock *lock)
{
int ret;
ret = percpu_counter_init(&lock->writers, 0, GFP_KERNEL);
if (ret)
return ret;
atomic_set(&lock->readers, 0);
init_waitqueue_head(&lock->pending_readers);
init_waitqueue_head(&lock->pending_writers);
return 0;
}
void btrfs_drew_lock_destroy(struct btrfs_drew_lock *lock)
{
percpu_counter_destroy(&lock->writers);
}
/* Return true if acquisition is successful, false otherwise */
bool btrfs_drew_try_write_lock(struct btrfs_drew_lock *lock)
{
if (atomic_read(&lock->readers))
return false;
percpu_counter_inc(&lock->writers);
/* Ensure writers count is updated before we check for pending readers */
smp_mb();
if (atomic_read(&lock->readers)) {
btrfs_drew_write_unlock(lock);
return false;
}
return true;
}
void btrfs_drew_write_lock(struct btrfs_drew_lock *lock)
{
while (true) {
if (btrfs_drew_try_write_lock(lock))
return;
wait_event(lock->pending_writers, !atomic_read(&lock->readers));
}
}
void btrfs_drew_write_unlock(struct btrfs_drew_lock *lock)
{
percpu_counter_dec(&lock->writers);
cond_wake_up(&lock->pending_readers);
}
void btrfs_drew_read_lock(struct btrfs_drew_lock *lock)
{
atomic_inc(&lock->readers);
/*
* Ensure the pending reader count is perceieved BEFORE this reader
* goes to sleep in case of active writers. This guarantees new writers
* won't be allowed and that the current reader will be woken up when
* the last active writer finishes its jobs.
*/
smp_mb__after_atomic();
wait_event(lock->pending_readers,
percpu_counter_sum(&lock->writers) == 0);
}
void btrfs_drew_read_unlock(struct btrfs_drew_lock *lock)
{
/*
* atomic_dec_and_test implies a full barrier, so woken up writers
* are guaranteed to see the decrement
*/
if (atomic_dec_and_test(&lock->readers))
wake_up(&lock->pending_writers);
}