change sb_writers to use percpu_rw_semaphore
We can remove everything from struct sb_writers except frozen
and add the array of percpu_rw_semaphore's instead.
This patch doesn't remove sb_writers->wait_unfrozen yet, we keep
it for get_super_thawed(). We will probably remove it later.
This change tries to address the following problems:
- Firstly, __sb_start_write() looks simply buggy. It does
__sb_end_write() if it sees ->frozen, but if it migrates
to another CPU before percpu_counter_dec(), sb_wait_write()
can wrongly succeed if there is another task which holds
the same "semaphore": sb_wait_write() can miss the result
of the previous percpu_counter_inc() but see the result
of this percpu_counter_dec().
- As Dave Hansen reports, it is suboptimal. The trivial
microbenchmark that writes to a tmpfs file in a loop runs
12% faster if we change this code to rely on RCU and kill
the memory barriers.
- This code doesn't look simple. It would be better to rely
on the generic locking code.
According to Dave, this change adds the same performance
improvement.
Note: with this change both freeze_super() and thaw_super() will do
synchronize_sched_expedited() 3 times. This is just ugly. But:
- This will be "fixed" by the rcu_sync changes we are going
to merge. After that freeze_super()->percpu_down_write()
will use synchronize_sched(), and thaw_super() won't use
synchronize() at all.
This doesn't need any changes in fs/super.c.
- Once we merge rcu_sync changes, we can also change super.c
so that all wb_write->rw_sem's will share the single ->rss
in struct sb_writes, then freeze_super() will need only one
synchronize_sched().
Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Reviewed-by: Jan Kara <jack@suse.com>
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 4bed789..ce356f6 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1,7 +1,6 @@
#ifndef _LINUX_FS_H
#define _LINUX_FS_H
-
#include <linux/linkage.h>
#include <linux/wait.h>
#include <linux/kdev_t.h>
@@ -31,6 +30,7 @@
#include <linux/percpu-rwsem.h>
#include <linux/blk_types.h>
#include <linux/workqueue.h>
+#include <linux/percpu-rwsem.h>
#include <asm/byteorder.h>
#include <uapi/linux/fs.h>
@@ -1275,16 +1275,9 @@
#define SB_FREEZE_LEVELS (SB_FREEZE_COMPLETE - 1)
struct sb_writers {
- /* Counters for counting writers at each level */
- struct percpu_counter counter[SB_FREEZE_LEVELS];
- wait_queue_head_t wait; /* queue for waiting for
- writers / faults to finish */
- int frozen; /* Is sb frozen? */
- wait_queue_head_t wait_unfrozen; /* queue for waiting for
- sb to be thawed */
-#ifdef CONFIG_DEBUG_LOCK_ALLOC
- struct lockdep_map lock_map[SB_FREEZE_LEVELS];
-#endif
+ int frozen; /* Is sb frozen? */
+ wait_queue_head_t wait_unfrozen; /* for get_super_thawed() */
+ struct percpu_rw_semaphore rw_sem[SB_FREEZE_LEVELS];
};
struct super_block {
@@ -1393,9 +1386,9 @@
int __sb_start_write(struct super_block *sb, int level, bool wait);
#define __sb_writers_acquired(sb, lev) \
- rwsem_acquire_read(&(sb)->s_writers.lock_map[(lev)-1], 0, 1, _THIS_IP_)
+ percpu_rwsem_acquire(&(sb)->s_writers.rw_sem[(lev)-1], 1, _THIS_IP_)
#define __sb_writers_release(sb, lev) \
- rwsem_release(&(sb)->s_writers.lock_map[(lev)-1], 1, _THIS_IP_)
+ percpu_rwsem_release(&(sb)->s_writers.rw_sem[(lev)-1], 1, _THIS_IP_)
/**
* sb_end_write - drop write access to a superblock