change sb_writers to use percpu_rw_semaphore

We can remove everything from struct sb_writers except frozen
and add the array of percpu_rw_semaphore's instead.

This patch doesn't remove sb_writers->wait_unfrozen yet, we keep
it for get_super_thawed(). We will probably remove it later.

This change tries to address the following problems:

	- Firstly, __sb_start_write() looks simply buggy. It does
	  __sb_end_write() if it sees ->frozen, but if it migrates
	  to another CPU before percpu_counter_dec(), sb_wait_write()
	  can wrongly succeed if there is another task which holds
	  the same "semaphore": sb_wait_write() can miss the result
	  of the previous percpu_counter_inc() but see the result
	  of this percpu_counter_dec().

	- As Dave Hansen reports, it is suboptimal. The trivial
	  microbenchmark that writes to a tmpfs file in a loop runs
	  12% faster if we change this code to rely on RCU and kill
	  the memory barriers.

	- This code doesn't look simple. It would be better to rely
	  on the generic locking code.

	  According to Dave, this change adds the same performance
	  improvement.

Note: with this change both freeze_super() and thaw_super() will do
synchronize_sched_expedited() 3 times. This is just ugly. But:

	- This will be "fixed" by the rcu_sync changes we are going
	  to merge. After that freeze_super()->percpu_down_write()
	  will use synchronize_sched(), and thaw_super() won't use
	  synchronize() at all.

	  This doesn't need any changes in fs/super.c.

	- Once we merge rcu_sync changes, we can also change super.c
	  so that all wb_write->rw_sem's will share the single ->rss
	  in struct sb_writes, then freeze_super() will need only one
	  synchronize_sched().

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Reviewed-by: Jan Kara <jack@suse.com>
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 4bed789..ce356f6 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1,7 +1,6 @@
 #ifndef _LINUX_FS_H
 #define _LINUX_FS_H
 
-
 #include <linux/linkage.h>
 #include <linux/wait.h>
 #include <linux/kdev_t.h>
@@ -31,6 +30,7 @@
 #include <linux/percpu-rwsem.h>
 #include <linux/blk_types.h>
 #include <linux/workqueue.h>
+#include <linux/percpu-rwsem.h>
 
 #include <asm/byteorder.h>
 #include <uapi/linux/fs.h>
@@ -1275,16 +1275,9 @@
 #define SB_FREEZE_LEVELS (SB_FREEZE_COMPLETE - 1)
 
 struct sb_writers {
-	/* Counters for counting writers at each level */
-	struct percpu_counter	counter[SB_FREEZE_LEVELS];
-	wait_queue_head_t	wait;		/* queue for waiting for
-						   writers / faults to finish */
-	int			frozen;		/* Is sb frozen? */
-	wait_queue_head_t	wait_unfrozen;	/* queue for waiting for
-						   sb to be thawed */
-#ifdef CONFIG_DEBUG_LOCK_ALLOC
-	struct lockdep_map	lock_map[SB_FREEZE_LEVELS];
-#endif
+	int				frozen;		/* Is sb frozen? */
+	wait_queue_head_t		wait_unfrozen;	/* for get_super_thawed() */
+	struct percpu_rw_semaphore	rw_sem[SB_FREEZE_LEVELS];
 };
 
 struct super_block {
@@ -1393,9 +1386,9 @@
 int __sb_start_write(struct super_block *sb, int level, bool wait);
 
 #define __sb_writers_acquired(sb, lev)	\
-	rwsem_acquire_read(&(sb)->s_writers.lock_map[(lev)-1], 0, 1, _THIS_IP_)
+	percpu_rwsem_acquire(&(sb)->s_writers.rw_sem[(lev)-1], 1, _THIS_IP_)
 #define __sb_writers_release(sb, lev)	\
-	rwsem_release(&(sb)->s_writers.lock_map[(lev)-1], 1, _THIS_IP_)
+	percpu_rwsem_release(&(sb)->s_writers.rw_sem[(lev)-1], 1, _THIS_IP_)
 
 /**
  * sb_end_write - drop write access to a superblock