percpu-rwsem: use synchronize_sched_expedited

Use synchronize_sched_expedited() instead of synchronize_sched()
to improve mount speed.

This patch improves mount time from 0.500s to 0.013s for Jeff's
test-case.

Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
Reported-and-tested-by: Jeff Chua <jeff.chua.linux@gmail.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
diff --git a/include/linux/percpu-rwsem.h b/include/linux/percpu-rwsem.h
index 250a4ac..bd1e860 100644
--- a/include/linux/percpu-rwsem.h
+++ b/include/linux/percpu-rwsem.h
@@ -13,7 +13,7 @@
 };
 
 #define light_mb()	barrier()
-#define heavy_mb()	synchronize_sched()
+#define heavy_mb()	synchronize_sched_expedited()
 
 static inline void percpu_down_read(struct percpu_rw_semaphore *p)
 {
@@ -51,7 +51,7 @@
 {
 	mutex_lock(&p->mtx);
 	p->locked = true;
-	synchronize_sched(); /* make sure that all readers exit the rcu_read_lock_sched region */
+	synchronize_sched_expedited(); /* make sure that all readers exit the rcu_read_lock_sched region */
 	while (__percpu_count(p->counters))
 		msleep(1);
 	heavy_mb(); /* C, between read of p->counter and write to data, paired with B */