raid10: improve random reads performance

RAID10 random read performance is lower than expected due to excessive spinlock
utilisation which is required mostly for rebuild/resync. Simplify allow_barrier
as it's in IO path and encounters a lot of unnecessary congestion.

As lower_barrier just takes a lock in order to decrement a counter, convert
counter (nr_pending) into atomic variable and remove the spin lock. There is
also a congestion for wake_up (it uses lock internally) so call it only when
it's really needed. As wake_up is not called constantly anymore, ensure process
waiting to raise a barrier is notified when there are no more waiting IOs.

Signed-off-by: Tomasz Majchrzak <tomasz.majchrzak@intel.com>
Signed-off-by: Shaohua Li <shli@fb.com>
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index f7f3c8a..cb1d887 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -905,7 +905,7 @@
 
 	/* Now wait for all pending IO to complete */
 	wait_event_lock_irq(conf->wait_barrier,
-			    !conf->nr_pending && conf->barrier < RESYNC_DEPTH,
+			    !atomic_read(&conf->nr_pending) && conf->barrier < RESYNC_DEPTH,
 			    conf->resync_lock);
 
 	spin_unlock_irq(&conf->resync_lock);
@@ -936,23 +936,23 @@
 		 */
 		wait_event_lock_irq(conf->wait_barrier,
 				    !conf->barrier ||
-				    (conf->nr_pending &&
+				    (atomic_read(&conf->nr_pending) &&
 				     current->bio_list &&
 				     !bio_list_empty(current->bio_list)),
 				    conf->resync_lock);
 		conf->nr_waiting--;
+		if (!conf->nr_waiting)
+			wake_up(&conf->wait_barrier);
 	}
-	conf->nr_pending++;
+	atomic_inc(&conf->nr_pending);
 	spin_unlock_irq(&conf->resync_lock);
 }
 
 static void allow_barrier(struct r10conf *conf)
 {
-	unsigned long flags;
-	spin_lock_irqsave(&conf->resync_lock, flags);
-	conf->nr_pending--;
-	spin_unlock_irqrestore(&conf->resync_lock, flags);
-	wake_up(&conf->wait_barrier);
+	if ((atomic_dec_and_test(&conf->nr_pending)) ||
+			(conf->array_freeze_pending))
+		wake_up(&conf->wait_barrier);
 }
 
 static void freeze_array(struct r10conf *conf, int extra)
@@ -970,13 +970,15 @@
 	 * we continue.
 	 */
 	spin_lock_irq(&conf->resync_lock);
+	conf->array_freeze_pending++;
 	conf->barrier++;
 	conf->nr_waiting++;
 	wait_event_lock_irq_cmd(conf->wait_barrier,
-				conf->nr_pending == conf->nr_queued+extra,
+				atomic_read(&conf->nr_pending) == conf->nr_queued+extra,
 				conf->resync_lock,
 				flush_pending_writes(conf));
 
+	conf->array_freeze_pending--;
 	spin_unlock_irq(&conf->resync_lock);
 }
 
@@ -3542,6 +3544,7 @@
 
 	spin_lock_init(&conf->resync_lock);
 	init_waitqueue_head(&conf->wait_barrier);
+	atomic_set(&conf->nr_pending, 0);
 
 	conf->thread = md_register_thread(raid10d, mddev, "raid10");
 	if (!conf->thread)