srcu: Move to state-based grace-period sequencing

The current SRCU grace-period processing might never reach the last
portion of srcu_advance_batches().  This is OK given the current
implementation, as the first portion, up to the try_check_zero()
following the srcu_flip() is sufficient to drive grace periods forward.
However, it has the unfortunate side-effect of making it impossible to
determine when a given grace period has ended, and it will be necessary
to efficiently trace ends of grace periods in order to efficiently handle
per-CPU SRCU callback lists.

This commit therefore adds states to the SRCU grace-period processing,
so that the end of a given SRCU grace period is marked by the transition
to the SRCU_STATE_DONE state.

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
diff --git a/include/linux/srcu.h b/include/linux/srcu.h
index a598cf3..f149a68 100644
--- a/include/linux/srcu.h
+++ b/include/linux/srcu.h
@@ -48,7 +48,7 @@ struct srcu_struct {
 	unsigned long completed;
 	struct srcu_array __percpu *per_cpu_ref;
 	spinlock_t queue_lock; /* protect ->batch_queue, ->running */
-	bool running;
+	int srcu_state;
 	/* callbacks just queued */
 	struct rcu_batch batch_queue;
 	/* callbacks try to do the first check_zero */
@@ -62,6 +62,12 @@ struct srcu_struct {
 #endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */
 };
 
+/* Values for -> state variable. */
+#define SRCU_STATE_IDLE		0
+#define SRCU_STATE_SCAN1	1
+#define SRCU_STATE_SCAN2	2
+#define SRCU_STATE_DONE		3
+
 #ifdef CONFIG_DEBUG_LOCK_ALLOC
 
 int __init_srcu_struct(struct srcu_struct *sp, const char *name,
@@ -89,7 +95,7 @@ void process_srcu(struct work_struct *work);
 		.completed = -300,					\
 		.per_cpu_ref = &name##_srcu_array,			\
 		.queue_lock = __SPIN_LOCK_UNLOCKED(name.queue_lock),	\
-		.running = false,					\
+		.srcu_state = SRCU_STATE_IDLE,				\
 		.batch_queue = RCU_BATCH_INIT(name.batch_queue),	\
 		.batch_check0 = RCU_BATCH_INIT(name.batch_check0),	\
 		.batch_check1 = RCU_BATCH_INIT(name.batch_check1),	\