rcu: Make rcu_nocb_wait_gp() check if GP already requested

This commit makes rcu_nocb_wait_gp() check rdp->gp_seq_needed to see
if the current CPU already knows about the needed grace period having
already been requested.  If so, it avoids acquiring the corresponding
leaf rcu_node structure's ->lock, thus decreasing contention.  This
optimization is intended for cases where either multiple leader rcuo
kthreads are running on the same CPU or these kthreads are running on
a non-offloaded (e.g., housekeeping) CPU.

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
[ paulmck: Move lock release past "if" as suggested by Joel Fernandes. ]
[ paulmck: Fix caching of furthest-future requested grace period. ]
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index 1ede516..4826598 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -1618,6 +1618,11 @@ static bool rcu_start_this_gp(struct rcu_node *rnp, struct rcu_data *rdp,
 	trace_rcu_grace_period(rsp->name, READ_ONCE(rsp->gpnum), TPS("newreq"));
 	ret = true;  /* Caller must wake GP kthread. */
 unlock_out:
+	/* Push furthest requested GP to leaf node and rcu_data structure. */
+	if (ULONG_CMP_LT(c, rnp_root->gp_seq_needed)) {
+		rnp->gp_seq_needed = rnp_root->gp_seq_needed;
+		rdp->gp_seq_needed = rnp_root->gp_seq_needed;
+	}
 	if (rnp != rnp_root)
 		raw_spin_unlock_rcu_node(rnp_root);
 	return ret;