rcu: Make rcu_nocb_wait_gp() check if GP already requested

This commit makes rcu_nocb_wait_gp() check rdp->gp_seq_needed to see
if the current CPU already knows about the needed grace period having
already been requested.  If so, it avoids acquiring the corresponding
leaf rcu_node structure's ->lock, thus decreasing contention.  This
optimization is intended for cases where either multiple leader rcuo
kthreads are running on the same CPU or these kthreads are running on
a non-offloaded (e.g., housekeeping) CPU.

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
[ paulmck: Move lock release past "if" as suggested by Joel Fernandes. ]
[ paulmck: Fix caching of furthest-future requested grace period. ]
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index 1ede516..4826598 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -1618,6 +1618,11 @@ static bool rcu_start_this_gp(struct rcu_node *rnp, struct rcu_data *rdp,
 	trace_rcu_grace_period(rsp->name, READ_ONCE(rsp->gpnum), TPS("newreq"));
 	ret = true;  /* Caller must wake GP kthread. */
 unlock_out:
+	/* Push furthest requested GP to leaf node and rcu_data structure. */
+	if (ULONG_CMP_LT(c, rnp_root->gp_seq_needed)) {
+		rnp->gp_seq_needed = rnp_root->gp_seq_needed;
+		rdp->gp_seq_needed = rnp_root->gp_seq_needed;
+	}
 	if (rnp != rnp_root)
 		raw_spin_unlock_rcu_node(rnp_root);
 	return ret;
diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h
index f4a88e3..ca73931 100644
--- a/kernel/rcu/tree_plugin.h
+++ b/kernel/rcu/tree_plugin.h
@@ -2104,12 +2104,17 @@ static void rcu_nocb_wait_gp(struct rcu_data *rdp)
 	bool needwake;
 	struct rcu_node *rnp = rdp->mynode;
 
-	raw_spin_lock_irqsave_rcu_node(rnp, flags);
+	local_irq_save(flags);
 	c = rcu_seq_snap(&rdp->rsp->gp_seq);
-	needwake = rcu_start_this_gp(rnp, rdp, c);
-	raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
-	if (needwake)
-		rcu_gp_kthread_wake(rdp->rsp);
+	if (!rdp->gpwrap && ULONG_CMP_GE(rdp->gp_seq_needed, c)) {
+		local_irq_restore(flags);
+	} else {
+		raw_spin_lock_rcu_node(rnp); /* irqs already disabled. */
+		needwake = rcu_start_this_gp(rnp, rdp, c);
+		raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
+		if (needwake)
+			rcu_gp_kthread_wake(rdp->rsp);
+	}
 
 	/*
 	 * Wait for the grace period.  Do so interruptibly to avoid messing