rcu: Move from ->need_future_gp[] to ->gp_seq_needed
One problem with the ->need_future_gp[] array is that the grace-period
assignment of each element changes as the grace periods complete.
This means that it is necessary to hold a lock when checking this
array to learn if a given grace period has already been requested.
This increase lock contention, which is the opposite of helpful.
This commit therefore replaces the ->need_future_gp[] with a single
->gp_seq_needed value and keeps it updated in the rcu_data structure.
This will enable reliable lockless checking of whether or not a given
grace period has already been requested.
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index 4a528a0..1ede516 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -1560,7 +1560,7 @@ static void trace_rcu_this_gp(struct rcu_node *rnp, struct rcu_data *rdp,
/*
* Start the specified grace period, as needed to handle newly arrived
* callbacks. The required future grace periods are recorded in each
- * rcu_node structure's ->need_future_gp[] field. Returns true if there
+ * rcu_node structure's ->gp_seq_needed field. Returns true if there
* is reason to awaken the grace-period kthread.
*
* The caller must hold the specified rcu_node structure's ->lock, which
@@ -1589,14 +1589,14 @@ static bool rcu_start_this_gp(struct rcu_node *rnp, struct rcu_data *rdp,
for (rnp_root = rnp; 1; rnp_root = rnp_root->parent) {
if (rnp_root != rnp)
raw_spin_lock_rcu_node(rnp_root);
- if (need_future_gp_element(rnp_root, c) ||
+ if (ULONG_CMP_GE(rnp_root->gp_seq_needed, c) ||
rcu_seq_done(&rnp_root->gp_seq, c) ||
(rnp != rnp_root &&
rcu_seq_state(rcu_seq_current(&rnp_root->gp_seq)))) {
trace_rcu_this_gp(rnp_root, rdp, c, TPS("Prestarted"));
goto unlock_out;
}
- need_future_gp_element(rnp_root, c) = true;
+ rnp_root->gp_seq_needed = c;
if (rnp_root != rnp && rnp_root->parent != NULL)
raw_spin_unlock_rcu_node(rnp_root);
if (!rnp_root->parent)
@@ -1633,8 +1633,9 @@ static bool rcu_future_gp_cleanup(struct rcu_state *rsp, struct rcu_node *rnp)
bool needmore;
struct rcu_data *rdp = this_cpu_ptr(rsp->rda);
- need_future_gp_element(rnp, c) = false;
- needmore = need_any_future_gp(rnp);
+ needmore = ULONG_CMP_LT(rnp->gp_seq, rnp->gp_seq_needed);
+ if (!needmore)
+ rnp->gp_seq_needed = rnp->gp_seq; /* Avoid counter wrap. */
trace_rcu_this_gp(rnp, rdp, c,
needmore ? TPS("CleanupMore") : TPS("Cleanup"));
return needmore;
@@ -2046,7 +2047,7 @@ static void rcu_gp_cleanup(struct rcu_state *rsp)
rsp->gp_state = RCU_GP_IDLE;
/* Check for GP requests since above loop. */
rdp = this_cpu_ptr(rsp->rda);
- if (need_any_future_gp(rnp)) {
+ if (ULONG_CMP_LT(rnp->gp_seq, rnp->gp_seq_needed)) {
trace_rcu_this_gp(rnp, rdp, rsp->completed - 1,
TPS("CleanupMore"));
needgp = true;
@@ -2700,8 +2701,8 @@ rcu_check_gp_start_stall(struct rcu_state *rsp, struct rcu_node *rnp,
struct rcu_node *rnp_root = rcu_get_root(rsp);
static atomic_t warned = ATOMIC_INIT(0);
- if (!IS_ENABLED(CONFIG_PROVE_RCU) ||
- rcu_gp_in_progress(rsp) || !need_any_future_gp(rcu_get_root(rsp)))
+ if (!IS_ENABLED(CONFIG_PROVE_RCU) || rcu_gp_in_progress(rsp) ||
+ ULONG_CMP_GE(rnp_root->gp_seq, rnp_root->gp_seq_needed))
return;
j = jiffies; /* Expensive access, and in common case don't get here. */
if (time_before(j, READ_ONCE(rsp->gp_req_activity) + HZ) ||
@@ -2711,7 +2712,8 @@ rcu_check_gp_start_stall(struct rcu_state *rsp, struct rcu_node *rnp,
raw_spin_lock_irqsave_rcu_node(rnp, flags);
j = jiffies;
- if (rcu_gp_in_progress(rsp) || !need_any_future_gp(rcu_get_root(rsp)) ||
+ if (rcu_gp_in_progress(rsp) ||
+ ULONG_CMP_GE(rnp_root->gp_seq, rnp_root->gp_seq_needed) ||
time_before(j, READ_ONCE(rsp->gp_req_activity) + HZ) ||
time_before(j, READ_ONCE(rsp->gp_activity) + HZ) ||
atomic_read(&warned)) {
@@ -2723,7 +2725,8 @@ rcu_check_gp_start_stall(struct rcu_state *rsp, struct rcu_node *rnp,
if (rnp_root != rnp)
raw_spin_lock_rcu_node(rnp_root); /* irqs already disabled. */
j = jiffies;
- if (rcu_gp_in_progress(rsp) || !need_any_future_gp(rcu_get_root(rsp)) ||
+ if (rcu_gp_in_progress(rsp) ||
+ ULONG_CMP_GE(rnp_root->gp_seq, rnp_root->gp_seq_needed) ||
time_before(j, rsp->gp_req_activity + HZ) ||
time_before(j, rsp->gp_activity + HZ) ||
atomic_xchg(&warned, 1)) {
@@ -2731,12 +2734,9 @@ rcu_check_gp_start_stall(struct rcu_state *rsp, struct rcu_node *rnp,
raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
return;
}
- pr_alert("%s: g%lu %d%d%d%d gar:%lu ga:%lu f%#x %s->state:%#lx\n",
- __func__, READ_ONCE(rsp->gpnum),
- need_future_gp_element(rcu_get_root(rsp), 0),
- need_future_gp_element(rcu_get_root(rsp), 1),
- need_future_gp_element(rcu_get_root(rsp), 2),
- need_future_gp_element(rcu_get_root(rsp), 3),
+ pr_alert("%s: g%ld->%ld gar:%lu ga:%lu f%#x %s->state:%#lx\n",
+ __func__, (long)READ_ONCE(rsp->gp_seq),
+ (long)READ_ONCE(rnp_root->gp_seq_needed),
j - rsp->gp_req_activity, j - rsp->gp_activity,
rsp->gp_flags, rsp->name,
rsp->gp_kthread ? rsp->gp_kthread->state : 0x1ffffL);
@@ -3527,6 +3527,7 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp)
rdp->gpnum = rnp->completed; /* Make CPU later note any new GP. */
rdp->completed = rnp->completed;
rdp->gp_seq = rnp->gp_seq;
+ rdp->gp_seq_needed = rnp->gp_seq;
rdp->cpu_no_qs.b.norm = true;
rdp->rcu_qs_ctr_snap = per_cpu(rcu_dynticks.rcu_qs_ctr, cpu);
rdp->core_needs_qs = false;
@@ -3907,6 +3908,7 @@ static void __init rcu_init_one(struct rcu_state *rsp)
rnp->gpnum = rsp->gpnum;
rnp->completed = rsp->completed;
rnp->gp_seq = rsp->gp_seq;
+ rnp->gp_seq_needed = rsp->gp_seq;
rnp->completedqs = rsp->gp_seq;
rnp->qsmask = 0;
rnp->qsmaskinit = 0;