rcu: Make TASKS_RCU handle nohz_full= CPUs
Currently TASKS_RCU would ignore a CPU running a task in nohz_full=
usermode execution. There would be neither a context switch nor a
scheduling-clock interrupt to tell TASKS_RCU that the task in question
had passed through a quiescent state. The grace period would therefore
extend indefinitely. This commit therefore makes RCU's dyntick-idle
subsystem record the task_struct structure of the task that is running
in dyntick-idle mode on each CPU. The TASKS_RCU grace period can
then access this information and record a quiescent state on
behalf of any CPU running in dyntick-idle usermode.
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
diff --git a/include/linux/init_task.h b/include/linux/init_task.h
index dffd925..03b2748 100644
--- a/include/linux/init_task.h
+++ b/include/linux/init_task.h
@@ -121,7 +121,8 @@
#define INIT_TASK_RCU_TASKS(tsk) \
.rcu_tasks_holdout = false, \
.rcu_tasks_holdout_list = \
- LIST_HEAD_INIT(tsk.rcu_tasks_holdout_list),
+ LIST_HEAD_INIT(tsk.rcu_tasks_holdout_list), \
+ .rcu_tasks_idle_cpu = -1,
#else
#define INIT_TASK_RCU_TASKS(tsk)
#endif
diff --git a/include/linux/sched.h b/include/linux/sched.h
index eaacac4..ec8b347 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1274,6 +1274,7 @@
unsigned long rcu_tasks_nvcsw;
bool rcu_tasks_holdout;
struct list_head rcu_tasks_holdout_list;
+ int rcu_tasks_idle_cpu;
#endif /* #ifdef CONFIG_TASKS_RCU */
#if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT)
@@ -2020,6 +2021,7 @@
#ifdef CONFIG_TASKS_RCU
p->rcu_tasks_holdout = false;
INIT_LIST_HEAD(&p->rcu_tasks_holdout_list);
+ p->rcu_tasks_idle_cpu = -1;
#endif /* #ifdef CONFIG_TASKS_RCU */
}
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index e23dad0..c880f53 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -526,6 +526,7 @@
atomic_inc(&rdtp->dynticks);
smp_mb__after_atomic(); /* Force ordering with next sojourn. */
WARN_ON_ONCE(atomic_read(&rdtp->dynticks) & 0x1);
+ rcu_dynticks_task_enter();
/*
* It is illegal to enter an extended quiescent state while
@@ -642,6 +643,7 @@
static void rcu_eqs_exit_common(struct rcu_dynticks *rdtp, long long oldval,
int user)
{
+ rcu_dynticks_task_exit();
smp_mb__before_atomic(); /* Force ordering w/previous sojourn. */
atomic_inc(&rdtp->dynticks);
/* CPUs seeing atomic_inc() must see later RCU read-side crit sects */
diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h
index 6a86eb7..3a92000 100644
--- a/kernel/rcu/tree.h
+++ b/kernel/rcu/tree.h
@@ -605,6 +605,8 @@
static void rcu_bind_gp_kthread(void);
static void rcu_sysidle_init_percpu_data(struct rcu_dynticks *rdtp);
static bool rcu_nohz_full_cpu(struct rcu_state *rsp);
+static void rcu_dynticks_task_enter(void);
+static void rcu_dynticks_task_exit(void);
#endif /* #ifndef RCU_TREE_NONCORE */
diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h
index 7672586..e466b40 100644
--- a/kernel/rcu/tree_plugin.h
+++ b/kernel/rcu/tree_plugin.h
@@ -3036,3 +3036,19 @@
housekeeping_affine(current);
#endif /* #else #ifdef CONFIG_NO_HZ_FULL_SYSIDLE */
}
+
+/* Record the current task on dyntick-idle entry. */
+static void rcu_dynticks_task_enter(void)
+{
+#if defined(CONFIG_TASKS_RCU) && defined(CONFIG_NO_HZ_FULL)
+ ACCESS_ONCE(current->rcu_tasks_idle_cpu) = smp_processor_id();
+#endif /* #if defined(CONFIG_TASKS_RCU) && defined(CONFIG_NO_HZ_FULL) */
+}
+
+/* Record no current task on dyntick-idle exit. */
+static void rcu_dynticks_task_exit(void)
+{
+#if defined(CONFIG_TASKS_RCU) && defined(CONFIG_NO_HZ_FULL)
+ ACCESS_ONCE(current->rcu_tasks_idle_cpu) = -1;
+#endif /* #if defined(CONFIG_TASKS_RCU) && defined(CONFIG_NO_HZ_FULL) */
+}
diff --git a/kernel/rcu/update.c b/kernel/rcu/update.c
index e1d7174..2658de4 100644
--- a/kernel/rcu/update.c
+++ b/kernel/rcu/update.c
@@ -463,7 +463,9 @@
{
if (!ACCESS_ONCE(t->rcu_tasks_holdout) ||
t->rcu_tasks_nvcsw != ACCESS_ONCE(t->nvcsw) ||
- !ACCESS_ONCE(t->on_rq)) {
+ !ACCESS_ONCE(t->on_rq) ||
+ (IS_ENABLED(CONFIG_NO_HZ_FULL) &&
+ !is_idle_task(t) && t->rcu_tasks_idle_cpu >= 0)) {
ACCESS_ONCE(t->rcu_tasks_holdout) = false;
list_del_rcu(&t->rcu_tasks_holdout_list);
put_task_struct(t);