Merge branch 'workqueue/for-5.16-fixes' into workqueue/for-5.17 for-5.16-fixes contains two subtle race conditions which were introduced by scheduler side code cleanups. The branch didn't get pushed out, so merge into for-5.17.

commit: 2a8ab0fbd110dec25795a98aaa232ede36f6c855 [log] [tgz]
author: Tejun Heo <tj@kernel.org> Mon Jan 10 07:54:04 2022 -1000
committer: Tejun Heo <tj@kernel.org> Mon Jan 10 07:54:04 2022 -1000
tree: df2f9464e38e386337ddde03c93504467ab63a03
parent: 84f91c62d675480ffd3d870ee44c07965cbd8b21 [diff]
parent: 45c753f5f24d2d4717acb38ce35e604ff9abcb50 [diff]
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index b583141..33f1106 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c

@@ -864,8 +864,17 @@ void wq_worker_running(struct task_struct *task)
 
 	if (!worker->sleeping)
 		return;
+
+	/*
+	 * If preempted by unbind_workers() between the WORKER_NOT_RUNNING check
+	 * and the nr_running increment below, we may ruin the nr_running reset
+	 * and leave with an unexpected pool->nr_running == 1 on the newly unbound
+	 * pool. Protect against such race.
+	 */
+	preempt_disable();
 	if (!(worker->flags & WORKER_NOT_RUNNING))
 		atomic_inc(&worker->pool->nr_running);
+	preempt_enable();
 	worker->sleeping = 0;
 }
 
@@ -899,6 +908,16 @@ void wq_worker_sleeping(struct task_struct *task)
 	raw_spin_lock_irq(&pool->lock);
 
 	/*
+	 * Recheck in case unbind_workers() preempted us. We don't
+	 * want to decrement nr_running after the worker is unbound
+	 * and nr_running has been reset.
+	 */
+	if (worker->flags & WORKER_NOT_RUNNING) {
+		raw_spin_unlock_irq(&pool->lock);
+		return;
+	}
+
+	/*
 	 * The counterpart of the following dec_and_test, implied mb,
 	 * worklist not empty test sequence is in insert_work().
 	 * Please read comment there.
@@ -1526,7 +1545,8 @@ static void __queue_work(int cpu, struct workqueue_struct *wq,
  * @work: work to queue
  *
  * We queue the work to a specific CPU, the caller must ensure it
- * can't go away.
+ * can't go away.  Callers that fail to ensure that the specified
+ * CPU cannot go away will execute on a randomly chosen CPU.
  *
  * Return: %false if @work was already on a queue, %true otherwise.
  */
commit	2a8ab0fbd110dec25795a98aaa232ede36f6c855	[log] [tgz]
author	Tejun Heo <tj@kernel.org>	Mon Jan 10 07:54:04 2022 -1000
committer	Tejun Heo <tj@kernel.org>	Mon Jan 10 07:54:04 2022 -1000
tree	df2f9464e38e386337ddde03c93504467ab63a03
parent	84f91c62d675480ffd3d870ee44c07965cbd8b21 [diff]
parent	45c753f5f24d2d4717acb38ce35e604ff9abcb50 [diff]