workqueue: fix ordered workqueues in NUMA setups An ordered workqueue implements execution ordering by using single pool_workqueue with max_active == 1. On a given pool_workqueue, work items are processed in FIFO order and limiting max_active to 1 enforces the queued work items to be processed one by one. Unfortunately, 4c16bd327c ("workqueue: implement NUMA affinity for unbound workqueues") accidentally broke this guarantee by applying NUMA affinity to ordered workqueues too. On NUMA setups, an ordered workqueue would end up with separate pool_workqueues for different nodes. Each pool_workqueue still limits max_active to 1 but multiple work items may be executed concurrently and out of order depending on which node they are queued to. Fix it by using dedicated ordered_wq_attrs[] when creating ordered workqueues. The new attrs match the unbound ones except that no_numa is always set thus forcing all NUMA nodes to share the default pool_workqueue. While at it, add sanity check in workqueue creation path which verifies that an ordered workqueues has only the default pool_workqueue. Signed-off-by: Tejun Heo <tj@kernel.org> Reported-by: Libin <huawei.libin@huawei.com> Cc: stable@vger.kernel.org Cc: Lai Jiangshan <laijs@cn.fujitsu.com>

commit: 8a2b75384444488fc4f2cbb9f0921b6a0794838f [log] [tgz]
author: Tejun Heo <tj@kernel.org> Thu Sep 05 12:30:04 2013 -0400
committer: Tejun Heo <tj@kernel.org> Fri Nov 22 18:14:02 2013 -0500
tree: c4b39daad8de264be08beec77621048e9b9fb9ed
parent: 91151228065354a050fd0d190aefdd662a0580aa [diff] [blame]
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index f894242..bbb5e98 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c

@@ -305,6 +305,9 @@
 /* I: attributes used when instantiating standard unbound pools on demand */
 static struct workqueue_attrs *unbound_std_wq_attrs[NR_STD_WORKER_POOLS];
 
+/* I: attributes used when instantiating ordered pools on demand */
+static struct workqueue_attrs *ordered_wq_attrs[NR_STD_WORKER_POOLS];
+
 struct workqueue_struct *system_wq __read_mostly;
 EXPORT_SYMBOL(system_wq);
 struct workqueue_struct *system_highpri_wq __read_mostly;
@@ -4107,7 +4110,7 @@
 static int alloc_and_link_pwqs(struct workqueue_struct *wq)
 {
 	bool highpri = wq->flags & WQ_HIGHPRI;
-	int cpu;
+	int cpu, ret;
 
 	if (!(wq->flags & WQ_UNBOUND)) {
 		wq->cpu_pwqs = alloc_percpu(struct pool_workqueue);
@@ -4127,6 +4130,13 @@
 			mutex_unlock(&wq->mutex);
 		}
 		return 0;
+	} else if (wq->flags & __WQ_ORDERED) {
+		ret = apply_workqueue_attrs(wq, ordered_wq_attrs[highpri]);
+		/* there should only be single pwq for ordering guarantee */
+		WARN(!ret && (wq->pwqs.next != &wq->dfl_pwq->pwqs_node ||
+			      wq->pwqs.prev != &wq->dfl_pwq->pwqs_node),
+		     "ordering guarantee broken for workqueue %s\n", wq->name);
+		return ret;
 	} else {
 		return apply_workqueue_attrs(wq, unbound_std_wq_attrs[highpri]);
 	}
@@ -5052,13 +5062,23 @@
 		}
 	}
 
-	/* create default unbound wq attrs */
+	/* create default unbound and ordered wq attrs */
 	for (i = 0; i < NR_STD_WORKER_POOLS; i++) {
 		struct workqueue_attrs *attrs;
 
 		BUG_ON(!(attrs = alloc_workqueue_attrs(GFP_KERNEL)));
 		attrs->nice = std_nice[i];
 		unbound_std_wq_attrs[i] = attrs;
+
+		/*
+		 * An ordered wq should have only one pwq as ordering is
+		 * guaranteed by max_active which is enforced by pwqs.
+		 * Turn off NUMA so that dfl_pwq is used for all nodes.
+		 */
+		BUG_ON(!(attrs = alloc_workqueue_attrs(GFP_KERNEL)));
+		attrs->nice = std_nice[i];
+		attrs->no_numa = true;
+		ordered_wq_attrs[i] = attrs;
 	}
 
 	system_wq = alloc_workqueue("events", 0, 0);
commit	8a2b75384444488fc4f2cbb9f0921b6a0794838f	[log] [tgz]
author	Tejun Heo <tj@kernel.org>	Thu Sep 05 12:30:04 2013 -0400
committer	Tejun Heo <tj@kernel.org>	Fri Nov 22 18:14:02 2013 -0500
tree	c4b39daad8de264be08beec77621048e9b9fb9ed
parent	91151228065354a050fd0d190aefdd662a0580aa [diff] [blame]