workqueue: unify local CPU queueing handling

Queueing functions have been using different methods to determine the
local CPU.

* queue_work() superflously uses get/put_cpu() to acquire and hold the
  local CPU across queue_work_on().

* delayed_work_timer_fn() uses smp_processor_id().

* queue_delayed_work() calls queue_delayed_work_on() with -1 @cpu
  which is interpreted as the local CPU.

* flush_delayed_work[_sync]() were using raw_smp_processor_id().

* __queue_work() interprets %WORK_CPU_UNBOUND as local CPU if the
  target workqueue is bound one but nobody uses this.

This patch converts all functions to uniformly use %WORK_CPU_UNBOUND
to indicate local CPU and use the local binding feature of
__queue_work().  unlikely() is dropped from %WORK_CPU_UNBOUND handling
in __queue_work().

Signed-off-by: Tejun Heo <tj@kernel.org>
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 5539238..ce60bb5 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -1003,7 +1003,7 @@
 	if (!(wq->flags & WQ_UNBOUND)) {
 		struct global_cwq *last_gcwq;
 
-		if (unlikely(cpu == WORK_CPU_UNBOUND))
+		if (cpu == WORK_CPU_UNBOUND)
 			cpu = raw_smp_processor_id();
 
 		/*
@@ -1103,12 +1103,7 @@
  */
 bool queue_work(struct workqueue_struct *wq, struct work_struct *work)
 {
-	bool ret;
-
-	ret = queue_work_on(get_cpu(), wq, work);
-	put_cpu();
-
-	return ret;
+	return queue_work_on(WORK_CPU_UNBOUND, wq, work);
 }
 EXPORT_SYMBOL_GPL(queue_work);
 
@@ -1118,7 +1113,7 @@
 	struct cpu_workqueue_struct *cwq = get_work_cwq(&dwork->work);
 
 	local_irq_disable();
-	__queue_work(smp_processor_id(), cwq->wq, &dwork->work);
+	__queue_work(WORK_CPU_UNBOUND, cwq->wq, &dwork->work);
 	local_irq_enable();
 }
 EXPORT_SYMBOL_GPL(delayed_work_timer_fn);
@@ -1172,7 +1167,7 @@
 
 		timer->expires = jiffies + delay;
 
-		if (unlikely(cpu >= 0))
+		if (unlikely(cpu != WORK_CPU_UNBOUND))
 			add_timer_on(timer, cpu);
 		else
 			add_timer(timer);
@@ -1198,7 +1193,7 @@
 	if (delay == 0)
 		return queue_work(wq, &dwork->work);
 
-	return queue_delayed_work_on(-1, wq, dwork, delay);
+	return queue_delayed_work_on(WORK_CPU_UNBOUND, wq, dwork, delay);
 }
 EXPORT_SYMBOL_GPL(queue_delayed_work);
 
@@ -2868,7 +2863,7 @@
 {
 	local_irq_disable();
 	if (del_timer_sync(&dwork->timer))
-		__queue_work(raw_smp_processor_id(),
+		__queue_work(WORK_CPU_UNBOUND,
 			     get_work_cwq(&dwork->work)->wq, &dwork->work);
 	local_irq_enable();
 	return flush_work(&dwork->work);
@@ -2891,7 +2886,7 @@
 {
 	local_irq_disable();
 	if (del_timer_sync(&dwork->timer))
-		__queue_work(raw_smp_processor_id(),
+		__queue_work(WORK_CPU_UNBOUND,
 			     get_work_cwq(&dwork->work)->wq, &dwork->work);
 	local_irq_enable();
 	return flush_work_sync(&dwork->work);