[SCSI] scsi_dh: Add a single threaded workqueue for initializing paths

Before this patch set (SCSI hardware handlers), initialization of a
path was done asynchronously. Doing that requires a workqueue in each
device/hardware handler module and leads to unneccessary complication
in the device handler code, making it difficult to read the code and
follow the state diagram.

Moving that workqueue to this level makes the device handler code simpler.
Hence, the workqueue is moved to dm level.

A new workqueue is added instead of adding it to the existing workqueue
(kmpathd) for the following reasons:
	1. Device activation has to happen faster, stacking them along
	   with the other workqueue might lead to unnecessary delay
	   in the activation of the path.
	2. The effect could be felt the other way too. i.e the current
	   events that are handled by the existing workqueue might get
	   a delayed response.

Signed-off-by: Chandra Seetharaman <sekharan@us.ibm.com>
Acked-by: Alasdair G Kergon <agk@redhat.com>
Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>
diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c
index e54ff37..9b16788 100644
--- a/drivers/md/dm-mpath.c
+++ b/drivers/md/dm-mpath.c
@@ -63,6 +63,7 @@
 	spinlock_t lock;
 
 	const char *hw_handler_name;
+	struct work_struct activate_path;
 	unsigned nr_priority_groups;
 	struct list_head priority_groups;
 	unsigned pg_init_required;	/* pg_init needs calling? */
@@ -107,10 +108,10 @@
 
 static struct kmem_cache *_mpio_cache;
 
-static struct workqueue_struct *kmultipathd;
+static struct workqueue_struct *kmultipathd, *kmpath_handlerd;
 static void process_queued_ios(struct work_struct *work);
 static void trigger_event(struct work_struct *work);
-static void pg_init_done(struct dm_path *, int);
+static void activate_path(struct work_struct *work);
 
 
 /*-----------------------------------------------
@@ -180,6 +181,7 @@
 		m->queue_io = 1;
 		INIT_WORK(&m->process_queued_ios, process_queued_ios);
 		INIT_WORK(&m->trigger_event, trigger_event);
+		INIT_WORK(&m->activate_path, activate_path);
 		m->mpio_pool = mempool_create_slab_pool(MIN_IOS, _mpio_cache);
 		if (!m->mpio_pool) {
 			kfree(m);
@@ -432,11 +434,8 @@
 out:
 	spin_unlock_irqrestore(&m->lock, flags);
 
-	if (init_required) {
-		struct dm_path *path = &pgpath->path;
-		int ret = scsi_dh_activate(bdev_get_queue(path->dev->bdev));
-		pg_init_done(path, ret);
-	}
+	if (init_required)
+		queue_work(kmpath_handlerd, &m->activate_path);
 
 	if (!must_queue)
 		dispatch_queued_ios(m);
@@ -791,6 +790,7 @@
 {
 	struct multipath *m = (struct multipath *) ti->private;
 
+	flush_workqueue(kmpath_handlerd);
 	flush_workqueue(kmultipathd);
 	free_multipath(m);
 }
@@ -1108,6 +1108,17 @@
 	spin_unlock_irqrestore(&m->lock, flags);
 }
 
+static void activate_path(struct work_struct *work)
+{
+	int ret;
+	struct multipath *m =
+		container_of(work, struct multipath, activate_path);
+	struct dm_path *path = &m->current_pgpath->path;
+
+	ret = scsi_dh_activate(bdev_get_queue(path->dev->bdev));
+	pg_init_done(path, ret);
+}
+
 /*
  * end_io handling
  */
@@ -1451,6 +1462,21 @@
 		return -ENOMEM;
 	}
 
+	/*
+	 * A separate workqueue is used to handle the device handlers
+	 * to avoid overloading existing workqueue. Overloading the
+	 * old workqueue would also create a bottleneck in the
+	 * path of the storage hardware device activation.
+	 */
+	kmpath_handlerd = create_singlethread_workqueue("kmpath_handlerd");
+	if (!kmpath_handlerd) {
+		DMERR("failed to create workqueue kmpath_handlerd");
+		destroy_workqueue(kmultipathd);
+		dm_unregister_target(&multipath_target);
+		kmem_cache_destroy(_mpio_cache);
+		return -ENOMEM;
+	}
+
 	DMINFO("version %u.%u.%u loaded",
 	       multipath_target.version[0], multipath_target.version[1],
 	       multipath_target.version[2]);
@@ -1462,6 +1488,7 @@
 {
 	int r;
 
+	destroy_workqueue(kmpath_handlerd);
 	destroy_workqueue(kmultipathd);
 
 	r = dm_unregister_target(&multipath_target);