[SCSI] libata, libsas: introduce sched_eh and end_eh port ops

When managing shost->host_eh_scheduled libata assumes that there is a
1:1 shost-to-ata_port relationship.  libsas creates a 1:N relationship
so it needs to manage host_eh_scheduled cumulatively at the host level.
The sched_eh and end_eh port port ops allow libsas to track when domain
devices enter/leave the "eh-pending" state under ha->lock (previously
named ha->state_lock, but it is no longer just a lock for ha->state
changes).

Since host_eh_scheduled indicates eh without backing commands pinning
the device it can be deallocated at any time.  Move the taking of the
domain_device reference under the port_lock to guarantee that the
ata_port stays around for the duration of eh.

Reviewed-by: Jacek Danecki <jacek.danecki@intel.com>
Acked-by: Jeff Garzik <jgarzik@redhat.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
Signed-off-by: James Bottomley <JBottomley@Parallels.com>
diff --git a/drivers/scsi/libsas/sas_ata.c b/drivers/scsi/libsas/sas_ata.c
index d109cc3..b035acf 100644
--- a/drivers/scsi/libsas/sas_ata.c
+++ b/drivers/scsi/libsas/sas_ata.c
@@ -523,6 +523,31 @@
 		i->dft->lldd_ata_set_dmamode(dev);
 }
 
+static void sas_ata_sched_eh(struct ata_port *ap)
+{
+	struct domain_device *dev = ap->private_data;
+	struct sas_ha_struct *ha = dev->port->ha;
+	unsigned long flags;
+
+	spin_lock_irqsave(&ha->lock, flags);
+	if (!test_and_set_bit(SAS_DEV_EH_PENDING, &dev->state))
+		ha->eh_active++;
+	ata_std_sched_eh(ap);
+	spin_unlock_irqrestore(&ha->lock, flags);
+}
+
+void sas_ata_end_eh(struct ata_port *ap)
+{
+	struct domain_device *dev = ap->private_data;
+	struct sas_ha_struct *ha = dev->port->ha;
+	unsigned long flags;
+
+	spin_lock_irqsave(&ha->lock, flags);
+	if (test_and_clear_bit(SAS_DEV_EH_PENDING, &dev->state))
+		ha->eh_active--;
+	spin_unlock_irqrestore(&ha->lock, flags);
+}
+
 static struct ata_port_operations sas_sata_ops = {
 	.prereset		= ata_std_prereset,
 	.hardreset		= sas_ata_hard_reset,
@@ -536,6 +561,8 @@
 	.port_start		= ata_sas_port_start,
 	.port_stop		= ata_sas_port_stop,
 	.set_dmamode		= sas_ata_set_dmamode,
+	.sched_eh		= sas_ata_sched_eh,
+	.end_eh			= sas_ata_end_eh,
 };
 
 static struct ata_port_info sata_port_info = {
@@ -708,10 +735,6 @@
 	struct ata_port *ap = dev->sata_dev.ap;
 	struct sas_ha_struct *ha = dev->port->ha;
 
-	/* hold a reference over eh since we may be racing with final
-	 * remove once all commands are completed
-	 */
-	kref_get(&dev->kref);
 	sas_ata_printk(KERN_DEBUG, dev, "dev error handler\n");
 	ata_scsi_port_error_handler(ha->core.shost, ap);
 	sas_put_device(dev);
@@ -742,6 +765,13 @@
 		list_for_each_entry(dev, &port->dev_list, dev_list_node) {
 			if (!dev_is_sata(dev))
 				continue;
+
+			/* hold a reference over eh since we may be
+			 * racing with final remove once all commands
+			 * are completed
+			 */
+			kref_get(&dev->kref);
+
 			async_schedule_domain(async_sas_ata_eh, dev, &async);
 		}
 		spin_unlock(&port->dev_list_lock);
diff --git a/drivers/scsi/libsas/sas_discover.c b/drivers/scsi/libsas/sas_discover.c
index 629a086..ff497ac 100644
--- a/drivers/scsi/libsas/sas_discover.c
+++ b/drivers/scsi/libsas/sas_discover.c
@@ -294,6 +294,8 @@
 
 	spin_lock_irq(&port->dev_list_lock);
 	list_del_init(&dev->dev_list_node);
+	if (dev_is_sata(dev))
+		sas_ata_end_eh(dev->sata_dev.ap);
 	spin_unlock_irq(&port->dev_list_lock);
 
 	sas_put_device(dev);
@@ -488,9 +490,9 @@
 	if (!test_and_set_bit(event, pending)) {
 		unsigned long flags;
 
-		spin_lock_irqsave(&ha->state_lock, flags);
+		spin_lock_irqsave(&ha->lock, flags);
 		sas_chain_work(ha, sw);
-		spin_unlock_irqrestore(&ha->state_lock, flags);
+		spin_unlock_irqrestore(&ha->lock, flags);
 	}
 }
 
diff --git a/drivers/scsi/libsas/sas_event.c b/drivers/scsi/libsas/sas_event.c
index 4e4292d..789c4d8 100644
--- a/drivers/scsi/libsas/sas_event.c
+++ b/drivers/scsi/libsas/sas_event.c
@@ -47,9 +47,9 @@
 	if (!test_and_set_bit(event, pending)) {
 		unsigned long flags;
 
-		spin_lock_irqsave(&ha->state_lock, flags);
+		spin_lock_irqsave(&ha->lock, flags);
 		sas_queue_work(ha, work);
-		spin_unlock_irqrestore(&ha->state_lock, flags);
+		spin_unlock_irqrestore(&ha->lock, flags);
 	}
 }
 
@@ -61,18 +61,18 @@
 
 	set_bit(SAS_HA_DRAINING, &ha->state);
 	/* flush submitters */
-	spin_lock_irq(&ha->state_lock);
-	spin_unlock_irq(&ha->state_lock);
+	spin_lock_irq(&ha->lock);
+	spin_unlock_irq(&ha->lock);
 
 	drain_workqueue(wq);
 
-	spin_lock_irq(&ha->state_lock);
+	spin_lock_irq(&ha->lock);
 	clear_bit(SAS_HA_DRAINING, &ha->state);
 	list_for_each_entry_safe(sw, _sw, &ha->defer_q, drain_node) {
 		list_del_init(&sw->drain_node);
 		sas_queue_work(ha, sw);
 	}
-	spin_unlock_irq(&ha->state_lock);
+	spin_unlock_irq(&ha->lock);
 }
 
 int sas_drain_work(struct sas_ha_struct *ha)
diff --git a/drivers/scsi/libsas/sas_init.c b/drivers/scsi/libsas/sas_init.c
index 10cb5ae3..6909fef 100644
--- a/drivers/scsi/libsas/sas_init.c
+++ b/drivers/scsi/libsas/sas_init.c
@@ -114,7 +114,7 @@
 		sas_ha->lldd_queue_size = 128; /* Sanity */
 
 	set_bit(SAS_HA_REGISTERED, &sas_ha->state);
-	spin_lock_init(&sas_ha->state_lock);
+	spin_lock_init(&sas_ha->lock);
 	mutex_init(&sas_ha->drain_mutex);
 	INIT_LIST_HEAD(&sas_ha->defer_q);
 
@@ -163,9 +163,9 @@
 	 * events to be queued, and flush any in-progress drainers
 	 */
 	mutex_lock(&sas_ha->drain_mutex);
-	spin_lock_irq(&sas_ha->state_lock);
+	spin_lock_irq(&sas_ha->lock);
 	clear_bit(SAS_HA_REGISTERED, &sas_ha->state);
-	spin_unlock_irq(&sas_ha->state_lock);
+	spin_unlock_irq(&sas_ha->lock);
 	__sas_drain_work(sas_ha);
 	mutex_unlock(&sas_ha->drain_mutex);
 
@@ -411,9 +411,9 @@
 	d->reset_result = 0;
 	d->hard_reset = hard_reset;
 
-	spin_lock_irq(&ha->state_lock);
+	spin_lock_irq(&ha->lock);
 	sas_queue_work(ha, &d->reset_work);
-	spin_unlock_irq(&ha->state_lock);
+	spin_unlock_irq(&ha->lock);
 
 	rc = sas_drain_work(ha);
 	if (rc == 0)
@@ -438,9 +438,9 @@
 	d->enable_result = 0;
 	d->enable = enable;
 
-	spin_lock_irq(&ha->state_lock);
+	spin_lock_irq(&ha->lock);
 	sas_queue_work(ha, &d->enable_work);
-	spin_unlock_irq(&ha->state_lock);
+	spin_unlock_irq(&ha->lock);
 
 	rc = sas_drain_work(ha);
 	if (rc == 0)
diff --git a/drivers/scsi/libsas/sas_scsi_host.c b/drivers/scsi/libsas/sas_scsi_host.c
index f0b9b7b..a09da44 100644
--- a/drivers/scsi/libsas/sas_scsi_host.c
+++ b/drivers/scsi/libsas/sas_scsi_host.c
@@ -667,16 +667,20 @@
 	goto out;
 }
 
+
 void sas_scsi_recover_host(struct Scsi_Host *shost)
 {
 	struct sas_ha_struct *ha = SHOST_TO_SAS_HA(shost);
-	unsigned long flags;
 	LIST_HEAD(eh_work_q);
+	int tries = 0;
+	bool retry;
 
-	spin_lock_irqsave(shost->host_lock, flags);
+retry:
+	tries++;
+	retry = true;
+	spin_lock_irq(shost->host_lock);
 	list_splice_init(&shost->eh_cmd_q, &eh_work_q);
-	shost->host_eh_scheduled = 0;
-	spin_unlock_irqrestore(shost->host_lock, flags);
+	spin_unlock_irq(shost->host_lock);
 
 	SAS_DPRINTK("Enter %s busy: %d failed: %d\n",
 		    __func__, shost->host_busy, shost->host_failed);
@@ -710,8 +714,19 @@
 
 	scsi_eh_flush_done_q(&ha->eh_done_q);
 
-	SAS_DPRINTK("--- Exit %s: busy: %d failed: %d\n",
-		    __func__, shost->host_busy, shost->host_failed);
+	/* check if any new eh work was scheduled during the last run */
+	spin_lock_irq(&ha->lock);
+	if (ha->eh_active == 0) {
+		shost->host_eh_scheduled = 0;
+		retry = false;
+	}
+	spin_unlock_irq(&ha->lock);
+
+	if (retry)
+		goto retry;
+
+	SAS_DPRINTK("--- Exit %s: busy: %d failed: %d tries: %d\n",
+		    __func__, shost->host_busy, shost->host_failed, tries);
 }
 
 enum blk_eh_timer_return sas_scsi_timed_out(struct scsi_cmnd *cmd)