isci: atomic device lookup and reference counting We have unsafe references to remote devices that are notified to disappear at lldd_dev_gone. In order to clean this up we need a single canonical source for device lookups and stable references once a lookup succeeds. Towards that end guarantee that domain_device.lldd_dev is NULL as soon as we start the process of stopping a device. Any code path that wants to safely lookup a remote device must do so through task->dev->lldd_dev (isci_lookup_device()). For in-flight references outside of scic_lock we need reference counting to ensure that the device is not recycled before we are done with it. Simplify device back references to just scic_sds_request.target_device which is now the only permissible internal reference that is maintained relative to the reference count. There were two occasions where we wanted new i/o's to be treated as SAS_TASK_UNDELIVERED but where the domain_dev->lldd_dev link is still intact. Introduce a 'gone' flag to prevent i/o while waiting for libsas to take action on the port down event. One 'core' leftover is that we currently call scic_remote_device_destruct() from isci_remote_device_deconstruct() which is called when the 'core' says the device is stopped. It would be more natural for the final put to trigger isci_remote_device_deconstruct() but this implementation is deferred as it requires other changes. Signed-off-by: Dan Williams <dan.j.williams@intel.com>

commit: 209fae14fabfd48525e5630bebbbd4ca15090c60 [log] [tgz]
author: Dan Williams <dan.j.williams@intel.com> Mon Jun 13 17:39:44 2011 -0700
committer: Dan Williams <dan.j.williams@intel.com> Sun Jul 03 04:04:51 2011 -0700
tree: b251b9b394b3493cc15242ea31002abcb4e9bb59
parent: 360b03ed178a4fe3971b0a098d8feeb53333481b [diff] [blame]
diff --git a/drivers/scsi/isci/request.c b/drivers/scsi/isci/request.c
index f0813d0..fd6314a 100644
--- a/drivers/scsi/isci/request.c
+++ b/drivers/scsi/isci/request.c

@@ -2313,7 +2313,7 @@
  * none.
  */
 static void isci_request_handle_controller_specific_errors(
-	struct isci_remote_device *isci_device,
+	struct isci_remote_device *idev,
 	struct isci_request *request,
 	struct sas_task *task,
 	enum service_response *response_ptr,
@@ -2353,8 +2353,7 @@
 			 * that we ignore the quiesce state, since we are
 			 * concerned about the actual device state.
 			 */
-			if ((isci_device->status == isci_stopping) ||
-			    (isci_device->status == isci_stopped))
+			if (!idev)
 				*status_ptr = SAS_DEVICE_UNKNOWN;
 			else
 				*status_ptr = SAS_ABORTED_TASK;
@@ -2367,8 +2366,7 @@
 			/* Task in the target is not done. */
 			*response_ptr = SAS_TASK_UNDELIVERED;
 
-			if ((isci_device->status == isci_stopping) ||
-			    (isci_device->status == isci_stopped))
+			if (!idev)
 				*status_ptr = SAS_DEVICE_UNKNOWN;
 			else
 				*status_ptr = SAM_STAT_TASK_ABORTED;
@@ -2399,8 +2397,7 @@
 		 * that we ignore the quiesce state, since we are
 		 * concerned about the actual device state.
 		 */
-		if ((isci_device->status == isci_stopping) ||
-		    (isci_device->status == isci_stopped))
+		if (!idev)
 			*status_ptr = SAS_DEVICE_UNKNOWN;
 		else
 			*status_ptr = SAS_ABORTED_TASK;
@@ -2629,7 +2626,7 @@
 	struct ssp_response_iu *resp_iu;
 	void *resp_buf;
 	unsigned long task_flags;
-	struct isci_remote_device *isci_device   = request->isci_device;
+	struct isci_remote_device *idev = isci_lookup_device(task->dev);
 	enum service_response response       = SAS_TASK_UNDELIVERED;
 	enum exec_status status         = SAS_ABORTED_TASK;
 	enum isci_request_status request_status;
@@ -2672,9 +2669,7 @@
 		 * that we ignore the quiesce state, since we are
 		 * concerned about the actual device state.
 		 */
-		if ((isci_device->status == isci_stopping)
-		    || (isci_device->status == isci_stopped)
-		    )
+		if (!idev)
 			status = SAS_DEVICE_UNKNOWN;
 		else
 			status = SAS_ABORTED_TASK;
@@ -2697,8 +2692,7 @@
 		request->complete_in_target = true;
 		response = SAS_TASK_UNDELIVERED;
 
-		if ((isci_device->status == isci_stopping) ||
-		    (isci_device->status == isci_stopped))
+		if (!idev)
 			/* The device has been /is being stopped. Note that
 			 * we ignore the quiesce state, since we are
 			 * concerned about the actual device state.
@@ -2728,8 +2722,7 @@
 		 * that we ignore the quiesce state, since we are
 		 * concerned about the actual device state.
 		 */
-		if ((isci_device->status == isci_stopping) ||
-		    (isci_device->status == isci_stopped))
+		if (!idev)
 			status = SAS_DEVICE_UNKNOWN;
 		else
 			status = SAS_ABORTED_TASK;
@@ -2861,8 +2854,7 @@
 			 * that we ignore the quiesce state, since we are
 			 * concerned about the actual device state.
 			 */
-			if ((isci_device->status == isci_stopping) ||
-			    (isci_device->status == isci_stopped))
+			if (!idev)
 				status = SAS_DEVICE_UNKNOWN;
 			else
 				status = SAS_ABORTED_TASK;
@@ -2873,7 +2865,7 @@
 		case SCI_FAILURE_CONTROLLER_SPECIFIC_IO_ERR:
 
 			isci_request_handle_controller_specific_errors(
-				isci_device, request, task, &response, &status,
+				idev, request, task, &response, &status,
 				&complete_to_host);
 
 			break;
@@ -2902,8 +2894,7 @@
 
 			/* Fail the I/O so it can be retried. */
 			response = SAS_TASK_UNDELIVERED;
-			if ((isci_device->status == isci_stopping) ||
-			    (isci_device->status == isci_stopped))
+			if (!idev)
 				status = SAS_DEVICE_UNKNOWN;
 			else
 				status = SAS_ABORTED_TASK;
@@ -2926,8 +2917,7 @@
 			 * that we ignore the quiesce state, since we are
 			 * concerned about the actual device state.
 			 */
-			if ((isci_device->status == isci_stopping) ||
-			    (isci_device->status == isci_stopped))
+			if (!idev)
 				status = SAS_DEVICE_UNKNOWN;
 			else
 				status = SAS_ABORTED_TASK;
@@ -2953,8 +2943,10 @@
 
 	/* complete the io request to the core. */
 	scic_controller_complete_io(&isci_host->sci,
-				    &isci_device->sci,
+				    request->sci.target_device,
 				    &request->sci);
+	isci_put_device(idev);
+
 	/* set terminated handle so it cannot be completed or
 	 * terminated again, and to cause any calls into abort
 	 * task to recognize the already completed case.
@@ -3511,7 +3503,6 @@
 }
 
 static struct isci_request *isci_request_alloc_core(struct isci_host *ihost,
-						    struct isci_remote_device *idev,
 						    gfp_t gfp_flags)
 {
 	dma_addr_t handle;
@@ -3528,7 +3519,6 @@
 	spin_lock_init(&ireq->state_lock);
 	ireq->request_daddr = handle;
 	ireq->isci_host = ihost;
-	ireq->isci_device = idev;
 	ireq->io_request_completion = NULL;
 	ireq->terminated = false;
 
@@ -3546,12 +3536,11 @@
 
 static struct isci_request *isci_request_alloc_io(struct isci_host *ihost,
 						  struct sas_task *task,
-						  struct isci_remote_device *idev,
 						  gfp_t gfp_flags)
 {
 	struct isci_request *ireq;
 
-	ireq = isci_request_alloc_core(ihost, idev, gfp_flags);
+	ireq = isci_request_alloc_core(ihost, gfp_flags);
 	if (ireq) {
 		ireq->ttype_ptr.io_task_ptr = task;
 		ireq->ttype = io_task;
@@ -3562,12 +3551,11 @@
 
 struct isci_request *isci_request_alloc_tmf(struct isci_host *ihost,
 					    struct isci_tmf *isci_tmf,
-					    struct isci_remote_device *idev,
 					    gfp_t gfp_flags)
 {
 	struct isci_request *ireq;
 
-	ireq = isci_request_alloc_core(ihost, idev, gfp_flags);
+	ireq = isci_request_alloc_core(ihost, gfp_flags);
 	if (ireq) {
 		ireq->ttype_ptr.tmf_task_ptr = isci_tmf;
 		ireq->ttype = tmf_task;
@@ -3575,21 +3563,16 @@
 	return ireq;
 }
 
-int isci_request_execute(struct isci_host *ihost, struct sas_task *task,
-			 gfp_t gfp_flags)
+int isci_request_execute(struct isci_host *ihost, struct isci_remote_device *idev,
+			 struct sas_task *task, gfp_t gfp_flags)
 {
 	enum sci_status status = SCI_FAILURE_UNSUPPORTED_PROTOCOL;
-	struct scic_sds_remote_device *sci_dev;
-	struct isci_remote_device *idev;
 	struct isci_request *ireq;
 	unsigned long flags;
 	int ret = 0;
 
-	idev = task->dev->lldd_dev;
-	sci_dev = &idev->sci;
-
 	/* do common allocation and init of request object. */
-	ireq = isci_request_alloc_io(ihost, task, idev, gfp_flags);
+	ireq = isci_request_alloc_io(ihost, task, gfp_flags);
 	if (!ireq)
 		goto out;
 
@@ -3605,8 +3588,7 @@
 	spin_lock_irqsave(&ihost->scic_lock, flags);
 
 	/* send the request, let the core assign the IO TAG.	*/
-	status = scic_controller_start_io(&ihost->sci, sci_dev,
-					  &ireq->sci,
+	status = scic_controller_start_io(&ihost->sci, &idev->sci, &ireq->sci,
 					  SCI_CONTROLLER_INVALID_IO_TAG);
 	if (status != SCI_SUCCESS &&
 	    status != SCI_FAILURE_REMOTE_DEVICE_RESET_REQUIRED) {
commit	209fae14fabfd48525e5630bebbbd4ca15090c60	[log] [tgz]
author	Dan Williams <dan.j.williams@intel.com>	Mon Jun 13 17:39:44 2011 -0700
committer	Dan Williams <dan.j.williams@intel.com>	Sun Jul 03 04:04:51 2011 -0700
tree	b251b9b394b3493cc15242ea31002abcb4e9bb59
parent	360b03ed178a4fe3971b0a098d8feeb53333481b [diff] [blame]