NVMe: Fix potential corruption during shutdown The driver has to end unreturned commands at some point even if the controller has not provided a completion. The driver tried to be safe by deleting IO queues prior to ending all unreturned commands. That should cause the controller to internally abort inflight commands, but IO queue deletion request does not have to be successful, so all bets are off. We still have to make progress, so to be extra safe, this patch doesn't clear a queue to release the dma mapping for a command until after the pci device has been disabled. This patch removes the special handling during device initialization so controller recovery can be done all the time. This is possible since initialization is not inlined with pci probe anymore. Reported-by: Nilish Choudhury <nilesh.choudhury@oracle.com> Signed-off-by: Keith Busch <keith.busch@intel.com>

commit: 07836e659c81ec6b0d683dfbf7958339a22a7b69 [log] [tgz]
author: Keith Busch <keith.busch@intel.com> Thu Feb 19 10:34:48 2015 -0700
committer: Keith Busch <keith.busch@intel.com> Thu Feb 19 16:15:37 2015 -0700
tree: 6c043eb7b855825c6cb304f19cb0103bc4303cc2
parent: 2e1d8448196ba85cd78a18723413a3c92aabe0f3 [diff] [blame]
diff --git a/drivers/block/nvme-core.c b/drivers/block/nvme-core.c
index a57685f..0f38820 100644
--- a/drivers/block/nvme-core.c
+++ b/drivers/block/nvme-core.c

@@ -1274,29 +1274,18 @@
 	struct nvme_cmd_info *cmd = blk_mq_rq_to_pdu(req);
 	struct nvme_queue *nvmeq = cmd->nvmeq;
 
+	dev_warn(nvmeq->q_dmadev, "Timeout I/O %d QID %d\n", req->tag,
+							nvmeq->qid);
+	spin_lock_irq(&nvmeq->q_lock);
+	nvme_abort_req(req);
+	spin_unlock_irq(&nvmeq->q_lock);
+
 	/*
 	 * The aborted req will be completed on receiving the abort req.
 	 * We enable the timer again. If hit twice, it'll cause a device reset,
 	 * as the device then is in a faulty state.
 	 */
-	int ret = BLK_EH_RESET_TIMER;
-
-	dev_warn(nvmeq->q_dmadev, "Timeout I/O %d QID %d\n", req->tag,
-							nvmeq->qid);
-
-	spin_lock_irq(&nvmeq->q_lock);
-	if (!nvmeq->dev->initialized) {
-		/*
-		 * Force cancelled command frees the request, which requires we
-		 * return BLK_EH_NOT_HANDLED.
-		 */
-		nvme_cancel_queue_ios(nvmeq->hctx, req, nvmeq, reserved);
-		ret = BLK_EH_NOT_HANDLED;
-	} else
-		nvme_abort_req(req);
-	spin_unlock_irq(&nvmeq->q_lock);
-
-	return ret;
+	return BLK_EH_RESET_TIMER;
 }
 
 static void nvme_free_queue(struct nvme_queue *nvmeq)
@@ -1349,7 +1338,6 @@
 	struct blk_mq_hw_ctx *hctx = nvmeq->hctx;
 
 	spin_lock_irq(&nvmeq->q_lock);
-	nvme_process_cq(nvmeq);
 	if (hctx && hctx->tags)
 		blk_mq_tag_busy_iter(hctx, nvme_cancel_queue_ios, nvmeq);
 	spin_unlock_irq(&nvmeq->q_lock);
@@ -1372,7 +1360,10 @@
 	}
 	if (!qid && dev->admin_q)
 		blk_mq_freeze_queue_start(dev->admin_q);
-	nvme_clear_queue(nvmeq);
+
+	spin_lock_irq(&nvmeq->q_lock);
+	nvme_process_cq(nvmeq);
+	spin_unlock_irq(&nvmeq->q_lock);
 }
 
 static struct nvme_queue *nvme_alloc_queue(struct nvme_dev *dev, int qid,
@@ -2121,8 +2112,7 @@
 		spin_lock(&dev_list_lock);
 		list_for_each_entry_safe(dev, next, &dev_list, node) {
 			int i;
-			if (readl(&dev->bar->csts) & NVME_CSTS_CFS &&
-							dev->initialized) {
+			if (readl(&dev->bar->csts) & NVME_CSTS_CFS) {
 				if (work_busy(&dev->reset_work))
 					continue;
 				list_del_init(&dev->node);
@@ -2525,8 +2515,6 @@
 static void nvme_del_queue_end(struct nvme_queue *nvmeq)
 {
 	struct nvme_delq_ctx *dq = nvmeq->cmdinfo.ctx;
-
-	nvme_clear_queue(nvmeq);
 	nvme_put_dq(dq);
 }
 
@@ -2669,7 +2657,6 @@
 	int i;
 	u32 csts = -1;
 
-	dev->initialized = 0;
 	nvme_dev_list_remove(dev);
 
 	if (dev->bar) {
@@ -2680,7 +2667,6 @@
 		for (i = dev->queue_count - 1; i >= 0; i--) {
 			struct nvme_queue *nvmeq = dev->queues[i];
 			nvme_suspend_queue(nvmeq);
-			nvme_clear_queue(nvmeq);
 		}
 	} else {
 		nvme_disable_io_queues(dev);
@@ -2688,6 +2674,9 @@
 		nvme_disable_queue(dev, 0);
 	}
 	nvme_dev_unmap(dev);
+
+	for (i = dev->queue_count - 1; i >= 0; i--)
+		nvme_clear_queue(dev->queues[i]);
 }
 
 static void nvme_dev_remove(struct nvme_dev *dev)
@@ -2955,7 +2944,6 @@
 		nvme_unfreeze_queues(dev);
 		nvme_set_irq_hints(dev);
 	}
-	dev->initialized = 1;
 	return 0;
 }
 
@@ -3063,11 +3051,12 @@
 		goto reset;
 
 	nvme_set_irq_hints(dev);
-	dev->initialized = 1;
 	return;
  reset:
-	dev->reset_workfn = nvme_reset_failed_dev;
-	queue_work(nvme_workq, &dev->reset_work);
+	if (!work_busy(&dev->reset_work)) {
+		dev->reset_workfn = nvme_reset_failed_dev;
+		queue_work(nvme_workq, &dev->reset_work);
+	}
 }
 
 static void nvme_reset_notify(struct pci_dev *pdev, bool prepare)
commit	07836e659c81ec6b0d683dfbf7958339a22a7b69	[log] [tgz]
author	Keith Busch <keith.busch@intel.com>	Thu Feb 19 10:34:48 2015 -0700
committer	Keith Busch <keith.busch@intel.com>	Thu Feb 19 16:15:37 2015 -0700
tree	6c043eb7b855825c6cb304f19cb0103bc4303cc2
parent	2e1d8448196ba85cd78a18723413a3c92aabe0f3 [diff] [blame]