Merge branch 'nvmf-4.8-rc' of git://git.infradead.org/nvme-fabrics into for-linus
Sagi writes:
Mostly stability fixes for nvmet, rdma:
- fix uninitialized rdma_cm private data from Roland.
- rdma device removal handling (host and target).
- fix controller disconnect during active mounts.
- fix namespaces lost after fabric reconnects.
- remove redundant calls to namespace removal (rdma, loop).
- actually send controller shutdown when disconnecting.
- reconnect fixes (ns rescan and aen requeue)
- nvmet controller serial number inconsistency fix.
diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c
index 3e3ce2b..8d2875b 100644
--- a/drivers/nvme/host/rdma.c
+++ b/drivers/nvme/host/rdma.c
@@ -12,13 +12,11 @@
* more details.
*/
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
-#include <linux/delay.h>
#include <linux/module.h>
#include <linux/init.h>
#include <linux/slab.h>
#include <linux/err.h>
#include <linux/string.h>
-#include <linux/jiffies.h>
#include <linux/atomic.h>
#include <linux/blk-mq.h>
#include <linux/types.h>
@@ -26,7 +24,6 @@
#include <linux/mutex.h>
#include <linux/scatterlist.h>
#include <linux/nvme.h>
-#include <linux/t10-pi.h>
#include <asm/unaligned.h>
#include <rdma/ib_verbs.h>
@@ -169,7 +166,6 @@
static int nvme_rdma_cm_handler(struct rdma_cm_id *cm_id,
struct rdma_cm_event *event);
static void nvme_rdma_recv_done(struct ib_cq *cq, struct ib_wc *wc);
-static int __nvme_rdma_del_ctrl(struct nvme_rdma_ctrl *ctrl);
/* XXX: really should move to a generic header sooner or later.. */
static inline void put_unaligned_le24(u32 val, u8 *p)
@@ -687,11 +683,6 @@
list_del(&ctrl->list);
mutex_unlock(&nvme_rdma_ctrl_mutex);
- if (ctrl->ctrl.tagset) {
- blk_cleanup_queue(ctrl->ctrl.connect_q);
- blk_mq_free_tag_set(&ctrl->tag_set);
- nvme_rdma_dev_put(ctrl->device);
- }
kfree(ctrl->queues);
nvmf_free_options(nctrl->opts);
free_ctrl:
@@ -748,8 +739,11 @@
changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_LIVE);
WARN_ON_ONCE(!changed);
- if (ctrl->queue_count > 1)
+ if (ctrl->queue_count > 1) {
nvme_start_queues(&ctrl->ctrl);
+ nvme_queue_scan(&ctrl->ctrl);
+ nvme_queue_async_events(&ctrl->ctrl);
+ }
dev_info(ctrl->ctrl.device, "Successfully reconnected\n");
@@ -1269,7 +1263,7 @@
{
struct nvme_rdma_ctrl *ctrl = queue->ctrl;
struct rdma_conn_param param = { };
- struct nvme_rdma_cm_req priv;
+ struct nvme_rdma_cm_req priv = { };
int ret;
param.qp_num = queue->qp->qp_num;
@@ -1318,37 +1312,39 @@
* that caught the event. Since we hold the callout until the controller
* deletion is completed, we'll deadlock if the controller deletion will
* call rdma_destroy_id on this queue's cm_id. Thus, we claim ownership
- * of destroying this queue before-hand, destroy the queue resources
- * after the controller deletion completed with the exception of destroying
- * the cm_id implicitely by returning a non-zero rc to the callout.
+ * of destroying this queue before-hand, destroy the queue resources,
+ * then queue the controller deletion which won't destroy this queue and
+ * we destroy the cm_id implicitely by returning a non-zero rc to the callout.
*/
static int nvme_rdma_device_unplug(struct nvme_rdma_queue *queue)
{
struct nvme_rdma_ctrl *ctrl = queue->ctrl;
- int ret, ctrl_deleted = 0;
+ int ret;
- /* First disable the queue so ctrl delete won't free it */
- if (!test_and_clear_bit(NVME_RDMA_Q_CONNECTED, &queue->flags))
- goto out;
+ /* Own the controller deletion */
+ if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_DELETING))
+ return 0;
- /* delete the controller */
- ret = __nvme_rdma_del_ctrl(ctrl);
- if (!ret) {
- dev_warn(ctrl->ctrl.device,
- "Got rdma device removal event, deleting ctrl\n");
- flush_work(&ctrl->delete_work);
+ dev_warn(ctrl->ctrl.device,
+ "Got rdma device removal event, deleting ctrl\n");
+
+ /* Get rid of reconnect work if its running */
+ cancel_delayed_work_sync(&ctrl->reconnect_work);
+
+ /* Disable the queue so ctrl delete won't free it */
+ if (test_and_clear_bit(NVME_RDMA_Q_CONNECTED, &queue->flags)) {
+ /* Free this queue ourselves */
+ nvme_rdma_stop_queue(queue);
+ nvme_rdma_destroy_queue_ib(queue);
/* Return non-zero so the cm_id will destroy implicitly */
- ctrl_deleted = 1;
-
- /* Free this queue ourselves */
- rdma_disconnect(queue->cm_id);
- ib_drain_qp(queue->qp);
- nvme_rdma_destroy_queue_ib(queue);
+ ret = 1;
}
-out:
- return ctrl_deleted;
+ /* Queue controller deletion */
+ queue_work(nvme_rdma_wq, &ctrl->delete_work);
+ flush_work(&ctrl->delete_work);
+ return ret;
}
static int nvme_rdma_cm_handler(struct rdma_cm_id *cm_id,
@@ -1648,7 +1644,7 @@
nvme_rdma_free_io_queues(ctrl);
}
- if (ctrl->ctrl.state == NVME_CTRL_LIVE)
+ if (test_bit(NVME_RDMA_Q_CONNECTED, &ctrl->queues[0].flags))
nvme_shutdown_ctrl(&ctrl->ctrl);
blk_mq_stop_hw_queues(ctrl->ctrl.admin_q);
@@ -1657,15 +1653,27 @@
nvme_rdma_destroy_admin_queue(ctrl);
}
+static void __nvme_rdma_remove_ctrl(struct nvme_rdma_ctrl *ctrl, bool shutdown)
+{
+ nvme_uninit_ctrl(&ctrl->ctrl);
+ if (shutdown)
+ nvme_rdma_shutdown_ctrl(ctrl);
+
+ if (ctrl->ctrl.tagset) {
+ blk_cleanup_queue(ctrl->ctrl.connect_q);
+ blk_mq_free_tag_set(&ctrl->tag_set);
+ nvme_rdma_dev_put(ctrl->device);
+ }
+
+ nvme_put_ctrl(&ctrl->ctrl);
+}
+
static void nvme_rdma_del_ctrl_work(struct work_struct *work)
{
struct nvme_rdma_ctrl *ctrl = container_of(work,
struct nvme_rdma_ctrl, delete_work);
- nvme_remove_namespaces(&ctrl->ctrl);
- nvme_rdma_shutdown_ctrl(ctrl);
- nvme_uninit_ctrl(&ctrl->ctrl);
- nvme_put_ctrl(&ctrl->ctrl);
+ __nvme_rdma_remove_ctrl(ctrl, true);
}
static int __nvme_rdma_del_ctrl(struct nvme_rdma_ctrl *ctrl)
@@ -1698,9 +1706,7 @@
struct nvme_rdma_ctrl *ctrl = container_of(work,
struct nvme_rdma_ctrl, delete_work);
- nvme_remove_namespaces(&ctrl->ctrl);
- nvme_uninit_ctrl(&ctrl->ctrl);
- nvme_put_ctrl(&ctrl->ctrl);
+ __nvme_rdma_remove_ctrl(ctrl, false);
}
static void nvme_rdma_reset_ctrl_work(struct work_struct *work)
@@ -1739,6 +1745,7 @@
if (ctrl->queue_count > 1) {
nvme_start_queues(&ctrl->ctrl);
nvme_queue_scan(&ctrl->ctrl);
+ nvme_queue_async_events(&ctrl->ctrl);
}
return;
diff --git a/drivers/nvme/target/admin-cmd.c b/drivers/nvme/target/admin-cmd.c
index 2fac17a..47c564b 100644
--- a/drivers/nvme/target/admin-cmd.c
+++ b/drivers/nvme/target/admin-cmd.c
@@ -13,7 +13,6 @@
*/
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include <linux/module.h>
-#include <linux/random.h>
#include <generated/utsrelease.h>
#include "nvmet.h"
@@ -83,7 +82,6 @@
{
struct nvmet_ctrl *ctrl = req->sq->ctrl;
struct nvme_id_ctrl *id;
- u64 serial;
u16 status = 0;
id = kzalloc(sizeof(*id), GFP_KERNEL);
@@ -96,10 +94,8 @@
id->vid = 0;
id->ssvid = 0;
- /* generate a random serial number as our controllers are ephemeral: */
- get_random_bytes(&serial, sizeof(serial));
memset(id->sn, ' ', sizeof(id->sn));
- snprintf(id->sn, sizeof(id->sn), "%llx", serial);
+ snprintf(id->sn, sizeof(id->sn), "%llx", ctrl->serial);
memset(id->mn, ' ', sizeof(id->mn));
strncpy((char *)id->mn, "Linux", sizeof(id->mn));
diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c
index 8a891ca..6559d5a 100644
--- a/drivers/nvme/target/core.c
+++ b/drivers/nvme/target/core.c
@@ -13,6 +13,7 @@
*/
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include <linux/module.h>
+#include <linux/random.h>
#include "nvmet.h"
static struct nvmet_fabrics_ops *nvmet_transports[NVMF_TRTYPE_MAX];
@@ -728,6 +729,9 @@
memcpy(ctrl->subsysnqn, subsysnqn, NVMF_NQN_SIZE);
memcpy(ctrl->hostnqn, hostnqn, NVMF_NQN_SIZE);
+ /* generate a random serial number as our controllers are ephemeral: */
+ get_random_bytes(&ctrl->serial, sizeof(ctrl->serial));
+
kref_init(&ctrl->ref);
ctrl->subsys = subsys;
diff --git a/drivers/nvme/target/loop.c b/drivers/nvme/target/loop.c
index 94e7829..7affd40 100644
--- a/drivers/nvme/target/loop.c
+++ b/drivers/nvme/target/loop.c
@@ -414,9 +414,8 @@
struct nvme_loop_ctrl *ctrl = container_of(work,
struct nvme_loop_ctrl, delete_work);
- nvme_remove_namespaces(&ctrl->ctrl);
- nvme_loop_shutdown_ctrl(ctrl);
nvme_uninit_ctrl(&ctrl->ctrl);
+ nvme_loop_shutdown_ctrl(ctrl);
nvme_put_ctrl(&ctrl->ctrl);
}
@@ -501,7 +500,6 @@
nvme_loop_destroy_admin_queue(ctrl);
out_disable:
dev_warn(ctrl->ctrl.device, "Removing after reset failure\n");
- nvme_remove_namespaces(&ctrl->ctrl);
nvme_uninit_ctrl(&ctrl->ctrl);
nvme_put_ctrl(&ctrl->ctrl);
}
diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h
index 57dd6d8..76b6eed 100644
--- a/drivers/nvme/target/nvmet.h
+++ b/drivers/nvme/target/nvmet.h
@@ -113,6 +113,7 @@
struct mutex lock;
u64 cap;
+ u64 serial;
u32 cc;
u32 csts;
diff --git a/drivers/nvme/target/rdma.c b/drivers/nvme/target/rdma.c
index e06d504..b4d6485 100644
--- a/drivers/nvme/target/rdma.c
+++ b/drivers/nvme/target/rdma.c
@@ -77,6 +77,7 @@
NVMET_RDMA_Q_CONNECTING,
NVMET_RDMA_Q_LIVE,
NVMET_RDMA_Q_DISCONNECTING,
+ NVMET_RDMA_IN_DEVICE_REMOVAL,
};
struct nvmet_rdma_queue {
@@ -615,15 +616,10 @@
if (!len)
return 0;
- /* use the already allocated data buffer if possible */
- if (len <= NVMET_RDMA_INLINE_DATA_SIZE && rsp->queue->host_qid) {
- nvmet_rdma_use_inline_sg(rsp, len, 0);
- } else {
- status = nvmet_rdma_alloc_sgl(&rsp->req.sg, &rsp->req.sg_cnt,
- len);
- if (status)
- return status;
- }
+ status = nvmet_rdma_alloc_sgl(&rsp->req.sg, &rsp->req.sg_cnt,
+ len);
+ if (status)
+ return status;
ret = rdma_rw_ctx_init(&rsp->rw, cm_id->qp, cm_id->port_num,
rsp->req.sg, rsp->req.sg_cnt, 0, addr, key,
@@ -984,7 +980,10 @@
struct nvmet_rdma_device *dev = queue->dev;
nvmet_rdma_free_queue(queue);
- rdma_destroy_id(cm_id);
+
+ if (queue->state != NVMET_RDMA_IN_DEVICE_REMOVAL)
+ rdma_destroy_id(cm_id);
+
kref_put(&dev->ref, nvmet_rdma_free_dev);
}
@@ -1233,8 +1232,9 @@
switch (queue->state) {
case NVMET_RDMA_Q_CONNECTING:
case NVMET_RDMA_Q_LIVE:
- disconnect = true;
queue->state = NVMET_RDMA_Q_DISCONNECTING;
+ case NVMET_RDMA_IN_DEVICE_REMOVAL:
+ disconnect = true;
break;
case NVMET_RDMA_Q_DISCONNECTING:
break;
@@ -1272,6 +1272,62 @@
schedule_work(&queue->release_work);
}
+/**
+ * nvme_rdma_device_removal() - Handle RDMA device removal
+ * @queue: nvmet rdma queue (cm id qp_context)
+ * @addr: nvmet address (cm_id context)
+ *
+ * DEVICE_REMOVAL event notifies us that the RDMA device is about
+ * to unplug so we should take care of destroying our RDMA resources.
+ * This event will be generated for each allocated cm_id.
+ *
+ * Note that this event can be generated on a normal queue cm_id
+ * and/or a device bound listener cm_id (where in this case
+ * queue will be null).
+ *
+ * we claim ownership on destroying the cm_id. For queues we move
+ * the queue state to NVMET_RDMA_IN_DEVICE_REMOVAL and for port
+ * we nullify the priv to prevent double cm_id destruction and destroying
+ * the cm_id implicitely by returning a non-zero rc to the callout.
+ */
+static int nvmet_rdma_device_removal(struct rdma_cm_id *cm_id,
+ struct nvmet_rdma_queue *queue)
+{
+ unsigned long flags;
+
+ if (!queue) {
+ struct nvmet_port *port = cm_id->context;
+
+ /*
+ * This is a listener cm_id. Make sure that
+ * future remove_port won't invoke a double
+ * cm_id destroy. use atomic xchg to make sure
+ * we don't compete with remove_port.
+ */
+ if (xchg(&port->priv, NULL) != cm_id)
+ return 0;
+ } else {
+ /*
+ * This is a queue cm_id. Make sure that
+ * release queue will not destroy the cm_id
+ * and schedule all ctrl queues removal (only
+ * if the queue is not disconnecting already).
+ */
+ spin_lock_irqsave(&queue->state_lock, flags);
+ if (queue->state != NVMET_RDMA_Q_DISCONNECTING)
+ queue->state = NVMET_RDMA_IN_DEVICE_REMOVAL;
+ spin_unlock_irqrestore(&queue->state_lock, flags);
+ nvmet_rdma_queue_disconnect(queue);
+ flush_scheduled_work();
+ }
+
+ /*
+ * We need to return 1 so that the core will destroy
+ * it's own ID. What a great API design..
+ */
+ return 1;
+}
+
static int nvmet_rdma_cm_handler(struct rdma_cm_id *cm_id,
struct rdma_cm_event *event)
{
@@ -1294,20 +1350,11 @@
break;
case RDMA_CM_EVENT_ADDR_CHANGE:
case RDMA_CM_EVENT_DISCONNECTED:
- case RDMA_CM_EVENT_DEVICE_REMOVAL:
case RDMA_CM_EVENT_TIMEWAIT_EXIT:
- /*
- * We can get the device removal callback even for a
- * CM ID that we aren't actually using. In that case
- * the context pointer is NULL, so we shouldn't try
- * to disconnect a non-existing queue. But we also
- * need to return 1 so that the core will destroy
- * it's own ID. What a great API design..
- */
- if (queue)
- nvmet_rdma_queue_disconnect(queue);
- else
- ret = 1;
+ nvmet_rdma_queue_disconnect(queue);
+ break;
+ case RDMA_CM_EVENT_DEVICE_REMOVAL:
+ ret = nvmet_rdma_device_removal(cm_id, queue);
break;
case RDMA_CM_EVENT_REJECTED:
case RDMA_CM_EVENT_UNREACHABLE:
@@ -1396,9 +1443,10 @@
static void nvmet_rdma_remove_port(struct nvmet_port *port)
{
- struct rdma_cm_id *cm_id = port->priv;
+ struct rdma_cm_id *cm_id = xchg(&port->priv, NULL);
- rdma_destroy_id(cm_id);
+ if (cm_id)
+ rdma_destroy_id(cm_id);
}
static struct nvmet_fabrics_ops nvmet_rdma_ops = {