RDMA/nes: Fix crash in nes_accept()
While running IMP_EXT's window test, we saw a crash in nes_accept().
Here is the sequence of what happened:
(1) In MVAPICH2, connect request is received for port #0.
FIX: Add a nes_connect() check to make sure local or remote tcp port
is not 0.
(2) Remote node's (passive) TCP stack sends a reset when it gets a
connect request because of port = 0. Active side set the connect
error to IW_CM_EVENT_STATUS_REJECTED when it received the RST from
remote node.
FIX: The corect error code is -ECONNRESET.
(3) Wrong error code of IW_CM_EVENT_STATUS_REJECTED causes the core to
destroy its listener ports. Here there are connections that may
have sent an MPA request up and waiting for accept or reject. But
the listener and its cm_nodes have been freed already causing the
crash noticed.
FIX: The cm_node is freed only if its state is not
NES_CM_STATE_MPAREQ_RCVD. If cm_node's state is
NES_CM_STATE_MPAREQ_RCVD then its new state is set to
NES_CM_STATE_LISTENER_DESTROYED and it is not freed. When
nes_accept() or nes_reject() is received, its state is checked
for NES_CM_STATE_LISTENER_DESTROYED and in this case the cm_node
is freed and error is returned.
Signed-off-by: Faisal Latif <faisal.latif@intel.com>
Signed-off-by: Roland Dreier <rolandd@cisco.com>
diff --git a/drivers/infiniband/hw/nes/nes_cm.c b/drivers/infiniband/hw/nes/nes_cm.c
index 08fcd25..ec04786 100644
--- a/drivers/infiniband/hw/nes/nes_cm.c
+++ b/drivers/infiniband/hw/nes/nes_cm.c
@@ -978,6 +978,7 @@
reset_entry);
{
struct nes_cm_node *loopback = cm_node->loopbackpartner;
+ enum nes_cm_node_state old_state;
if (NES_CM_STATE_FIN_WAIT1 <= cm_node->state) {
rem_ref_cm_node(cm_node->cm_core, cm_node);
} else {
@@ -989,11 +990,12 @@
NES_CM_STATE_CLOSED;
WARN_ON(1);
} else {
- cm_node->state =
- NES_CM_STATE_CLOSED;
- rem_ref_cm_node(
- cm_node->cm_core,
- cm_node);
+ old_state = cm_node->state;
+ cm_node->state = NES_CM_STATE_LISTENER_DESTROYED;
+ if (old_state != NES_CM_STATE_MPAREQ_RCVD)
+ rem_ref_cm_node(
+ cm_node->cm_core,
+ cm_node);
}
} else {
struct nes_cm_event event;
@@ -1009,6 +1011,7 @@
loopback->loc_port;
event.cm_info.cm_id = loopback->cm_id;
cm_event_connect_error(&event);
+ cm_node->state = NES_CM_STATE_LISTENER_DESTROYED;
loopback->state = NES_CM_STATE_CLOSED;
event.cm_node = cm_node;
@@ -2131,30 +2134,39 @@
cm_node->state = NES_CM_STATE_CLOSED;
rem_ref_cm_node(cm_core, cm_node);
} else {
- ret = send_mpa_reject(cm_node);
- if (ret) {
- cm_node->state = NES_CM_STATE_CLOSED;
- err = send_reset(cm_node, NULL);
- if (err)
- WARN_ON(1);
- } else
- cm_id->add_ref(cm_id);
+ if (cm_node->state == NES_CM_STATE_LISTENER_DESTROYED) {
+ rem_ref_cm_node(cm_core, cm_node);
+ } else {
+ ret = send_mpa_reject(cm_node);
+ if (ret) {
+ cm_node->state = NES_CM_STATE_CLOSED;
+ err = send_reset(cm_node, NULL);
+ if (err)
+ WARN_ON(1);
+ } else
+ cm_id->add_ref(cm_id);
+ }
}
} else {
cm_node->cm_id = NULL;
- event.cm_node = loopback;
- event.cm_info.rem_addr = loopback->rem_addr;
- event.cm_info.loc_addr = loopback->loc_addr;
- event.cm_info.rem_port = loopback->rem_port;
- event.cm_info.loc_port = loopback->loc_port;
- event.cm_info.cm_id = loopback->cm_id;
- cm_event_mpa_reject(&event);
- rem_ref_cm_node(cm_core, cm_node);
- loopback->state = NES_CM_STATE_CLOSING;
+ if (cm_node->state == NES_CM_STATE_LISTENER_DESTROYED) {
+ rem_ref_cm_node(cm_core, cm_node);
+ rem_ref_cm_node(cm_core, loopback);
+ } else {
+ event.cm_node = loopback;
+ event.cm_info.rem_addr = loopback->rem_addr;
+ event.cm_info.loc_addr = loopback->loc_addr;
+ event.cm_info.rem_port = loopback->rem_port;
+ event.cm_info.loc_port = loopback->loc_port;
+ event.cm_info.cm_id = loopback->cm_id;
+ cm_event_mpa_reject(&event);
+ rem_ref_cm_node(cm_core, cm_node);
+ loopback->state = NES_CM_STATE_CLOSING;
- cm_id = loopback->cm_id;
- rem_ref_cm_node(cm_core, loopback);
- cm_id->rem_ref(cm_id);
+ cm_id = loopback->cm_id;
+ rem_ref_cm_node(cm_core, loopback);
+ cm_id->rem_ref(cm_id);
+ }
}
return ret;
@@ -2198,6 +2210,7 @@
case NES_CM_STATE_UNKNOWN:
case NES_CM_STATE_INITED:
case NES_CM_STATE_CLOSED:
+ case NES_CM_STATE_LISTENER_DESTROYED:
ret = rem_ref_cm_node(cm_core, cm_node);
break;
case NES_CM_STATE_TSA:
@@ -2716,8 +2729,6 @@
struct nes_pd *nespd;
u64 tagged_offset;
-
-
ibqp = nes_get_qp(cm_id->device, conn_param->qpn);
if (!ibqp)
return -EINVAL;
@@ -2733,6 +2744,13 @@
"%s\n", cm_node, nesvnic, nesvnic->netdev,
nesvnic->netdev->name);
+ if (NES_CM_STATE_LISTENER_DESTROYED == cm_node->state) {
+ if (cm_node->loopbackpartner)
+ rem_ref_cm_node(cm_node->cm_core, cm_node->loopbackpartner);
+ rem_ref_cm_node(cm_node->cm_core, cm_node);
+ return -EINVAL;
+ }
+
/* associate the node with the QP */
nesqp->cm_node = (void *)cm_node;
cm_node->nesqp = nesqp;
@@ -3003,6 +3021,9 @@
if (!nesdev)
return -EINVAL;
+ if (!(cm_id->local_addr.sin_port) || !(cm_id->remote_addr.sin_port))
+ return -EINVAL;
+
nes_debug(NES_DBG_CM, "QP%u, current IP = 0x%08X, Destination IP = "
"0x%08X:0x%04X, local = 0x%08X:0x%04X.\n", nesqp->hwqp.qp_id,
ntohl(nesvnic->local_ipaddr),
@@ -3375,7 +3396,7 @@
nesqp->cm_id = NULL;
cm_id->provider_data = NULL;
cm_event.event = IW_CM_EVENT_CONNECT_REPLY;
- cm_event.status = IW_CM_EVENT_STATUS_REJECTED;
+ cm_event.status = -ECONNRESET;
cm_event.provider_data = cm_id->provider_data;
cm_event.local_addr = cm_id->local_addr;
cm_event.remote_addr = cm_id->remote_addr;
diff --git a/drivers/infiniband/hw/nes/nes_cm.h b/drivers/infiniband/hw/nes/nes_cm.h
index 911846a..d9825fd 100644
--- a/drivers/infiniband/hw/nes/nes_cm.h
+++ b/drivers/infiniband/hw/nes/nes_cm.h
@@ -200,6 +200,7 @@
NES_CM_STATE_TIME_WAIT,
NES_CM_STATE_LAST_ACK,
NES_CM_STATE_CLOSING,
+ NES_CM_STATE_LISTENER_DESTROYED,
NES_CM_STATE_CLOSED
};