Merge tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/dledford/rdma

Pull rdma fixes from Doug Ledford:
 "First round of -rc fixes for 4.10 kernel:

   - a series of qedr fixes
   - a series of rxe fixes
   - one i40iw fix
   - one cma fix
   - one cxgb4 fix"

* tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/dledford/rdma:
  IB/rxe: Don't check for null ptr in send()
  IB/rxe: Drop future atomic/read packets rather than retrying
  IB/rxe: Use BTH_PSN_MASK when ACKing duplicate sends
  qedr: Always notify the verb consumer of flushed CQEs
  qedr: clear the vendor error field in the work completion
  qedr: post_send/recv according to QP state
  qedr: ignore inline flag in read verbs
  qedr: modify QP state to error when destroying it
  qedr: return correct value on modify qp
  qedr: return error if destroy CQ failed
  qedr: configure the number of CQEs on CQ creation
  i40iw: Set 128B as the only supported RQ WQE size
  IB/cma: Fix a race condition in iboe_addr_get_sgid()
  IB/rxe: Fix a memory leak in rxe_qp_cleanup()
  iw_cxgb4: set correct FetchBurstMax for QPs
diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c
index b7ac97b..cda5542 100644
--- a/drivers/infiniband/hw/cxgb4/qp.c
+++ b/drivers/infiniband/hw/cxgb4/qp.c
@@ -321,7 +321,8 @@ static int create_qp(struct c4iw_rdev *rdev, struct t4_wq *wq,
 		FW_RI_RES_WR_DCAEN_V(0) |
 		FW_RI_RES_WR_DCACPU_V(0) |
 		FW_RI_RES_WR_FBMIN_V(2) |
-		FW_RI_RES_WR_FBMAX_V(2) |
+		(t4_sq_onchip(&wq->sq) ? FW_RI_RES_WR_FBMAX_V(2) :
+					 FW_RI_RES_WR_FBMAX_V(3)) |
 		FW_RI_RES_WR_CIDXFTHRESHO_V(0) |
 		FW_RI_RES_WR_CIDXFTHRESH_V(0) |
 		FW_RI_RES_WR_EQSIZE_V(eqsize));
@@ -345,7 +346,7 @@ static int create_qp(struct c4iw_rdev *rdev, struct t4_wq *wq,
 		FW_RI_RES_WR_DCAEN_V(0) |
 		FW_RI_RES_WR_DCACPU_V(0) |
 		FW_RI_RES_WR_FBMIN_V(2) |
-		FW_RI_RES_WR_FBMAX_V(2) |
+		FW_RI_RES_WR_FBMAX_V(3) |
 		FW_RI_RES_WR_CIDXFTHRESHO_V(0) |
 		FW_RI_RES_WR_CIDXFTHRESH_V(0) |
 		FW_RI_RES_WR_EQSIZE_V(eqsize));
diff --git a/drivers/infiniband/hw/i40iw/i40iw_ctrl.c b/drivers/infiniband/hw/i40iw/i40iw_ctrl.c
index 392f783..98923a8 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_ctrl.c
+++ b/drivers/infiniband/hw/i40iw/i40iw_ctrl.c
@@ -358,13 +358,16 @@ void i40iw_qp_add_qos(struct i40iw_sc_qp *qp)
  * @dev: sc device struct
  * @pd: sc pd ptr
  * @pd_id: pd_id for allocated pd
+ * @abi_ver: ABI version from user context, -1 if not valid
  */
 static void i40iw_sc_pd_init(struct i40iw_sc_dev *dev,
 			     struct i40iw_sc_pd *pd,
-			     u16 pd_id)
+			     u16 pd_id,
+			     int abi_ver)
 {
 	pd->size = sizeof(*pd);
 	pd->pd_id = pd_id;
+	pd->abi_ver = abi_ver;
 	pd->dev = dev;
 }
 
@@ -2252,6 +2255,7 @@ static enum i40iw_status_code i40iw_sc_qp_init(struct i40iw_sc_qp *qp,
 					      offset);
 
 	info->qp_uk_init_info.wqe_alloc_reg = wqe_alloc_reg;
+	info->qp_uk_init_info.abi_ver = qp->pd->abi_ver;
 	ret_code = i40iw_qp_uk_init(&qp->qp_uk, &info->qp_uk_init_info);
 	if (ret_code)
 		return ret_code;
@@ -2270,10 +2274,21 @@ static enum i40iw_status_code i40iw_sc_qp_init(struct i40iw_sc_qp *qp,
 						    false);
 	i40iw_debug(qp->dev, I40IW_DEBUG_WQE, "%s: hw_sq_size[%04d] sq_ring.size[%04d]\n",
 		    __func__, qp->hw_sq_size, qp->qp_uk.sq_ring.size);
-	ret_code = i40iw_fragcnt_to_wqesize_rq(qp->qp_uk.max_rq_frag_cnt,
-					       &wqe_size);
-	if (ret_code)
-		return ret_code;
+
+	switch (qp->pd->abi_ver) {
+	case 4:
+		ret_code = i40iw_fragcnt_to_wqesize_rq(qp->qp_uk.max_rq_frag_cnt,
+						       &wqe_size);
+		if (ret_code)
+			return ret_code;
+		break;
+	case 5: /* fallthrough until next ABI version */
+	default:
+		if (qp->qp_uk.max_rq_frag_cnt > I40IW_MAX_WQ_FRAGMENT_COUNT)
+			return I40IW_ERR_INVALID_FRAG_COUNT;
+		wqe_size = I40IW_MAX_WQE_SIZE_RQ;
+		break;
+	}
 	qp->hw_rq_size = i40iw_get_encoded_wqe_size(qp->qp_uk.rq_size *
 				(wqe_size / I40IW_QP_WQE_MIN_SIZE), false);
 	i40iw_debug(qp->dev, I40IW_DEBUG_WQE,
diff --git a/drivers/infiniband/hw/i40iw/i40iw_puda.c b/drivers/infiniband/hw/i40iw/i40iw_puda.c
index 449ba8c..db41ab4 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_puda.c
+++ b/drivers/infiniband/hw/i40iw/i40iw_puda.c
@@ -930,7 +930,7 @@ enum i40iw_status_code i40iw_puda_create_rsrc(struct i40iw_sc_vsi *vsi,
 	INIT_LIST_HEAD(&rsrc->txpend);
 
 	rsrc->tx_wqe_avail_cnt = info->sq_size - 1;
-	dev->iw_pd_ops->pd_init(dev, &rsrc->sc_pd, info->pd_id);
+	dev->iw_pd_ops->pd_init(dev, &rsrc->sc_pd, info->pd_id, -1);
 	rsrc->qp_id = info->qp_id;
 	rsrc->cq_id = info->cq_id;
 	rsrc->sq_size = info->sq_size;
diff --git a/drivers/infiniband/hw/i40iw/i40iw_type.h b/drivers/infiniband/hw/i40iw/i40iw_type.h
index f3f8e9c..7b76259 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_type.h
+++ b/drivers/infiniband/hw/i40iw/i40iw_type.h
@@ -280,6 +280,7 @@ struct i40iw_sc_pd {
 	u32 size;
 	struct i40iw_sc_dev *dev;
 	u16 pd_id;
+	int abi_ver;
 };
 
 struct i40iw_cqp_quanta {
@@ -852,6 +853,7 @@ struct i40iw_qp_init_info {
 	u64 host_ctx_pa;
 	u64 q2_pa;
 	u64 shadow_area_pa;
+	int abi_ver;
 	u8 sq_tph_val;
 	u8 rq_tph_val;
 	u8 type;
@@ -1051,7 +1053,7 @@ struct i40iw_aeq_ops {
 };
 
 struct i40iw_pd_ops {
-	void (*pd_init)(struct i40iw_sc_dev *, struct i40iw_sc_pd *, u16);
+	void (*pd_init)(struct i40iw_sc_dev *, struct i40iw_sc_pd *, u16, int);
 };
 
 struct i40iw_priv_qp_ops {
diff --git a/drivers/infiniband/hw/i40iw/i40iw_ucontext.h b/drivers/infiniband/hw/i40iw/i40iw_ucontext.h
index 12acd68..57d3f1d 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_ucontext.h
+++ b/drivers/infiniband/hw/i40iw/i40iw_ucontext.h
@@ -39,8 +39,8 @@
 
 #include <linux/types.h>
 
-#define I40IW_ABI_USERSPACE_VER 4
-#define I40IW_ABI_KERNEL_VER    4
+#define I40IW_ABI_VER 5
+
 struct i40iw_alloc_ucontext_req {
 	__u32 reserved32;
 	__u8 userspace_ver;
diff --git a/drivers/infiniband/hw/i40iw/i40iw_uk.c b/drivers/infiniband/hw/i40iw/i40iw_uk.c
index 4376cd6..2800f79 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_uk.c
+++ b/drivers/infiniband/hw/i40iw/i40iw_uk.c
@@ -966,10 +966,6 @@ enum i40iw_status_code i40iw_qp_uk_init(struct i40iw_qp_uk *qp,
 	if (ret_code)
 		return ret_code;
 
-	ret_code = i40iw_get_wqe_shift(info->rq_size, info->max_rq_frag_cnt, 0, &rqshift);
-	if (ret_code)
-		return ret_code;
-
 	qp->sq_base = info->sq;
 	qp->rq_base = info->rq;
 	qp->shadow_area = info->shadow_area;
@@ -998,8 +994,19 @@ enum i40iw_status_code i40iw_qp_uk_init(struct i40iw_qp_uk *qp,
 	if (!qp->use_srq) {
 		qp->rq_size = info->rq_size;
 		qp->max_rq_frag_cnt = info->max_rq_frag_cnt;
-		qp->rq_wqe_size = rqshift;
 		I40IW_RING_INIT(qp->rq_ring, qp->rq_size);
+		switch (info->abi_ver) {
+		case 4:
+			ret_code = i40iw_get_wqe_shift(info->rq_size, info->max_rq_frag_cnt, 0, &rqshift);
+			if (ret_code)
+				return ret_code;
+			break;
+		case 5: /* fallthrough until next ABI version */
+		default:
+			rqshift = I40IW_MAX_RQ_WQE_SHIFT;
+			break;
+		}
+		qp->rq_wqe_size = rqshift;
 		qp->rq_wqe_size_multiplier = 4 << rqshift;
 	}
 	qp->ops = iw_qp_uk_ops;
diff --git a/drivers/infiniband/hw/i40iw/i40iw_user.h b/drivers/infiniband/hw/i40iw/i40iw_user.h
index 80d9f46..84be6f1 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_user.h
+++ b/drivers/infiniband/hw/i40iw/i40iw_user.h
@@ -76,6 +76,7 @@ enum i40iw_device_capabilities_const {
 	I40IW_MAX_ORD_SIZE =			127,
 	I40IW_MAX_WQ_ENTRIES =			2048,
 	I40IW_Q2_BUFFER_SIZE =			(248 + 100),
+	I40IW_MAX_WQE_SIZE_RQ =			128,
 	I40IW_QP_CTX_SIZE =			248,
 	I40IW_MAX_PDS = 			32768
 };
@@ -97,6 +98,7 @@ enum i40iw_device_capabilities_const {
 #define i40iw_address_list u64 *
 
 #define	I40IW_MAX_MR_SIZE	0x10000000000L
+#define	I40IW_MAX_RQ_WQE_SHIFT	2
 
 struct i40iw_qp_uk;
 struct i40iw_cq_uk;
@@ -405,7 +407,7 @@ struct i40iw_qp_uk_init_info {
 	u32 max_sq_frag_cnt;
 	u32 max_rq_frag_cnt;
 	u32 max_inline_data;
-
+	int abi_ver;
 };
 
 struct i40iw_cq_uk_init_info {
diff --git a/drivers/infiniband/hw/i40iw/i40iw_verbs.c b/drivers/infiniband/hw/i40iw/i40iw_verbs.c
index 7368a50..29e97df 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_verbs.c
+++ b/drivers/infiniband/hw/i40iw/i40iw_verbs.c
@@ -145,9 +145,8 @@ static struct ib_ucontext *i40iw_alloc_ucontext(struct ib_device *ibdev,
 	if (ib_copy_from_udata(&req, udata, sizeof(req)))
 		return ERR_PTR(-EINVAL);
 
-	if (req.userspace_ver != I40IW_ABI_USERSPACE_VER) {
-		i40iw_pr_err("Invalid userspace driver version detected. Detected version %d, should be %d\n",
-			     req.userspace_ver, I40IW_ABI_USERSPACE_VER);
+	if (req.userspace_ver < 4 || req.userspace_ver > I40IW_ABI_VER) {
+		i40iw_pr_err("Unsupported provider library version %u.\n", req.userspace_ver);
 		return ERR_PTR(-EINVAL);
 	}
 
@@ -155,13 +154,14 @@ static struct ib_ucontext *i40iw_alloc_ucontext(struct ib_device *ibdev,
 	uresp.max_qps = iwdev->max_qp;
 	uresp.max_pds = iwdev->max_pd;
 	uresp.wq_size = iwdev->max_qp_wr * 2;
-	uresp.kernel_ver = I40IW_ABI_KERNEL_VER;
+	uresp.kernel_ver = req.userspace_ver;
 
 	ucontext = kzalloc(sizeof(*ucontext), GFP_KERNEL);
 	if (!ucontext)
 		return ERR_PTR(-ENOMEM);
 
 	ucontext->iwdev = iwdev;
+	ucontext->abi_ver = req.userspace_ver;
 
 	if (ib_copy_to_udata(udata, &uresp, sizeof(uresp))) {
 		kfree(ucontext);
@@ -333,6 +333,7 @@ static struct ib_pd *i40iw_alloc_pd(struct ib_device *ibdev,
 	struct i40iw_sc_dev *dev = &iwdev->sc_dev;
 	struct i40iw_alloc_pd_resp uresp;
 	struct i40iw_sc_pd *sc_pd;
+	struct i40iw_ucontext *ucontext;
 	u32 pd_id = 0;
 	int err;
 
@@ -353,15 +354,18 @@ static struct ib_pd *i40iw_alloc_pd(struct ib_device *ibdev,
 	}
 
 	sc_pd = &iwpd->sc_pd;
-	dev->iw_pd_ops->pd_init(dev, sc_pd, pd_id);
 
 	if (context) {
+		ucontext = to_ucontext(context);
+		dev->iw_pd_ops->pd_init(dev, sc_pd, pd_id, ucontext->abi_ver);
 		memset(&uresp, 0, sizeof(uresp));
 		uresp.pd_id = pd_id;
 		if (ib_copy_to_udata(udata, &uresp, sizeof(uresp))) {
 			err = -EFAULT;
 			goto error;
 		}
+	} else {
+		dev->iw_pd_ops->pd_init(dev, sc_pd, pd_id, -1);
 	}
 
 	i40iw_add_pdusecount(iwpd);
@@ -518,7 +522,7 @@ static int i40iw_setup_kmode_qp(struct i40iw_device *iwdev,
 	struct i40iw_dma_mem *mem = &iwqp->kqp.dma_mem;
 	u32 sqdepth, rqdepth;
 	u32 sq_size, rq_size;
-	u8 sqshift, rqshift;
+	u8 sqshift;
 	u32 size;
 	enum i40iw_status_code status;
 	struct i40iw_qp_uk_init_info *ukinfo = &info->qp_uk_init_info;
@@ -527,14 +531,11 @@ static int i40iw_setup_kmode_qp(struct i40iw_device *iwdev,
 	rq_size = i40iw_qp_roundup(ukinfo->rq_size + 1);
 
 	status = i40iw_get_wqe_shift(sq_size, ukinfo->max_sq_frag_cnt, ukinfo->max_inline_data, &sqshift);
-	if (!status)
-		status = i40iw_get_wqe_shift(rq_size, ukinfo->max_rq_frag_cnt, 0, &rqshift);
-
 	if (status)
 		return -ENOMEM;
 
 	sqdepth = sq_size << sqshift;
-	rqdepth = rq_size << rqshift;
+	rqdepth = rq_size << I40IW_MAX_RQ_WQE_SHIFT;
 
 	size = sqdepth * sizeof(struct i40iw_sq_uk_wr_trk_info) + (rqdepth << 3);
 	iwqp->kqp.wrid_mem = kzalloc(size, GFP_KERNEL);
diff --git a/drivers/infiniband/hw/i40iw/i40iw_verbs.h b/drivers/infiniband/hw/i40iw/i40iw_verbs.h
index 6549c93..07c3fec 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_verbs.h
+++ b/drivers/infiniband/hw/i40iw/i40iw_verbs.h
@@ -42,6 +42,7 @@ struct i40iw_ucontext {
 	spinlock_t cq_reg_mem_list_lock; /* memory list for cq's */
 	struct list_head qp_reg_mem_list;
 	spinlock_t qp_reg_mem_list_lock; /* memory list for qp's */
+	int abi_ver;
 };
 
 struct i40iw_pd {
diff --git a/drivers/infiniband/hw/qedr/verbs.c b/drivers/infiniband/hw/qedr/verbs.c
index 302fb05..57c8de2 100644
--- a/drivers/infiniband/hw/qedr/verbs.c
+++ b/drivers/infiniband/hw/qedr/verbs.c
@@ -890,6 +890,8 @@ struct ib_cq *qedr_create_cq(struct ib_device *ibdev,
 
 		pbl_ptr = cq->q.pbl_tbl->pa;
 		page_cnt = cq->q.pbl_info.num_pbes;
+
+		cq->ibcq.cqe = chain_entries;
 	} else {
 		cq->cq_type = QEDR_CQ_TYPE_KERNEL;
 
@@ -905,6 +907,7 @@ struct ib_cq *qedr_create_cq(struct ib_device *ibdev,
 
 		page_cnt = qed_chain_get_page_cnt(&cq->pbl);
 		pbl_ptr = qed_chain_get_pbl_phys(&cq->pbl);
+		cq->ibcq.cqe = cq->pbl.capacity;
 	}
 
 	qedr_init_cq_params(cq, ctx, dev, vector, chain_entries, page_cnt,
@@ -982,8 +985,13 @@ int qedr_destroy_cq(struct ib_cq *ibcq)
 
 	/* GSIs CQs are handled by driver, so they don't exist in the FW */
 	if (cq->cq_type != QEDR_CQ_TYPE_GSI) {
+		int rc;
+
 		iparams.icid = cq->icid;
-		dev->ops->rdma_destroy_cq(dev->rdma_ctx, &iparams, &oparams);
+		rc = dev->ops->rdma_destroy_cq(dev->rdma_ctx, &iparams,
+					       &oparams);
+		if (rc)
+			return rc;
 		dev->ops->common->chain_free(dev->cdev, &cq->pbl);
 	}
 
@@ -1966,7 +1974,7 @@ int qedr_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
 
 	if (attr_mask & IB_QP_STATE) {
 		if ((qp->qp_type != IB_QPT_GSI) && (!udata))
-			qedr_update_qp_state(dev, qp, qp_params.new_state);
+			rc = qedr_update_qp_state(dev, qp, qp_params.new_state);
 		qp->state = qp_params.new_state;
 	}
 
@@ -2070,8 +2078,10 @@ int qedr_destroy_qp(struct ib_qp *ibqp)
 	DP_DEBUG(dev, QEDR_MSG_QP, "destroy qp: destroying %p, qp type=%d\n",
 		 qp, qp->qp_type);
 
-	if (qp->state != (QED_ROCE_QP_STATE_RESET | QED_ROCE_QP_STATE_ERR |
-			  QED_ROCE_QP_STATE_INIT)) {
+	if ((qp->state != QED_ROCE_QP_STATE_RESET) &&
+	    (qp->state != QED_ROCE_QP_STATE_ERR) &&
+	    (qp->state != QED_ROCE_QP_STATE_INIT)) {
+
 		attr.qp_state = IB_QPS_ERR;
 		attr_mask |= IB_QP_STATE;
 
@@ -2626,7 +2636,9 @@ static u32 qedr_prepare_sq_rdma_data(struct qedr_dev *dev,
 	rwqe2->r_key = cpu_to_le32(rdma_wr(wr)->rkey);
 	DMA_REGPAIR_LE(rwqe2->remote_va, rdma_wr(wr)->remote_addr);
 
-	if (wr->send_flags & IB_SEND_INLINE) {
+	if (wr->send_flags & IB_SEND_INLINE &&
+	    (wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM ||
+	     wr->opcode == IB_WR_RDMA_WRITE)) {
 		u8 flags = 0;
 
 		SET_FIELD2(flags, RDMA_SQ_RDMA_WQE_1ST_INLINE_FLG, 1);
@@ -2977,8 +2989,9 @@ int qedr_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
 
 	spin_lock_irqsave(&qp->q_lock, flags);
 
-	if ((qp->state == QED_ROCE_QP_STATE_RESET) ||
-	    (qp->state == QED_ROCE_QP_STATE_ERR)) {
+	if ((qp->state != QED_ROCE_QP_STATE_RTS) &&
+	    (qp->state != QED_ROCE_QP_STATE_ERR) &&
+	    (qp->state != QED_ROCE_QP_STATE_SQD)) {
 		spin_unlock_irqrestore(&qp->q_lock, flags);
 		*bad_wr = wr;
 		DP_DEBUG(dev, QEDR_MSG_CQ,
@@ -3031,8 +3044,7 @@ int qedr_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr,
 
 	spin_lock_irqsave(&qp->q_lock, flags);
 
-	if ((qp->state == QED_ROCE_QP_STATE_RESET) ||
-	    (qp->state == QED_ROCE_QP_STATE_ERR)) {
+	if (qp->state == QED_ROCE_QP_STATE_RESET) {
 		spin_unlock_irqrestore(&qp->q_lock, flags);
 		*bad_wr = wr;
 		return -EINVAL;
@@ -3174,6 +3186,7 @@ static int process_req(struct qedr_dev *dev, struct qedr_qp *qp,
 
 		/* fill WC */
 		wc->status = status;
+		wc->vendor_err = 0;
 		wc->wc_flags = 0;
 		wc->src_qp = qp->id;
 		wc->qp = &qp->ibqp;
@@ -3225,7 +3238,7 @@ static int qedr_poll_cq_req(struct qedr_dev *dev,
 		       "Error: POLL CQ with RDMA_CQE_REQ_STS_WORK_REQUEST_FLUSHED_ERR. CQ icid=0x%x, QP icid=0x%x\n",
 		       cq->icid, qp->icid);
 		cnt = process_req(dev, qp, cq, num_entries, wc, req->sq_cons,
-				  IB_WC_WR_FLUSH_ERR, 0);
+				  IB_WC_WR_FLUSH_ERR, 1);
 		break;
 	default:
 		/* process all WQE before the cosumer */
@@ -3363,6 +3376,7 @@ static void __process_resp_one(struct qedr_dev *dev, struct qedr_qp *qp,
 
 	/* fill WC */
 	wc->status = wc_status;
+	wc->vendor_err = 0;
 	wc->src_qp = qp->id;
 	wc->qp = &qp->ibqp;
 	wc->wr_id = wr_id;
@@ -3391,6 +3405,7 @@ static int process_resp_flush(struct qedr_qp *qp, struct qedr_cq *cq,
 	while (num_entries && qp->rq.wqe_cons != hw_cons) {
 		/* fill WC */
 		wc->status = IB_WC_WR_FLUSH_ERR;
+		wc->vendor_err = 0;
 		wc->wc_flags = 0;
 		wc->src_qp = qp->id;
 		wc->byte_len = 0;
diff --git a/drivers/infiniband/sw/rxe/rxe_comp.c b/drivers/infiniband/sw/rxe/rxe_comp.c
index cd27cbd..d369f24 100644
--- a/drivers/infiniband/sw/rxe/rxe_comp.c
+++ b/drivers/infiniband/sw/rxe/rxe_comp.c
@@ -224,7 +224,7 @@ static inline enum comp_state check_psn(struct rxe_qp *qp,
 		else
 			return COMPST_DONE;
 	} else if ((diff > 0) && (wqe->mask & WR_ATOMIC_OR_READ_MASK)) {
-		return COMPST_ERROR_RETRY;
+		return COMPST_DONE;
 	} else {
 		return COMPST_CHECK_ACK;
 	}
diff --git a/drivers/infiniband/sw/rxe/rxe_net.c b/drivers/infiniband/sw/rxe/rxe_net.c
index 16967cd..342e781 100644
--- a/drivers/infiniband/sw/rxe/rxe_net.c
+++ b/drivers/infiniband/sw/rxe/rxe_net.c
@@ -455,8 +455,7 @@ static int send(struct rxe_dev *rxe, struct rxe_pkt_info *pkt,
 		return -EAGAIN;
 	}
 
-	if (pkt->qp)
-		atomic_inc(&pkt->qp->skb_out);
+	atomic_inc(&pkt->qp->skb_out);
 	kfree_skb(skb);
 
 	return 0;
diff --git a/drivers/infiniband/sw/rxe/rxe_qp.c b/drivers/infiniband/sw/rxe/rxe_qp.c
index c3e60e4..486d576 100644
--- a/drivers/infiniband/sw/rxe/rxe_qp.c
+++ b/drivers/infiniband/sw/rxe/rxe_qp.c
@@ -855,4 +855,5 @@ void rxe_qp_cleanup(void *arg)
 	free_rd_atomic_resources(qp);
 
 	kernel_sock_shutdown(qp->sk, SHUT_RDWR);
+	sock_release(qp->sk);
 }
diff --git a/drivers/infiniband/sw/rxe/rxe_resp.c b/drivers/infiniband/sw/rxe/rxe_resp.c
index 7a36ec9..3435eff 100644
--- a/drivers/infiniband/sw/rxe/rxe_resp.c
+++ b/drivers/infiniband/sw/rxe/rxe_resp.c
@@ -1070,12 +1070,13 @@ static enum resp_states duplicate_request(struct rxe_qp *qp,
 					  struct rxe_pkt_info *pkt)
 {
 	enum resp_states rc;
+	u32 prev_psn = (qp->resp.psn - 1) & BTH_PSN_MASK;
 
 	if (pkt->mask & RXE_SEND_MASK ||
 	    pkt->mask & RXE_WRITE_MASK) {
 		/* SEND. Ack again and cleanup. C9-105. */
 		if (bth_ack(pkt))
-			send_ack(qp, pkt, AETH_ACK_UNLIMITED, qp->resp.psn - 1);
+			send_ack(qp, pkt, AETH_ACK_UNLIMITED, prev_psn);
 		rc = RESPST_CLEANUP;
 		goto out;
 	} else if (pkt->mask & RXE_READ_MASK) {
diff --git a/include/rdma/ib_addr.h b/include/rdma/ib_addr.h
index 931a47b..1beab55 100644
--- a/include/rdma/ib_addr.h
+++ b/include/rdma/ib_addr.h
@@ -205,10 +205,12 @@ static inline void iboe_addr_get_sgid(struct rdma_dev_addr *dev_addr,
 
 	dev = dev_get_by_index(&init_net, dev_addr->bound_dev_if);
 	if (dev) {
-		ip4 = (struct in_device *)dev->ip_ptr;
-		if (ip4 && ip4->ifa_list && ip4->ifa_list->ifa_address)
+		ip4 = in_dev_get(dev);
+		if (ip4 && ip4->ifa_list && ip4->ifa_list->ifa_address) {
 			ipv6_addr_set_v4mapped(ip4->ifa_list->ifa_address,
 					       (struct in6_addr *)gid);
+			in_dev_put(ip4);
+		}
 		dev_put(dev);
 	}
 }