IB/hfi1: Remove dependence on qp->s_hdrwords

The s_hdrwords variable was used to indicate whether a
packet was already built on a previous iteration of the
send engine. This variable assumed the protection of the
QP's RVT_S_BUSY flag, which was required since the the
QP's s_lock was dropped just prior to the packet being
queued on the one of the egress mechanisms.

Support for multiple send engine instantiations require
that the field not be used due to concurency issues.
The ps.txreq signals the "already built" without the
potential concurency issues.

Fix by getting rid of all s_hdrword usage.   A wrapper
is added to test for the already built case that used to
use s_hdrwords.

What used to be stored in s_hdrwords is now in the txreq.
The PBC is not counted, but is added in the pio/sdma code
paths prior to posting the packet.

Reviewed-by: Don Hiatt <don.hiatt@intel.com>
Signed-off-by: Mitko Haralanov <mitko.haralanov@intel.com>
Signed-off-by: Mike Marciniszyn <mike.marciniszyn@intel.com>
Signed-off-by: Dennis Dalessandro <dennis.dalessandro@intel.com>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
diff --git a/drivers/infiniband/hw/hfi1/ruc.c b/drivers/infiniband/hw/hfi1/ruc.c
index 2c7fc6e..0cced9a 100644
--- a/drivers/infiniband/hw/hfi1/ruc.c
+++ b/drivers/infiniband/hw/hfi1/ruc.c
@@ -757,8 +757,9 @@ static inline void hfi1_make_ruc_header_16B(struct rvt_qp *qp,
 	u32 slid;
 	u16 pkey = hfi1_get_pkey(ibp, qp->s_pkey_index);
 	u8 l4 = OPA_16B_L4_IB_LOCAL;
-	u8 extra_bytes = hfi1_get_16b_padding((qp->s_hdrwords << 2),
-				   ps->s_txreq->s_cur_size);
+	u8 extra_bytes = hfi1_get_16b_padding(
+				(ps->s_txreq->hdr_dwords << 2),
+				ps->s_txreq->s_cur_size);
 	u32 nwords = SIZE_OF_CRC + ((ps->s_txreq->s_cur_size +
 				 extra_bytes + SIZE_OF_LT) >> 2);
 	u8 becn = 0;
@@ -778,9 +779,9 @@ static inline void hfi1_make_ruc_header_16B(struct rvt_qp *qp,
 			grd->sgid_index = 0;
 		grh = &ps->s_txreq->phdr.hdr.opah.u.l.grh;
 		l4 = OPA_16B_L4_IB_GLOBAL;
-		hdrwords = qp->s_hdrwords - 4;
-		qp->s_hdrwords += hfi1_make_grh(ibp, grh, grd,
-						hdrwords, nwords);
+		hdrwords = ps->s_txreq->hdr_dwords - 4;
+		ps->s_txreq->hdr_dwords += hfi1_make_grh(ibp, grh, grd,
+							 hdrwords, nwords);
 		middle = 0;
 	}
 
@@ -814,7 +815,7 @@ static inline void hfi1_make_ruc_header_16B(struct rvt_qp *qp,
 			  slid,
 			  opa_get_lid(rdma_ah_get_dlid(&qp->remote_ah_attr),
 				      16B),
-			  (qp->s_hdrwords + nwords) >> 1,
+			  (ps->s_txreq->hdr_dwords + nwords) >> 1,
 			  pkey, becn, 0, l4, priv->s_sc);
 }
 
@@ -834,10 +835,10 @@ static inline void hfi1_make_ruc_header_9B(struct rvt_qp *qp,
 
 	if (unlikely(rdma_ah_get_ah_flags(&qp->remote_ah_attr) & IB_AH_GRH)) {
 		struct ib_grh *grh = &ps->s_txreq->phdr.hdr.ibh.u.l.grh;
-		int hdrwords = qp->s_hdrwords - 2;
+		int hdrwords = ps->s_txreq->hdr_dwords - 2;
 
 		lrh0 = HFI1_LRH_GRH;
-		qp->s_hdrwords +=
+		ps->s_txreq->hdr_dwords +=
 			hfi1_make_grh(ibp, grh,
 				      rdma_ah_read_grh(&qp->remote_ah_attr),
 				      hdrwords, nwords);
@@ -866,7 +867,7 @@ static inline void hfi1_make_ruc_header_9B(struct rvt_qp *qp,
 	hfi1_make_ruc_bth(qp, ohdr, bth0, bth1, bth2);
 	hfi1_make_ib_hdr(&ps->s_txreq->phdr.hdr.ibh,
 			 lrh0,
-			 qp->s_hdrwords + nwords,
+			 ps->s_txreq->hdr_dwords + nwords,
 			 opa_get_lid(rdma_ah_get_dlid(&qp->remote_ah_attr), 9B),
 			 ppd_from_ibp(ibp)->lid |
 				rdma_ah_get_path_bits(&qp->remote_ah_attr));
@@ -1031,7 +1032,7 @@ void hfi1_do_send(struct rvt_qp *qp, bool in_thread)
 	ps.s_txreq = get_waiting_verbs_txreq(qp);
 	do {
 		/* Check for a constructed packet to be sent. */
-		if (qp->s_hdrwords != 0) {
+		if (ps.s_txreq) {
 			spin_unlock_irqrestore(&qp->s_lock, ps.flags);
 			/*
 			 * If the packet cannot be sent now, return and
@@ -1039,8 +1040,6 @@ void hfi1_do_send(struct rvt_qp *qp, bool in_thread)
 			 */
 			if (hfi1_verbs_send(qp, &ps))
 				return;
-			/* Record that s_ahg is empty. */
-			qp->s_hdrwords = 0;
 			/* allow other tasks to run */
 			if (schedule_send_yield(qp, &ps))
 				return;