ib_isert: Add support for completion interrupt coalescing
This patch adds support for completion interrupt coalescing that
allows only every ISERT_COMP_BATCH_COUNT (8) to set IB_SEND_SIGNALED,
thus avoiding completion interrupts for every posted iser_tx_desc.
The batch processing is done using a per isert_conn llist that once
IB_SEND_SIGNALED has been set is saved to tx_desc->comp_llnode_batch,
and completion processing of previously posted iser_tx_descs is done
in a single shot from within isert_send_completion() code.
Note this is only done for response PDUs from ISCSI_OP_SCSI_CMD, and
all other control type of PDU responses will force an implicit batch
drain to occur.
Cc: Or Gerlitz <ogerlitz@mellanox.com>
Cc: Sagi Grimberg <sagig@mellanox.com>
Cc: Kent Overstreet <kmo@daterainc.com>
Cc: Roland Dreier <roland@purestorage.com>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
diff --git a/drivers/infiniband/ulp/isert/ib_isert.c b/drivers/infiniband/ulp/isert/ib_isert.c
index 3591855..27708c3 100644
--- a/drivers/infiniband/ulp/isert/ib_isert.c
+++ b/drivers/infiniband/ulp/isert/ib_isert.c
@@ -22,6 +22,7 @@
#include <linux/socket.h>
#include <linux/in.h>
#include <linux/in6.h>
+#include <linux/llist.h>
#include <rdma/ib_verbs.h>
#include <rdma/rdma_cm.h>
#include <target/target_core_base.h>
@@ -489,6 +490,7 @@
kref_init(&isert_conn->conn_kref);
kref_get(&isert_conn->conn_kref);
mutex_init(&isert_conn->conn_mutex);
+ mutex_init(&isert_conn->conn_comp_mutex);
spin_lock_init(&isert_conn->conn_lock);
cma_id->context = isert_conn;
@@ -843,14 +845,32 @@
}
static void
-isert_init_send_wr(struct isert_cmd *isert_cmd, struct ib_send_wr *send_wr)
+isert_init_send_wr(struct isert_conn *isert_conn, struct isert_cmd *isert_cmd,
+ struct ib_send_wr *send_wr, bool coalesce)
{
+ struct iser_tx_desc *tx_desc = &isert_cmd->tx_desc;
+
isert_cmd->rdma_wr.iser_ib_op = ISER_IB_SEND;
send_wr->wr_id = (unsigned long)&isert_cmd->tx_desc;
send_wr->opcode = IB_WR_SEND;
- send_wr->send_flags = IB_SEND_SIGNALED;
- send_wr->sg_list = &isert_cmd->tx_desc.tx_sg[0];
+ send_wr->sg_list = &tx_desc->tx_sg[0];
send_wr->num_sge = isert_cmd->tx_desc.num_sge;
+ /*
+ * Coalesce send completion interrupts by only setting IB_SEND_SIGNALED
+ * bit for every ISERT_COMP_BATCH_COUNT number of ib_post_send() calls.
+ */
+ mutex_lock(&isert_conn->conn_comp_mutex);
+ if (coalesce &&
+ ++isert_conn->conn_comp_batch < ISERT_COMP_BATCH_COUNT) {
+ llist_add(&tx_desc->comp_llnode, &isert_conn->conn_comp_llist);
+ mutex_unlock(&isert_conn->conn_comp_mutex);
+ return;
+ }
+ isert_conn->conn_comp_batch = 0;
+ tx_desc->comp_llnode_batch = llist_del_all(&isert_conn->conn_comp_llist);
+ mutex_unlock(&isert_conn->conn_comp_mutex);
+
+ send_wr->send_flags = IB_SEND_SIGNALED;
}
static int
@@ -1582,8 +1602,8 @@
}
static void
-isert_send_completion(struct iser_tx_desc *tx_desc,
- struct isert_conn *isert_conn)
+__isert_send_completion(struct iser_tx_desc *tx_desc,
+ struct isert_conn *isert_conn)
{
struct ib_device *ib_dev = isert_conn->conn_cm_id->device;
struct isert_cmd *isert_cmd = tx_desc->isert_cmd;
@@ -1624,6 +1644,24 @@
}
static void
+isert_send_completion(struct iser_tx_desc *tx_desc,
+ struct isert_conn *isert_conn)
+{
+ struct llist_node *llnode = tx_desc->comp_llnode_batch;
+ struct iser_tx_desc *t;
+ /*
+ * Drain coalesced completion llist starting from comp_llnode_batch
+ * setup in isert_init_send_wr(), and then complete trailing tx_desc.
+ */
+ while (llnode) {
+ t = llist_entry(llnode, struct iser_tx_desc, comp_llnode);
+ llnode = llist_next(llnode);
+ __isert_send_completion(t, isert_conn);
+ }
+ __isert_send_completion(tx_desc, isert_conn);
+}
+
+static void
isert_cq_comp_err(struct iser_tx_desc *tx_desc, struct isert_conn *isert_conn)
{
struct ib_device *ib_dev = isert_conn->conn_cm_id->device;
@@ -1793,7 +1831,7 @@
isert_cmd->tx_desc.num_sge = 2;
}
- isert_init_send_wr(isert_cmd, send_wr);
+ isert_init_send_wr(isert_conn, isert_cmd, send_wr, true);
pr_debug("Posting SCSI Response IB_WR_SEND >>>>>>>>>>>>>>>>>>>>>>\n");
@@ -1813,7 +1851,7 @@
&isert_cmd->tx_desc.iscsi_header,
nopout_response);
isert_init_tx_hdrs(isert_conn, &isert_cmd->tx_desc);
- isert_init_send_wr(isert_cmd, send_wr);
+ isert_init_send_wr(isert_conn, isert_cmd, send_wr, false);
pr_debug("Posting NOPIN Response IB_WR_SEND >>>>>>>>>>>>>>>>>>>>>>\n");
@@ -1831,7 +1869,7 @@
iscsit_build_logout_rsp(cmd, conn, (struct iscsi_logout_rsp *)
&isert_cmd->tx_desc.iscsi_header);
isert_init_tx_hdrs(isert_conn, &isert_cmd->tx_desc);
- isert_init_send_wr(isert_cmd, send_wr);
+ isert_init_send_wr(isert_conn, isert_cmd, send_wr, false);
pr_debug("Posting Logout Response IB_WR_SEND >>>>>>>>>>>>>>>>>>>>>>\n");
@@ -1849,7 +1887,7 @@
iscsit_build_task_mgt_rsp(cmd, conn, (struct iscsi_tm_rsp *)
&isert_cmd->tx_desc.iscsi_header);
isert_init_tx_hdrs(isert_conn, &isert_cmd->tx_desc);
- isert_init_send_wr(isert_cmd, send_wr);
+ isert_init_send_wr(isert_conn, isert_cmd, send_wr, false);
pr_debug("Posting Task Management Response IB_WR_SEND >>>>>>>>>>>>>>>>>>>>>>\n");
@@ -1881,7 +1919,7 @@
tx_dsg->lkey = isert_conn->conn_mr->lkey;
isert_cmd->tx_desc.num_sge = 2;
- isert_init_send_wr(isert_cmd, send_wr);
+ isert_init_send_wr(isert_conn, isert_cmd, send_wr, false);
pr_debug("Posting Reject IB_WR_SEND >>>>>>>>>>>>>>>>>>>>>>\n");
@@ -1921,7 +1959,7 @@
tx_dsg->lkey = isert_conn->conn_mr->lkey;
isert_cmd->tx_desc.num_sge = 2;
}
- isert_init_send_wr(isert_cmd, send_wr);
+ isert_init_send_wr(isert_conn, isert_cmd, send_wr, false);
pr_debug("Posting Text Response IB_WR_SEND >>>>>>>>>>>>>>>>>>>>>>\n");
@@ -2309,7 +2347,8 @@
iscsit_build_rsp_pdu(cmd, conn, false, (struct iscsi_scsi_rsp *)
&isert_cmd->tx_desc.iscsi_header);
isert_init_tx_hdrs(isert_conn, &isert_cmd->tx_desc);
- isert_init_send_wr(isert_cmd, &isert_cmd->tx_desc.send_wr);
+ isert_init_send_wr(isert_conn, isert_cmd,
+ &isert_cmd->tx_desc.send_wr, true);
atomic_inc(&isert_conn->post_send_buf_count);