scsi: lpfc: Rework EQ/CQ processing to address interrupt coalescing When driving high iop counts, auto_imax coalescing kicks in and drives the performance to extremely small iops levels. There are two issues: 1) auto_imax is enabled by default. The auto algorithm, when iops gets high, divides the iops by the hdwq count and uses that value to calculate EQ_Delay. The EQ_Delay is set uniformly on all EQs whether they have load or not. The EQ_delay is only manipulated every 5s (a long time). Thus there were large 5s swings of no interrupt delay followed by large/maximum delay, before repeating. 2) When processing a CQ, the driver got mixed up on the rate of when to ring the doorbell to keep the chip appraised of the eqe or cqe consumption as well as how how long to sit in the thread and process queue entries. Currently, the driver capped its work at 64 entries (very small) and exited/rearmed the CQ. Thus, on heavy loads, additional overheads were taken to exit and re-enter the interrupt handler. Worse, if in the large/maximum coalescing windows,k it could be a while before getting back to servicing. The issues are corrected by the following: - A change in defaults. Auto_imax is turned OFF and fcp_imax is set to 0. Thus all interrupts are immediate. - Cleanup of field names and their meanings. Existing names were non-intuitive or used for duplicate things. - Added max_proc_limit field, to control the length of time the handlers would service completions. - Reworked EQ handling: Added common routine that walks eq, applying notify interval and max processing limits. Use queue_claimed to claim ownership of the queue while processing. Always rearm the queue whenever the common routine is called. Rework queue element processing, namely to eliminate hba_index vs host_index. Only one index is necessary. The queue entry can be marked invalid and the host_index updated immediately after eqe processing. After rework, xx_release routines are now DB write functions. Renamed the routines as such. Moved lpfc_sli4_eq_flush(), which does similar action, to same area. Replaced the 2 individual loops that walk an eq with a call to the common routine. Slightly revised lpfc_sli4_hba_handle_eqe() calling syntax. Added per-cpu counters to detect interrupt rates and scale interrupt coalescing values. - Reworked CQ handling: Added common routine that walks cq, applying notify interval and max processing limits. Use queue_claimed to claim ownership of the queue while processing. Always rearm the queue whenever the common routine is called. Rework queue element processing, namely to eliminate hba_index vs host_index. Only one index is necessary. The queue entry can be marked invalid and the host_index updated immediately after cqe processing. After rework, xx_release routines are now DB write functions. Renamed the routines as such. Replaced the 3 individual loops that walk a cq with a call to the common routine. Redefined lpfc_sli4_sp_handle_mcqe() to commong handler definition with queue reference. Add increment for mbox completion to handler. - Added a new module/sysfs attribute: lpfc_cq_max_proc_limit To allow dynamic changing of the CQ max_proc_limit value being used. Although this leaves an EQ as an immediate interrupt, that interrupt will only occur if a CQ bound to it is in an armed state and has cqe's to process. By staying in the cq processing routine longer, high loads will avoid generating more interrupts as they will only rearm as the processing thread exits. The immediately interrupt is also beneficial to idle or lower-processing CQ's as they get serviced immediately without being penalized by sharing an EQ with a more loaded CQ. Signed-off-by: Dick Kennedy <dick.kennedy@broadcom.com> Signed-off-by: James Smart <jsmart2021@gmail.com> Reviewed-by: Hannes Reinecke <hare@suse.com> Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>

commit: 32517fc0975bf8dd3967e43c2a6350f038a3af28 [log] [tgz]
author: James Smart <jsmart2021@gmail.com> Mon Jan 28 11:14:33 2019 -0800
committer: Martin K. Petersen <martin.petersen@oracle.com> Tue Feb 05 22:29:49 2019 -0500
tree: 564dc9138630ddef0af8fcc83dbc4ef1c57b1acb
parent: cb733e358787b7386bd1aeec088a35e03c53da3b [diff] [blame]
diff --git a/drivers/scsi/lpfc/lpfc_attr.c b/drivers/scsi/lpfc/lpfc_attr.c
index ed8caee..2864cb5 100644
--- a/drivers/scsi/lpfc/lpfc_attr.c
+++ b/drivers/scsi/lpfc/lpfc_attr.c

@@ -4935,6 +4935,7 @@ lpfc_fcp_imax_store(struct device *dev, struct device_attribute *attr,
 	struct Scsi_Host *shost = class_to_shost(dev);
 	struct lpfc_vport *vport = (struct lpfc_vport *)shost->hostdata;
 	struct lpfc_hba *phba = vport->phba;
+	struct lpfc_eq_intr_info *eqi;
 	uint32_t usdelay;
 	int val = 0, i;
 
@@ -4956,8 +4957,18 @@ lpfc_fcp_imax_store(struct device *dev, struct device_attribute *attr,
 	if (val && (val < LPFC_MIN_IMAX || val > LPFC_MAX_IMAX))
 		return -EINVAL;
 
+	phba->cfg_auto_imax = (val) ? 0 : 1;
+	if (phba->cfg_fcp_imax && !val) {
+		queue_delayed_work(phba->wq, &phba->eq_delay_work,
+				   msecs_to_jiffies(LPFC_EQ_DELAY_MSECS));
+
+		for_each_present_cpu(i) {
+			eqi = per_cpu_ptr(phba->sli4_hba.eq_info, i);
+			eqi->icnt = 0;
+		}
+	}
+
 	phba->cfg_fcp_imax = (uint32_t)val;
-	phba->initial_imax = phba->cfg_fcp_imax;
 
 	if (phba->cfg_fcp_imax)
 		usdelay = LPFC_SEC_TO_USEC / phba->cfg_fcp_imax;
@@ -5020,15 +5031,119 @@ lpfc_fcp_imax_init(struct lpfc_hba *phba, int val)
 
 static DEVICE_ATTR_RW(lpfc_fcp_imax);
 
+/**
+ * lpfc_cq_max_proc_limit_store
+ *
+ * @dev: class device that is converted into a Scsi_host.
+ * @attr: device attribute, not used.
+ * @buf: string with the cq max processing limit of cqes
+ * @count: unused variable.
+ *
+ * Description:
+ * If val is in a valid range, then set value on each cq
+ *
+ * Returns:
+ * The length of the buf: if successful
+ * -ERANGE: if val is not in the valid range
+ * -EINVAL: if bad value format or intended mode is not supported.
+ **/
+static ssize_t
+lpfc_cq_max_proc_limit_store(struct device *dev, struct device_attribute *attr,
+			 const char *buf, size_t count)
+{
+	struct Scsi_Host *shost = class_to_shost(dev);
+	struct lpfc_vport *vport = (struct lpfc_vport *)shost->hostdata;
+	struct lpfc_hba *phba = vport->phba;
+	struct lpfc_queue *eq, *cq;
+	unsigned long val;
+	int i;
+
+	/* cq_max_proc_limit is only valid for SLI4 */
+	if (phba->sli_rev != LPFC_SLI_REV4)
+		return -EINVAL;
+
+	/* Sanity check on user data */
+	if (!isdigit(buf[0]))
+		return -EINVAL;
+	if (kstrtoul(buf, 0, &val))
+		return -EINVAL;
+
+	if (val < LPFC_CQ_MIN_PROC_LIMIT || val > LPFC_CQ_MAX_PROC_LIMIT)
+		return -ERANGE;
+
+	phba->cfg_cq_max_proc_limit = (uint32_t)val;
+
+	/* set the values on the cq's */
+	for (i = 0; i < phba->cfg_irq_chann; i++) {
+		eq = phba->sli4_hba.hdwq[i].hba_eq;
+		if (!eq)
+			continue;
+
+		list_for_each_entry(cq, &eq->child_list, list)
+			cq->max_proc_limit = min(phba->cfg_cq_max_proc_limit,
+						 cq->entry_count);
+	}
+
+	return strlen(buf);
+}
+
 /*
- * lpfc_auto_imax: Controls Auto-interrupt coalescing values support.
- *       0       No auto_imax support
- *       1       auto imax on
- * Auto imax will change the value of fcp_imax on a per EQ basis, using
- * the EQ Delay Multiplier, depending on the activity for that EQ.
- * Value range [0,1]. Default value is 1.
+ * lpfc_cq_max_proc_limit: The maximum number CQE entries processed in an
+ *   itteration of CQ processing.
  */
-LPFC_ATTR_RW(auto_imax, 1, 0, 1, "Enable Auto imax");
+static int lpfc_cq_max_proc_limit = LPFC_CQ_DEF_MAX_PROC_LIMIT;
+module_param(lpfc_cq_max_proc_limit, int, 0644);
+MODULE_PARM_DESC(lpfc_cq_max_proc_limit,
+	    "Set the maximum number CQEs processed in an iteration of "
+	    "CQ processing");
+lpfc_param_show(cq_max_proc_limit)
+
+/*
+ * lpfc_cq_poll_threshold: Set the threshold of CQE completions in a
+ *   single handler call which should request a polled completion rather
+ *   than re-enabling interrupts.
+ */
+LPFC_ATTR_RW(cq_poll_threshold, LPFC_CQ_DEF_THRESHOLD_TO_POLL,
+	     LPFC_CQ_MIN_THRESHOLD_TO_POLL,
+	     LPFC_CQ_MAX_THRESHOLD_TO_POLL,
+	     "CQE Processing Threshold to enable Polling");
+
+/**
+ * lpfc_cq_max_proc_limit_init - Set the initial cq max_proc_limit
+ * @phba: lpfc_hba pointer.
+ * @val: entry limit
+ *
+ * Description:
+ * If val is in a valid range, then initialize the adapter's maximum
+ * value.
+ *
+ * Returns:
+ *  Always returns 0 for success, even if value not always set to
+ *  requested value. If value out of range or not supported, will fall
+ *  back to default.
+ **/
+static int
+lpfc_cq_max_proc_limit_init(struct lpfc_hba *phba, int val)
+{
+	phba->cfg_cq_max_proc_limit = LPFC_CQ_DEF_MAX_PROC_LIMIT;
+
+	if (phba->sli_rev != LPFC_SLI_REV4)
+		return 0;
+
+	if (val >= LPFC_CQ_MIN_PROC_LIMIT && val <= LPFC_CQ_MAX_PROC_LIMIT) {
+		phba->cfg_cq_max_proc_limit = val;
+		return 0;
+	}
+
+	lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+			"0371 "LPFC_DRIVER_NAME"_cq_max_proc_limit: "
+			"%d out of range, using default\n",
+			phba->cfg_cq_max_proc_limit);
+
+	return 0;
+}
+
+static DEVICE_ATTR_RW(lpfc_cq_max_proc_limit);
 
 /**
  * lpfc_state_show - Display current driver CPU affinity
@@ -5788,8 +5903,9 @@ struct device_attribute *lpfc_hba_attrs[] = {
 	&dev_attr_lpfc_use_msi,
 	&dev_attr_lpfc_nvme_oas,
 	&dev_attr_lpfc_nvme_embed_cmd,
-	&dev_attr_lpfc_auto_imax,
 	&dev_attr_lpfc_fcp_imax,
+	&dev_attr_lpfc_cq_poll_threshold,
+	&dev_attr_lpfc_cq_max_proc_limit,
 	&dev_attr_lpfc_fcp_cpu_map,
 	&dev_attr_lpfc_hdw_queue,
 	&dev_attr_lpfc_irq_chann,
@@ -6834,8 +6950,9 @@ lpfc_get_cfgparam(struct lpfc_hba *phba)
 	lpfc_use_msi_init(phba, lpfc_use_msi);
 	lpfc_nvme_oas_init(phba, lpfc_nvme_oas);
 	lpfc_nvme_embed_cmd_init(phba, lpfc_nvme_embed_cmd);
-	lpfc_auto_imax_init(phba, lpfc_auto_imax);
 	lpfc_fcp_imax_init(phba, lpfc_fcp_imax);
+	lpfc_cq_poll_threshold_init(phba, lpfc_cq_poll_threshold);
+	lpfc_cq_max_proc_limit_init(phba, lpfc_cq_max_proc_limit);
 	lpfc_fcp_cpu_map_init(phba, lpfc_fcp_cpu_map);
 	lpfc_enable_hba_reset_init(phba, lpfc_enable_hba_reset);
 	lpfc_enable_hba_heartbeat_init(phba, lpfc_enable_hba_heartbeat);
@@ -6888,9 +7005,7 @@ lpfc_get_cfgparam(struct lpfc_hba *phba)
 			phba->cfg_enable_fc4_type |= LPFC_ENABLE_FCP;
 	}
 
-	if (phba->cfg_auto_imax && !phba->cfg_fcp_imax)
-		phba->cfg_auto_imax = 0;
-	phba->initial_imax = phba->cfg_fcp_imax;
+	phba->cfg_auto_imax = (phba->cfg_fcp_imax) ? 0 : 1;
 
 	phba->cfg_enable_pbde = 0;
commit	32517fc0975bf8dd3967e43c2a6350f038a3af28	[log] [tgz]
author	James Smart <jsmart2021@gmail.com>	Mon Jan 28 11:14:33 2019 -0800
committer	Martin K. Petersen <martin.petersen@oracle.com>	Tue Feb 05 22:29:49 2019 -0500
tree	564dc9138630ddef0af8fcc83dbc4ef1c57b1acb
parent	cb733e358787b7386bd1aeec088a35e03c53da3b [diff] [blame]