scsi: lpfc: Synchronize hardware queues with SCSI MQ interface

Now that the lower half has much better per-cpu parallelization using the
hardware queues, the SCSI MQ support needs to be tied into it.

The involves the following mods:

 - Use the hardware queue info from the midlayer to help select the
   hardware queue to utilize. This required change to the get_scsi-buf_xxx
   routines.

 - Remove lpfc_sli4_scmd_to_wqidx_distr() routine. No longer needed.

 - Includes fix for SLI-3 that does not have multi queue parallelization.

Signed-off-by: Dick Kennedy <dick.kennedy@broadcom.com>
Signed-off-by: James Smart <jsmart2021@gmail.com>
Reviewed-by: Hannes Reinecke <hare@suse.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
diff --git a/drivers/scsi/lpfc/lpfc.h b/drivers/scsi/lpfc/lpfc.h
index 9262c52..755bf49 100644
--- a/drivers/scsi/lpfc/lpfc.h
+++ b/drivers/scsi/lpfc/lpfc.h
@@ -619,7 +619,8 @@ struct lpfc_ras_fwlog {
 struct lpfc_hba {
 	/* SCSI interface function jump table entries */
 	struct lpfc_scsi_buf * (*lpfc_get_scsi_buf)
-		(struct lpfc_hba *, struct lpfc_nodelist *);
+		(struct lpfc_hba *phba, struct lpfc_nodelist *ndlp,
+		struct scsi_cmnd *cmnd);
 	int (*lpfc_scsi_prep_dma_buf)
 		(struct lpfc_hba *, struct lpfc_scsi_buf *);
 	void (*lpfc_scsi_unprep_dma_buf)
diff --git a/drivers/scsi/lpfc/lpfc_init.c b/drivers/scsi/lpfc/lpfc_init.c
index 36d9c32..88b1c3c 100644
--- a/drivers/scsi/lpfc/lpfc_init.c
+++ b/drivers/scsi/lpfc/lpfc_init.c
@@ -4063,12 +4063,16 @@ lpfc_create_port(struct lpfc_hba *phba, int instance, struct device *dev)
 	shost->max_lun = vport->cfg_max_luns;
 	shost->this_id = -1;
 	shost->max_cmd_len = 16;
-	shost->nr_hw_queues = phba->cfg_hdw_queue;
 	if (phba->sli_rev == LPFC_SLI_REV4) {
+		shost->nr_hw_queues = phba->cfg_hdw_queue;
 		shost->dma_boundary =
 			phba->sli4_hba.pc_sli4_params.sge_supp_len-1;
 		shost->sg_tablesize = phba->cfg_scsi_seg_cnt;
-	}
+	} else
+		/* SLI-3 has a limited number of hardware queues (3),
+		 * thus there is only one for FCP processing.
+		 */
+		shost->nr_hw_queues = 1;
 
 	/*
 	 * Set initial can_queue value since 0 is no longer supported and
diff --git a/drivers/scsi/lpfc/lpfc_scsi.c b/drivers/scsi/lpfc/lpfc_scsi.c
index 55c58bb..79a3765 100644
--- a/drivers/scsi/lpfc/lpfc_scsi.c
+++ b/drivers/scsi/lpfc/lpfc_scsi.c
@@ -636,7 +636,8 @@ lpfc_sli4_fcp_xri_aborted(struct lpfc_hba *phba,
  *   Pointer to lpfc_scsi_buf - Success
  **/
 static struct lpfc_scsi_buf*
-lpfc_get_scsi_buf_s3(struct lpfc_hba *phba, struct lpfc_nodelist *ndlp)
+lpfc_get_scsi_buf_s3(struct lpfc_hba *phba, struct lpfc_nodelist *ndlp,
+		     struct scsi_cmnd *cmnd)
 {
 	struct  lpfc_scsi_buf * lpfc_cmd = NULL;
 	struct list_head *scsi_buf_list_get = &phba->lpfc_scsi_buf_list_get;
@@ -674,7 +675,8 @@ lpfc_get_scsi_buf_s3(struct lpfc_hba *phba, struct lpfc_nodelist *ndlp)
  *   Pointer to lpfc_scsi_buf - Success
  **/
 static struct lpfc_scsi_buf*
-lpfc_get_scsi_buf_s4(struct lpfc_hba *phba, struct lpfc_nodelist *ndlp)
+lpfc_get_scsi_buf_s4(struct lpfc_hba *phba, struct lpfc_nodelist *ndlp,
+		     struct scsi_cmnd *cmnd)
 {
 	struct lpfc_scsi_buf *lpfc_cmd, *lpfc_cmd_next;
 	struct lpfc_sli4_hdw_queue *qp;
@@ -685,12 +687,18 @@ lpfc_get_scsi_buf_s4(struct lpfc_hba *phba, struct lpfc_nodelist *ndlp)
 	dma_addr_t pdma_phys_fcp_cmd;
 	uint32_t sgl_size, cpu, idx;
 	int found = 0;
+	int tag;
 
 	cpu = smp_processor_id();
-	if (cpu < phba->cfg_hdw_queue)
-		idx = cpu;
-	else
-		idx = cpu % phba->cfg_hdw_queue;
+	if (cmnd) {
+		tag = blk_mq_unique_tag(cmnd->request);
+		idx = blk_mq_unique_tag_to_hwq(tag);
+	} else {
+		if (cpu < phba->cfg_hdw_queue)
+			idx = cpu;
+		else
+			idx = cpu % phba->cfg_hdw_queue;
+	}
 
 	qp = &phba->sli4_hba.hdwq[idx];
 	spin_lock_irqsave(&qp->io_buf_list_get_lock, iflag);
@@ -815,9 +823,10 @@ lpfc_get_scsi_buf_s4(struct lpfc_hba *phba, struct lpfc_nodelist *ndlp)
  *   Pointer to lpfc_scsi_buf - Success
  **/
 static struct lpfc_scsi_buf*
-lpfc_get_scsi_buf(struct lpfc_hba *phba, struct lpfc_nodelist *ndlp)
+lpfc_get_scsi_buf(struct lpfc_hba *phba, struct lpfc_nodelist *ndlp,
+		  struct scsi_cmnd *cmnd)
 {
-	return  phba->lpfc_get_scsi_buf(phba, ndlp);
+	return  phba->lpfc_get_scsi_buf(phba, ndlp, cmnd);
 }
 
 /**
@@ -3658,49 +3667,6 @@ lpfc_handle_fcp_err(struct lpfc_vport *vport, struct lpfc_scsi_buf *lpfc_cmd,
 }
 
 /**
- * lpfc_sli4_scmd_to_wqidx_distr - scsi command to SLI4 WQ index distribution
- * @phba: Pointer to HBA context object.
- *
- * This routine performs a roundrobin SCSI command to SLI4 FCP WQ index
- * distribution.  This is called by __lpfc_sli_issue_iocb_s4() with the hbalock
- * held.
- * If scsi-mq is enabled, get the default block layer mapping of software queues
- * to hardware queues. This information is saved in request tag.
- *
- * Return: index into SLI4 fast-path FCP queue index.
- **/
-int lpfc_sli4_scmd_to_wqidx_distr(struct lpfc_hba *phba,
-				  struct lpfc_scsi_buf *lpfc_cmd)
-{
-	struct scsi_cmnd *cmnd = lpfc_cmd->pCmd;
-	struct lpfc_vector_map_info *cpup;
-	int chann, cpu;
-	uint32_t tag;
-	uint16_t hwq;
-
-	if (cmnd) {
-		tag = blk_mq_unique_tag(cmnd->request);
-		hwq = blk_mq_unique_tag_to_hwq(tag);
-
-		return hwq;
-	}
-
-	if (phba->cfg_fcp_io_sched == LPFC_FCP_SCHED_BY_CPU &&
-	    phba->cfg_hdw_queue > 1) {
-		cpu = lpfc_cmd->cpu;
-		if (cpu < phba->sli4_hba.num_present_cpu) {
-			cpup = phba->sli4_hba.cpu_map;
-			cpup += cpu;
-			return cpup->channel_id;
-		}
-	}
-	chann = atomic_add_return(1, &phba->fcp_qidx);
-	chann = chann % phba->cfg_hdw_queue;
-	return chann;
-}
-
-
-/**
  * lpfc_scsi_cmd_iocb_cmpl - Scsi cmnd IOCB completion routine
  * @phba: The Hba for which this call is being executed.
  * @pIocbIn: The command IOCBQ for the scsi cmnd.
@@ -4474,7 +4440,7 @@ lpfc_queuecommand(struct Scsi_Host *shost, struct scsi_cmnd *cmnd)
 		}
 	}
 
-	lpfc_cmd = lpfc_get_scsi_buf(phba, ndlp);
+	lpfc_cmd = lpfc_get_scsi_buf(phba, ndlp, cmnd);
 	if (lpfc_cmd == NULL) {
 		lpfc_rampdown_queue_depth(phba);
 
@@ -4913,7 +4879,7 @@ lpfc_send_taskmgmt(struct lpfc_vport *vport, struct scsi_cmnd *cmnd,
 		return FAILED;
 	pnode = rdata->pnode;
 
-	lpfc_cmd = lpfc_get_scsi_buf(phba, pnode);
+	lpfc_cmd = lpfc_get_scsi_buf(phba, pnode, NULL);
 	if (lpfc_cmd == NULL)
 		return FAILED;
 	lpfc_cmd->timeout = phba->cfg_task_mgmt_tmo;
diff --git a/drivers/scsi/lpfc/lpfc_scsi.h b/drivers/scsi/lpfc/lpfc_scsi.h
index 58cb6fd..69689808 100644
--- a/drivers/scsi/lpfc/lpfc_scsi.h
+++ b/drivers/scsi/lpfc/lpfc_scsi.h
@@ -200,5 +200,3 @@ struct lpfc_scsi_buf {
 /* For sysfs/debugfs tmp string max len */
 #define LPFC_MAX_SCSI_INFO_TMP_LEN	79
 
-int lpfc_sli4_scmd_to_wqidx_distr(struct lpfc_hba *phba,
-				  struct lpfc_scsi_buf *lpfc_cmd);