scsi: lpfc: Adapt partitioned XRI lists to efficient sharing

The XRI get/put lists were partitioned per hardware queue. However, the
adapter rarely had sufficient resources to give a large number of resources
per queue. As such, it became common for a cpu to encounter a lack of XRI
resource and request the upper io stack to retry after returning a BUSY
condition. This occurred even though other cpus were idle and not using
their resources.

Create as efficient a scheme as possible to move resources to the cpus that
need them. Each cpu maintains a small private pool which it allocates from
for io. There is a watermark that the cpu attempts to keep in the private
pool.  The private pool, when empty, pulls from a global pool from the
cpu. When the cpu's global pool is empty it will pull from other cpu's
global pool. As there many cpu global pools (1 per cpu or hardware queue
count) and as each cpu selects what cpu to pull from at different rates and
at different times, it creates a radomizing effect that minimizes the
number of cpu's that will contend with each other when the steal XRI's from
another cpu's global pool.

On io completion, a cpu will push the XRI back on to its private pool.  A
watermark level is maintained for the private pool such that when it is
exceeded it will move XRI's to the CPU global pool so that other cpu's may
allocate them.

On NVME, as heartbeat commands are critical to get placed on the wire, a
single expedite pool is maintained. When a heartbeat is to be sent, it will
allocate an XRI from the expedite pool rather than the normal cpu
private/global pools. On any io completion, if a reduction in the expedite
pools is seen, it will be replenished before the XRI is placed on the cpu
private pool.

Statistics are added to aid understanding the XRI levels on each cpu and
their behaviors.

Signed-off-by: Dick Kennedy <dick.kennedy@broadcom.com>
Signed-off-by: James Smart <jsmart2021@gmail.com>
Reviewed-by: Hannes Reinecke <hare@suse.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
diff --git a/drivers/scsi/lpfc/lpfc.h b/drivers/scsi/lpfc/lpfc.h
index 755bf49..0f8964f 100644
--- a/drivers/scsi/lpfc/lpfc.h
+++ b/drivers/scsi/lpfc/lpfc.h
@@ -235,8 +235,6 @@ typedef struct lpfc_vpd {
 	} sli3Feat;
 } lpfc_vpd_t;
 
-struct lpfc_scsi_buf;
-
 
 /*
  * lpfc stat counters
@@ -597,6 +595,13 @@ struct lpfc_mbox_ext_buf_ctx {
 	struct list_head ext_dmabuf_list;
 };
 
+struct lpfc_epd_pool {
+	/* Expedite pool */
+	struct list_head list;
+	u32 count;
+	spinlock_t lock;	/* lock for expedite pool */
+};
+
 struct lpfc_ras_fwlog {
 	uint8_t *fwlog_buff;
 	uint32_t fw_buffcount; /* Buffer size posted to FW */
@@ -618,19 +623,19 @@ struct lpfc_ras_fwlog {
 
 struct lpfc_hba {
 	/* SCSI interface function jump table entries */
-	struct lpfc_scsi_buf * (*lpfc_get_scsi_buf)
+	struct lpfc_io_buf * (*lpfc_get_scsi_buf)
 		(struct lpfc_hba *phba, struct lpfc_nodelist *ndlp,
 		struct scsi_cmnd *cmnd);
 	int (*lpfc_scsi_prep_dma_buf)
-		(struct lpfc_hba *, struct lpfc_scsi_buf *);
+		(struct lpfc_hba *, struct lpfc_io_buf *);
 	void (*lpfc_scsi_unprep_dma_buf)
-		(struct lpfc_hba *, struct lpfc_scsi_buf *);
+		(struct lpfc_hba *, struct lpfc_io_buf *);
 	void (*lpfc_release_scsi_buf)
-		(struct lpfc_hba *, struct lpfc_scsi_buf *);
+		(struct lpfc_hba *, struct lpfc_io_buf *);
 	void (*lpfc_rampdown_queue_depth)
 		(struct lpfc_hba *);
 	void (*lpfc_scsi_prep_cmnd)
-		(struct lpfc_vport *, struct lpfc_scsi_buf *,
+		(struct lpfc_vport *, struct lpfc_io_buf *,
 		 struct lpfc_nodelist *);
 
 	/* IOCB interface function jump table entries */
@@ -673,9 +678,12 @@ struct lpfc_hba {
 		(struct lpfc_hba *);
 
 	int (*lpfc_bg_scsi_prep_dma_buf)
-		(struct lpfc_hba *, struct lpfc_scsi_buf *);
+		(struct lpfc_hba *, struct lpfc_io_buf *);
 	/* Add new entries here */
 
+	/* expedite pool */
+	struct lpfc_epd_pool epd_pool;
+
 	/* SLI4 specific HBA data structure */
 	struct lpfc_sli4_hba sli4_hba;
 
@@ -789,6 +797,7 @@ struct lpfc_hba {
 
 	/* HBA Config Parameters */
 	uint32_t cfg_ack0;
+	uint32_t cfg_xri_rebalancing;
 	uint32_t cfg_enable_npiv;
 	uint32_t cfg_enable_rrq;
 	uint32_t cfg_topology;
@@ -1014,6 +1023,7 @@ struct lpfc_hba {
 #ifdef CONFIG_SCSI_LPFC_DEBUG_FS
 	struct dentry *hba_debugfs_root;
 	atomic_t debugfs_vport_count;
+	struct dentry *debug_multixri_pools;
 	struct dentry *debug_hbqinfo;
 	struct dentry *debug_dumpHostSlim;
 	struct dentry *debug_dumpHBASlim;