target: Drop se_lun->lun_active for existing percpu lun_ref

With se_port_t and t10_alua_tg_pt_gp_member being absored into se_lun,
there is no need for an extra atomic_t based reference count for PR
ALL_TG_PT=1 and ALUA access state transition.

Go ahead and use the existing percpu se_lun->lun_ref instead, and
convert the two special cases to percpu_ref_tryget_live() to avoid
se_lun if transport_clear_lun_ref() has already been invoked to
shutdown the se_lun.

Cc: Christoph Hellwig <hch@lst.de>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
diff --git a/drivers/target/target_core_alua.c b/drivers/target/target_core_alua.c
index 0a00873..02d8e1a 100644
--- a/drivers/target/target_core_alua.c
+++ b/drivers/target/target_core_alua.c
@@ -969,7 +969,8 @@
 		 * every I_T nexus other than the I_T nexus on which the SET
 		 * TARGET PORT GROUPS command
 		 */
-		atomic_inc_mb(&lun->lun_active);
+		if (!percpu_ref_tryget_live(&lun->lun_ref))
+			continue;
 		spin_unlock(&tg_pt_gp->tg_pt_gp_lock);
 
 		spin_lock_bh(&lun->lun_deve_lock);
@@ -998,7 +999,7 @@
 		spin_unlock_bh(&lun->lun_deve_lock);
 
 		spin_lock(&tg_pt_gp->tg_pt_gp_lock);
-		atomic_dec_mb(&lun->lun_active);
+		percpu_ref_put(&lun->lun_ref);
 	}
 	spin_unlock(&tg_pt_gp->tg_pt_gp_lock);
 	/*
diff --git a/drivers/target/target_core_pr.c b/drivers/target/target_core_pr.c
index 60624bb..94935ea 100644
--- a/drivers/target/target_core_pr.c
+++ b/drivers/target/target_core_pr.c
@@ -704,7 +704,8 @@
 	 */
 	spin_lock(&dev->se_port_lock);
 	list_for_each_entry_safe(lun_tmp, next, &dev->dev_sep_list, lun_dev_link) {
-		atomic_inc_mb(&lun_tmp->lun_active);
+		if (!percpu_ref_tryget_live(&lun_tmp->lun_ref))
+			continue;
 		spin_unlock(&dev->se_port_lock);
 
 		spin_lock_bh(&lun_tmp->lun_deve_lock);
@@ -751,7 +752,7 @@
 			if (ret < 0) {
 				pr_err("core_scsi3_lunacl_depend"
 						"_item() failed\n");
-				atomic_dec_mb(&lun->lun_active);
+				percpu_ref_put(&lun_tmp->lun_ref);
 				kref_put(&deve_tmp->pr_kref, target_pr_kref_release);
 				goto out;
 			}
@@ -770,7 +771,7 @@
 						deve_tmp->mapped_lun, NULL,
 						sa_res_key, all_tg_pt, aptpl);
 			if (!pr_reg_atp) {
-				atomic_dec_mb(&lun_tmp->lun_active);
+				percpu_ref_put(&lun_tmp->lun_ref);
 				core_scsi3_lunacl_undepend_item(deve_tmp);
 				goto out;
 			}
@@ -782,7 +783,7 @@
 		spin_unlock_bh(&lun_tmp->lun_deve_lock);
 
 		spin_lock(&dev->se_port_lock);
-		atomic_dec_mb(&lun_tmp->lun_active);
+		percpu_ref_put(&lun_tmp->lun_ref);
 	}
 	spin_unlock(&dev->se_port_lock);
 
diff --git a/drivers/target/target_core_tpg.c b/drivers/target/target_core_tpg.c
index 5b30940..b9fcf2c 100644
--- a/drivers/target/target_core_tpg.c
+++ b/drivers/target/target_core_tpg.c
@@ -607,7 +607,6 @@
 	mutex_init(&lun->lun_tg_pt_md_mutex);
 	INIT_LIST_HEAD(&lun->lun_tg_pt_gp_link);
 	spin_lock_init(&lun->lun_tg_pt_gp_lock);
-	atomic_set(&lun->lun_active, 0);
 	lun->lun_tpg = tpg;
 
 	return lun;
@@ -667,13 +666,16 @@
 	struct se_device *dev = rcu_dereference_raw(lun->lun_se_dev);
 
 	core_clear_lun_from_tpg(lun, tpg);
+	/*
+	 * Wait for any active I/O references to percpu se_lun->lun_ref to
+	 * be released.  Also, se_lun->lun_ref is now used by PR and ALUA
+	 * logic when referencing a remote target port during ALL_TGT_PT=1
+	 * and generating UNIT_ATTENTIONs for ALUA access state transition.
+	 */
 	transport_clear_lun_ref(lun);
 
 	mutex_lock(&tpg->tpg_lun_mutex);
 	if (lun->lun_se_dev) {
-		while (atomic_read(&lun->lun_active))
-			cpu_relax();
-
 		target_detach_tg_pt_gp(lun);
 
 		spin_lock(&dev->se_port_lock);
diff --git a/include/target/target_core_base.h b/include/target/target_core_base.h
index b82a989a..eefc2b0 100644
--- a/include/target/target_core_base.h
+++ b/include/target/target_core_base.h
@@ -721,7 +721,6 @@
 	struct t10_alua_tg_pt_gp *lun_tg_pt_gp;
 	spinlock_t		lun_tg_pt_gp_lock;
 
-	atomic_t		lun_active;
 	struct se_portal_group	*lun_tpg;
 	struct scsi_port_stats	lun_stats;
 	struct config_group	lun_group;