[SCSI] qla2xxx: Extend base EEH support in qla2xxx.
Signed-off-by: Giridhar Malavali <giridhar.malavali@qlogic.com>
Signed-off-by: James Bottomley <James.Bottomley@suse.de>
diff --git a/drivers/scsi/qla2xxx/qla_attr.c b/drivers/scsi/qla2xxx/qla_attr.c
index 21e2bc4..3a9f5b2 100644
--- a/drivers/scsi/qla2xxx/qla_attr.c
+++ b/drivers/scsi/qla2xxx/qla_attr.c
@@ -232,6 +232,9 @@
if (off)
return 0;
+ if (unlikely(pci_channel_offline(ha->pdev)))
+ return 0;
+
if (sscanf(buf, "%d:%x:%x", &val, &start, &size) < 1)
return -EINVAL;
if (start > ha->optrom_size)
@@ -379,6 +382,9 @@
struct device, kobj)));
struct qla_hw_data *ha = vha->hw;
+ if (unlikely(pci_channel_offline(ha->pdev)))
+ return 0;
+
if (!capable(CAP_SYS_ADMIN))
return 0;
@@ -398,6 +404,9 @@
struct qla_hw_data *ha = vha->hw;
uint8_t *tmp_data;
+ if (unlikely(pci_channel_offline(ha->pdev)))
+ return 0;
+
if (!capable(CAP_SYS_ADMIN) || off != 0 || count != ha->vpd_size ||
!ha->isp_ops->write_nvram)
return 0;
@@ -1238,10 +1247,11 @@
char *buf)
{
scsi_qla_host_t *vha = shost_priv(class_to_shost(dev));
- int rval;
+ int rval = QLA_FUNCTION_FAILED;
uint16_t state[5];
- rval = qla2x00_get_firmware_state(vha, state);
+ if (!vha->hw->flags.eeh_busy)
+ rval = qla2x00_get_firmware_state(vha, state);
if (rval != QLA_SUCCESS)
memset(state, -1, sizeof(state));
@@ -1452,10 +1462,13 @@
if (!fcport)
return;
- if (unlikely(pci_channel_offline(fcport->vha->hw->pdev)))
+ if (test_bit(ABORT_ISP_ACTIVE, &fcport->vha->dpc_flags))
+ return;
+
+ if (unlikely(pci_channel_offline(fcport->vha->hw->pdev))) {
qla2x00_abort_all_cmds(fcport->vha, DID_NO_CONNECT << 16);
- else
- qla2x00_abort_fcport_cmds(fcport);
+ return;
+ }
/*
* Transport has effectively 'deleted' the rport, clear
@@ -1475,6 +1488,9 @@
if (!fcport)
return;
+ if (test_bit(ABORT_ISP_ACTIVE, &fcport->vha->dpc_flags))
+ return;
+
if (unlikely(pci_channel_offline(fcport->vha->hw->pdev))) {
qla2x00_abort_all_cmds(fcport->vha, DID_NO_CONNECT << 16);
return;
@@ -1515,6 +1531,12 @@
pfc_host_stat = &ha->fc_host_stat;
memset(pfc_host_stat, -1, sizeof(struct fc_host_statistics));
+ if (test_bit(UNLOADING, &vha->dpc_flags))
+ goto done;
+
+ if (unlikely(pci_channel_offline(ha->pdev)))
+ goto done;
+
stats = dma_pool_alloc(ha->s_dma_pool, GFP_KERNEL, &stats_dma);
if (stats == NULL) {
DEBUG2_3_11(printk("%s(%ld): Failed to allocate memory.\n",
diff --git a/drivers/scsi/qla2xxx/qla_dbg.h b/drivers/scsi/qla2xxx/qla_dbg.h
index f660dd7..d6d9c86 100644
--- a/drivers/scsi/qla2xxx/qla_dbg.h
+++ b/drivers/scsi/qla2xxx/qla_dbg.h
@@ -26,7 +26,7 @@
/* #define QL_DEBUG_LEVEL_14 */ /* Output RSCN trace msgs */
/* #define QL_DEBUG_LEVEL_15 */ /* Output NPIV trace msgs */
/* #define QL_DEBUG_LEVEL_16 */ /* Output ISP84XX trace msgs */
-/* #define QL_DEBUG_LEVEL_17 */ /* Output MULTI-Q trace messages */
+/* #define QL_DEBUG_LEVEL_17 */ /* Output EEH trace messages */
/*
* Macros use for debugging the driver.
@@ -132,6 +132,13 @@
#else
#define DEBUG16(x) do {} while (0)
#endif
+
+#if defined(QL_DEBUG_LEVEL_17)
+#define DEBUG17(x) do {x;} while (0)
+#else
+#define DEBUG17(x) do {} while (0)
+#endif
+
/*
* Firmware Dump structure definition
*/
diff --git a/drivers/scsi/qla2xxx/qla_def.h b/drivers/scsi/qla2xxx/qla_def.h
index 384afda..608e675 100644
--- a/drivers/scsi/qla2xxx/qla_def.h
+++ b/drivers/scsi/qla2xxx/qla_def.h
@@ -2256,11 +2256,13 @@
uint32_t disable_serdes :1;
uint32_t gpsc_supported :1;
uint32_t npiv_supported :1;
+ uint32_t pci_channel_io_perm_failure :1;
uint32_t fce_enabled :1;
uint32_t fac_supported :1;
uint32_t chip_reset_done :1;
uint32_t port0 :1;
uint32_t running_gold_fw :1;
+ uint32_t eeh_busy :1;
uint32_t cpu_affinity_enabled :1;
uint32_t disable_msix_handshake :1;
} flags;
diff --git a/drivers/scsi/qla2xxx/qla_init.c b/drivers/scsi/qla2xxx/qla_init.c
index 0f7ea6c..b4a0eac 100644
--- a/drivers/scsi/qla2xxx/qla_init.c
+++ b/drivers/scsi/qla2xxx/qla_init.c
@@ -269,6 +269,8 @@
vha->flags.online = 0;
ha->flags.chip_reset_done = 0;
vha->flags.reset_active = 0;
+ ha->flags.pci_channel_io_perm_failure = 0;
+ ha->flags.eeh_busy = 0;
atomic_set(&vha->loop_down_timer, LOOP_DOWN_TIME);
atomic_set(&vha->loop_state, LOOP_DOWN);
vha->device_flags = DFLG_NO_CABLE;
@@ -581,6 +583,9 @@
uint32_t cnt;
uint16_t cmd;
+ if (unlikely(pci_channel_offline(ha->pdev)))
+ return;
+
ha->isp_ops->disable_intrs(ha);
spin_lock_irqsave(&ha->hardware_lock, flags);
@@ -786,6 +791,12 @@
qla24xx_reset_chip(scsi_qla_host_t *vha)
{
struct qla_hw_data *ha = vha->hw;
+
+ if (pci_channel_offline(ha->pdev) &&
+ ha->flags.pci_channel_io_perm_failure) {
+ return;
+ }
+
ha->isp_ops->disable_intrs(ha);
/* Perform RISC reset. */
@@ -3562,6 +3573,13 @@
/* Requeue all commands in outstanding command list. */
qla2x00_abort_all_cmds(vha, DID_RESET << 16);
+ if (unlikely(pci_channel_offline(ha->pdev) &&
+ ha->flags.pci_channel_io_perm_failure)) {
+ clear_bit(ISP_ABORT_RETRY, &vha->dpc_flags);
+ status = 0;
+ return status;
+ }
+
ha->isp_ops->get_flash_version(vha, req->ring);
ha->isp_ops->nvram_config(vha);
@@ -4460,6 +4478,8 @@
int ret, retries;
struct qla_hw_data *ha = vha->hw;
+ if (ha->flags.pci_channel_io_perm_failure)
+ return;
if (!IS_FWI2_CAPABLE(ha))
return;
if (!ha->fw_major_version)
diff --git a/drivers/scsi/qla2xxx/qla_isr.c b/drivers/scsi/qla2xxx/qla_isr.c
index 1692a88..ffd0efd 100644
--- a/drivers/scsi/qla2xxx/qla_isr.c
+++ b/drivers/scsi/qla2xxx/qla_isr.c
@@ -152,7 +152,7 @@
for (iter = 50; iter--; ) {
stat = RD_REG_DWORD(®->u.isp2300.host_status);
if (stat & HSR_RISC_PAUSED) {
- if (pci_channel_offline(ha->pdev))
+ if (unlikely(pci_channel_offline(ha->pdev)))
break;
hccr = RD_REG_WORD(®->hccr);
@@ -1846,12 +1846,15 @@
reg = &ha->iobase->isp24;
status = 0;
+ if (unlikely(pci_channel_offline(ha->pdev)))
+ return IRQ_HANDLED;
+
spin_lock_irqsave(&ha->hardware_lock, flags);
vha = pci_get_drvdata(ha->pdev);
for (iter = 50; iter--; ) {
stat = RD_REG_DWORD(®->host_status);
if (stat & HSRX_RISC_PAUSED) {
- if (pci_channel_offline(ha->pdev))
+ if (unlikely(pci_channel_offline(ha->pdev)))
break;
hccr = RD_REG_DWORD(®->hccr);
@@ -1992,7 +1995,7 @@
do {
stat = RD_REG_DWORD(®->host_status);
if (stat & HSRX_RISC_PAUSED) {
- if (pci_channel_offline(ha->pdev))
+ if (unlikely(pci_channel_offline(ha->pdev)))
break;
hccr = RD_REG_DWORD(®->hccr);
diff --git a/drivers/scsi/qla2xxx/qla_mbx.c b/drivers/scsi/qla2xxx/qla_mbx.c
index e91f3d8..056e4d4 100644
--- a/drivers/scsi/qla2xxx/qla_mbx.c
+++ b/drivers/scsi/qla2xxx/qla_mbx.c
@@ -56,6 +56,12 @@
DEBUG11(printk("%s(%ld): entered.\n", __func__, base_vha->host_no));
+ if (ha->flags.pci_channel_io_perm_failure) {
+ DEBUG(printk("%s(%ld): Perm failure on EEH, timeout MBX "
+ "Exiting.\n", __func__, vha->host_no));
+ return QLA_FUNCTION_TIMEOUT;
+ }
+
/*
* Wait for active mailbox commands to finish by waiting at most tov
* seconds. This is to serialize actual issuing of mailbox cmds during
@@ -154,10 +160,14 @@
/* Check for pending interrupts. */
qla2x00_poll(ha->rsp_q_map[0]);
- if (command != MBC_LOAD_RISC_RAM_EXTENDED &&
- !ha->flags.mbox_int)
+ if (!ha->flags.mbox_int &&
+ !(IS_QLA2200(ha) &&
+ command == MBC_LOAD_RISC_RAM_EXTENDED))
msleep(10);
} /* while */
+ DEBUG17(qla_printk(KERN_WARNING, ha,
+ "Waited %d sec\n",
+ (uint)((jiffies - (wait_time - (mcp->tov * HZ)))/HZ)));
}
/* Check whether we timed out */
@@ -227,7 +237,8 @@
if (rval == QLA_FUNCTION_TIMEOUT &&
mcp->mb[0] != MBC_GEN_SYSTEM_ERROR) {
- if (!io_lock_on || (mcp->flags & IOCTL_CMD)) {
+ if (!io_lock_on || (mcp->flags & IOCTL_CMD) ||
+ ha->flags.eeh_busy) {
/* not in dpc. schedule it for dpc to take over. */
DEBUG(printk("%s(%ld): timeout schedule "
"isp_abort_needed.\n", __func__,
@@ -237,7 +248,7 @@
base_vha->host_no));
qla_printk(KERN_WARNING, ha,
"Mailbox command timeout occurred. Scheduling ISP "
- "abort.\n");
+ "abort. eeh_busy: 0x%x\n", ha->flags.eeh_busy);
set_bit(ISP_ABORT_NEEDED, &base_vha->dpc_flags);
qla2xxx_wake_dpc(vha);
} else if (!abort_active) {
@@ -2530,6 +2541,9 @@
if (!IS_FWI2_CAPABLE(vha->hw))
return QLA_FUNCTION_FAILED;
+ if (unlikely(pci_channel_offline(vha->hw->pdev)))
+ return QLA_FUNCTION_FAILED;
+
DEBUG11(printk("%s(%ld): entered.\n", __func__, vha->host_no));
mcp->mb[0] = MBC_TRACE_CONTROL;
@@ -2565,6 +2579,9 @@
if (!IS_FWI2_CAPABLE(vha->hw))
return QLA_FUNCTION_FAILED;
+ if (unlikely(pci_channel_offline(vha->hw->pdev)))
+ return QLA_FUNCTION_FAILED;
+
DEBUG11(printk("%s(%ld): entered.\n", __func__, vha->host_no));
mcp->mb[0] = MBC_TRACE_CONTROL;
@@ -2595,6 +2612,9 @@
if (!IS_QLA25XX(vha->hw) && !IS_QLA81XX(vha->hw))
return QLA_FUNCTION_FAILED;
+ if (unlikely(pci_channel_offline(vha->hw->pdev)))
+ return QLA_FUNCTION_FAILED;
+
DEBUG11(printk("%s(%ld): entered.\n", __func__, vha->host_no));
mcp->mb[0] = MBC_TRACE_CONTROL;
@@ -2639,6 +2659,9 @@
if (!IS_FWI2_CAPABLE(vha->hw))
return QLA_FUNCTION_FAILED;
+ if (unlikely(pci_channel_offline(vha->hw->pdev)))
+ return QLA_FUNCTION_FAILED;
+
DEBUG11(printk("%s(%ld): entered.\n", __func__, vha->host_no));
mcp->mb[0] = MBC_TRACE_CONTROL;
diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c
index 2f873d2..1ab3582 100644
--- a/drivers/scsi/qla2xxx/qla_os.c
+++ b/drivers/scsi/qla2xxx/qla_os.c
@@ -475,11 +475,11 @@
srb_t *sp;
int rval;
- if (unlikely(pci_channel_offline(ha->pdev))) {
- if (ha->pdev->error_state == pci_channel_io_frozen)
- cmd->result = DID_REQUEUE << 16;
- else
+ if (ha->flags.eeh_busy) {
+ if (ha->flags.pci_channel_io_perm_failure)
cmd->result = DID_NO_CONNECT << 16;
+ else
+ cmd->result = DID_REQUEUE << 16;
goto qc24_fail_command;
}
@@ -552,8 +552,15 @@
#define ABORT_POLLING_PERIOD 1000
#define ABORT_WAIT_ITER ((10 * 1000) / (ABORT_POLLING_PERIOD))
unsigned long wait_iter = ABORT_WAIT_ITER;
+ scsi_qla_host_t *vha = shost_priv(cmd->device->host);
+ struct qla_hw_data *ha = vha->hw;
int ret = QLA_SUCCESS;
+ if (unlikely(pci_channel_offline(ha->pdev)) || ha->flags.eeh_busy) {
+ DEBUG17(qla_printk(KERN_WARNING, ha, "return:eh_wait\n"));
+ return ret;
+ }
+
while (CMD_SP(cmd) && wait_iter--) {
msleep(ABORT_POLLING_PERIOD);
}
@@ -2174,6 +2181,24 @@
{
struct qla_hw_data *ha = vha->hw;
+ qla2x00_abort_all_cmds(vha, DID_NO_CONNECT << 16);
+
+ /* Disable timer */
+ if (vha->timer_active)
+ qla2x00_stop_timer(vha);
+
+ /* Kill the kernel thread for this host */
+ if (ha->dpc_thread) {
+ struct task_struct *t = ha->dpc_thread;
+
+ /*
+ * qla2xxx_wake_dpc checks for ->dpc_thread
+ * so we need to zero it out.
+ */
+ ha->dpc_thread = NULL;
+ kthread_stop(t);
+ }
+
qla25xx_delete_queues(vha);
if (ha->flags.fce_enabled)
@@ -2185,6 +2210,8 @@
/* Stop currently executing firmware. */
qla2x00_try_to_stop_firmware(vha);
+ vha->flags.online = 0;
+
/* turn-off interrupts on the card */
if (ha->interrupts_on)
ha->isp_ops->disable_intrs(ha);
@@ -2859,6 +2886,13 @@
if (!base_vha->flags.init_done)
continue;
+ if (ha->flags.eeh_busy) {
+ DEBUG17(qla_printk(KERN_WARNING, ha,
+ "qla2x00_do_dpc: dpc_flags: %lx\n",
+ base_vha->dpc_flags));
+ continue;
+ }
+
DEBUG3(printk("scsi(%ld): DPC handler\n", base_vha->host_no));
ha->dpc_active = 1;
@@ -3049,8 +3083,13 @@
int index;
srb_t *sp;
int t;
+ uint16_t w;
struct qla_hw_data *ha = vha->hw;
struct req_que *req;
+
+ /* Hardware read to raise pending EEH errors during mailbox waits. */
+ if (!pci_channel_offline(ha->pdev))
+ pci_read_config_word(ha->pdev, PCI_VENDOR_ID, &w);
/*
* Ports - Port down timer.
*
@@ -3252,16 +3291,23 @@
static pci_ers_result_t
qla2xxx_pci_error_detected(struct pci_dev *pdev, pci_channel_state_t state)
{
- scsi_qla_host_t *base_vha = pci_get_drvdata(pdev);
+ scsi_qla_host_t *vha = pci_get_drvdata(pdev);
+ struct qla_hw_data *ha = vha->hw;
+
+ DEBUG2(qla_printk(KERN_WARNING, ha, "error_detected:state %x\n",
+ state));
switch (state) {
case pci_channel_io_normal:
+ ha->flags.eeh_busy = 0;
return PCI_ERS_RESULT_CAN_RECOVER;
case pci_channel_io_frozen:
+ ha->flags.eeh_busy = 1;
pci_disable_device(pdev);
return PCI_ERS_RESULT_NEED_RESET;
case pci_channel_io_perm_failure:
- qla2x00_abort_all_cmds(base_vha, DID_NO_CONNECT << 16);
+ ha->flags.pci_channel_io_perm_failure = 1;
+ qla2x00_abort_all_cmds(vha, DID_NO_CONNECT << 16);
return PCI_ERS_RESULT_DISCONNECT;
}
return PCI_ERS_RESULT_NEED_RESET;
@@ -3312,6 +3358,8 @@
struct qla_hw_data *ha = base_vha->hw;
int rc;
+ DEBUG17(qla_printk(KERN_WARNING, ha, "slot_reset\n"));
+
if (ha->mem_only)
rc = pci_enable_device_mem(pdev);
else
@@ -3320,19 +3368,33 @@
if (rc) {
qla_printk(KERN_WARNING, ha,
"Can't re-enable PCI device after reset.\n");
-
return ret;
}
- pci_set_master(pdev);
if (ha->isp_ops->pci_config(base_vha))
return ret;
+#ifdef QL_DEBUG_LEVEL_17
+ {
+ uint8_t b;
+ uint32_t i;
+
+ printk("slot_reset_1: ");
+ for (i = 0; i < 256; i++) {
+ pci_read_config_byte(ha->pdev, i, &b);
+ printk("%s%02x", (i%16) ? " " : "\n", b);
+ }
+ printk("\n");
+ }
+#endif
set_bit(ABORT_ISP_ACTIVE, &base_vha->dpc_flags);
if (qla2x00_abort_isp(base_vha) == QLA_SUCCESS)
ret = PCI_ERS_RESULT_RECOVERED;
clear_bit(ABORT_ISP_ACTIVE, &base_vha->dpc_flags);
+ DEBUG17(qla_printk(KERN_WARNING, ha,
+ "slot_reset-return:ret=%x\n", ret));
+
return ret;
}
@@ -3343,12 +3405,17 @@
struct qla_hw_data *ha = base_vha->hw;
int ret;
+ DEBUG17(qla_printk(KERN_WARNING, ha, "pci_resume\n"));
+
ret = qla2x00_wait_for_hba_online(base_vha);
if (ret != QLA_SUCCESS) {
qla_printk(KERN_ERR, ha,
"the device failed to resume I/O "
"from slot/link_reset");
}
+
+ ha->flags.eeh_busy = 0;
+
pci_cleanup_aer_uncorrect_error_status(pdev);
}