[SCSI] megaraid_sas: Add Online Controller Reset to MegaRAID SAS drive
To add the Online controller reset support, driver need to do:
a). reset the controller chips -- Xscale and Gen2 which will change
the function calls and add the reset function related to this two
chips.
b). during the reset, driver will store the pending cmds which not
returned by FW to driver's pending queue. Driver will re-issue those
pending cmds again to FW after the OCR finished.
c). In driver's timeout routine, driver will report to OS as reset.
Also driver's queue routine will block the cmds until the OCR
finished.
d). in Driver's ISR routine, if driver get the FW state as state
change, FW in Failure status and FW support online controller
reset (OCR), driver will start to do the controller reset.
e). In driver's IOCTL routine, the application cmds will wait for the
OCR to finish, then issue the cmds to FW.
Signed-off-by Bo Yang <bo.yang@lsi.com>
Signed-off-by: James Bottomley <James.Bottomley@suse.de>
diff --git a/drivers/scsi/megaraid/megaraid_sas.c b/drivers/scsi/megaraid/megaraid_sas.c
index 99e4478..55951f4 100644
--- a/drivers/scsi/megaraid/megaraid_sas.c
+++ b/drivers/scsi/megaraid/megaraid_sas.c
@@ -62,6 +62,11 @@
MODULE_AUTHOR("megaraidlinux@lsi.com");
MODULE_DESCRIPTION("LSI MegaRAID SAS Driver");
+static int megasas_transition_to_ready(struct megasas_instance *instance);
+static int megasas_get_pd_list(struct megasas_instance *instance);
+static int megasas_issue_init_mfi(struct megasas_instance *instance);
+static int megasas_register_aen(struct megasas_instance *instance,
+ u32 seq_num, u32 class_locale_word);
/*
* PCI ID table for all supported controllers
*/
@@ -164,7 +169,7 @@
static inline void
megasas_enable_intr_xscale(struct megasas_register_set __iomem * regs)
{
- writel(1, &(regs)->outbound_intr_mask);
+ writel(0, &(regs)->outbound_intr_mask);
/* Dummy readl to force pci flush */
readl(®s->outbound_intr_mask);
@@ -200,24 +205,27 @@
megasas_clear_intr_xscale(struct megasas_register_set __iomem * regs)
{
u32 status;
+ u32 mfiStatus = 0;
/*
* Check if it is our interrupt
*/
status = readl(®s->outbound_intr_status);
- if (!(status & MFI_OB_INTR_STATUS_MASK)) {
- return 1;
- }
+ if (status & MFI_OB_INTR_STATUS_MASK)
+ mfiStatus = MFI_INTR_FLAG_REPLY_MESSAGE;
+ if (status & MFI_XSCALE_OMR0_CHANGE_INTERRUPT)
+ mfiStatus |= MFI_INTR_FLAG_FIRMWARE_STATE_CHANGE;
/*
* Clear the interrupt by writing back the same value
*/
- writel(status, ®s->outbound_intr_status);
+ if (mfiStatus)
+ writel(status, ®s->outbound_intr_status);
/* Dummy readl to force pci flush */
readl(®s->outbound_intr_status);
- return 0;
+ return mfiStatus;
}
/**
@@ -232,8 +240,69 @@
u32 frame_count,
struct megasas_register_set __iomem *regs)
{
+ unsigned long flags;
+ spin_lock_irqsave(&instance->hba_lock, flags);
writel((frame_phys_addr >> 3)|(frame_count),
&(regs)->inbound_queue_port);
+ spin_unlock_irqrestore(&instance->hba_lock, flags);
+}
+
+/**
+ * megasas_adp_reset_xscale - For controller reset
+ * @regs: MFI register set
+ */
+static int
+megasas_adp_reset_xscale(struct megasas_instance *instance,
+ struct megasas_register_set __iomem *regs)
+{
+ u32 i;
+ u32 pcidata;
+ writel(MFI_ADP_RESET, ®s->inbound_doorbell);
+
+ for (i = 0; i < 3; i++)
+ msleep(1000); /* sleep for 3 secs */
+ pcidata = 0;
+ pci_read_config_dword(instance->pdev, MFI_1068_PCSR_OFFSET, &pcidata);
+ printk(KERN_NOTICE "pcidata = %x\n", pcidata);
+ if (pcidata & 0x2) {
+ printk(KERN_NOTICE "mfi 1068 offset read=%x\n", pcidata);
+ pcidata &= ~0x2;
+ pci_write_config_dword(instance->pdev,
+ MFI_1068_PCSR_OFFSET, pcidata);
+
+ for (i = 0; i < 2; i++)
+ msleep(1000); /* need to wait 2 secs again */
+
+ pcidata = 0;
+ pci_read_config_dword(instance->pdev,
+ MFI_1068_FW_HANDSHAKE_OFFSET, &pcidata);
+ printk(KERN_NOTICE "1068 offset handshake read=%x\n", pcidata);
+ if ((pcidata & 0xffff0000) == MFI_1068_FW_READY) {
+ printk(KERN_NOTICE "1068 offset pcidt=%x\n", pcidata);
+ pcidata = 0;
+ pci_write_config_dword(instance->pdev,
+ MFI_1068_FW_HANDSHAKE_OFFSET, pcidata);
+ }
+ }
+ return 0;
+}
+
+/**
+ * megasas_check_reset_xscale - For controller reset check
+ * @regs: MFI register set
+ */
+static int
+megasas_check_reset_xscale(struct megasas_instance *instance,
+ struct megasas_register_set __iomem *regs)
+{
+ u32 consumer;
+ consumer = *instance->consumer;
+
+ if ((instance->adprecovery != MEGASAS_HBA_OPERATIONAL) &&
+ (*instance->consumer == MEGASAS_ADPRESET_INPROG_SIGN)) {
+ return 1;
+ }
+ return 0;
}
static struct megasas_instance_template megasas_instance_template_xscale = {
@@ -243,6 +312,8 @@
.disable_intr = megasas_disable_intr_xscale,
.clear_intr = megasas_clear_intr_xscale,
.read_fw_status_reg = megasas_read_fw_status_reg_xscale,
+ .adp_reset = megasas_adp_reset_xscale,
+ .check_reset = megasas_check_reset_xscale,
};
/**
@@ -264,7 +335,7 @@
{
writel(0xFFFFFFFF, &(regs)->outbound_doorbell_clear);
- writel(~0x80000004, &(regs)->outbound_intr_mask);
+ writel(~0x80000000, &(regs)->outbound_intr_mask);
/* Dummy readl to force pci flush */
readl(®s->outbound_intr_mask);
@@ -307,7 +378,7 @@
status = readl(®s->outbound_intr_status);
if (!(status & MFI_REPLY_1078_MESSAGE_INTERRUPT)) {
- return 1;
+ return 0;
}
/*
@@ -318,7 +389,7 @@
/* Dummy readl to force pci flush */
readl(®s->outbound_doorbell_clear);
- return 0;
+ return 1;
}
/**
* megasas_fire_cmd_ppc - Sends command to the FW
@@ -332,10 +403,34 @@
u32 frame_count,
struct megasas_register_set __iomem *regs)
{
+ unsigned long flags;
+ spin_lock_irqsave(&instance->hba_lock, flags);
writel((frame_phys_addr | (frame_count<<1))|1,
&(regs)->inbound_queue_port);
+ spin_unlock_irqrestore(&instance->hba_lock, flags);
}
+/**
+ * megasas_adp_reset_ppc - For controller reset
+ * @regs: MFI register set
+ */
+static int
+megasas_adp_reset_ppc(struct megasas_instance *instance,
+ struct megasas_register_set __iomem *regs)
+{
+ return 0;
+}
+
+/**
+ * megasas_check_reset_ppc - For controller reset check
+ * @regs: MFI register set
+ */
+static int
+megasas_check_reset_ppc(struct megasas_instance *instance,
+ struct megasas_register_set __iomem *regs)
+{
+ return 0;
+}
static struct megasas_instance_template megasas_instance_template_ppc = {
.fire_cmd = megasas_fire_cmd_ppc,
@@ -343,6 +438,8 @@
.disable_intr = megasas_disable_intr_ppc,
.clear_intr = megasas_clear_intr_ppc,
.read_fw_status_reg = megasas_read_fw_status_reg_ppc,
+ .adp_reset = megasas_adp_reset_ppc,
+ .check_reset = megasas_check_reset_ppc,
};
/**
@@ -397,7 +494,7 @@
status = readl(®s->outbound_intr_status);
if (!(status & MFI_SKINNY_ENABLE_INTERRUPT_MASK)) {
- return 1;
+ return 0;
}
/*
@@ -410,7 +507,7 @@
*/
readl(®s->outbound_intr_status);
- return 0;
+ return 1;
}
/**
@@ -426,11 +523,33 @@
struct megasas_register_set __iomem *regs)
{
unsigned long flags;
- spin_lock_irqsave(&instance->fire_lock, flags);
+ spin_lock_irqsave(&instance->hba_lock, flags);
writel(0, &(regs)->inbound_high_queue_port);
writel((frame_phys_addr | (frame_count<<1))|1,
&(regs)->inbound_low_queue_port);
- spin_unlock_irqrestore(&instance->fire_lock, flags);
+ spin_unlock_irqrestore(&instance->hba_lock, flags);
+}
+
+/**
+ * megasas_adp_reset_skinny - For controller reset
+ * @regs: MFI register set
+ */
+static int
+megasas_adp_reset_skinny(struct megasas_instance *instance,
+ struct megasas_register_set __iomem *regs)
+{
+ return 0;
+}
+
+/**
+ * megasas_check_reset_skinny - For controller reset check
+ * @regs: MFI register set
+ */
+static int
+megasas_check_reset_skinny(struct megasas_instance *instance,
+ struct megasas_register_set __iomem *regs)
+{
+ return 0;
}
static struct megasas_instance_template megasas_instance_template_skinny = {
@@ -440,6 +559,8 @@
.disable_intr = megasas_disable_intr_skinny,
.clear_intr = megasas_clear_intr_skinny,
.read_fw_status_reg = megasas_read_fw_status_reg_skinny,
+ .adp_reset = megasas_adp_reset_skinny,
+ .check_reset = megasas_check_reset_skinny,
};
@@ -495,23 +616,29 @@
megasas_clear_intr_gen2(struct megasas_register_set __iomem *regs)
{
u32 status;
+ u32 mfiStatus = 0;
/*
* Check if it is our interrupt
*/
status = readl(®s->outbound_intr_status);
- if (!(status & MFI_GEN2_ENABLE_INTERRUPT_MASK))
- return 1;
+ if (status & MFI_GEN2_ENABLE_INTERRUPT_MASK) {
+ mfiStatus = MFI_INTR_FLAG_REPLY_MESSAGE;
+ }
+ if (status & MFI_G2_OUTBOUND_DOORBELL_CHANGE_INTERRUPT) {
+ mfiStatus |= MFI_INTR_FLAG_FIRMWARE_STATE_CHANGE;
+ }
/*
* Clear the interrupt by writing back the same value
*/
- writel(status, ®s->outbound_doorbell_clear);
+ if (mfiStatus)
+ writel(status, ®s->outbound_doorbell_clear);
/* Dummy readl to force pci flush */
readl(®s->outbound_intr_status);
- return 0;
+ return mfiStatus;
}
/**
* megasas_fire_cmd_gen2 - Sends command to the FW
@@ -525,8 +652,74 @@
u32 frame_count,
struct megasas_register_set __iomem *regs)
{
+ unsigned long flags;
+ spin_lock_irqsave(&instance->hba_lock, flags);
writel((frame_phys_addr | (frame_count<<1))|1,
&(regs)->inbound_queue_port);
+ spin_unlock_irqrestore(&instance->hba_lock, flags);
+}
+
+/**
+ * megasas_adp_reset_gen2 - For controller reset
+ * @regs: MFI register set
+ */
+static int
+megasas_adp_reset_gen2(struct megasas_instance *instance,
+ struct megasas_register_set __iomem *reg_set)
+{
+ u32 retry = 0 ;
+ u32 HostDiag;
+
+ writel(0, ®_set->seq_offset);
+ writel(4, ®_set->seq_offset);
+ writel(0xb, ®_set->seq_offset);
+ writel(2, ®_set->seq_offset);
+ writel(7, ®_set->seq_offset);
+ writel(0xd, ®_set->seq_offset);
+ msleep(1000);
+
+ HostDiag = (u32)readl(®_set->host_diag);
+
+ while ( !( HostDiag & DIAG_WRITE_ENABLE) ) {
+ msleep(100);
+ HostDiag = (u32)readl(®_set->host_diag);
+ printk(KERN_NOTICE "RESETGEN2: retry=%x, hostdiag=%x\n",
+ retry, HostDiag);
+
+ if (retry++ >= 100)
+ return 1;
+
+ }
+
+ printk(KERN_NOTICE "ADP_RESET_GEN2: HostDiag=%x\n", HostDiag);
+
+ writel((HostDiag | DIAG_RESET_ADAPTER), ®_set->host_diag);
+
+ ssleep(10);
+
+ HostDiag = (u32)readl(®_set->host_diag);
+ while ( ( HostDiag & DIAG_RESET_ADAPTER) ) {
+ msleep(100);
+ HostDiag = (u32)readl(®_set->host_diag);
+ printk(KERN_NOTICE "RESET_GEN2: retry=%x, hostdiag=%x\n",
+ retry, HostDiag);
+
+ if (retry++ >= 1000)
+ return 1;
+
+ }
+ return 0;
+}
+
+/**
+ * megasas_check_reset_gen2 - For controller reset check
+ * @regs: MFI register set
+ */
+static int
+megasas_check_reset_gen2(struct megasas_instance *instance,
+ struct megasas_register_set __iomem *regs)
+{
+ return 0;
}
static struct megasas_instance_template megasas_instance_template_gen2 = {
@@ -536,11 +729,13 @@
.disable_intr = megasas_disable_intr_gen2,
.clear_intr = megasas_clear_intr_gen2,
.read_fw_status_reg = megasas_read_fw_status_reg_gen2,
+ .adp_reset = megasas_adp_reset_gen2,
+ .check_reset = megasas_check_reset_gen2,
};
/**
* This is the end of set of functions & definitions
-* specific to ppc (deviceid : 0x60) controllers
+* specific to gen2 (deviceid : 0x78, 0x79) controllers
*/
/**
@@ -599,8 +794,7 @@
instance->instancet->fire_cmd(instance,
cmd->frame_phys_addr, 0, instance->reg_set);
- wait_event_timeout(instance->int_cmd_wait_q, (cmd->cmd_status != ENODATA),
- MEGASAS_INTERNAL_CMD_WAIT_TIME*HZ);
+ wait_event(instance->int_cmd_wait_q, cmd->cmd_status != ENODATA);
return 0;
}
@@ -648,8 +842,8 @@
/*
* Wait for this cmd to complete
*/
- wait_event_timeout(instance->abort_cmd_wait_q, (cmd->cmd_status != 0xFF),
- MEGASAS_INTERNAL_CMD_WAIT_TIME*HZ);
+ wait_event(instance->abort_cmd_wait_q, cmd->cmd_status != 0xFF);
+ cmd->sync_cmd = 0;
megasas_return_cmd(instance, cmd);
return 0;
@@ -1131,14 +1325,22 @@
u32 frame_count;
struct megasas_cmd *cmd;
struct megasas_instance *instance;
+ unsigned long flags;
instance = (struct megasas_instance *)
scmd->device->host->hostdata;
- /* Don't process if we have already declared adapter dead */
- if (instance->hw_crit_error)
+ if (instance->issuepend_done == 0)
return SCSI_MLQUEUE_HOST_BUSY;
+ spin_lock_irqsave(&instance->hba_lock, flags);
+ if (instance->adprecovery != MEGASAS_HBA_OPERATIONAL) {
+ spin_unlock_irqrestore(&instance->hba_lock, flags);
+ return SCSI_MLQUEUE_HOST_BUSY;
+ }
+
+ spin_unlock_irqrestore(&instance->hba_lock, flags);
+
scmd->scsi_done = done;
scmd->result = 0;
@@ -1274,6 +1476,18 @@
return 0;
}
+static void megaraid_sas_kill_hba(struct megasas_instance *instance)
+{
+ if ((instance->pdev->device == PCI_DEVICE_ID_LSI_SAS0073SKINNY) ||
+ (instance->pdev->device == PCI_DEVICE_ID_LSI_SAS0071SKINNY)) {
+ writel(MFI_STOP_ADP,
+ &instance->reg_set->reserved_0[0]);
+ } else {
+ writel(MFI_STOP_ADP,
+ &instance->reg_set->inbound_doorbell);
+ }
+}
+
/**
* megasas_complete_cmd_dpc - Returns FW's controller structure
* @instance_addr: Address of adapter soft state
@@ -1291,7 +1505,7 @@
unsigned long flags;
/* If we have already declared adapter dead, donot complete cmds */
- if (instance->hw_crit_error)
+ if (instance->adprecovery == MEGASAS_HW_CRITICAL_ERROR )
return;
spin_lock_irqsave(&instance->completion_lock, flags);
@@ -1301,6 +1515,11 @@
while (consumer != producer) {
context = instance->reply_queue[consumer];
+ if (context >= instance->max_fw_cmds) {
+ printk(KERN_ERR "Unexpected context value %x\n",
+ context);
+ BUG();
+ }
cmd = instance->cmd_list[context];
@@ -1350,7 +1569,76 @@
static int megasas_wait_for_outstanding(struct megasas_instance *instance)
{
int i;
+ u32 reset_index;
u32 wait_time = MEGASAS_RESET_WAIT_TIME;
+ u8 adprecovery;
+ unsigned long flags;
+ struct list_head clist_local;
+ struct megasas_cmd *reset_cmd;
+
+ spin_lock_irqsave(&instance->hba_lock, flags);
+ adprecovery = instance->adprecovery;
+ spin_unlock_irqrestore(&instance->hba_lock, flags);
+
+ if (adprecovery != MEGASAS_HBA_OPERATIONAL) {
+
+ INIT_LIST_HEAD(&clist_local);
+ spin_lock_irqsave(&instance->hba_lock, flags);
+ list_splice_init(&instance->internal_reset_pending_q,
+ &clist_local);
+ spin_unlock_irqrestore(&instance->hba_lock, flags);
+
+ printk(KERN_NOTICE "megasas: HBA reset wait ...\n");
+ for (i = 0; i < wait_time; i++) {
+ msleep(1000);
+ spin_lock_irqsave(&instance->hba_lock, flags);
+ adprecovery = instance->adprecovery;
+ spin_unlock_irqrestore(&instance->hba_lock, flags);
+ if (adprecovery == MEGASAS_HBA_OPERATIONAL)
+ break;
+ }
+
+ if (adprecovery != MEGASAS_HBA_OPERATIONAL) {
+ printk(KERN_NOTICE "megasas: reset: Stopping HBA.\n");
+ spin_lock_irqsave(&instance->hba_lock, flags);
+ instance->adprecovery = MEGASAS_HW_CRITICAL_ERROR;
+ spin_unlock_irqrestore(&instance->hba_lock, flags);
+ return FAILED;
+ }
+
+ reset_index = 0;
+ while (!list_empty(&clist_local)) {
+ reset_cmd = list_entry((&clist_local)->next,
+ struct megasas_cmd, list);
+ list_del_init(&reset_cmd->list);
+ if (reset_cmd->scmd) {
+ reset_cmd->scmd->result = DID_RESET << 16;
+ printk(KERN_NOTICE "%d:%p reset [%02x], %#lx\n",
+ reset_index, reset_cmd,
+ reset_cmd->scmd->cmnd[0],
+ reset_cmd->scmd->serial_number);
+
+ reset_cmd->scmd->scsi_done(reset_cmd->scmd);
+ megasas_return_cmd(instance, reset_cmd);
+ } else if (reset_cmd->sync_cmd) {
+ printk(KERN_NOTICE "megasas:%p synch cmds"
+ "reset queue\n",
+ reset_cmd);
+
+ reset_cmd->cmd_status = ENODATA;
+ instance->instancet->fire_cmd(instance,
+ reset_cmd->frame_phys_addr,
+ 0, instance->reg_set);
+ } else {
+ printk(KERN_NOTICE "megasas: %p unexpected"
+ "cmds lst\n",
+ reset_cmd);
+ }
+ reset_index++;
+ }
+
+ return SUCCESS;
+ }
for (i = 0; i < wait_time; i++) {
@@ -1373,6 +1661,7 @@
}
if (atomic_read(&instance->fw_outstanding)) {
+ printk(KERN_NOTICE "megaraid_sas: pending cmds after reset\n");
/*
* Send signal to FW to stop processing any pending cmds.
* The controller will be taken offline by the OS now.
@@ -1388,10 +1677,14 @@
&instance->reg_set->inbound_doorbell);
}
megasas_dump_pending_frames(instance);
- instance->hw_crit_error = 1;
+ spin_lock_irqsave(&instance->hba_lock, flags);
+ instance->adprecovery = MEGASAS_HW_CRITICAL_ERROR;
+ spin_unlock_irqrestore(&instance->hba_lock, flags);
return FAILED;
}
+ printk(KERN_NOTICE "megaraid_sas: no pending cmds after reset\n");
+
return SUCCESS;
}
@@ -1413,7 +1706,7 @@
scmd_printk(KERN_NOTICE, scmd, "megasas: RESET -%ld cmd=%x retries=%x\n",
scmd->serial_number, scmd->cmnd[0], scmd->retries);
- if (instance->hw_crit_error) {
+ if (instance->adprecovery == MEGASAS_HW_CRITICAL_ERROR) {
printk(KERN_ERR "megasas: cannot recover from previous reset "
"failures\n");
return FAILED;
@@ -1568,7 +1861,8 @@
instance->aen_cmd = NULL;
megasas_return_cmd(instance, cmd);
- if (instance->unload == 0) {
+ if ((instance->unload == 0) &&
+ ((instance->issuepend_done == 1))) {
struct megasas_aen_event *ev;
ev = kzalloc(sizeof(*ev), GFP_ATOMIC);
if (!ev) {
@@ -1663,6 +1957,9 @@
struct megasas_header *hdr = &cmd->frame->hdr;
unsigned long flags;
+ /* flag for the retry reset */
+ cmd->retry_for_fw_reset = 0;
+
if (cmd->scmd)
cmd->scmd->SCp.ptr = NULL;
@@ -1783,39 +2080,301 @@
}
/**
+ * megasas_issue_pending_cmds_again - issue all pending cmds
+ * in FW again because of the fw reset
+ * @instance: Adapter soft state
+ */
+static inline void
+megasas_issue_pending_cmds_again(struct megasas_instance *instance)
+{
+ struct megasas_cmd *cmd;
+ struct list_head clist_local;
+ union megasas_evt_class_locale class_locale;
+ unsigned long flags;
+ u32 seq_num;
+
+ INIT_LIST_HEAD(&clist_local);
+ spin_lock_irqsave(&instance->hba_lock, flags);
+ list_splice_init(&instance->internal_reset_pending_q, &clist_local);
+ spin_unlock_irqrestore(&instance->hba_lock, flags);
+
+ while (!list_empty(&clist_local)) {
+ cmd = list_entry((&clist_local)->next,
+ struct megasas_cmd, list);
+ list_del_init(&cmd->list);
+
+ if (cmd->sync_cmd || cmd->scmd) {
+ printk(KERN_NOTICE "megaraid_sas: command %p, %p:%d"
+ "detected to be pending while HBA reset.\n",
+ cmd, cmd->scmd, cmd->sync_cmd);
+
+ cmd->retry_for_fw_reset++;
+
+ if (cmd->retry_for_fw_reset == 3) {
+ printk(KERN_NOTICE "megaraid_sas: cmd %p, %p:%d"
+ "was tried multiple times during reset."
+ "Shutting down the HBA\n",
+ cmd, cmd->scmd, cmd->sync_cmd);
+ megaraid_sas_kill_hba(instance);
+
+ instance->adprecovery =
+ MEGASAS_HW_CRITICAL_ERROR;
+ return;
+ }
+ }
+
+ if (cmd->sync_cmd == 1) {
+ if (cmd->scmd) {
+ printk(KERN_NOTICE "megaraid_sas: unexpected"
+ "cmd attached to internal command!\n");
+ }
+ printk(KERN_NOTICE "megasas: %p synchronous cmd"
+ "on the internal reset queue,"
+ "issue it again.\n", cmd);
+ cmd->cmd_status = ENODATA;
+ instance->instancet->fire_cmd(instance,
+ cmd->frame_phys_addr ,
+ 0, instance->reg_set);
+ } else if (cmd->scmd) {
+ printk(KERN_NOTICE "megasas: %p scsi cmd [%02x],%#lx"
+ "detected on the internal queue, issue again.\n",
+ cmd, cmd->scmd->cmnd[0], cmd->scmd->serial_number);
+
+ atomic_inc(&instance->fw_outstanding);
+ instance->instancet->fire_cmd(instance,
+ cmd->frame_phys_addr,
+ cmd->frame_count-1, instance->reg_set);
+ } else {
+ printk(KERN_NOTICE "megasas: %p unexpected cmd on the"
+ "internal reset defer list while re-issue!!\n",
+ cmd);
+ }
+ }
+
+ if (instance->aen_cmd) {
+ printk(KERN_NOTICE "megaraid_sas: aen_cmd in def process\n");
+ megasas_return_cmd(instance, instance->aen_cmd);
+
+ instance->aen_cmd = NULL;
+ }
+
+ /*
+ * Initiate AEN (Asynchronous Event Notification)
+ */
+ seq_num = instance->last_seq_num;
+ class_locale.members.reserved = 0;
+ class_locale.members.locale = MR_EVT_LOCALE_ALL;
+ class_locale.members.class = MR_EVT_CLASS_DEBUG;
+
+ megasas_register_aen(instance, seq_num, class_locale.word);
+}
+
+/**
+ * Move the internal reset pending commands to a deferred queue.
+ *
+ * We move the commands pending at internal reset time to a
+ * pending queue. This queue would be flushed after successful
+ * completion of the internal reset sequence. if the internal reset
+ * did not complete in time, the kernel reset handler would flush
+ * these commands.
+ **/
+static void
+megasas_internal_reset_defer_cmds(struct megasas_instance *instance)
+{
+ struct megasas_cmd *cmd;
+ int i;
+ u32 max_cmd = instance->max_fw_cmds;
+ u32 defer_index;
+ unsigned long flags;
+
+ defer_index = 0;
+ spin_lock_irqsave(&instance->cmd_pool_lock, flags);
+ for (i = 0; i < max_cmd; i++) {
+ cmd = instance->cmd_list[i];
+ if (cmd->sync_cmd == 1 || cmd->scmd) {
+ printk(KERN_NOTICE "megasas: moving cmd[%d]:%p:%d:%p"
+ "on the defer queue as internal\n",
+ defer_index, cmd, cmd->sync_cmd, cmd->scmd);
+
+ if (!list_empty(&cmd->list)) {
+ printk(KERN_NOTICE "megaraid_sas: ERROR while"
+ " moving this cmd:%p, %d %p, it was"
+ "discovered on some list?\n",
+ cmd, cmd->sync_cmd, cmd->scmd);
+
+ list_del_init(&cmd->list);
+ }
+ defer_index++;
+ list_add_tail(&cmd->list,
+ &instance->internal_reset_pending_q);
+ }
+ }
+ spin_unlock_irqrestore(&instance->cmd_pool_lock, flags);
+}
+
+
+static void
+process_fw_state_change_wq(struct work_struct *work)
+{
+ struct megasas_instance *instance =
+ container_of(work, struct megasas_instance, work_init);
+ u32 wait;
+ unsigned long flags;
+
+ if (instance->adprecovery != MEGASAS_ADPRESET_SM_INFAULT) {
+ printk(KERN_NOTICE "megaraid_sas: error, recovery st %x \n",
+ instance->adprecovery);
+ return ;
+ }
+
+ if (instance->adprecovery == MEGASAS_ADPRESET_SM_INFAULT) {
+ printk(KERN_NOTICE "megaraid_sas: FW detected to be in fault"
+ "state, restarting it...\n");
+
+ instance->instancet->disable_intr(instance->reg_set);
+ atomic_set(&instance->fw_outstanding, 0);
+
+ atomic_set(&instance->fw_reset_no_pci_access, 1);
+ instance->instancet->adp_reset(instance, instance->reg_set);
+ atomic_set(&instance->fw_reset_no_pci_access, 0 );
+
+ printk(KERN_NOTICE "megaraid_sas: FW restarted successfully,"
+ "initiating next stage...\n");
+
+ printk(KERN_NOTICE "megaraid_sas: HBA recovery state machine,"
+ "state 2 starting...\n");
+
+ /*waitting for about 20 second before start the second init*/
+ for (wait = 0; wait < 30; wait++) {
+ msleep(1000);
+ }
+
+ if (megasas_transition_to_ready(instance)) {
+ printk(KERN_NOTICE "megaraid_sas:adapter not ready\n");
+
+ megaraid_sas_kill_hba(instance);
+ instance->adprecovery = MEGASAS_HW_CRITICAL_ERROR;
+ return ;
+ }
+
+ if ((instance->pdev->device == PCI_DEVICE_ID_LSI_SAS1064R) ||
+ (instance->pdev->device == PCI_DEVICE_ID_DELL_PERC5) ||
+ (instance->pdev->device == PCI_DEVICE_ID_LSI_VERDE_ZCR)
+ ) {
+ *instance->consumer = *instance->producer;
+ } else {
+ *instance->consumer = 0;
+ *instance->producer = 0;
+ }
+
+ megasas_issue_init_mfi(instance);
+
+ spin_lock_irqsave(&instance->hba_lock, flags);
+ instance->adprecovery = MEGASAS_HBA_OPERATIONAL;
+ spin_unlock_irqrestore(&instance->hba_lock, flags);
+ instance->instancet->enable_intr(instance->reg_set);
+
+ megasas_issue_pending_cmds_again(instance);
+ instance->issuepend_done = 1;
+ }
+ return ;
+}
+
+/**
* megasas_deplete_reply_queue - Processes all completed commands
* @instance: Adapter soft state
* @alt_status: Alternate status to be returned to
* SCSI mid-layer instead of the status
* returned by the FW
+ * Note: this must be called with hba lock held
*/
static int
-megasas_deplete_reply_queue(struct megasas_instance *instance, u8 alt_status)
+megasas_deplete_reply_queue(struct megasas_instance *instance,
+ u8 alt_status)
{
- /*
- * Check if it is our interrupt
- * Clear the interrupt
- */
- if(instance->instancet->clear_intr(instance->reg_set))
- return IRQ_NONE;
+ u32 mfiStatus;
+ u32 fw_state;
- if (instance->hw_crit_error)
- goto out_done;
- /*
- * Schedule the tasklet for cmd completion
- */
+ if ((mfiStatus = instance->instancet->check_reset(instance,
+ instance->reg_set)) == 1) {
+ return IRQ_HANDLED;
+ }
+
+ if ((mfiStatus = instance->instancet->clear_intr(
+ instance->reg_set)
+ ) == 0) {
+ return IRQ_NONE;
+ }
+
+ instance->mfiStatus = mfiStatus;
+
+ if ((mfiStatus & MFI_INTR_FLAG_FIRMWARE_STATE_CHANGE)) {
+ fw_state = instance->instancet->read_fw_status_reg(
+ instance->reg_set) & MFI_STATE_MASK;
+
+ if (fw_state != MFI_STATE_FAULT) {
+ printk(KERN_NOTICE "megaraid_sas: fw state:%x\n",
+ fw_state);
+ }
+
+ if ((fw_state == MFI_STATE_FAULT) &&
+ (instance->disableOnlineCtrlReset == 0)) {
+ printk(KERN_NOTICE "megaraid_sas: wait adp restart\n");
+
+ if ((instance->pdev->device ==
+ PCI_DEVICE_ID_LSI_SAS1064R) ||
+ (instance->pdev->device ==
+ PCI_DEVICE_ID_DELL_PERC5) ||
+ (instance->pdev->device ==
+ PCI_DEVICE_ID_LSI_VERDE_ZCR)) {
+
+ *instance->consumer =
+ MEGASAS_ADPRESET_INPROG_SIGN;
+ }
+
+
+ instance->instancet->disable_intr(instance->reg_set);
+ instance->adprecovery = MEGASAS_ADPRESET_SM_INFAULT;
+ instance->issuepend_done = 0;
+
+ atomic_set(&instance->fw_outstanding, 0);
+ megasas_internal_reset_defer_cmds(instance);
+
+ printk(KERN_NOTICE "megasas: fwState=%x, stage:%d\n",
+ fw_state, instance->adprecovery);
+
+ schedule_work(&instance->work_init);
+ return IRQ_HANDLED;
+
+ } else {
+ printk(KERN_NOTICE "megasas: fwstate:%x, dis_OCR=%x\n",
+ fw_state, instance->disableOnlineCtrlReset);
+ }
+ }
+
tasklet_schedule(&instance->isr_tasklet);
-out_done:
return IRQ_HANDLED;
}
-
/**
* megasas_isr - isr entry point
*/
static irqreturn_t megasas_isr(int irq, void *devp)
{
- return megasas_deplete_reply_queue((struct megasas_instance *)devp,
- DID_OK);
+ struct megasas_instance *instance;
+ unsigned long flags;
+ irqreturn_t rc;
+
+ if (atomic_read(
+ &(((struct megasas_instance *)devp)->fw_reset_no_pci_access)))
+ return IRQ_HANDLED;
+
+ instance = (struct megasas_instance *)devp;
+
+ spin_lock_irqsave(&instance->hba_lock, flags);
+ rc = megasas_deplete_reply_queue(instance, DID_OK);
+ spin_unlock_irqrestore(&instance->hba_lock, flags);
+
+ return rc;
}
/**
@@ -1972,7 +2531,7 @@
"in %d secs\n", fw_state, max_wait);
return -ENODEV;
}
- };
+ }
printk(KERN_INFO "megasas: FW now in Ready state\n");
return 0;
@@ -2054,6 +2613,7 @@
*/
sgl_sz = sge_sz * instance->max_num_sge;
frame_count = (sgl_sz + MEGAMFI_FRAME_SIZE - 1) / MEGAMFI_FRAME_SIZE;
+ frame_count = 15;
/*
* We need one extra frame for the MFI command
@@ -2201,6 +2761,7 @@
cmd = instance->cmd_list[i];
memset(cmd, 0, sizeof(struct megasas_cmd));
cmd->index = i;
+ cmd->scmd = NULL;
cmd->instance = instance;
list_add_tail(&cmd->list, &instance->cmd_pool);
@@ -2368,7 +2929,7 @@
/* the following function will get the instance PD LIST */
- if ((ret == 0) && (ci->ldCount < MAX_LOGICAL_DRIVES)) {
+ if ((ret == 0) && (ci->ldCount <= MAX_LOGICAL_DRIVES)) {
memset(instance->ld_ids, 0xff, MEGASAS_MAX_LD_IDS);
for (ld_index = 0; ld_index < ci->ldCount; ld_index++) {
@@ -2682,6 +3243,21 @@
if (megasas_issue_init_mfi(instance))
goto fail_fw_init;
+ instance->fw_support_ieee = 0;
+ instance->fw_support_ieee =
+ (instance->instancet->read_fw_status_reg(reg_set) &
+ 0x04000000);
+
+ printk(KERN_NOTICE "megasas_init_mfi: fw_support_ieee=%d",
+ instance->fw_support_ieee);
+
+ if (instance->fw_support_ieee)
+ instance->flag_ieee = 1;
+
+ /** for passthrough
+ * the following function will get the PD LIST.
+ */
+
memset(instance->pd_list, 0 ,
(MEGASAS_MAX_PD * sizeof(struct megasas_pd_list)));
megasas_get_pd_list(instance);
@@ -2708,6 +3284,8 @@
max_sectors_2 = ctrl_info->max_request_size;
tmp_sectors = min_t(u32, max_sectors_1 , max_sectors_2);
+ instance->disableOnlineCtrlReset =
+ ctrl_info->properties.OnOffProperties.disableOnlineCtrlReset;
}
instance->max_sectors_per_req = instance->max_num_sge *
@@ -2929,6 +3507,7 @@
dcmd->flags = MFI_FRAME_DIR_READ;
dcmd->timeout = 0;
dcmd->pad_0 = 0;
+ instance->last_seq_num = seq_num;
dcmd->data_xfer_len = sizeof(struct megasas_evt_detail);
dcmd->opcode = MR_DCMD_CTRL_EVENT_WAIT;
dcmd->mbox.w[0] = seq_num;
@@ -3097,6 +3676,7 @@
instance = (struct megasas_instance *)host->hostdata;
memset(instance, 0, sizeof(*instance));
+ atomic_set( &instance->fw_reset_no_pci_access, 0 );
instance->producer = pci_alloc_consistent(pdev, sizeof(u32),
&instance->producer_h);
@@ -3114,6 +3694,9 @@
megasas_poll_wait_aen = 0;
instance->flag_ieee = 0;
instance->ev = NULL;
+ instance->issuepend_done = 1;
+ instance->adprecovery = MEGASAS_HBA_OPERATIONAL;
+ megasas_poll_wait_aen = 0;
instance->evt_detail = pci_alloc_consistent(pdev,
sizeof(struct
@@ -3130,6 +3713,7 @@
* Initialize locks and queues
*/
INIT_LIST_HEAD(&instance->cmd_pool);
+ INIT_LIST_HEAD(&instance->internal_reset_pending_q);
atomic_set(&instance->fw_outstanding,0);
@@ -3137,7 +3721,7 @@
init_waitqueue_head(&instance->abort_cmd_wait_q);
spin_lock_init(&instance->cmd_pool_lock);
- spin_lock_init(&instance->fire_lock);
+ spin_lock_init(&instance->hba_lock);
spin_lock_init(&instance->completion_lock);
spin_lock_init(&poll_aen_lock);
@@ -3162,6 +3746,9 @@
instance->flag = 0;
instance->unload = 1;
instance->last_time = 0;
+ instance->disableOnlineCtrlReset = 1;
+
+ INIT_WORK(&instance->work_init, process_fw_state_change_wq);
/*
* Initialize MFI Firmware
@@ -3253,6 +3840,9 @@
struct megasas_cmd *cmd;
struct megasas_dcmd_frame *dcmd;
+ if (instance->adprecovery == MEGASAS_HW_CRITICAL_ERROR)
+ return;
+
cmd = megasas_get_cmd(instance);
if (!cmd)
@@ -3290,6 +3880,9 @@
struct megasas_cmd *cmd;
struct megasas_dcmd_frame *dcmd;
+ if (instance->adprecovery == MEGASAS_HW_CRITICAL_ERROR)
+ return;
+
cmd = megasas_get_cmd(instance);
if (!cmd)
@@ -3781,6 +4374,9 @@
struct megasas_iocpacket *ioc;
struct megasas_instance *instance;
int error;
+ int i;
+ unsigned long flags;
+ u32 wait_time = MEGASAS_RESET_WAIT_TIME;
ioc = kmalloc(sizeof(*ioc), GFP_KERNEL);
if (!ioc)
@@ -3797,8 +4393,8 @@
goto out_kfree_ioc;
}
- if (instance->hw_crit_error == 1) {
- printk(KERN_DEBUG "Controller in Crit ERROR\n");
+ if (instance->adprecovery == MEGASAS_HW_CRITICAL_ERROR) {
+ printk(KERN_ERR "Controller in crit error\n");
error = -ENODEV;
goto out_kfree_ioc;
}
@@ -3815,6 +4411,35 @@
error = -ERESTARTSYS;
goto out_kfree_ioc;
}
+
+ for (i = 0; i < wait_time; i++) {
+
+ spin_lock_irqsave(&instance->hba_lock, flags);
+ if (instance->adprecovery == MEGASAS_HBA_OPERATIONAL) {
+ spin_unlock_irqrestore(&instance->hba_lock, flags);
+ break;
+ }
+ spin_unlock_irqrestore(&instance->hba_lock, flags);
+
+ if (!(i % MEGASAS_RESET_NOTICE_INTERVAL)) {
+ printk(KERN_NOTICE "megasas: waiting"
+ "for controller reset to finish\n");
+ }
+
+ msleep(1000);
+ }
+
+ spin_lock_irqsave(&instance->hba_lock, flags);
+ if (instance->adprecovery != MEGASAS_HBA_OPERATIONAL) {
+ spin_unlock_irqrestore(&instance->hba_lock, flags);
+
+ printk(KERN_ERR "megaraid_sas: timed out while"
+ "waiting for HBA to recover\n");
+ error = -ENODEV;
+ goto out_kfree_ioc;
+ }
+ spin_unlock_irqrestore(&instance->hba_lock, flags);
+
error = megasas_mgmt_fw_ioctl(instance, user_ioc, ioc);
up(&instance->ioctl_sem);
@@ -3828,6 +4453,9 @@
struct megasas_instance *instance;
struct megasas_aen aen;
int error;
+ int i;
+ unsigned long flags;
+ u32 wait_time = MEGASAS_RESET_WAIT_TIME;
if (file->private_data != file) {
printk(KERN_DEBUG "megasas: fasync_helper was not "
@@ -3843,14 +4471,42 @@
if (!instance)
return -ENODEV;
- if (instance->hw_crit_error == 1) {
- error = -ENODEV;
+ if (instance->adprecovery == MEGASAS_HW_CRITICAL_ERROR) {
+ return -ENODEV;
}
if (instance->unload == 1) {
return -ENODEV;
}
+ for (i = 0; i < wait_time; i++) {
+
+ spin_lock_irqsave(&instance->hba_lock, flags);
+ if (instance->adprecovery == MEGASAS_HBA_OPERATIONAL) {
+ spin_unlock_irqrestore(&instance->hba_lock,
+ flags);
+ break;
+ }
+
+ spin_unlock_irqrestore(&instance->hba_lock, flags);
+
+ if (!(i % MEGASAS_RESET_NOTICE_INTERVAL)) {
+ printk(KERN_NOTICE "megasas: waiting for"
+ "controller reset to finish\n");
+ }
+
+ msleep(1000);
+ }
+
+ spin_lock_irqsave(&instance->hba_lock, flags);
+ if (instance->adprecovery != MEGASAS_HBA_OPERATIONAL) {
+ spin_unlock_irqrestore(&instance->hba_lock, flags);
+ printk(KERN_ERR "megaraid_sas: timed out while waiting"
+ "for HBA to recover.\n");
+ return -ENODEV;
+ }
+ spin_unlock_irqrestore(&instance->hba_lock, flags);
+
mutex_lock(&instance->aen_mutex);
error = megasas_register_aen(instance, aen.seq_num,
aen.class_locale_word);