Message ID | 20210323044257.26664-10-njavali@marvell.com |
---|---|
State | Superseded |
Headers | show |
Series | qla2xxx driver bug fixes | expand |
> On Mar 22, 2021, at 11:42 PM, Nilesh Javali <njavali@marvell.com> wrote: > > From: Quinn Tran <qutran@marvell.com> > > For the mailbox thread that encounter PCIe error, pause that > thread until PCIe link reset/recovery completed to prevent > the thread from possibly unmapping any type of DMA resource that might > be in progress at the same time. > > Signed-off-by: Quinn Tran <qutran@marvell.com> > Signed-off-by: Nilesh Javali <njavali@marvell.com> > --- > drivers/scsi/qla2xxx/qla_mbx.c | 38 ++++++++++++++++++++++++++-------- > 1 file changed, 29 insertions(+), 9 deletions(-) > > diff --git a/drivers/scsi/qla2xxx/qla_mbx.c b/drivers/scsi/qla2xxx/qla_mbx.c > index 0149f84cdd8e..3bc6020cfb8d 100644 > --- a/drivers/scsi/qla2xxx/qla_mbx.c > +++ b/drivers/scsi/qla2xxx/qla_mbx.c > @@ -102,7 +102,7 @@ qla2x00_mailbox_command(scsi_qla_host_t *vha, mbx_cmd_t *mcp) > int rval, i; > unsigned long flags = 0; > device_reg_t *reg; > - uint8_t abort_active; > + uint8_t abort_active, eeh_delay; > uint8_t io_lock_on; > uint16_t command = 0; > uint16_t *iptr; > @@ -136,7 +136,7 @@ qla2x00_mailbox_command(scsi_qla_host_t *vha, mbx_cmd_t *mcp) > "PCI error, exiting.\n"); > return QLA_FUNCTION_TIMEOUT; > } > - > + eeh_delay = 0; > reg = ha->iobase; > io_lock_on = base_vha->flags.init_done; > > @@ -159,11 +159,10 @@ qla2x00_mailbox_command(scsi_qla_host_t *vha, mbx_cmd_t *mcp) > } > > /* check if ISP abort is active and return cmd with timeout */ > - if ((test_bit(ABORT_ISP_ACTIVE, &base_vha->dpc_flags) || > - test_bit(ISP_ABORT_RETRY, &base_vha->dpc_flags) || > - test_bit(ISP_ABORT_NEEDED, &base_vha->dpc_flags) || > - ha->flags.eeh_busy) && > - !is_rom_cmd(mcp->mb[0])) { > + if (((test_bit(ABORT_ISP_ACTIVE, &base_vha->dpc_flags) || > + test_bit(ISP_ABORT_RETRY, &base_vha->dpc_flags) || > + test_bit(ISP_ABORT_NEEDED, &base_vha->dpc_flags)) && > + !is_rom_cmd(mcp->mb[0])) || ha->flags.eeh_busy) { > ql_log(ql_log_info, vha, 0x1005, > "Cmd 0x%x aborted with timeout since ISP Abort is pending\n", > mcp->mb[0]); > @@ -186,7 +185,11 @@ qla2x00_mailbox_command(scsi_qla_host_t *vha, mbx_cmd_t *mcp) > return QLA_FUNCTION_TIMEOUT; > } > atomic_dec(&ha->num_pend_mbx_stage1); > - if (ha->flags.purge_mbox || chip_reset != ha->chip_reset) { > + if (ha->flags.purge_mbox || chip_reset != ha->chip_reset || > + ha->flags.eeh_busy) { > + ql_log(ql_log_warn, vha, 0xd035, > + "Error detected: purge[%d] eeh[%d] cmd=0x%x, Exiting.\n", > + ha->flags.purge_mbox, ha->flags.eeh_busy, mcp->mb[0]); > rval = QLA_ABORTED; > goto premature_exit; > } > @@ -266,6 +269,8 @@ qla2x00_mailbox_command(scsi_qla_host_t *vha, mbx_cmd_t *mcp) > if (!wait_for_completion_timeout(&ha->mbx_intr_comp, > mcp->tov * HZ)) { > if (chip_reset != ha->chip_reset) { > + eeh_delay = ha->flags.eeh_busy ? 1 : 0; > + > spin_lock_irqsave(&ha->hardware_lock, flags); > ha->flags.mbox_busy = 0; > spin_unlock_irqrestore(&ha->hardware_lock, > @@ -283,6 +288,8 @@ qla2x00_mailbox_command(scsi_qla_host_t *vha, mbx_cmd_t *mcp) > > } else if (ha->flags.purge_mbox || > chip_reset != ha->chip_reset) { > + eeh_delay = ha->flags.eeh_busy ? 1 : 0; > + > spin_lock_irqsave(&ha->hardware_lock, flags); > ha->flags.mbox_busy = 0; > spin_unlock_irqrestore(&ha->hardware_lock, flags); > @@ -324,6 +331,8 @@ qla2x00_mailbox_command(scsi_qla_host_t *vha, mbx_cmd_t *mcp) > while (!ha->flags.mbox_int) { > if (ha->flags.purge_mbox || > chip_reset != ha->chip_reset) { > + eeh_delay = ha->flags.eeh_busy ? 1 : 0; > + > spin_lock_irqsave(&ha->hardware_lock, flags); > ha->flags.mbox_busy = 0; > spin_unlock_irqrestore(&ha->hardware_lock, > @@ -532,7 +541,8 @@ qla2x00_mailbox_command(scsi_qla_host_t *vha, mbx_cmd_t *mcp) > clear_bit(ISP_ABORT_NEEDED, &vha->dpc_flags); > /* Allow next mbx cmd to come in. */ > complete(&ha->mbx_cmd_comp); > - if (ha->isp_ops->abort_isp(vha)) { > + if (ha->isp_ops->abort_isp(vha) && > + !ha->flags.eeh_busy) { > /* Failed. retry later. */ > set_bit(ISP_ABORT_NEEDED, > &vha->dpc_flags); > @@ -585,6 +595,16 @@ qla2x00_mailbox_command(scsi_qla_host_t *vha, mbx_cmd_t *mcp) > ql_dbg(ql_dbg_mbx, base_vha, 0x1021, "Done %s.\n", __func__); > } > > + i = 500; > + while (i && eeh_delay && (ha->pci_error_state < QLA_PCI_SLOT_RESET)) { > + /* The caller of this mailbox encounter pci error. > + * Hold the thread until PCIE link reset complete to make > + * sure caller does not unmap dma while recovery is > + * in progress. > + */ Small nit…. Fix comment formatting for multi line. > + msleep(1); > + i--; > + } > return rval; > } > > -- > 2.19.0.rc0 > Code itself looks good. After fixing comment you can add Reviewed-by: Himanshu Madhani <himanshu.madhani@oracle.com> -- Himanshu Madhani Oracle Linux Engineering
diff --git a/drivers/scsi/qla2xxx/qla_mbx.c b/drivers/scsi/qla2xxx/qla_mbx.c index 0149f84cdd8e..3bc6020cfb8d 100644 --- a/drivers/scsi/qla2xxx/qla_mbx.c +++ b/drivers/scsi/qla2xxx/qla_mbx.c @@ -102,7 +102,7 @@ qla2x00_mailbox_command(scsi_qla_host_t *vha, mbx_cmd_t *mcp) int rval, i; unsigned long flags = 0; device_reg_t *reg; - uint8_t abort_active; + uint8_t abort_active, eeh_delay; uint8_t io_lock_on; uint16_t command = 0; uint16_t *iptr; @@ -136,7 +136,7 @@ qla2x00_mailbox_command(scsi_qla_host_t *vha, mbx_cmd_t *mcp) "PCI error, exiting.\n"); return QLA_FUNCTION_TIMEOUT; } - + eeh_delay = 0; reg = ha->iobase; io_lock_on = base_vha->flags.init_done; @@ -159,11 +159,10 @@ qla2x00_mailbox_command(scsi_qla_host_t *vha, mbx_cmd_t *mcp) } /* check if ISP abort is active and return cmd with timeout */ - if ((test_bit(ABORT_ISP_ACTIVE, &base_vha->dpc_flags) || - test_bit(ISP_ABORT_RETRY, &base_vha->dpc_flags) || - test_bit(ISP_ABORT_NEEDED, &base_vha->dpc_flags) || - ha->flags.eeh_busy) && - !is_rom_cmd(mcp->mb[0])) { + if (((test_bit(ABORT_ISP_ACTIVE, &base_vha->dpc_flags) || + test_bit(ISP_ABORT_RETRY, &base_vha->dpc_flags) || + test_bit(ISP_ABORT_NEEDED, &base_vha->dpc_flags)) && + !is_rom_cmd(mcp->mb[0])) || ha->flags.eeh_busy) { ql_log(ql_log_info, vha, 0x1005, "Cmd 0x%x aborted with timeout since ISP Abort is pending\n", mcp->mb[0]); @@ -186,7 +185,11 @@ qla2x00_mailbox_command(scsi_qla_host_t *vha, mbx_cmd_t *mcp) return QLA_FUNCTION_TIMEOUT; } atomic_dec(&ha->num_pend_mbx_stage1); - if (ha->flags.purge_mbox || chip_reset != ha->chip_reset) { + if (ha->flags.purge_mbox || chip_reset != ha->chip_reset || + ha->flags.eeh_busy) { + ql_log(ql_log_warn, vha, 0xd035, + "Error detected: purge[%d] eeh[%d] cmd=0x%x, Exiting.\n", + ha->flags.purge_mbox, ha->flags.eeh_busy, mcp->mb[0]); rval = QLA_ABORTED; goto premature_exit; } @@ -266,6 +269,8 @@ qla2x00_mailbox_command(scsi_qla_host_t *vha, mbx_cmd_t *mcp) if (!wait_for_completion_timeout(&ha->mbx_intr_comp, mcp->tov * HZ)) { if (chip_reset != ha->chip_reset) { + eeh_delay = ha->flags.eeh_busy ? 1 : 0; + spin_lock_irqsave(&ha->hardware_lock, flags); ha->flags.mbox_busy = 0; spin_unlock_irqrestore(&ha->hardware_lock, @@ -283,6 +288,8 @@ qla2x00_mailbox_command(scsi_qla_host_t *vha, mbx_cmd_t *mcp) } else if (ha->flags.purge_mbox || chip_reset != ha->chip_reset) { + eeh_delay = ha->flags.eeh_busy ? 1 : 0; + spin_lock_irqsave(&ha->hardware_lock, flags); ha->flags.mbox_busy = 0; spin_unlock_irqrestore(&ha->hardware_lock, flags); @@ -324,6 +331,8 @@ qla2x00_mailbox_command(scsi_qla_host_t *vha, mbx_cmd_t *mcp) while (!ha->flags.mbox_int) { if (ha->flags.purge_mbox || chip_reset != ha->chip_reset) { + eeh_delay = ha->flags.eeh_busy ? 1 : 0; + spin_lock_irqsave(&ha->hardware_lock, flags); ha->flags.mbox_busy = 0; spin_unlock_irqrestore(&ha->hardware_lock, @@ -532,7 +541,8 @@ qla2x00_mailbox_command(scsi_qla_host_t *vha, mbx_cmd_t *mcp) clear_bit(ISP_ABORT_NEEDED, &vha->dpc_flags); /* Allow next mbx cmd to come in. */ complete(&ha->mbx_cmd_comp); - if (ha->isp_ops->abort_isp(vha)) { + if (ha->isp_ops->abort_isp(vha) && + !ha->flags.eeh_busy) { /* Failed. retry later. */ set_bit(ISP_ABORT_NEEDED, &vha->dpc_flags); @@ -585,6 +595,16 @@ qla2x00_mailbox_command(scsi_qla_host_t *vha, mbx_cmd_t *mcp) ql_dbg(ql_dbg_mbx, base_vha, 0x1021, "Done %s.\n", __func__); } + i = 500; + while (i && eeh_delay && (ha->pci_error_state < QLA_PCI_SLOT_RESET)) { + /* The caller of this mailbox encounter pci error. + * Hold the thread until PCIE link reset complete to make + * sure caller does not unmap dma while recovery is + * in progress. + */ + msleep(1); + i--; + } return rval; }