diff mbox series

[02/10] qla2xxx: Flush mailbox commands on chip reset

Message ID 20230801114057.27039-3-njavali@marvell.com
State Superseded
Headers show
Series qla2xxx driver misc features | expand

Commit Message

Nilesh Javali Aug. 1, 2023, 11:40 a.m. UTC
From: Quinn Tran <qutran@marvell.com>

Fix race condition between Interrupt thread and Chip reset
thread in trying to flush the same mailbox. With the race
condition, the "ha->mbx_intr_comp" will get an extra complete()
call. The extra complete call create erroneous mailbox timeout
condition when the next mailbox is sent where the mailbox call
does not wait for interrupt to arrive. Instead, it advance
without waiting.

Add lock protection around the check for mailbox completion.

Cc: stable@vger.kernel.org
Fixes: b2000805a9759 ("scsi: qla2xxx: Flush mailbox commands on chip reset")
Signed-off-by: Quinn Tran <quinn.tran@marvell.com>
Signed-off-by: Nilesh Javali <njavali@marvell.com>
---
 drivers/scsi/qla2xxx/qla_def.h  | 1 -
 drivers/scsi/qla2xxx/qla_init.c | 7 ++++---
 drivers/scsi/qla2xxx/qla_mbx.c  | 4 ----
 drivers/scsi/qla2xxx/qla_os.c   | 1 -
 4 files changed, 4 insertions(+), 9 deletions(-)

Comments

Himanshu Madhani Aug. 2, 2023, 7:41 p.m. UTC | #1
On 8/1/23 04:40, Nilesh Javali wrote:
> From: Quinn Tran <qutran@marvell.com>
> 
> Fix race condition between Interrupt thread and Chip reset
> thread in trying to flush the same mailbox. With the race
> condition, the "ha->mbx_intr_comp" will get an extra complete()
> call. The extra complete call create erroneous mailbox timeout
> condition when the next mailbox is sent where the mailbox call
> does not wait for interrupt to arrive. Instead, it advance
> without waiting.
> 
> Add lock protection around the check for mailbox completion.
> 
> Cc: stable@vger.kernel.org
> Fixes: b2000805a9759 ("scsi: qla2xxx: Flush mailbox commands on chip reset")
> Signed-off-by: Quinn Tran <quinn.tran@marvell.com>
> Signed-off-by: Nilesh Javali <njavali@marvell.com>
> ---
>   drivers/scsi/qla2xxx/qla_def.h  | 1 -
>   drivers/scsi/qla2xxx/qla_init.c | 7 ++++---
>   drivers/scsi/qla2xxx/qla_mbx.c  | 4 ----
>   drivers/scsi/qla2xxx/qla_os.c   | 1 -
>   4 files changed, 4 insertions(+), 9 deletions(-)
> 
> diff --git a/drivers/scsi/qla2xxx/qla_def.h b/drivers/scsi/qla2xxx/qla_def.h
> index 806d08f4f310..5882e61141e6 100644
> --- a/drivers/scsi/qla2xxx/qla_def.h
> +++ b/drivers/scsi/qla2xxx/qla_def.h
> @@ -4412,7 +4412,6 @@ struct qla_hw_data {
>   	uint8_t		aen_mbx_count;
>   	atomic_t	num_pend_mbx_stage1;
>   	atomic_t	num_pend_mbx_stage2;
> -	atomic_t	num_pend_mbx_stage3;
>   	uint16_t	frame_payload_size;
>   
>   	uint32_t	login_retry_count;
> diff --git a/drivers/scsi/qla2xxx/qla_init.c b/drivers/scsi/qla2xxx/qla_init.c
> index 2a9fbb3e12c9..ddc9b54f5703 100644
> --- a/drivers/scsi/qla2xxx/qla_init.c
> +++ b/drivers/scsi/qla2xxx/qla_init.c
> @@ -7391,14 +7391,15 @@ qla2x00_abort_isp_cleanup(scsi_qla_host_t *vha)
>   	}
>   
>   	/* purge MBox commands */
> -	if (atomic_read(&ha->num_pend_mbx_stage3)) {
> +	spin_lock_irqsave(&ha->hardware_lock, flags);
> +	if (test_bit(MBX_INTR_WAIT, &ha->mbx_cmd_flags)) {
>   		clear_bit(MBX_INTR_WAIT, &ha->mbx_cmd_flags);
>   		complete(&ha->mbx_intr_comp);
>   	}
> +	spin_unlock_irqrestore(&ha->hardware_lock, flags);
>   
>   	i = 0;
> -	while (atomic_read(&ha->num_pend_mbx_stage3) ||
> -	    atomic_read(&ha->num_pend_mbx_stage2) ||
> +	while (atomic_read(&ha->num_pend_mbx_stage2) ||
>   	    atomic_read(&ha->num_pend_mbx_stage1)) {
>   		msleep(20);
>   		i++;
> diff --git a/drivers/scsi/qla2xxx/qla_mbx.c b/drivers/scsi/qla2xxx/qla_mbx.c
> index b05f93037875..21ec32b4fb28 100644
> --- a/drivers/scsi/qla2xxx/qla_mbx.c
> +++ b/drivers/scsi/qla2xxx/qla_mbx.c
> @@ -273,7 +273,6 @@ qla2x00_mailbox_command(scsi_qla_host_t *vha, mbx_cmd_t *mcp)
>   		spin_unlock_irqrestore(&ha->hardware_lock, flags);
>   
>   		wait_time = jiffies;
> -		atomic_inc(&ha->num_pend_mbx_stage3);
>   		if (!wait_for_completion_timeout(&ha->mbx_intr_comp,
>   		    mcp->tov * HZ)) {
>   			ql_dbg(ql_dbg_mbx, vha, 0x117a,
> @@ -290,7 +289,6 @@ qla2x00_mailbox_command(scsi_qla_host_t *vha, mbx_cmd_t *mcp)
>   				spin_unlock_irqrestore(&ha->hardware_lock,
>   				    flags);
>   				atomic_dec(&ha->num_pend_mbx_stage2);
> -				atomic_dec(&ha->num_pend_mbx_stage3);
>   				rval = QLA_ABORTED;
>   				goto premature_exit;
>   			}
> @@ -302,11 +300,9 @@ qla2x00_mailbox_command(scsi_qla_host_t *vha, mbx_cmd_t *mcp)
>   			ha->flags.mbox_busy = 0;
>   			spin_unlock_irqrestore(&ha->hardware_lock, flags);
>   			atomic_dec(&ha->num_pend_mbx_stage2);
> -			atomic_dec(&ha->num_pend_mbx_stage3);
>   			rval = QLA_ABORTED;
>   			goto premature_exit;
>   		}
> -		atomic_dec(&ha->num_pend_mbx_stage3);
>   
>   		if (time_after(jiffies, wait_time + 5 * HZ))
>   			ql_log(ql_log_warn, vha, 0x1015, "cmd=0x%x, waited %d msecs\n",
> diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c
> index d622d415a3c1..a18bcc86a21a 100644
> --- a/drivers/scsi/qla2xxx/qla_os.c
> +++ b/drivers/scsi/qla2xxx/qla_os.c
> @@ -3007,7 +3007,6 @@ qla2x00_probe_one(struct pci_dev *pdev, const struct pci_device_id *id)
>   	ha->max_exchg = FW_MAX_EXCHANGES_CNT;
>   	atomic_set(&ha->num_pend_mbx_stage1, 0);
>   	atomic_set(&ha->num_pend_mbx_stage2, 0);
> -	atomic_set(&ha->num_pend_mbx_stage3, 0);
>   	atomic_set(&ha->zio_threshold, DEFAULT_ZIO_THRESHOLD);
>   	ha->last_zio_threshold = DEFAULT_ZIO_THRESHOLD;
>   	INIT_LIST_HEAD(&ha->tmf_pending);


Reviewed-by: Himanshu Madhani <himanshu.madhani@oracle.com>
diff mbox series

Patch

diff --git a/drivers/scsi/qla2xxx/qla_def.h b/drivers/scsi/qla2xxx/qla_def.h
index 806d08f4f310..5882e61141e6 100644
--- a/drivers/scsi/qla2xxx/qla_def.h
+++ b/drivers/scsi/qla2xxx/qla_def.h
@@ -4412,7 +4412,6 @@  struct qla_hw_data {
 	uint8_t		aen_mbx_count;
 	atomic_t	num_pend_mbx_stage1;
 	atomic_t	num_pend_mbx_stage2;
-	atomic_t	num_pend_mbx_stage3;
 	uint16_t	frame_payload_size;
 
 	uint32_t	login_retry_count;
diff --git a/drivers/scsi/qla2xxx/qla_init.c b/drivers/scsi/qla2xxx/qla_init.c
index 2a9fbb3e12c9..ddc9b54f5703 100644
--- a/drivers/scsi/qla2xxx/qla_init.c
+++ b/drivers/scsi/qla2xxx/qla_init.c
@@ -7391,14 +7391,15 @@  qla2x00_abort_isp_cleanup(scsi_qla_host_t *vha)
 	}
 
 	/* purge MBox commands */
-	if (atomic_read(&ha->num_pend_mbx_stage3)) {
+	spin_lock_irqsave(&ha->hardware_lock, flags);
+	if (test_bit(MBX_INTR_WAIT, &ha->mbx_cmd_flags)) {
 		clear_bit(MBX_INTR_WAIT, &ha->mbx_cmd_flags);
 		complete(&ha->mbx_intr_comp);
 	}
+	spin_unlock_irqrestore(&ha->hardware_lock, flags);
 
 	i = 0;
-	while (atomic_read(&ha->num_pend_mbx_stage3) ||
-	    atomic_read(&ha->num_pend_mbx_stage2) ||
+	while (atomic_read(&ha->num_pend_mbx_stage2) ||
 	    atomic_read(&ha->num_pend_mbx_stage1)) {
 		msleep(20);
 		i++;
diff --git a/drivers/scsi/qla2xxx/qla_mbx.c b/drivers/scsi/qla2xxx/qla_mbx.c
index b05f93037875..21ec32b4fb28 100644
--- a/drivers/scsi/qla2xxx/qla_mbx.c
+++ b/drivers/scsi/qla2xxx/qla_mbx.c
@@ -273,7 +273,6 @@  qla2x00_mailbox_command(scsi_qla_host_t *vha, mbx_cmd_t *mcp)
 		spin_unlock_irqrestore(&ha->hardware_lock, flags);
 
 		wait_time = jiffies;
-		atomic_inc(&ha->num_pend_mbx_stage3);
 		if (!wait_for_completion_timeout(&ha->mbx_intr_comp,
 		    mcp->tov * HZ)) {
 			ql_dbg(ql_dbg_mbx, vha, 0x117a,
@@ -290,7 +289,6 @@  qla2x00_mailbox_command(scsi_qla_host_t *vha, mbx_cmd_t *mcp)
 				spin_unlock_irqrestore(&ha->hardware_lock,
 				    flags);
 				atomic_dec(&ha->num_pend_mbx_stage2);
-				atomic_dec(&ha->num_pend_mbx_stage3);
 				rval = QLA_ABORTED;
 				goto premature_exit;
 			}
@@ -302,11 +300,9 @@  qla2x00_mailbox_command(scsi_qla_host_t *vha, mbx_cmd_t *mcp)
 			ha->flags.mbox_busy = 0;
 			spin_unlock_irqrestore(&ha->hardware_lock, flags);
 			atomic_dec(&ha->num_pend_mbx_stage2);
-			atomic_dec(&ha->num_pend_mbx_stage3);
 			rval = QLA_ABORTED;
 			goto premature_exit;
 		}
-		atomic_dec(&ha->num_pend_mbx_stage3);
 
 		if (time_after(jiffies, wait_time + 5 * HZ))
 			ql_log(ql_log_warn, vha, 0x1015, "cmd=0x%x, waited %d msecs\n",
diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c
index d622d415a3c1..a18bcc86a21a 100644
--- a/drivers/scsi/qla2xxx/qla_os.c
+++ b/drivers/scsi/qla2xxx/qla_os.c
@@ -3007,7 +3007,6 @@  qla2x00_probe_one(struct pci_dev *pdev, const struct pci_device_id *id)
 	ha->max_exchg = FW_MAX_EXCHANGES_CNT;
 	atomic_set(&ha->num_pend_mbx_stage1, 0);
 	atomic_set(&ha->num_pend_mbx_stage2, 0);
-	atomic_set(&ha->num_pend_mbx_stage3, 0);
 	atomic_set(&ha->zio_threshold, DEFAULT_ZIO_THRESHOLD);
 	ha->last_zio_threshold = DEFAULT_ZIO_THRESHOLD;
 	INIT_LIST_HEAD(&ha->tmf_pending);