diff mbox series

[v7,09/23] scsi: sd: Do not issue commands to suspended disks on shutdown

Message ID 20230926081507.69346-10-dlemoal@kernel.org
State Superseded
Headers show
Series Fix libata suspend/resume handling and code cleanup | expand

Commit Message

Damien Le Moal Sept. 26, 2023, 8:14 a.m. UTC
If an error occurs when resuming a host adapter before the devices
attached to the adapter are resumed, the adapter low level driver may
remove the scsi host, resulting in a call to sd_remove() for the
disks of the host. This in turn results in a call to sd_shutdown() which
will issue a synchronize cache command and a start stop unit command to
spindown the disk. sd_shutdown() issues the commands only if the device
is not already runtime suspended but does not check the power state for
system-wide suspend/resume. That is, the commands may be issued with the
device in a suspended state, which causes PM resume to hang, forcing a
reset of the machine to recover.

Fix this by tracking the suspended state of a disk by introducing the
suspended boolean field in the scsi_disk structure. This flag is set to
true when the disk is suspended is sd_suspend_common() and resumed with
sd_resume(). When suspended is true, sd_shutdown() is not executed from
sd_remove().

Cc: stable@vger.kernel.org
Signed-off-by: Damien Le Moal <dlemoal@kernel.org>
---
 drivers/scsi/sd.c | 17 +++++++++++++----
 drivers/scsi/sd.h |  1 +
 2 files changed, 14 insertions(+), 4 deletions(-)

Comments

Bart Van Assche Sept. 27, 2023, 2:55 p.m. UTC | #1
On 9/26/23 01:14, Damien Le Moal wrote:
> @@ -3891,21 +3895,26 @@ static int sd_suspend_runtime(struct device *dev)
>   static int sd_resume(struct device *dev, bool runtime)
>   {
>   	struct scsi_disk *sdkp = dev_get_drvdata(dev);
> -	int ret;
> +	int ret = 0;
>   
>   	if (!sdkp)	/* E.g.: runtime resume at the start of sd_probe() */
>   		return 0;

As far as I can tell there is nothing that prevents system wide
suspend or resume after a SCSI disk has been discovered and before
sd_probe() is called(). So I think that "sdkp->suspended = false;"
has to be added in the above if-statement. This is the how SCSI
disks are registered (synchronous case only):

scsi_probe_and_add_lun(target, bflagsp, sdevp, rescan, hostdata)
   scsi_alloc_sdev(starget, lun, hostdata)
     __scsi_init_queue(&sdev->host)
     scsi_sysfs_device_initialize(sdev)
     shost->hostt->slave_alloc(sdev)
   scsi_probe_lun(sdev, ...)
     scsi_execute_req(sdev, INQUIRY)
   scsi_add_lun(sdev, ...)
     scsi_device_set_state(sdev, SDEV_RUNNING)
     sdev->host->hostt->slave_configure(sdev) /* may do I/O */
     scsi_sysfs_add_sdev(sdev) /* enables runtime PM */
       scsi_target_add(starget)
       device_add(&sdev->sdev_gendev)
         kobject_add(&dev->kobj, ...)
         bus_add_device(dev)
         bus_probe_device(dev)
           device_initial_probe(dev)
             __device_attach(dev, /*allow_async=*/true)
               __device_attach_driver(drv, dev, ...)
                 driver_probe_device(drv, dev)
                   really_probe(dev, drv)
                     dev->bus->probe(dev) = sd_probe(dev)
                       gd = blk_mq_alloc_disk_for_queue()
                       device_add(&sdkp->dev)
                       sd_revalidate_disk(gd)
                       device_add_disk(dev, gd, NULL)
       device_add(&sdev->sdev_dev)
       bsg_scsi_register_queue(rq, &sdev->sdev_gendev)

Thanks,

Bart.
diff mbox series

Patch

diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index a1ef4eef904f..f911a64521e6 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -3741,7 +3741,8 @@  static int sd_remove(struct device *dev)
 
 	device_del(&sdkp->disk_dev);
 	del_gendisk(sdkp->disk);
-	sd_shutdown(dev);
+	if (!sdkp->suspended)
+		sd_shutdown(dev);
 
 	put_disk(sdkp->disk);
 	return 0;
@@ -3872,6 +3873,9 @@  static int sd_suspend_common(struct device *dev, bool runtime)
 			ret = 0;
 	}
 
+	if (!ret)
+		sdkp->suspended = true;
+
 	return ret;
 }
 
@@ -3891,21 +3895,26 @@  static int sd_suspend_runtime(struct device *dev)
 static int sd_resume(struct device *dev, bool runtime)
 {
 	struct scsi_disk *sdkp = dev_get_drvdata(dev);
-	int ret;
+	int ret = 0;
 
 	if (!sdkp)	/* E.g.: runtime resume at the start of sd_probe() */
 		return 0;
 
-	if (!sd_do_start_stop(sdkp->device, runtime))
+	if (!sd_do_start_stop(sdkp->device, runtime)) {
+		sdkp->suspended = false;
 		return 0;
+	}
 
 	if (!sdkp->device->no_start_on_resume) {
 		sd_printk(KERN_NOTICE, sdkp, "Starting disk\n");
 		ret = sd_start_stop_device(sdkp, 1);
 	}
 
-	if (!ret)
+	if (!ret) {
 		opal_unlock_from_suspend(sdkp->opal_dev);
+		sdkp->suspended = false;
+	}
+
 	return ret;
 }
 
diff --git a/drivers/scsi/sd.h b/drivers/scsi/sd.h
index 5eea762f84d1..14153ef7a414 100644
--- a/drivers/scsi/sd.h
+++ b/drivers/scsi/sd.h
@@ -131,6 +131,7 @@  struct scsi_disk {
 	u8		provisioning_mode;
 	u8		zeroing_mode;
 	u8		nr_actuators;		/* Number of actuators */
+	bool		suspended;	/* Disk is supended (stopped) */
 	unsigned	ATO : 1;	/* state of disk ATO bit */
 	unsigned	cache_override : 1; /* temp override of WCE,RCD */
 	unsigned	WCE : 1;	/* state of disk WCE bit */