diff mbox series

[V2] scsi: ufs: Fix runtime PM messages never-ending cycle

Message ID 20220214121941.296034-1-adrian.hunter@intel.com
State New
Headers show
Series [V2] scsi: ufs: Fix runtime PM messages never-ending cycle | expand

Commit Message

Adrian Hunter Feb. 14, 2022, 12:19 p.m. UTC
Kernel messages produced during runtime PM can cause a never-ending
cycle because user space utilities (e.g. journald or rsyslog) write the
messages back to storage, causing runtime resume, more messages, and so
on.

Messages that tell of things that are expected to happen, are arguably
unnecessary, so suppress them.

 Example messages from Ubuntu 21.10:

 $ dmesg | tail
 [ 1620.380071] ufshcd 0000:00:12.5: ufshcd_print_pwr_info:[RX, TX]: gear=[1, 1], lane[1, 1], pwr[SLOWAUTO_MODE, SLOWAUTO_MODE], rate = 0
 [ 1620.408825] ufshcd 0000:00:12.5: ufshcd_print_pwr_info:[RX, TX]: gear=[4, 4], lane[2, 2], pwr[FAST MODE, FAST MODE], rate = 2
 [ 1620.409020] ufshcd 0000:00:12.5: ufshcd_find_max_sup_active_icc_level: Regulator capability was not set, actvIccLevel=0
 [ 1620.409524] sd 0:0:0:0: Power-on or device reset occurred
 [ 1622.938794] sd 0:0:0:0: [sda] Synchronizing SCSI cache
 [ 1622.939184] ufs_device_wlun 0:0:0:49488: Power-on or device reset occurred
 [ 1625.183175] ufshcd 0000:00:12.5: ufshcd_print_pwr_info:[RX, TX]: gear=[1, 1], lane[1, 1], pwr[SLOWAUTO_MODE, SLOWAUTO_MODE], rate = 0
 [ 1625.208041] ufshcd 0000:00:12.5: ufshcd_print_pwr_info:[RX, TX]: gear=[4, 4], lane[2, 2], pwr[FAST MODE, FAST MODE], rate = 2
 [ 1625.208311] ufshcd 0000:00:12.5: ufshcd_find_max_sup_active_icc_level: Regulator capability was not set, actvIccLevel=0
 [ 1625.209035] sd 0:0:0:0: Power-on or device reset occurred

Cc: stable@vger.kernel.org
Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
---


Changes in V2:

	Remove offending SCSI messages
	Use dev_dbg for offending UFSHCD messages


 drivers/scsi/scsi_error.c | 10 ++++++++--
 drivers/scsi/sd.c         |  7 +++++--
 drivers/scsi/ufs/ufshcd.c | 15 +++++++++++++--
 3 files changed, 26 insertions(+), 6 deletions(-)

Comments

Martin K. Petersen Feb. 15, 2022, 3:29 a.m. UTC | #1
Adrian,

> Kernel messages produced during runtime PM can cause a never-ending
> cycle because user space utilities (e.g. journald or rsyslog) write the
> messages back to storage, causing runtime resume, more messages, and so
> on.
>
> Messages that tell of things that are expected to happen, are arguably
> unnecessary, so suppress them.

I don't have a major objection to the sd_suspend changes although I
wonder if there is a log level configuration problem with these systems?
Would KERN_INFO instead of KERN_NOTICE help?

I do not like the report sense change. We see this message all the time
in the field and it is very useful for debugging problems. So that
message should be made conditional based on PM state.

> diff --git a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c
> index 60a6ae9d1219..e177dc5cc69a 100644
> --- a/drivers/scsi/scsi_error.c
> +++ b/drivers/scsi/scsi_error.c
> @@ -484,8 +484,14 @@ static void scsi_report_sense(struct scsi_device *sdev,
>  
>  		if (sshdr->asc == 0x29) {
>  			evt_type = SDEV_EVT_POWER_ON_RESET_OCCURRED;
> -			sdev_printk(KERN_WARNING, sdev,
> -				    "Power-on or device reset occurred\n");
> +			/*
> +			 * Do not print a message here because reset can be an
> +			 * expected side-effect of runtime PM. Do not print
> +			 * messages due to runtime PM to avoid never-ending
> +			 * cycles of messages written back to storage by user
> +			 * space causing runtime resume, causing more messages
> +			 * and so on.
> +			 */
>  		}
>  
>  		if (sshdr->asc == 0x2a && sshdr->ascq == 0x01) {
Adrian Hunter Feb. 16, 2022, 7:56 a.m. UTC | #2
On 15/02/2022 05:29, Martin K. Petersen wrote:
> 
> Adrian,
> 
>> Kernel messages produced during runtime PM can cause a never-ending
>> cycle because user space utilities (e.g. journald or rsyslog) write the
>> messages back to storage, causing runtime resume, more messages, and so
>> on.
>>
>> Messages that tell of things that are expected to happen, are arguably
>> unnecessary, so suppress them.
> 
> I don't have a major objection to the sd_suspend changes although I
> wonder if there is a log level configuration problem with these systems?
> Would KERN_INFO instead of KERN_NOTICE help?

I tried KERN_DEBUG but it did not help, but we also do not want to produce
large numbers of unnecessary messages.

> 
> I do not like the report sense change. We see this message all the time
> in the field and it is very useful for debugging problems. So that
> message should be made conditional based on PM state.

It can happen after runtime PM when a LUN is accessed, so a flag set by
the driver seems better.  Please see V3.

> 
>> diff --git a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c
>> index 60a6ae9d1219..e177dc5cc69a 100644
>> --- a/drivers/scsi/scsi_error.c
>> +++ b/drivers/scsi/scsi_error.c
>> @@ -484,8 +484,14 @@ static void scsi_report_sense(struct scsi_device *sdev,
>>  
>>  		if (sshdr->asc == 0x29) {
>>  			evt_type = SDEV_EVT_POWER_ON_RESET_OCCURRED;
>> -			sdev_printk(KERN_WARNING, sdev,
>> -				    "Power-on or device reset occurred\n");
>> +			/*
>> +			 * Do not print a message here because reset can be an
>> +			 * expected side-effect of runtime PM. Do not print
>> +			 * messages due to runtime PM to avoid never-ending
>> +			 * cycles of messages written back to storage by user
>> +			 * space causing runtime resume, causing more messages
>> +			 * and so on.
>> +			 */
>>  		}
>>  
>>  		if (sshdr->asc == 0x2a && sshdr->ascq == 0x01) {
>
diff mbox series

Patch

diff --git a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c
index 60a6ae9d1219..e177dc5cc69a 100644
--- a/drivers/scsi/scsi_error.c
+++ b/drivers/scsi/scsi_error.c
@@ -484,8 +484,14 @@  static void scsi_report_sense(struct scsi_device *sdev,
 
 		if (sshdr->asc == 0x29) {
 			evt_type = SDEV_EVT_POWER_ON_RESET_OCCURRED;
-			sdev_printk(KERN_WARNING, sdev,
-				    "Power-on or device reset occurred\n");
+			/*
+			 * Do not print a message here because reset can be an
+			 * expected side-effect of runtime PM. Do not print
+			 * messages due to runtime PM to avoid never-ending
+			 * cycles of messages written back to storage by user
+			 * space causing runtime resume, causing more messages
+			 * and so on.
+			 */
 		}
 
 		if (sshdr->asc == 0x2a && sshdr->ascq == 0x01) {
diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index 62eb9921cc94..18cdf5f9415a 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -3742,6 +3742,11 @@  static void sd_shutdown(struct device *dev)
 	}
 }
 
+/*
+ * Do not print messages during runtime PM to avoid never-ending cycles of
+ * messages written back to storage by user space causing runtime resume,
+ * causing more messages and so on.
+ */
 static int sd_suspend_common(struct device *dev, bool ignore_stop_errors)
 {
 	struct scsi_disk *sdkp = dev_get_drvdata(dev);
@@ -3752,7 +3757,6 @@  static int sd_suspend_common(struct device *dev, bool ignore_stop_errors)
 		return 0;
 
 	if (sdkp->WCE && sdkp->media_present) {
-		sd_printk(KERN_NOTICE, sdkp, "Synchronizing SCSI cache\n");
 		ret = sd_sync_cache(sdkp, &sshdr);
 
 		if (ret) {
@@ -3774,7 +3778,6 @@  static int sd_suspend_common(struct device *dev, bool ignore_stop_errors)
 	}
 
 	if (sdkp->device->manage_start_stop) {
-		sd_printk(KERN_NOTICE, sdkp, "Stopping disk\n");
 		/* an error is not worth aborting a system sleep */
 		ret = sd_start_stop_device(sdkp, 0);
 		if (ignore_stop_errors)
diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c
index 50b12d60dc1b..27d5b9f75644 100644
--- a/drivers/scsi/ufs/ufshcd.c
+++ b/drivers/scsi/ufs/ufshcd.c
@@ -585,7 +585,12 @@  static void ufshcd_print_pwr_info(struct ufs_hba *hba)
 		"INVALID MODE",
 	};
 
-	dev_err(hba->dev, "%s:[RX, TX]: gear=[%d, %d], lane[%d, %d], pwr[%s, %s], rate = %d\n",
+	/*
+	 * Using dev_dbg to avoid messages during runtime PM to avoid
+	 * never-ending cycles of messages written back to storage by user space
+	 * causing runtime resume, causing more messages and so on.
+	 */
+	dev_dbg(hba->dev, "%s:[RX, TX]: gear=[%d, %d], lane[%d, %d], pwr[%s, %s], rate = %d\n",
 		 __func__,
 		 hba->pwr_info.gear_rx, hba->pwr_info.gear_tx,
 		 hba->pwr_info.lane_rx, hba->pwr_info.lane_tx,
@@ -7339,7 +7344,13 @@  static u32 ufshcd_find_max_sup_active_icc_level(struct ufs_hba *hba,
 
 	if (!hba->vreg_info.vcc || !hba->vreg_info.vccq ||
 						!hba->vreg_info.vccq2) {
-		dev_err(hba->dev,
+		/*
+		 * Using dev_dbg to avoid messages during runtime PM to avoid
+		 * never-ending cycles of messages written back to storage by
+		 * user space causing runtime resume, causing more messages and
+		 * so on.
+		 */
+		dev_dbg(hba->dev,
 			"%s: Regulator capability was not set, actvIccLevel=%d",
 							__func__, icc_level);
 		goto out;