diff mbox series

scsi: ufs: initialize devfreq synchronously

Message ID 20230209211456.54250-1-athierry@redhat.com
State New
Headers show
Series scsi: ufs: initialize devfreq synchronously | expand

Commit Message

Adrien Thierry Feb. 9, 2023, 9:14 p.m. UTC
During ufs initialization, devfreq initialization is asynchronous:
ufshcd_async_scan() calls ufshcd_add_lus(), which in turn initializes
devfreq for ufs. The simple ondemand governor is then loaded. If it is
build as a module, request_module() is called and throws a warning:

  WARNING: CPU: 7 PID: 167 at kernel/kmod.c:136 __request_module+0x1e0/0x460
  Modules linked in: crct10dif_ce llcc_qcom phy_qcom_qmp_usb ufs_qcom phy_qcom_snps_femto_v2 ufshcd_pltfrm phy_qcom_qmp_combo ufshcd_core phy_qcom_qmp_ufs qcom_wdt socinfo fuse ipv6
  CPU: 7 PID: 167 Comm: kworker/u16:3 Not tainted 6.2.0-rc6-00009-g58706f7fb045 #1
  Hardware name: Qualcomm SA8540P Ride (DT)
  Workqueue: events_unbound async_run_entry_fn
  pstate: 00400005 (nzcv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--)
  pc : __request_module+0x1e0/0x460
  lr : __request_module+0x1d8/0x460
  sp : ffff800009323b90
  x29: ffff800009323b90 x28: 0000000000000000 x27: 0000000000000000
  x26: ffff800009323d50 x25: ffff7b9045f57810 x24: ffff7b9045f57830
  x23: ffffdc5a83e426e8 x22: ffffdc5ae80a9818 x21: 0000000000000001
  x20: ffffdc5ae7502f98 x19: ffff7b9045f57800 x18: ffffffffffffffff
  x17: 312f716572667665 x16: 642f7366752e3030 x15: 0000000000000000
  x14: 000000000000021c x13: 0000000000005400 x12: ffff7b9042ed7614
  x11: ffff7b9042ed7600 x10: 00000000636c0890 x9 : 0000000000000038
  x8 : ffff7b9045f2c880 x7 : ffff7b9045f57c68 x6 : 0000000000000080
  x5 : 0000000000000000 x4 : 8000000000000000 x3 : 0000000000000000
  x2 : 0000000000000000 x1 : ffffdc5ae5d382f0 x0 : 0000000000000001
  Call trace:
   __request_module+0x1e0/0x460
   try_then_request_governor+0x7c/0x100
   devfreq_add_device+0x4b0/0x5fc
   ufshcd_async_scan+0x1d4/0x310 [ufshcd_core]
   async_run_entry_fn+0x34/0xe0
   process_one_work+0x1d0/0x320
   worker_thread+0x14c/0x444
   kthread+0x10c/0x110
   ret_from_fork+0x10/0x20

This occurs because synchronous module loading from async is not
allowed. According to __request_module():

  /*
   * We don't allow synchronous module loading from async.  Module
   * init may invoke async_synchronize_full() which will end up
   * waiting for this task which already is waiting for the module
   * loading to complete, leading to a deadlock.
   */

I experienced such a deadlock on the Qualcomm QDrive3/sa8540p-ride. With
DEVFREQ_GOV_SIMPLE_ONDEMAND=m, the boot hangs after the warning.

This patch fixes both the warning and the deadlock, by moving devfreq
initialization out of the async routine.

I tested this on the sa8540p-ride by using fio to put the UFS under
load, and printing the trace generated by
/sys/kernel/tracing/events/ufs/ufshcd_clk_scaling events. The trace
looks similar with and without the change.

Signed-off-by: Adrien Thierry <athierry@redhat.com>
---
 drivers/ufs/core/ufshcd.c | 40 ++++++++++++++++++++++-----------------
 include/ufs/ufshcd.h      |  1 +
 2 files changed, 24 insertions(+), 17 deletions(-)

Comments

Bean Huo Feb. 10, 2023, 5:51 p.m. UTC | #1
Adrien,

On 09.02.23 10:14 PM, Adrien Thierry wrote:
>   
> -	/* Initialize devfreq after UFS device is detected */
> -	if (ufshcd_is_clkscaling_supported(hba)) {
> -		memcpy(&hba->clk_scaling.saved_pwr_info.info,
> -			&hba->pwr_info,
> -			sizeof(struct ufs_pa_layer_attr));
> -		hba->clk_scaling.saved_pwr_info.is_valid = true;
> -		hba->clk_scaling.is_allowed = true;
> -
> -		ret = ufshcd_devfreq_init(hba);
> -		if (ret)
> -			goto out;
> -
> -		hba->clk_scaling.is_enabled = true;
> -		ufshcd_init_clk_scaling_sysfs(hba);
> -	}
> -
>   	ufs_bsg_probe(hba);
>   	ufshpb_init(hba);
>   	scsi_scan_host(hba->host);
> @@ -8290,7 +8277,8 @@ static void ufshcd_async_scan(void *data, async_cookie_t cookie)
>   	if (ret) {
>   		pm_runtime_put_sync(hba->dev);
>   		ufshcd_hba_exit(hba);
> -	}
> +	} else
> +		hba->is_initialized = true;

after moving devfreq initialization out of the async routine, still has deadlock issue?


>   }
>   
>   static enum scsi_timeout_action ufshcd_eh_timed_out(struct scsi_cmnd *scmd)
> @@ -9896,12 +9884,30 @@ int ufshcd_init(struct ufs_hba *hba, void __iomem *mmio_base, unsigned int irq)
>   	 */
>   	ufshcd_set_ufs_dev_active(hba);
>   
> +	/* Initialize devfreq */
> +	if (ufshcd_is_clkscaling_supported(hba)) {
> +		memcpy(&hba->clk_scaling.saved_pwr_info.info,
> +			&hba->pwr_info,
> +			sizeof(struct ufs_pa_layer_attr));
> +		hba->clk_scaling.saved_pwr_info.is_valid = true;
> +		hba->clk_scaling.is_allowed = true;
> +
> +		err = ufshcd_devfreq_init(hba);
> +		if (err)
> +			goto out_power_off;
> +
> +		hba->clk_scaling.is_enabled = true;
> +		ufshcd_init_clk_scaling_sysfs(hba);
> +	}
Adrien Thierry Feb. 10, 2023, 8:23 p.m. UTC | #2
Hi Bean,

> On 09.02.23 10:14 PM, Adrien Thierry wrote:
> > -	/* Initialize devfreq after UFS device is detected */
> > -	if (ufshcd_is_clkscaling_supported(hba)) {
> > -		memcpy(&hba->clk_scaling.saved_pwr_info.info,
> > -			&hba->pwr_info,
> > -			sizeof(struct ufs_pa_layer_attr));
> > -		hba->clk_scaling.saved_pwr_info.is_valid = true;
> > -		hba->clk_scaling.is_allowed = true;
> > -
> > -		ret = ufshcd_devfreq_init(hba);
> > -		if (ret)
> > -			goto out;
> > -
> > -		hba->clk_scaling.is_enabled = true;
> > -		ufshcd_init_clk_scaling_sysfs(hba);
> > -	}
> > -
> >   	ufs_bsg_probe(hba);
> >   	ufshpb_init(hba);
> >   	scsi_scan_host(hba->host);
> > @@ -8290,7 +8277,8 @@ static void ufshcd_async_scan(void *data, async_cookie_t cookie)
> >   	if (ret) {
> >   		pm_runtime_put_sync(hba->dev);
> >   		ufshcd_hba_exit(hba);
> > -	}
> > +	} else
> > +		hba->is_initialized = true;
> 
> after moving devfreq initialization out of the async routine, still has deadlock issue?
>

No more deadlock after moving devfreq initialization out of the async
routine. The reason I added the hba->is_initialized variable is to prevent
devfreq_monitor to update the ufs frequency until the async routine had a
chance to run and the ufs is initialized. Otherwise I would sometimes get
"dvfs failed with (-16) error" on boot.

Best,

Adrien
Bart Van Assche Feb. 15, 2023, 6:50 p.m. UTC | #3
On 2/9/23 13:14, Adrien Thierry wrote:
> During ufs initialization, devfreq initialization is asynchronous:
> ufshcd_async_scan() calls ufshcd_add_lus(), which in turn initializes
> devfreq for ufs. The simple ondemand governor is then loaded. If it is
> build as a module, request_module() is called and throws a warning:
> 
>    WARNING: CPU: 7 PID: 167 at kernel/kmod.c:136 __request_module+0x1e0/0x460
>    Modules linked in: crct10dif_ce llcc_qcom phy_qcom_qmp_usb ufs_qcom phy_qcom_snps_femto_v2 ufshcd_pltfrm phy_qcom_qmp_combo ufshcd_core phy_qcom_qmp_ufs qcom_wdt socinfo fuse ipv6
>    CPU: 7 PID: 167 Comm: kworker/u16:3 Not tainted 6.2.0-rc6-00009-g58706f7fb045 #1
>    Hardware name: Qualcomm SA8540P Ride (DT)
>    Workqueue: events_unbound async_run_entry_fn
>    pstate: 00400005 (nzcv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--)
>    pc : __request_module+0x1e0/0x460
>    lr : __request_module+0x1d8/0x460
>    sp : ffff800009323b90
>    x29: ffff800009323b90 x28: 0000000000000000 x27: 0000000000000000
>    x26: ffff800009323d50 x25: ffff7b9045f57810 x24: ffff7b9045f57830
>    x23: ffffdc5a83e426e8 x22: ffffdc5ae80a9818 x21: 0000000000000001
>    x20: ffffdc5ae7502f98 x19: ffff7b9045f57800 x18: ffffffffffffffff
>    x17: 312f716572667665 x16: 642f7366752e3030 x15: 0000000000000000
>    x14: 000000000000021c x13: 0000000000005400 x12: ffff7b9042ed7614
>    x11: ffff7b9042ed7600 x10: 00000000636c0890 x9 : 0000000000000038
>    x8 : ffff7b9045f2c880 x7 : ffff7b9045f57c68 x6 : 0000000000000080
>    x5 : 0000000000000000 x4 : 8000000000000000 x3 : 0000000000000000
>    x2 : 0000000000000000 x1 : ffffdc5ae5d382f0 x0 : 0000000000000001
>    Call trace:
>     __request_module+0x1e0/0x460
>     try_then_request_governor+0x7c/0x100
>     devfreq_add_device+0x4b0/0x5fc
>     ufshcd_async_scan+0x1d4/0x310 [ufshcd_core]
>     async_run_entry_fn+0x34/0xe0
>     process_one_work+0x1d0/0x320
>     worker_thread+0x14c/0x444
>     kthread+0x10c/0x110
>     ret_from_fork+0x10/0x20
> 
> This occurs because synchronous module loading from async is not
> allowed. According to __request_module():
> 
>    /*
>     * We don't allow synchronous module loading from async.  Module
>     * init may invoke async_synchronize_full() which will end up
>     * waiting for this task which already is waiting for the module
>     * loading to complete, leading to a deadlock.
>     */
> 
> I experienced such a deadlock on the Qualcomm QDrive3/sa8540p-ride. With
> DEVFREQ_GOV_SIMPLE_ONDEMAND=m, the boot hangs after the warning.
> 
> This patch fixes both the warning and the deadlock, by moving devfreq
> initialization out of the async routine.
> 
> I tested this on the sa8540p-ride by using fio to put the UFS under
> load, and printing the trace generated by
> /sys/kernel/tracing/events/ufs/ufshcd_clk_scaling events. The trace
> looks similar with and without the change.
> 
> Signed-off-by: Adrien Thierry <athierry@redhat.com>
> ---
>   drivers/ufs/core/ufshcd.c | 40 ++++++++++++++++++++++-----------------
>   include/ufs/ufshcd.h      |  1 +
>   2 files changed, 24 insertions(+), 17 deletions(-)
> 
> diff --git a/drivers/ufs/core/ufshcd.c b/drivers/ufs/core/ufshcd.c
> index 3a1c4d31e010..17189934d1ae 100644
> --- a/drivers/ufs/core/ufshcd.c
> +++ b/drivers/ufs/core/ufshcd.c
> @@ -1357,6 +1357,9 @@ static int ufshcd_devfreq_target(struct device *dev,
>   	struct ufs_clk_info *clki;
>   	unsigned long irq_flags;
>   
> +	if (!hba->is_initialized)
> +		return 0;
> +
>   	if (!ufshcd_is_clkscaling_supported(hba))
>   		return -EINVAL;
>   
> @@ -8136,22 +8139,6 @@ static int ufshcd_add_lus(struct ufs_hba *hba)
>   	if (ret)
>   		goto out;
>   
> -	/* Initialize devfreq after UFS device is detected */
> -	if (ufshcd_is_clkscaling_supported(hba)) {
> -		memcpy(&hba->clk_scaling.saved_pwr_info.info,
> -			&hba->pwr_info,
> -			sizeof(struct ufs_pa_layer_attr));
> -		hba->clk_scaling.saved_pwr_info.is_valid = true;
> -		hba->clk_scaling.is_allowed = true;
> -
> -		ret = ufshcd_devfreq_init(hba);
> -		if (ret)
> -			goto out;
> -
> -		hba->clk_scaling.is_enabled = true;
> -		ufshcd_init_clk_scaling_sysfs(hba);
> -	}
> -
>   	ufs_bsg_probe(hba);
>   	ufshpb_init(hba);
>   	scsi_scan_host(hba->host);
> @@ -8290,7 +8277,8 @@ static void ufshcd_async_scan(void *data, async_cookie_t cookie)
>   	if (ret) {
>   		pm_runtime_put_sync(hba->dev);
>   		ufshcd_hba_exit(hba);
> -	}
> +	} else
> +		hba->is_initialized = true;
>   }
>   
>   static enum scsi_timeout_action ufshcd_eh_timed_out(struct scsi_cmnd *scmd)
> @@ -9896,12 +9884,30 @@ int ufshcd_init(struct ufs_hba *hba, void __iomem *mmio_base, unsigned int irq)
>   	 */
>   	ufshcd_set_ufs_dev_active(hba);
>   
> +	/* Initialize devfreq */
> +	if (ufshcd_is_clkscaling_supported(hba)) {
> +		memcpy(&hba->clk_scaling.saved_pwr_info.info,
> +			&hba->pwr_info,
> +			sizeof(struct ufs_pa_layer_attr));
> +		hba->clk_scaling.saved_pwr_info.is_valid = true;
> +		hba->clk_scaling.is_allowed = true;
> +
> +		err = ufshcd_devfreq_init(hba);
> +		if (err)
> +			goto out_power_off;
> +
> +		hba->clk_scaling.is_enabled = true;
> +		ufshcd_init_clk_scaling_sysfs(hba);
> +	}
> +
>   	async_schedule(ufshcd_async_scan, hba);
>   	ufs_sysfs_add_nodes(hba->dev);
>   
>   	device_enable_async_suspend(dev);
>   	return 0;
>   
> +out_power_off:
> +	pm_runtime_put_sync(dev);
>   free_tmf_queue:
>   	blk_mq_destroy_queue(hba->tmf_queue);
>   	blk_put_queue(hba->tmf_queue);
> diff --git a/include/ufs/ufshcd.h b/include/ufs/ufshcd.h
> index 727084cd79be..58a78dcd3472 100644
> --- a/include/ufs/ufshcd.h
> +++ b/include/ufs/ufshcd.h
> @@ -896,6 +896,7 @@ struct ufs_hba {
>   	struct completion *uic_async_done;
>   
>   	enum ufshcd_state ufshcd_state;
> +	bool is_initialized;
>   	u32 eh_flags;
>   	u32 intr_mask;
>   	u16 ee_ctrl_mask;

Please make the following changes:
* Rename is_initialized into logical_unit_scan_finished or another name 
that describes the purpose of this variable more clearly.
* Fix the races between the code that sets the is_initialized variable 
and the code that reads it by using smp_load_acquire() and 
smp_store_release() instead of an ordinary load and store.
* Document in ufshcd_devfreq_target() why the logical unit scan must 
have completed.

Thanks,

Bart.
diff mbox series

Patch

diff --git a/drivers/ufs/core/ufshcd.c b/drivers/ufs/core/ufshcd.c
index 3a1c4d31e010..17189934d1ae 100644
--- a/drivers/ufs/core/ufshcd.c
+++ b/drivers/ufs/core/ufshcd.c
@@ -1357,6 +1357,9 @@  static int ufshcd_devfreq_target(struct device *dev,
 	struct ufs_clk_info *clki;
 	unsigned long irq_flags;
 
+	if (!hba->is_initialized)
+		return 0;
+
 	if (!ufshcd_is_clkscaling_supported(hba))
 		return -EINVAL;
 
@@ -8136,22 +8139,6 @@  static int ufshcd_add_lus(struct ufs_hba *hba)
 	if (ret)
 		goto out;
 
-	/* Initialize devfreq after UFS device is detected */
-	if (ufshcd_is_clkscaling_supported(hba)) {
-		memcpy(&hba->clk_scaling.saved_pwr_info.info,
-			&hba->pwr_info,
-			sizeof(struct ufs_pa_layer_attr));
-		hba->clk_scaling.saved_pwr_info.is_valid = true;
-		hba->clk_scaling.is_allowed = true;
-
-		ret = ufshcd_devfreq_init(hba);
-		if (ret)
-			goto out;
-
-		hba->clk_scaling.is_enabled = true;
-		ufshcd_init_clk_scaling_sysfs(hba);
-	}
-
 	ufs_bsg_probe(hba);
 	ufshpb_init(hba);
 	scsi_scan_host(hba->host);
@@ -8290,7 +8277,8 @@  static void ufshcd_async_scan(void *data, async_cookie_t cookie)
 	if (ret) {
 		pm_runtime_put_sync(hba->dev);
 		ufshcd_hba_exit(hba);
-	}
+	} else
+		hba->is_initialized = true;
 }
 
 static enum scsi_timeout_action ufshcd_eh_timed_out(struct scsi_cmnd *scmd)
@@ -9896,12 +9884,30 @@  int ufshcd_init(struct ufs_hba *hba, void __iomem *mmio_base, unsigned int irq)
 	 */
 	ufshcd_set_ufs_dev_active(hba);
 
+	/* Initialize devfreq */
+	if (ufshcd_is_clkscaling_supported(hba)) {
+		memcpy(&hba->clk_scaling.saved_pwr_info.info,
+			&hba->pwr_info,
+			sizeof(struct ufs_pa_layer_attr));
+		hba->clk_scaling.saved_pwr_info.is_valid = true;
+		hba->clk_scaling.is_allowed = true;
+
+		err = ufshcd_devfreq_init(hba);
+		if (err)
+			goto out_power_off;
+
+		hba->clk_scaling.is_enabled = true;
+		ufshcd_init_clk_scaling_sysfs(hba);
+	}
+
 	async_schedule(ufshcd_async_scan, hba);
 	ufs_sysfs_add_nodes(hba->dev);
 
 	device_enable_async_suspend(dev);
 	return 0;
 
+out_power_off:
+	pm_runtime_put_sync(dev);
 free_tmf_queue:
 	blk_mq_destroy_queue(hba->tmf_queue);
 	blk_put_queue(hba->tmf_queue);
diff --git a/include/ufs/ufshcd.h b/include/ufs/ufshcd.h
index 727084cd79be..58a78dcd3472 100644
--- a/include/ufs/ufshcd.h
+++ b/include/ufs/ufshcd.h
@@ -896,6 +896,7 @@  struct ufs_hba {
 	struct completion *uic_async_done;
 
 	enum ufshcd_state ufshcd_state;
+	bool is_initialized;
 	u32 eh_flags;
 	u32 intr_mask;
 	u16 ee_ctrl_mask;