diff mbox series

ufs: fix deadlock for the case of pm shutdown during suspend transition to resume

Message ID 20250311084257.8989-1-qiwu.chen@transsion.com
State New
Headers show
Series ufs: fix deadlock for the case of pm shutdown during suspend transition to resume | expand

Commit Message

qiwu.chen March 11, 2025, 8:42 a.m. UTC
There is a deadlock when triggers pm shutdown during suspend-to-idle state transition
to resume. Here are the issue reproduce steps:
1. System enter suspend-to-idle transition and execute suspend callbacks for given devices
in dpm_suspend(). The suspend callback of ufshcd_wl device - ufshcd_wl_suspend() will down
hba->host_sem which is supposed to up in ufshcd_wl_resume(). Here is main call trace:
enter_state
    suspend_devices_and_enter
        dpm_suspend_start
	    dpm_suspend
		device_suspend
		    ufshcd_wl_suspend
			down(&hba->host_sem) //hold host_sem

2. Someone triggers shutdown due to low battery during suspend transition.
The shutdown flow will hold device lock and execute shutdown callback for given devices
in device_shutdown(). The shutdown callback of ufshcd_wl device - ufshcd_wl_shutdown()
will hold ufshcd_wl's device lock and blocked to down hba->host_sem unfortunately.
Here is the blocked trace of shutdown thread:
__switch_to+0x174/0x338
__schedule+0x608/0x9f0
schedule+0x7c/0xe8
schedule_timeout+0x44/0x1c8
__down_common+0xbc/0x238
__down+0x18/0x28
down+0x50/0x54
ufshcd_wl_shutdown+0x28/0xb4   //blocked to down host_sem
device_shutdown+0x1a0/0x254    //get device lock
kernel_power_off+0x3c/0xf0
power_misc_routine_thread+0x814/0x970
kthread+0x104/0x1d4
ret_from_fork+0x10/0x20

3. Meanwhile, the suspend-to-idle transition is aborted by a wakeup source and go to handle
resume works, the resume work will be blocked in holding ufshcd_wl device lock forever.
Here is the blocked trace of resume work:
__switch_to+0x174/0x338
__schedule+0x608/0x9f0
schedule+0x7c/0xe8
schedule_preempt_disabled+0x24/0x40
__mutex_lock+0x408/0xdac
__mutex_lock_slowpath+0x14/0x24
mutex_lock+0x40/0xec
__device_resume+0x50/0x360    //blocked in holding device lock, deadlock!
async_resume+0x24/0x3c
async_run_entry_fn+0x44/0x118
process_one_work+0x1e4/0x43c
worker_thread+0x25c/0x430
kthread+0x104/0x1d4
ret_from_fork+0x10/0x20

Fix the deadlock issue by using atomic operation instead of sleep lock for shutting_down state check.

Signed-off-by: qiwu.chen <qiwu.chen@transsion.com>
---
 drivers/ufs/core/ufshcd-priv.h |  2 +-
 drivers/ufs/core/ufshcd.c      | 21 +++++++++------------
 include/ufs/ufshcd.h           |  2 +-
 3 files changed, 11 insertions(+), 14 deletions(-)
diff mbox series

Patch

diff --git a/drivers/ufs/core/ufshcd-priv.h b/drivers/ufs/core/ufshcd-priv.h
index 786f20ef2238..76d323de42f9 100644
--- a/drivers/ufs/core/ufshcd-priv.h
+++ b/drivers/ufs/core/ufshcd-priv.h
@@ -8,7 +8,7 @@ 
 
 static inline bool ufshcd_is_user_access_allowed(struct ufs_hba *hba)
 {
-	return !hba->shutting_down;
+	return !atomic_read(&hba->shutting_down);
 }
 
 void ufshcd_schedule_eh_work(struct ufs_hba *hba);
diff --git a/drivers/ufs/core/ufshcd.c b/drivers/ufs/core/ufshcd.c
index 3094f3c89e82..b0f5152e5b04 100644
--- a/drivers/ufs/core/ufshcd.c
+++ b/drivers/ufs/core/ufshcd.c
@@ -1729,12 +1729,10 @@  static ssize_t ufshcd_clkscale_enable_store(struct device *dev,
 	if (kstrtou32(buf, 0, &value))
 		return -EINVAL;
 
-	down(&hba->host_sem);
-	if (!ufshcd_is_user_access_allowed(hba)) {
-		err = -EBUSY;
-		goto out;
-	}
+	if (!ufshcd_is_user_access_allowed(hba))
+		return -EBUSY;
 
+	down(&hba->host_sem);
 	value = !!value;
 	if (value == hba->clk_scaling.is_enabled)
 		goto out;
@@ -6416,8 +6414,7 @@  static void ufshcd_err_handling_unprepare(struct ufs_hba *hba)
 
 static inline bool ufshcd_err_handling_should_stop(struct ufs_hba *hba)
 {
-	return (!hba->is_powered || hba->shutting_down ||
-		!hba->ufs_device_wlun ||
+	return (!hba->is_powered || !hba->ufs_device_wlun ||
 		hba->ufshcd_state == UFSHCD_STATE_ERROR ||
 		(!(hba->saved_err || hba->saved_uic_err || hba->force_reset ||
 		   ufshcd_is_link_broken(hba))));
@@ -6541,10 +6538,13 @@  static void ufshcd_err_handler(struct work_struct *work)
 	dev_info(hba->dev,
 		 "%s started; HBA state %s; powered %d; shutting down %d; saved_err = %d; saved_uic_err = %d; force_reset = %d%s\n",
 		 __func__, ufshcd_state_name[hba->ufshcd_state],
-		 hba->is_powered, hba->shutting_down, hba->saved_err,
+		 hba->is_powered, atomic_read(&hba->shutting_down), hba->saved_err,
 		 hba->saved_uic_err, hba->force_reset,
 		 ufshcd_is_link_broken(hba) ? "; link is broken" : "");
 
+	if (!ufshcd_is_user_access_allowed(hba))
+		return;
+
 	down(&hba->host_sem);
 	spin_lock_irqsave(hba->host->host_lock, flags);
 	if (ufshcd_err_handling_should_stop(hba)) {
@@ -10194,10 +10194,7 @@  static void ufshcd_wl_shutdown(struct device *dev)
 	struct scsi_device *sdev = to_scsi_device(dev);
 	struct ufs_hba *hba = shost_priv(sdev->host);
 
-	down(&hba->host_sem);
-	hba->shutting_down = true;
-	up(&hba->host_sem);
-
+	atomic_set(&hba->shutting_down, 1);
 	/* Turn on everything while shutting down */
 	ufshcd_rpm_get_sync(hba);
 	scsi_device_quiesce(sdev);
diff --git a/include/ufs/ufshcd.h b/include/ufs/ufshcd.h
index 74e5b9960c54..db38141278c7 100644
--- a/include/ufs/ufshcd.h
+++ b/include/ufs/ufshcd.h
@@ -1034,7 +1034,7 @@  struct ufs_hba {
 	u16 ee_usr_mask;
 	struct mutex ee_ctrl_mutex;
 	bool is_powered;
-	bool shutting_down;
+	atomic_t shutting_down;
 	struct semaphore host_sem;
 
 	/* Work Queues */