[RFC,4/4] ufs: Make host controller state change handling more systematic

Message ID 20210619005228.28569-5-bvanassche@acm.org
State New
Headers show
Series
  • UFS patches for Linux kernel v5.14
Related show

Commit Message

Bart Van Assche June 19, 2021, 12:52 a.m.
Introduce a function that reports whether or not a controller state change
is allowed instead of duplicating these checks in every context that
changes the host controller state. This patch includes a behavior change:
ufshcd_link_recovery() no longer can change the host controller state from
UFSHCD_STATE_ERROR into UFSHCD_STATE_RESET.

Cc: Can Guo <cang@codeaurora.org>
Signed-off-by: Bart Van Assche <bvanassche@acm.org>
---
 drivers/scsi/ufs/ufshcd.c | 59 ++++++++++++++++++++++++---------------
 1 file changed, 36 insertions(+), 23 deletions(-)

Comments

Avri Altman June 21, 2021, 8:55 a.m. | #1
> Introduce a function that reports whether or not a controller state change

> is allowed instead of duplicating these checks in every context that

> changes the host controller state. This patch includes a behavior change:

> ufshcd_link_recovery() no longer can change the host controller state from

> UFSHCD_STATE_ERROR into UFSHCD_STATE_RESET.

> 

> Cc: Can Guo <cang@codeaurora.org>

> Signed-off-by: Bart Van Assche <bvanassche@acm.org>

> ---

>  drivers/scsi/ufs/ufshcd.c | 59 ++++++++++++++++++++++++---------------

>  1 file changed, 36 insertions(+), 23 deletions(-)

> 

> diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c

> index c213daec20f7..a10c8ec28468 100644

> --- a/drivers/scsi/ufs/ufshcd.c

> +++ b/drivers/scsi/ufs/ufshcd.c

> @@ -4070,16 +4070,32 @@ static int ufshcd_uic_change_pwr_mode(struct

> ufs_hba *hba, u8 mode)

>         return ret;

>  }

> 

> +static bool ufshcd_set_state(struct ufs_hba *hba, enum ufshcd_state

> new_state)

> +{

> +       lockdep_assert_held(hba->host->host_lock);

> +

> +       if (old_state != UFSHCD_STATE_ERROR || new_state ==

> UFSHCD_STATE_ERROR)

old_state ?

Thanks,
Avri

> +               hba->ufshcd_state = new_state;

> +               return true;

> +       }

> +       return false;

> +}

> +

>  int ufshcd_link_recovery(struct ufs_hba *hba)

>  {

>         int ret;

>         unsigned long flags;

> +       bool proceed;

> 

>         spin_lock_irqsave(hba->host->host_lock, flags);

> -       hba->ufshcd_state = UFSHCD_STATE_RESET;

> -       ufshcd_set_eh_in_progress(hba);

> +       proceed = ufshcd_set_state(hba, UFSHCD_STATE_RESET);

> +       if (proceed)

> +               ufshcd_set_eh_in_progress(hba);

>         spin_unlock_irqrestore(hba->host->host_lock, flags);

> 

> +       if (!proceed)

> +               return -EPERM;

> +

>         /* Reset the attached device */

>         ufshcd_device_reset(hba);

> 

> @@ -4087,7 +4103,7 @@ int ufshcd_link_recovery(struct ufs_hba *hba)

> 

>         spin_lock_irqsave(hba->host->host_lock, flags);

>         if (ret)

> -               hba->ufshcd_state = UFSHCD_STATE_ERROR;

> +               ufshcd_set_state(hba, UFSHCD_STATE_ERROR);

>         ufshcd_clear_eh_in_progress(hba);

>         spin_unlock_irqrestore(hba->host->host_lock, flags);

> 

> @@ -5856,15 +5872,17 @@ static inline bool

> ufshcd_is_saved_err_fatal(struct ufs_hba *hba)

>  /* host lock must be held before calling this func */

>  static inline void ufshcd_schedule_eh_work(struct ufs_hba *hba)

>  {

> -       /* handle fatal errors only when link is not in error state */

> -       if (hba->ufshcd_state != UFSHCD_STATE_ERROR) {

> -               if (hba->force_reset || ufshcd_is_link_broken(hba) ||

> -                   ufshcd_is_saved_err_fatal(hba))

> -                       hba->ufshcd_state = UFSHCD_STATE_EH_SCHEDULED_FATAL;

> -               else

> -                       hba->ufshcd_state =

> UFSHCD_STATE_EH_SCHEDULED_NON_FATAL;

> +       bool proceed;

> +

> +       if (hba->force_reset || ufshcd_is_link_broken(hba) ||

> +           ufshcd_is_saved_err_fatal(hba))

> +               proceed = ufshcd_set_state(hba,

> +                                          UFSHCD_STATE_EH_SCHEDULED_FATAL);

> +       else

> +               proceed = ufshcd_set_state(hba,

> +                                          UFSHCD_STATE_EH_SCHEDULED_NON_FATAL);

> +       if (proceed)

>                 queue_work(hba->eh_wq, &hba->eh_work);

> -       }

>  }

> 

>  static void ufshcd_clk_scaling_allow(struct ufs_hba *hba, bool allow)

> @@ -6017,8 +6035,7 @@ static void ufshcd_err_handler(struct work_struct

> *work)

>         down(&hba->host_sem);

>         spin_lock_irqsave(hba->host->host_lock, flags);

>         if (ufshcd_err_handling_should_stop(hba)) {

> -               if (hba->ufshcd_state != UFSHCD_STATE_ERROR)

> -                       hba->ufshcd_state = UFSHCD_STATE_OPERATIONAL;

> +               ufshcd_set_state(hba, UFSHCD_STATE_OPERATIONAL);

>                 spin_unlock_irqrestore(hba->host->host_lock, flags);

>                 up(&hba->host_sem);

>                 return;

> @@ -6029,8 +6046,7 @@ static void ufshcd_err_handler(struct work_struct

> *work)

>         /* Complete requests that have door-bell cleared by h/w */

>         ufshcd_complete_requests(hba);

>         spin_lock_irqsave(hba->host->host_lock, flags);

> -       if (hba->ufshcd_state != UFSHCD_STATE_ERROR)

> -               hba->ufshcd_state = UFSHCD_STATE_RESET;

> +       ufshcd_set_state(hba, UFSHCD_STATE_RESET);

>         /*

>          * A full reset and restore might have happened after preparation

>          * is finished, double check whether we should stop.

> @@ -6163,8 +6179,7 @@ static void ufshcd_err_handler(struct work_struct

> *work)

> 

>  skip_err_handling:

>         if (!needs_reset) {

> -               if (hba->ufshcd_state == UFSHCD_STATE_RESET)

> -                       hba->ufshcd_state = UFSHCD_STATE_OPERATIONAL;

> +               ufshcd_set_state(hba, UFSHCD_STATE_OPERATIONAL);

>                 if (hba->saved_err || hba->saved_uic_err)

>                         dev_err_ratelimited(hba->dev, "%s: exit: saved_err 0x%x

> saved_uic_err 0x%x",

>                             __func__, hba->saved_err, hba->saved_uic_err);

> @@ -7135,7 +7150,7 @@ static int ufshcd_reset_and_restore(struct ufs_hba

> *hba)

>          */

>         scsi_report_bus_reset(hba->host, 0);

>         if (err) {

> -               hba->ufshcd_state = UFSHCD_STATE_ERROR;

> +               ufshcd_set_state(hba, UFSHCD_STATE_ERROR);

>                 hba->saved_err |= saved_err;

>                 hba->saved_uic_err |= saved_uic_err;

>         }

> @@ -7951,7 +7966,7 @@ static int ufshcd_probe_hba(struct ufs_hba *hba,

> bool init_dev_params)

>         unsigned long flags;

>         ktime_t start = ktime_get();

> 

> -       hba->ufshcd_state = UFSHCD_STATE_RESET;

> +       ufshcd_set_state(hba, UFSHCD_STATE_RESET);

> 

>         ret = ufshcd_link_startup(hba);

>         if (ret)

> @@ -8024,10 +8039,8 @@ static int ufshcd_probe_hba(struct ufs_hba *hba,

> bool init_dev_params)

> 

>  out:

>         spin_lock_irqsave(hba->host->host_lock, flags);

> -       if (ret)

> -               hba->ufshcd_state = UFSHCD_STATE_ERROR;

> -       else if (hba->ufshcd_state == UFSHCD_STATE_RESET)

> -               hba->ufshcd_state = UFSHCD_STATE_OPERATIONAL;

> +       ufshcd_set_state(hba, ret ? UFSHCD_STATE_ERROR :

> +                        UFSHCD_STATE_OPERATIONAL);

>         spin_unlock_irqrestore(hba->host->host_lock, flags);

> 

>         trace_ufshcd_init(dev_name(hba->dev), ret,
Bart Van Assche June 21, 2021, 5:38 p.m. | #2
On 6/21/21 1:55 AM, Avri Altman wrote:
>> +static bool ufshcd_set_state(struct ufs_hba *hba, enum ufshcd_state

>> new_state)

>> +{

>> +       lockdep_assert_held(hba->host->host_lock);

>> +

>> +       if (old_state != UFSHCD_STATE_ERROR || new_state ==

>> UFSHCD_STATE_ERROR)

>

> old_state ?


Thanks for having taken a look. This function should look as follows:

static bool ufshcd_set_state(struct ufs_hba *hba, enum ufshcd_state new_state)
{
	lockdep_assert_held(hba->host->host_lock);

	if (hba->ufshcd_state != UFSHCD_STATE_ERROR ||
	    new_state == UFSHCD_STATE_ERROR) {
		hba->ufshcd_state = new_state;
		return true;
	}
	return false;
}
Can Guo June 23, 2021, 8:08 a.m. | #3
Hi Bart,

On 2021-06-19 08:52, Bart Van Assche wrote:
> Introduce a function that reports whether or not a controller state 

> change

> is allowed instead of duplicating these checks in every context that

> changes the host controller state. This patch includes a behavior 

> change:

> ufshcd_link_recovery() no longer can change the host controller state 

> from

> UFSHCD_STATE_ERROR into UFSHCD_STATE_RESET.

> 

> Cc: Can Guo <cang@codeaurora.org>

> Signed-off-by: Bart Van Assche <bvanassche@acm.org>

> ---

>  drivers/scsi/ufs/ufshcd.c | 59 ++++++++++++++++++++++++---------------

>  1 file changed, 36 insertions(+), 23 deletions(-)

> 

> diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c

> index c213daec20f7..a10c8ec28468 100644

> --- a/drivers/scsi/ufs/ufshcd.c

> +++ b/drivers/scsi/ufs/ufshcd.c

> @@ -4070,16 +4070,32 @@ static int ufshcd_uic_change_pwr_mode(struct

> ufs_hba *hba, u8 mode)

>  	return ret;

>  }

> 

> +static bool ufshcd_set_state(struct ufs_hba *hba, enum ufshcd_state 

> new_state)

> +{

> +	lockdep_assert_held(hba->host->host_lock);

> +

> +	if (old_state != UFSHCD_STATE_ERROR || new_state == 

> UFSHCD_STATE_ERROR)

> +		hba->ufshcd_state = new_state;

> +		return true;

> +	}

> +	return false;

> +}

> +

>  int ufshcd_link_recovery(struct ufs_hba *hba)

>  {

>  	int ret;

>  	unsigned long flags;

> +	bool proceed;

> 

>  	spin_lock_irqsave(hba->host->host_lock, flags);

> -	hba->ufshcd_state = UFSHCD_STATE_RESET;

> -	ufshcd_set_eh_in_progress(hba);

> +	proceed = ufshcd_set_state(hba, UFSHCD_STATE_RESET);

> +	if (proceed)

> +		ufshcd_set_eh_in_progress(hba);

>  	spin_unlock_irqrestore(hba->host->host_lock, flags);

> 

> +	if (!proceed)

> +		return -EPERM;

> +

>  	/* Reset the attached device */

>  	ufshcd_device_reset(hba);

> 

> @@ -4087,7 +4103,7 @@ int ufshcd_link_recovery(struct ufs_hba *hba)


I want to remove this function since it is only used by ufs-mediatek.c
through ufshcd_vops_resume(). Althrough it does not race with error
handling as of now, by removing it we can reduce one more caller of
full reset. I will ask Stanley help comment on this.

> 

>  	spin_lock_irqsave(hba->host->host_lock, flags);

>  	if (ret)

> -		hba->ufshcd_state = UFSHCD_STATE_ERROR;

> +		ufshcd_set_state(hba, UFSHCD_STATE_ERROR);

>  	ufshcd_clear_eh_in_progress(hba);

>  	spin_unlock_irqrestore(hba->host->host_lock, flags);

> 

> @@ -5856,15 +5872,17 @@ static inline bool

> ufshcd_is_saved_err_fatal(struct ufs_hba *hba)

>  /* host lock must be held before calling this func */

>  static inline void ufshcd_schedule_eh_work(struct ufs_hba *hba)

>  {

> -	/* handle fatal errors only when link is not in error state */

> -	if (hba->ufshcd_state != UFSHCD_STATE_ERROR) {

> -		if (hba->force_reset || ufshcd_is_link_broken(hba) ||

> -		    ufshcd_is_saved_err_fatal(hba))

> -			hba->ufshcd_state = UFSHCD_STATE_EH_SCHEDULED_FATAL;

> -		else

> -			hba->ufshcd_state = UFSHCD_STATE_EH_SCHEDULED_NON_FATAL;

> +	bool proceed;

> +

> +	if (hba->force_reset || ufshcd_is_link_broken(hba) ||

> +	    ufshcd_is_saved_err_fatal(hba))

> +		proceed = ufshcd_set_state(hba,

> +					   UFSHCD_STATE_EH_SCHEDULED_FATAL);

> +	else

> +		proceed = ufshcd_set_state(hba,

> +					   UFSHCD_STATE_EH_SCHEDULED_NON_FATAL);

> +	if (proceed)

>  		queue_work(hba->eh_wq, &hba->eh_work);

> -	}

>  }

> 

>  static void ufshcd_clk_scaling_allow(struct ufs_hba *hba, bool allow)

> @@ -6017,8 +6035,7 @@ static void ufshcd_err_handler(struct work_struct 

> *work)

>  	down(&hba->host_sem);

>  	spin_lock_irqsave(hba->host->host_lock, flags);

>  	if (ufshcd_err_handling_should_stop(hba)) {

> -		if (hba->ufshcd_state != UFSHCD_STATE_ERROR)

> -			hba->ufshcd_state = UFSHCD_STATE_OPERATIONAL;

> +		ufshcd_set_state(hba, UFSHCD_STATE_OPERATIONAL);

>  		spin_unlock_irqrestore(hba->host->host_lock, flags);

>  		up(&hba->host_sem);

>  		return;

> @@ -6029,8 +6046,7 @@ static void ufshcd_err_handler(struct work_struct 

> *work)

>  	/* Complete requests that have door-bell cleared by h/w */

>  	ufshcd_complete_requests(hba);

>  	spin_lock_irqsave(hba->host->host_lock, flags);

> -	if (hba->ufshcd_state != UFSHCD_STATE_ERROR)

> -		hba->ufshcd_state = UFSHCD_STATE_RESET;

> +	ufshcd_set_state(hba, UFSHCD_STATE_RESET);

>  	/*

>  	 * A full reset and restore might have happened after preparation

>  	 * is finished, double check whether we should stop.

> @@ -6163,8 +6179,7 @@ static void ufshcd_err_handler(struct work_struct 

> *work)

> 

>  skip_err_handling:

>  	if (!needs_reset) {

> -		if (hba->ufshcd_state == UFSHCD_STATE_RESET)

> -			hba->ufshcd_state = UFSHCD_STATE_OPERATIONAL;

> +		ufshcd_set_state(hba, UFSHCD_STATE_OPERATIONAL);

>  		if (hba->saved_err || hba->saved_uic_err)

>  			dev_err_ratelimited(hba->dev, "%s: exit: saved_err 0x%x 

> saved_uic_err 0x%x",

>  			    __func__, hba->saved_err, hba->saved_uic_err);

> @@ -7135,7 +7150,7 @@ static int ufshcd_reset_and_restore(struct 

> ufs_hba *hba)

>  	 */

>  	scsi_report_bus_reset(hba->host, 0);

>  	if (err) {

> -		hba->ufshcd_state = UFSHCD_STATE_ERROR;

> +		ufshcd_set_state(hba, UFSHCD_STATE_ERROR);

>  		hba->saved_err |= saved_err;

>  		hba->saved_uic_err |= saved_uic_err;

>  	}

> @@ -7951,7 +7966,7 @@ static int ufshcd_probe_hba(struct ufs_hba *hba,

> bool init_dev_params)

>  	unsigned long flags;

>  	ktime_t start = ktime_get();

> 

> -	hba->ufshcd_state = UFSHCD_STATE_RESET;

> +	ufshcd_set_state(hba, UFSHCD_STATE_RESET);


1. We don't hold host lock here, but ufshcd_set_state() is expecting it.

2. Here we need to override the state to RESET anyways even state is 
ERROR,
because in ufshcd_reset_and_restore(), ufshcd_probe_hba() can be called 
5
times (retries == 5, see below code snippet). Otherwise, say on the 2nd
retry of ufshcd_probe_hba(), even if ufshcd_probe_hba() pass through 
without
an error (ret == 0), the ufshcd_set_state() (down below) cannot set the
state back to OPERATIONAL.

7042 	do {
7043 		/* Reset the attached device */
7044 		ufshcd_vops_device_reset(hba);
7045
7046 		err = ufshcd_host_reset_and_restore(hba);
7047 	} while (err && --retries);

Thanks,

Can Guo.

> 

>  	ret = ufshcd_link_startup(hba);

>  	if (ret)

> @@ -8024,10 +8039,8 @@ static int ufshcd_probe_hba(struct ufs_hba

> *hba, bool init_dev_params)

> 

>  out:

>  	spin_lock_irqsave(hba->host->host_lock, flags);

> -	if (ret)

> -		hba->ufshcd_state = UFSHCD_STATE_ERROR;

> -	else if (hba->ufshcd_state == UFSHCD_STATE_RESET)

> -		hba->ufshcd_state = UFSHCD_STATE_OPERATIONAL;

> +	ufshcd_set_state(hba, ret ? UFSHCD_STATE_ERROR :

> +			 UFSHCD_STATE_OPERATIONAL);

>  	spin_unlock_irqrestore(hba->host->host_lock, flags);

> 

>  	trace_ufshcd_init(dev_name(hba->dev), ret,
Can Guo June 23, 2021, 12:02 p.m. | #4
Hi Bart,

On 2021-06-19 08:52, Bart Van Assche wrote:
> Introduce a function that reports whether or not a controller state 

> change

> is allowed instead of duplicating these checks in every context that

> changes the host controller state. This patch includes a behavior 

> change:

> ufshcd_link_recovery() no longer can change the host controller state 

> from

> UFSHCD_STATE_ERROR into UFSHCD_STATE_RESET.

> 

> Cc: Can Guo <cang@codeaurora.org>

> Signed-off-by: Bart Van Assche <bvanassche@acm.org>

> ---

>  drivers/scsi/ufs/ufshcd.c | 59 ++++++++++++++++++++++++---------------

>  1 file changed, 36 insertions(+), 23 deletions(-)

> 

> diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c

> index c213daec20f7..a10c8ec28468 100644

> --- a/drivers/scsi/ufs/ufshcd.c

> +++ b/drivers/scsi/ufs/ufshcd.c

> @@ -4070,16 +4070,32 @@ static int ufshcd_uic_change_pwr_mode(struct

> ufs_hba *hba, u8 mode)

>  	return ret;

>  }

> 

> +static bool ufshcd_set_state(struct ufs_hba *hba, enum ufshcd_state 

> new_state)

> +{

> +	lockdep_assert_held(hba->host->host_lock);

> +

> +	if (old_state != UFSHCD_STATE_ERROR || new_state == 

> UFSHCD_STATE_ERROR)

> +		hba->ufshcd_state = new_state;

> +		return true;

> +	}

> +	return false;

> +}

> +

>  int ufshcd_link_recovery(struct ufs_hba *hba)

>  {

>  	int ret;

>  	unsigned long flags;

> +	bool proceed;

> 

>  	spin_lock_irqsave(hba->host->host_lock, flags);

> -	hba->ufshcd_state = UFSHCD_STATE_RESET;

> -	ufshcd_set_eh_in_progress(hba);

> +	proceed = ufshcd_set_state(hba, UFSHCD_STATE_RESET);

> +	if (proceed)

> +		ufshcd_set_eh_in_progress(hba);

>  	spin_unlock_irqrestore(hba->host->host_lock, flags);

> 

> +	if (!proceed)

> +		return -EPERM;

> +

>  	/* Reset the attached device */

>  	ufshcd_device_reset(hba);

> 

> @@ -4087,7 +4103,7 @@ int ufshcd_link_recovery(struct ufs_hba *hba)

> 

>  	spin_lock_irqsave(hba->host->host_lock, flags);

>  	if (ret)

> -		hba->ufshcd_state = UFSHCD_STATE_ERROR;

> +		ufshcd_set_state(hba, UFSHCD_STATE_ERROR);

>  	ufshcd_clear_eh_in_progress(hba);

>  	spin_unlock_irqrestore(hba->host->host_lock, flags);

> 

> @@ -5856,15 +5872,17 @@ static inline bool

> ufshcd_is_saved_err_fatal(struct ufs_hba *hba)

>  /* host lock must be held before calling this func */

>  static inline void ufshcd_schedule_eh_work(struct ufs_hba *hba)

>  {

> -	/* handle fatal errors only when link is not in error state */

> -	if (hba->ufshcd_state != UFSHCD_STATE_ERROR) {

> -		if (hba->force_reset || ufshcd_is_link_broken(hba) ||

> -		    ufshcd_is_saved_err_fatal(hba))

> -			hba->ufshcd_state = UFSHCD_STATE_EH_SCHEDULED_FATAL;

> -		else

> -			hba->ufshcd_state = UFSHCD_STATE_EH_SCHEDULED_NON_FATAL;

> +	bool proceed;

> +

> +	if (hba->force_reset || ufshcd_is_link_broken(hba) ||

> +	    ufshcd_is_saved_err_fatal(hba))

> +		proceed = ufshcd_set_state(hba,

> +					   UFSHCD_STATE_EH_SCHEDULED_FATAL);

> +	else

> +		proceed = ufshcd_set_state(hba,

> +					   UFSHCD_STATE_EH_SCHEDULED_NON_FATAL);

> +	if (proceed)

>  		queue_work(hba->eh_wq, &hba->eh_work);

> -	}

>  }

> 

>  static void ufshcd_clk_scaling_allow(struct ufs_hba *hba, bool allow)

> @@ -6017,8 +6035,7 @@ static void ufshcd_err_handler(struct work_struct 

> *work)

>  	down(&hba->host_sem);

>  	spin_lock_irqsave(hba->host->host_lock, flags);

>  	if (ufshcd_err_handling_should_stop(hba)) {

> -		if (hba->ufshcd_state != UFSHCD_STATE_ERROR)

> -			hba->ufshcd_state = UFSHCD_STATE_OPERATIONAL;

> +		ufshcd_set_state(hba, UFSHCD_STATE_OPERATIONAL);

>  		spin_unlock_irqrestore(hba->host->host_lock, flags);

>  		up(&hba->host_sem);

>  		return;

> @@ -6029,8 +6046,7 @@ static void ufshcd_err_handler(struct work_struct 

> *work)

>  	/* Complete requests that have door-bell cleared by h/w */

>  	ufshcd_complete_requests(hba);

>  	spin_lock_irqsave(hba->host->host_lock, flags);

> -	if (hba->ufshcd_state != UFSHCD_STATE_ERROR)

> -		hba->ufshcd_state = UFSHCD_STATE_RESET;

> +	ufshcd_set_state(hba, UFSHCD_STATE_RESET);

>  	/*

>  	 * A full reset and restore might have happened after preparation

>  	 * is finished, double check whether we should stop.

> @@ -6163,8 +6179,7 @@ static void ufshcd_err_handler(struct work_struct 

> *work)

> 

>  skip_err_handling:

>  	if (!needs_reset) {

> -		if (hba->ufshcd_state == UFSHCD_STATE_RESET)

> -			hba->ufshcd_state = UFSHCD_STATE_OPERATIONAL;


State transition to OPERATIONAL should only be from RESET, this
is to make sure that we don't miss any new error (fatal or non-fatal)
occurs while error handling is running.

> +		ufshcd_set_state(hba, UFSHCD_STATE_OPERATIONAL);

>  		if (hba->saved_err || hba->saved_uic_err)

>  			dev_err_ratelimited(hba->dev, "%s: exit: saved_err 0x%x 

> saved_uic_err 0x%x",

>  			    __func__, hba->saved_err, hba->saved_uic_err);

> @@ -7135,7 +7150,7 @@ static int ufshcd_reset_and_restore(struct 

> ufs_hba *hba)

>  	 */

>  	scsi_report_bus_reset(hba->host, 0);

>  	if (err) {

> -		hba->ufshcd_state = UFSHCD_STATE_ERROR;

> +		ufshcd_set_state(hba, UFSHCD_STATE_ERROR);

>  		hba->saved_err |= saved_err;

>  		hba->saved_uic_err |= saved_uic_err;

>  	}

> @@ -7951,7 +7966,7 @@ static int ufshcd_probe_hba(struct ufs_hba *hba,

> bool init_dev_params)

>  	unsigned long flags;

>  	ktime_t start = ktime_get();

> 

> -	hba->ufshcd_state = UFSHCD_STATE_RESET;

> +	ufshcd_set_state(hba, UFSHCD_STATE_RESET);

> 

>  	ret = ufshcd_link_startup(hba);

>  	if (ret)

> @@ -8024,10 +8039,8 @@ static int ufshcd_probe_hba(struct ufs_hba

> *hba, bool init_dev_params)

> 

>  out:

>  	spin_lock_irqsave(hba->host->host_lock, flags);

> -	if (ret)

> -		hba->ufshcd_state = UFSHCD_STATE_ERROR;

> -	else if (hba->ufshcd_state == UFSHCD_STATE_RESET)

> -		hba->ufshcd_state = UFSHCD_STATE_OPERATIONAL;


State transition to OPERATIONAL should only be from RESET, this
is to make sure that we don't miss any new error (fatal or non-fatal)
occurs during re-probe.

Thanks,

Can Guo.

> +	ufshcd_set_state(hba, ret ? UFSHCD_STATE_ERROR :

> +			 UFSHCD_STATE_OPERATIONAL);

>  	spin_unlock_irqrestore(hba->host->host_lock, flags);

> 

>  	trace_ufshcd_init(dev_name(hba->dev), ret,

Patch

diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c
index c213daec20f7..a10c8ec28468 100644
--- a/drivers/scsi/ufs/ufshcd.c
+++ b/drivers/scsi/ufs/ufshcd.c
@@ -4070,16 +4070,32 @@  static int ufshcd_uic_change_pwr_mode(struct ufs_hba *hba, u8 mode)
 	return ret;
 }
 
+static bool ufshcd_set_state(struct ufs_hba *hba, enum ufshcd_state new_state)
+{
+	lockdep_assert_held(hba->host->host_lock);
+
+	if (old_state != UFSHCD_STATE_ERROR || new_state == UFSHCD_STATE_ERROR)
+		hba->ufshcd_state = new_state;
+		return true;
+	}
+	return false;
+}
+
 int ufshcd_link_recovery(struct ufs_hba *hba)
 {
 	int ret;
 	unsigned long flags;
+	bool proceed;
 
 	spin_lock_irqsave(hba->host->host_lock, flags);
-	hba->ufshcd_state = UFSHCD_STATE_RESET;
-	ufshcd_set_eh_in_progress(hba);
+	proceed = ufshcd_set_state(hba, UFSHCD_STATE_RESET);
+	if (proceed)
+		ufshcd_set_eh_in_progress(hba);
 	spin_unlock_irqrestore(hba->host->host_lock, flags);
 
+	if (!proceed)
+		return -EPERM;
+
 	/* Reset the attached device */
 	ufshcd_device_reset(hba);
 
@@ -4087,7 +4103,7 @@  int ufshcd_link_recovery(struct ufs_hba *hba)
 
 	spin_lock_irqsave(hba->host->host_lock, flags);
 	if (ret)
-		hba->ufshcd_state = UFSHCD_STATE_ERROR;
+		ufshcd_set_state(hba, UFSHCD_STATE_ERROR);
 	ufshcd_clear_eh_in_progress(hba);
 	spin_unlock_irqrestore(hba->host->host_lock, flags);
 
@@ -5856,15 +5872,17 @@  static inline bool ufshcd_is_saved_err_fatal(struct ufs_hba *hba)
 /* host lock must be held before calling this func */
 static inline void ufshcd_schedule_eh_work(struct ufs_hba *hba)
 {
-	/* handle fatal errors only when link is not in error state */
-	if (hba->ufshcd_state != UFSHCD_STATE_ERROR) {
-		if (hba->force_reset || ufshcd_is_link_broken(hba) ||
-		    ufshcd_is_saved_err_fatal(hba))
-			hba->ufshcd_state = UFSHCD_STATE_EH_SCHEDULED_FATAL;
-		else
-			hba->ufshcd_state = UFSHCD_STATE_EH_SCHEDULED_NON_FATAL;
+	bool proceed;
+
+	if (hba->force_reset || ufshcd_is_link_broken(hba) ||
+	    ufshcd_is_saved_err_fatal(hba))
+		proceed = ufshcd_set_state(hba,
+					   UFSHCD_STATE_EH_SCHEDULED_FATAL);
+	else
+		proceed = ufshcd_set_state(hba,
+					   UFSHCD_STATE_EH_SCHEDULED_NON_FATAL);
+	if (proceed)
 		queue_work(hba->eh_wq, &hba->eh_work);
-	}
 }
 
 static void ufshcd_clk_scaling_allow(struct ufs_hba *hba, bool allow)
@@ -6017,8 +6035,7 @@  static void ufshcd_err_handler(struct work_struct *work)
 	down(&hba->host_sem);
 	spin_lock_irqsave(hba->host->host_lock, flags);
 	if (ufshcd_err_handling_should_stop(hba)) {
-		if (hba->ufshcd_state != UFSHCD_STATE_ERROR)
-			hba->ufshcd_state = UFSHCD_STATE_OPERATIONAL;
+		ufshcd_set_state(hba, UFSHCD_STATE_OPERATIONAL);
 		spin_unlock_irqrestore(hba->host->host_lock, flags);
 		up(&hba->host_sem);
 		return;
@@ -6029,8 +6046,7 @@  static void ufshcd_err_handler(struct work_struct *work)
 	/* Complete requests that have door-bell cleared by h/w */
 	ufshcd_complete_requests(hba);
 	spin_lock_irqsave(hba->host->host_lock, flags);
-	if (hba->ufshcd_state != UFSHCD_STATE_ERROR)
-		hba->ufshcd_state = UFSHCD_STATE_RESET;
+	ufshcd_set_state(hba, UFSHCD_STATE_RESET);
 	/*
 	 * A full reset and restore might have happened after preparation
 	 * is finished, double check whether we should stop.
@@ -6163,8 +6179,7 @@  static void ufshcd_err_handler(struct work_struct *work)
 
 skip_err_handling:
 	if (!needs_reset) {
-		if (hba->ufshcd_state == UFSHCD_STATE_RESET)
-			hba->ufshcd_state = UFSHCD_STATE_OPERATIONAL;
+		ufshcd_set_state(hba, UFSHCD_STATE_OPERATIONAL);
 		if (hba->saved_err || hba->saved_uic_err)
 			dev_err_ratelimited(hba->dev, "%s: exit: saved_err 0x%x saved_uic_err 0x%x",
 			    __func__, hba->saved_err, hba->saved_uic_err);
@@ -7135,7 +7150,7 @@  static int ufshcd_reset_and_restore(struct ufs_hba *hba)
 	 */
 	scsi_report_bus_reset(hba->host, 0);
 	if (err) {
-		hba->ufshcd_state = UFSHCD_STATE_ERROR;
+		ufshcd_set_state(hba, UFSHCD_STATE_ERROR);
 		hba->saved_err |= saved_err;
 		hba->saved_uic_err |= saved_uic_err;
 	}
@@ -7951,7 +7966,7 @@  static int ufshcd_probe_hba(struct ufs_hba *hba, bool init_dev_params)
 	unsigned long flags;
 	ktime_t start = ktime_get();
 
-	hba->ufshcd_state = UFSHCD_STATE_RESET;
+	ufshcd_set_state(hba, UFSHCD_STATE_RESET);
 
 	ret = ufshcd_link_startup(hba);
 	if (ret)
@@ -8024,10 +8039,8 @@  static int ufshcd_probe_hba(struct ufs_hba *hba, bool init_dev_params)
 
 out:
 	spin_lock_irqsave(hba->host->host_lock, flags);
-	if (ret)
-		hba->ufshcd_state = UFSHCD_STATE_ERROR;
-	else if (hba->ufshcd_state == UFSHCD_STATE_RESET)
-		hba->ufshcd_state = UFSHCD_STATE_OPERATIONAL;
+	ufshcd_set_state(hba, ret ? UFSHCD_STATE_ERROR :
+			 UFSHCD_STATE_OPERATIONAL);
 	spin_unlock_irqrestore(hba->host->host_lock, flags);
 
 	trace_ufshcd_init(dev_name(hba->dev), ret,