diff mbox series

mmc: race condition between "sdcard hot plug out" and "system reboot"

Message ID 20240408103824.11476-1-Joe.Zhou@mediatek.com
State New
Headers show
Series mmc: race condition between "sdcard hot plug out" and "system reboot" | expand

Commit Message

Joe.Zhou April 8, 2024, 10:38 a.m. UTC
In mmc driver, a race condition may occur between "sdcard hot plug out" and "system reboot".
How it happen?

sdcard hot pulg out:                SyS_reboot:
CPU0                               CPU1
mmc_sd_detect()                    _mmc_sd_suspend
{                                  {

......
#Step1: detect SD card removed
if (err) {                          ......
    #Step2: host->card is NULL
    mmc_sd_remove(host);
                                    #Step3:_mmc_sd_suspend claimed host
                                    mmc_claim_host(host);
                                    #Step4: use host->card(NULL pointer)
                                    if (mmc_card_suspended(host->card))
                                    ......
                                    }
    mmc_claim_host(host);
    mmc_detach_bus(host);
 }
 ......
 }
we can prevent it occuring by add claim for "host->card = NULL" and add "host->card" validity check in mmc_sd_suspend.

Signed-off-by: Joe.Zhou <Joe.Zhou@mediatek.com>
---
 drivers/mmc/core/sd.c | 25 ++++++++++++++-----------
 1 file changed, 14 insertions(+), 11 deletions(-)

Comments

Ulf Hansson April 25, 2024, 9:49 a.m. UTC | #1
On Mon, 8 Apr 2024 at 12:38, Joe.Zhou <Joe.Zhou@mediatek.com> wrote:
>
> In mmc driver, a race condition may occur between "sdcard hot plug out" and "system reboot".
> How it happen?
>
> sdcard hot pulg out:                SyS_reboot:
> CPU0                               CPU1
> mmc_sd_detect()                    _mmc_sd_suspend
> {                                  {
>
> ......
> #Step1: detect SD card removed
> if (err) {                          ......
>     #Step2: host->card is NULL
>     mmc_sd_remove(host);
>                                     #Step3:_mmc_sd_suspend claimed host
>                                     mmc_claim_host(host);
>                                     #Step4: use host->card(NULL pointer)
>                                     if (mmc_card_suspended(host->card))
>                                     ......
>                                     }
>     mmc_claim_host(host);
>     mmc_detach_bus(host);
>  }
>  ......
>  }
> we can prevent it occuring by add claim for "host->card = NULL" and add "host->card" validity check in mmc_sd_suspend.
>
> Signed-off-by: Joe.Zhou <Joe.Zhou@mediatek.com>

Thanks for your patch!

Doesn't commit 66c915d09b94 ("mmc: core: Disable card detect during
shutdown") take care of this problem?

Kind regards
Uffe

> ---
>  drivers/mmc/core/sd.c | 25 ++++++++++++++-----------
>  1 file changed, 14 insertions(+), 11 deletions(-)
>
> diff --git a/drivers/mmc/core/sd.c b/drivers/mmc/core/sd.c
> index 1c8148cdda50..38c0b271283a 100644
> --- a/drivers/mmc/core/sd.c
> +++ b/drivers/mmc/core/sd.c
> @@ -1593,7 +1593,9 @@ static int mmc_sd_init_card(struct mmc_host *host, u32 ocr,
>  static void mmc_sd_remove(struct mmc_host *host)
>  {
>         mmc_remove_card(host->card);
> +       mmc_claim_host(host);
>         host->card = NULL;
> +       mmc_release_host(host);
>  }
>
>  /*
> @@ -1702,18 +1704,19 @@ static int _mmc_sd_suspend(struct mmc_host *host)
>         int err = 0;
>
>         mmc_claim_host(host);
> +       if (host->card) {
> +               if (mmc_card_suspended(card))
> +                       goto out;
>
> -       if (mmc_card_suspended(card))
> -               goto out;
> -
> -       if (sd_can_poweroff_notify(card))
> -               err = sd_poweroff_notify(card);
> -       else if (!mmc_host_is_spi(host))
> -               err = mmc_deselect_cards(host);
> +               if (sd_can_poweroff_notify(card))
> +                       err = sd_poweroff_notify(card);
> +               else if (!mmc_host_is_spi(host))
> +                       err = mmc_deselect_cards(host);
>
> -       if (!err) {
> -               mmc_power_off(host);
> -               mmc_card_set_suspended(card);
> +               if (!err) {
> +                       mmc_power_off(host);
> +                       mmc_card_set_suspended(card);
> +               }
>         }
>
>  out:
> @@ -1729,7 +1732,7 @@ static int mmc_sd_suspend(struct mmc_host *host)
>         int err;
>
>         err = _mmc_sd_suspend(host);
> -       if (!err) {
> +       if (!err && host->card) {
>                 pm_runtime_disable(&host->card->dev);
>                 pm_runtime_set_suspended(&host->card->dev);
>         }
> --
> 2.18.0
>
Joe.Zhou May 6, 2024, 3:36 a.m. UTC | #2
From: Joe Zhou <Joe.Zhou@mediatek.com>

> Thanks for your patch!

> Doesn't commit 66c915d09b94 ("mmc: core: Disable card detect during
> shutdown") take care of this problem?

> Kind regards
> Uffe


Dear Ulf,
     Thank you for your replay!

     I think that commit66c915d09b94 ("mmc: core: Disable card detect during shutdown") doesn't reslove this issue.
     1. Issues may asise in the following processing.
     sdcard hot pulg out:                                  SyS_reboot:
     CPU0                                                  CPU1
     _mmc_detect_change() {
     ......
     mmc_schedule_delayed_work(&host->detect, delay)
     #Step1: call delay work &host->detect
         mmc_rescan()
         {
          .......
              #Step2: detect SD card removed
              mmc_sd_detect() {                              ......
                                                             _mmc_stop_host (.pre_shutdown)
                                                            {
              ......                                        #Step3:_mmc_stop_host() cancel detect use sync
                                                            cancel_delayed_work_sync(&host->detect)
                                                            #Step4: wait delay work complete
                                                            }
                 if (err) {
                 #Step5: host->card is NULL
                 mmc_sd_remove(host);                        ......
                                                            #Step6: wait delay work complete
                                                            mmc_sd_suspend (.shutdown)
                                                            {
                                                             ......

                                                            #Step7:_mmc_sd_suspend claimed host
                                                            mmc_claim_host(host);
                                                            #Step8: use host-card(NULL pointer)
                                                            if (mmc_card_suspended(host->card))
                                                             ......
                                                            }
                 mmc_claim_host(host);
                 mmc_detach_bus(host);
                }
             }
          }
       ......
      }

     2. And in the version that includes the patch, we have reproduced the issue.

Best regards,
Joe
Ulf Hansson May 6, 2024, 9:53 a.m. UTC | #3
On Mon, 6 May 2024 at 05:36, Joe.Zhou <Joe.Zhou@mediatek.com> wrote:
>
> From: Joe Zhou <Joe.Zhou@mediatek.com>
>
> > Thanks for your patch!
>
> > Doesn't commit 66c915d09b94 ("mmc: core: Disable card detect during
> > shutdown") take care of this problem?
>
> > Kind regards
> > Uffe
>
>
> Dear Ulf,
>      Thank you for your replay!
>
>      I think that commit66c915d09b94 ("mmc: core: Disable card detect during shutdown") doesn't reslove this issue.
>      1. Issues may asise in the following processing.
>      sdcard hot pulg out:                                  SyS_reboot:
>      CPU0                                                  CPU1
>      _mmc_detect_change() {
>      ......
>      mmc_schedule_delayed_work(&host->detect, delay)
>      #Step1: call delay work &host->detect
>          mmc_rescan()
>          {
>           .......
>               #Step2: detect SD card removed
>               mmc_sd_detect() {                              ......
>                                                              _mmc_stop_host (.pre_shutdown)
>                                                             {
>               ......                                        #Step3:_mmc_stop_host() cancel detect use sync
>                                                             cancel_delayed_work_sync(&host->detect)
>                                                             #Step4: wait delay work complete
>                                                             }
>                  if (err) {
>                  #Step5: host->card is NULL
>                  mmc_sd_remove(host);                        ......

Via mmc_sd_detect() we are also calling device_del(card) and
mmc_detach_bus(). In other words, when _mmc_stop_host() has been
completed, the struct device corresponding to the card should have
been unregistered and host->bus_ops should be NULL.

In the later phase, mmc_bus_shutdown() seems to be called, which is
weird in the first place as the struct device should have been removed
from the bus. Then, even if that is the case, the host->bus_ops should
be NULL, thus it should rather lead to NULL pointer dereference splat
when accessing host->bus_ops->shutdown() callback.

What am I missing here?

>                                                             #Step6: wait delay work complete
>                                                             mmc_sd_suspend (.shutdown)
>                                                             {
>                                                              ......
>
>                                                             #Step7:_mmc_sd_suspend claimed host
>                                                             mmc_claim_host(host);
>                                                             #Step8: use host-card(NULL pointer)
>                                                             if (mmc_card_suspended(host->card))
>                                                              ......
>                                                             }
>                  mmc_claim_host(host);
>                  mmc_detach_bus(host);
>                 }
>              }
>           }
>        ......
>       }
>
>      2. And in the version that includes the patch, we have reproduced the issue.

Can you please send a detailed log-splat of what is happening?
Otherwise I may not be able to help.

>
> Best regards,
> Joe

Kind regards
Uffe
diff mbox series

Patch

diff --git a/drivers/mmc/core/sd.c b/drivers/mmc/core/sd.c
index 1c8148cdda50..38c0b271283a 100644
--- a/drivers/mmc/core/sd.c
+++ b/drivers/mmc/core/sd.c
@@ -1593,7 +1593,9 @@  static int mmc_sd_init_card(struct mmc_host *host, u32 ocr,
 static void mmc_sd_remove(struct mmc_host *host)
 {
 	mmc_remove_card(host->card);
+	mmc_claim_host(host);
 	host->card = NULL;
+	mmc_release_host(host);
 }
 
 /*
@@ -1702,18 +1704,19 @@  static int _mmc_sd_suspend(struct mmc_host *host)
 	int err = 0;
 
 	mmc_claim_host(host);
+	if (host->card) {
+		if (mmc_card_suspended(card))
+			goto out;
 
-	if (mmc_card_suspended(card))
-		goto out;
-
-	if (sd_can_poweroff_notify(card))
-		err = sd_poweroff_notify(card);
-	else if (!mmc_host_is_spi(host))
-		err = mmc_deselect_cards(host);
+		if (sd_can_poweroff_notify(card))
+			err = sd_poweroff_notify(card);
+		else if (!mmc_host_is_spi(host))
+			err = mmc_deselect_cards(host);
 
-	if (!err) {
-		mmc_power_off(host);
-		mmc_card_set_suspended(card);
+		if (!err) {
+			mmc_power_off(host);
+			mmc_card_set_suspended(card);
+		}
 	}
 
 out:
@@ -1729,7 +1732,7 @@  static int mmc_sd_suspend(struct mmc_host *host)
 	int err;
 
 	err = _mmc_sd_suspend(host);
-	if (!err) {
+	if (!err && host->card) {
 		pm_runtime_disable(&host->card->dev);
 		pm_runtime_set_suspended(&host->card->dev);
 	}