diff mbox series

[V3,1/4] mmc: sdhci: Capture eMMC and SD card errors

Message ID 1642699582-14785-2-git-send-email-quic_c_sbhanu@quicinc.com
State New
Headers show
Series mmc: add error statistics for eMMC and SD card | expand

Commit Message

Shaik Sajida Bhanu Jan. 20, 2022, 5:26 p.m. UTC
Add changes to capture eMMC and SD card errors.
This is useful for debug and testing.

Signed-off-by: Shaik Sajida Bhanu <quic_c_sbhanu@quicinc.com>
Signed-off-by: Liangliang Lu <luliang@codeaurora.org>
Signed-off-by: Sayali Lokhande <sayalil@codeaurora.org>
Signed-off-by: Bao D. Nguyen <nguyenb@codeaurora.org>
---
 drivers/mmc/host/sdhci-msm.c |  3 ++
 drivers/mmc/host/sdhci.c     | 72 ++++++++++++++++++++++++++++++++++++--------
 include/linux/mmc/host.h     | 31 +++++++++++++++++++
 3 files changed, 94 insertions(+), 12 deletions(-)

Comments

Adrian Hunter Jan. 21, 2022, 7:08 a.m. UTC | #1
On 20/01/2022 19:26, Shaik Sajida Bhanu wrote:
> Add changes to capture eMMC and SD card errors.
> This is useful for debug and testing.
> 
> Signed-off-by: Shaik Sajida Bhanu <quic_c_sbhanu@quicinc.com>
> Signed-off-by: Liangliang Lu <luliang@codeaurora.org>
> Signed-off-by: Sayali Lokhande <sayalil@codeaurora.org>
> Signed-off-by: Bao D. Nguyen <nguyenb@codeaurora.org>
> ---
>  drivers/mmc/host/sdhci-msm.c |  3 ++
>  drivers/mmc/host/sdhci.c     | 72 ++++++++++++++++++++++++++++++++++++--------
>  include/linux/mmc/host.h     | 31 +++++++++++++++++++
>  3 files changed, 94 insertions(+), 12 deletions(-)
> 
> diff --git a/drivers/mmc/host/sdhci-msm.c b/drivers/mmc/host/sdhci-msm.c
> index 50c71e0..309eb7b 100644
> --- a/drivers/mmc/host/sdhci-msm.c
> +++ b/drivers/mmc/host/sdhci-msm.c
> @@ -128,6 +128,8 @@
>  
>  #define MSM_MMC_AUTOSUSPEND_DELAY_MS	50
>  
> +#define MSM_MMC_ERR_STATS_ENABLE 1
> +
>  /* Timeout value to avoid infinite waiting for pwr_irq */
>  #define MSM_PWR_IRQ_TIMEOUT_MS 5000
>  
> @@ -2734,6 +2736,7 @@ static int sdhci_msm_probe(struct platform_device *pdev)
>  	if (ret)
>  		goto pm_runtime_disable;
>  
> +	host->mmc->err_stats_enabled = MSM_MMC_ERR_STATS_ENABLE;

Please remove this. SDHCI will enable error stats.

>  	pm_runtime_mark_last_busy(&pdev->dev);
>  	pm_runtime_put_autosuspend(&pdev->dev);
>  
> diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c
> index 07c6da1..74b356e 100644
> --- a/drivers/mmc/host/sdhci.c
> +++ b/drivers/mmc/host/sdhci.c
> @@ -113,6 +113,8 @@ void sdhci_dumpregs(struct sdhci_host *host)
>  	if (host->ops->dump_vendor_regs)
>  		host->ops->dump_vendor_regs(host);
>  
> +	if (host->mmc->err_stats_enabled)
> +		mmc_debugfs_err_stats_enable(host->mmc);

Please move this to sdhci_setup_host() and call it unconditionally i.e. just

	mmc_debugfs_err_stats_enable(host->mmc);

>  	SDHCI_DUMP("============================================\n");
>  }
>  EXPORT_SYMBOL_GPL(sdhci_dumpregs);
> @@ -3159,6 +3161,8 @@ static void sdhci_timeout_timer(struct timer_list *t)
>  	spin_lock_irqsave(&host->lock, flags);
>  
>  	if (host->cmd && !sdhci_data_line_cmd(host->cmd)) {
> +		if (host->mmc && host->mmc->err_stats_enabled)
> +			mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_REQ_TIMEOUT);

Please remove the 'if ()', i.e. just make it, unconditionally:

		mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_REQ_TIMEOUT);

Same for other calls to mmc_debugfs_err_stats_inc()

>  		pr_err("%s: Timeout waiting for hardware cmd interrupt.\n",
>  		       mmc_hostname(host->mmc));
>  		sdhci_dumpregs(host);
> @@ -3181,6 +3185,8 @@ static void sdhci_timeout_data_timer(struct timer_list *t)
>  
>  	if (host->data || host->data_cmd ||
>  	    (host->cmd && sdhci_data_line_cmd(host->cmd))) {
> +		if (host->mmc && host->mmc->err_stats_enabled)
> +			mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_REQ_TIMEOUT);
>  		pr_err("%s: Timeout waiting for hardware interrupt.\n",
>  		       mmc_hostname(host->mmc));
>  		sdhci_dumpregs(host);
> @@ -3240,11 +3246,18 @@ static void sdhci_cmd_irq(struct sdhci_host *host, u32 intmask, u32 *intmask_p)
>  
>  	if (intmask & (SDHCI_INT_TIMEOUT | SDHCI_INT_CRC |
>  		       SDHCI_INT_END_BIT | SDHCI_INT_INDEX)) {
> -		if (intmask & SDHCI_INT_TIMEOUT)
> +		if (intmask & SDHCI_INT_TIMEOUT) {
>  			host->cmd->error = -ETIMEDOUT;
> -		else
> +			if (host->mmc && host->mmc->err_stats_enabled)
> +				mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_CMD_TIMEOUT);
> +		} else {
>  			host->cmd->error = -EILSEQ;
> -
> +			if (host->cmd->opcode != MMC_SEND_TUNING_BLOCK ||
> +					host->cmd->opcode != MMC_SEND_TUNING_BLOCK_HS200) {
> +				if (host->mmc && host->mmc->err_stats_enabled)
> +					mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_CMD_CRC);
> +			}
> +		}
>  		/* Treat data command CRC error the same as data CRC error */
>  		if (host->cmd->data &&
>  		    (intmask & (SDHCI_INT_CRC | SDHCI_INT_TIMEOUT)) ==
> @@ -3265,6 +3278,8 @@ static void sdhci_cmd_irq(struct sdhci_host *host, u32 intmask, u32 *intmask_p)
>  		int err = (auto_cmd_status & SDHCI_AUTO_CMD_TIMEOUT) ?
>  			  -ETIMEDOUT :
>  			  -EILSEQ;
> +		if (host->mmc && host->mmc->err_stats_enabled)
> +			mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_AUTO_CMD);
>  
>  		if (sdhci_auto_cmd23(host, mrq)) {
>  			mrq->sbc->error = err;
> @@ -3342,6 +3357,8 @@ static void sdhci_data_irq(struct sdhci_host *host, u32 intmask)
>  			if (intmask & SDHCI_INT_DATA_TIMEOUT) {
>  				host->data_cmd = NULL;
>  				data_cmd->error = -ETIMEDOUT;
> +				if (host->mmc && host->mmc->err_stats_enabled)
> +					mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_CMD_TIMEOUT);
>  				__sdhci_finish_mrq(host, data_cmd->mrq);
>  				return;
>  			}
> @@ -3375,18 +3392,29 @@ static void sdhci_data_irq(struct sdhci_host *host, u32 intmask)
>  		return;
>  	}
>  
> -	if (intmask & SDHCI_INT_DATA_TIMEOUT)
> +	if (intmask & SDHCI_INT_DATA_TIMEOUT) {
>  		host->data->error = -ETIMEDOUT;
> +		if (host->mmc && host->mmc->err_stats_enabled)
> +			mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_DAT_TIMEOUT);
> +	}
>  	else if (intmask & SDHCI_INT_DATA_END_BIT)
>  		host->data->error = -EILSEQ;
>  	else if ((intmask & SDHCI_INT_DATA_CRC) &&
>  		SDHCI_GET_CMD(sdhci_readw(host, SDHCI_COMMAND))
> -			!= MMC_BUS_TEST_R)
> +			!= MMC_BUS_TEST_R) {
>  		host->data->error = -EILSEQ;
> +		if (host->cmd->opcode != MMC_SEND_TUNING_BLOCK ||
> +				host->cmd->opcode != MMC_SEND_TUNING_BLOCK_HS200) {
> +			if (host->mmc && host->mmc->err_stats_enabled)
> +				mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_DAT_CRC);
> +		}
> +	}
>  	else if (intmask & SDHCI_INT_ADMA_ERROR) {
>  		pr_err("%s: ADMA error: 0x%08x\n", mmc_hostname(host->mmc),
>  		       intmask);
>  		sdhci_adma_show_error(host);
> +		if (host->mmc && host->mmc->err_stats_enabled)
> +			mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_ADMA);
>  		host->data->error = -EIO;
>  		if (host->ops->adma_workaround)
>  			host->ops->adma_workaround(host, intmask);
> @@ -3905,20 +3933,40 @@ bool sdhci_cqe_irq(struct sdhci_host *host, u32 intmask, int *cmd_error,
>  	if (!host->cqe_on)
>  		return false;
>  
> -	if (intmask & (SDHCI_INT_INDEX | SDHCI_INT_END_BIT | SDHCI_INT_CRC))
> +	if (intmask & (SDHCI_INT_INDEX | SDHCI_INT_END_BIT | SDHCI_INT_CRC)) {
>  		*cmd_error = -EILSEQ;
> -	else if (intmask & SDHCI_INT_TIMEOUT)
> +		if (intmask & SDHCI_INT_CRC) {
> +			if (host->cmd->opcode != MMC_SEND_TUNING_BLOCK ||
> +					host->cmd->opcode != MMC_SEND_TUNING_BLOCK_HS200) {
> +				if (host->mmc && host->mmc->err_stats_enabled)
> +					mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_CMD_CRC);
> +			}
> +		}
> +	} else if (intmask & SDHCI_INT_TIMEOUT) {
>  		*cmd_error = -ETIMEDOUT;
> -	else
> +		if (host->mmc && host->mmc->err_stats_enabled)
> +			mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_CMD_TIMEOUT);
> +	} else
>  		*cmd_error = 0;
>  
> -	if (intmask & (SDHCI_INT_DATA_END_BIT | SDHCI_INT_DATA_CRC))
> +	if (intmask & (SDHCI_INT_DATA_END_BIT | SDHCI_INT_DATA_CRC)) {
>  		*data_error = -EILSEQ;
> -	else if (intmask & SDHCI_INT_DATA_TIMEOUT)
> +		if (intmask & SDHCI_INT_DATA_CRC) {
> +			if (host->cmd->opcode != MMC_SEND_TUNING_BLOCK ||
> +					host->cmd->opcode != MMC_SEND_TUNING_BLOCK_HS200) {
> +				if (host->mmc && host->mmc->err_stats_enabled)
> +					mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_DAT_CRC);
> +			}
> +		}
> +	} else if (intmask & SDHCI_INT_DATA_TIMEOUT) {
>  		*data_error = -ETIMEDOUT;
> -	else if (intmask & SDHCI_INT_ADMA_ERROR)
> +		if (host->mmc && host->mmc->err_stats_enabled)
> +			mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_DAT_TIMEOUT);
> +	} else if (intmask & SDHCI_INT_ADMA_ERROR) {
>  		*data_error = -EIO;
> -	else
> +		if (host->mmc && host->mmc->err_stats_enabled)
> +			mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_ADMA);
> +	} else
>  		*data_error = 0;
>  
>  	/* Clear selected interrupts. */
> diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h

Changes to host.h are core changes and belong in patch 3,
which should be the first patch.

> index 7afb57c..883b50b 100644
> --- a/include/linux/mmc/host.h
> +++ b/include/linux/mmc/host.h
> @@ -93,6 +93,23 @@ struct mmc_clk_phase_map {
>  
>  struct mmc_host;
>  
> +enum mmc_err_stat {
> +	MMC_ERR_CMD_TIMEOUT,
> +	MMC_ERR_CMD_CRC,
> +	MMC_ERR_DAT_TIMEOUT,
> +	MMC_ERR_DAT_CRC,
> +	MMC_ERR_AUTO_CMD,
> +	MMC_ERR_ADMA,
> +	MMC_ERR_TUNING,
> +	MMC_ERR_CMDQ_RED,
> +	MMC_ERR_CMDQ_GCE,
> +	MMC_ERR_CMDQ_ICCE,
> +	MMC_ERR_REQ_TIMEOUT,
> +	MMC_ERR_CMDQ_REQ_TIMEOUT,
> +	MMC_ERR_ICE_CFG,
> +	MMC_ERR_MAX,
> +};
> +
>  struct mmc_host_ops {
>  	/*
>  	 * It is optional for the host to implement pre_req and post_req in
> @@ -500,6 +517,9 @@ struct mmc_host {
>  
>  	/* Host Software Queue support */
>  	bool			hsq_enabled;
> +	u32                     err_stats[MMC_ERR_MAX];
> +	bool 			err_stats_enabled;
> +	bool			err_state;

Please drop err_state for now

>  
>  	unsigned long		private[] ____cacheline_aligned;
>  };
> @@ -635,6 +655,17 @@ static inline enum dma_data_direction mmc_get_dma_dir(struct mmc_data *data)
>  	return data->flags & MMC_DATA_WRITE ? DMA_TO_DEVICE : DMA_FROM_DEVICE;
>  }
>  
> +static inline void mmc_debugfs_err_stats_enable(struct mmc_host *mmc)

Please use 'host' as the mmc_host parameter in this file.

> +{
> +	mmc->err_state = true;

Let's make this:

	host->err_stats_enabled = true;

> +}
> +
> +static inline void mmc_debugfs_err_stats_inc(struct mmc_host *mmc,
> +		enum mmc_err_stat stat) {
> +

Please remove blank line here

> +	mmc->err_stats[stat] += 1;
> +}
> +
>  int mmc_send_tuning(struct mmc_host *host, u32 opcode, int *cmd_error);
>  int mmc_send_abort_tuning(struct mmc_host *host, u32 opcode);
>  int mmc_get_ext_csd(struct mmc_card *card, u8 **new_ext_csd);
>
Sajida Bhanu (Temp) Jan. 25, 2022, 6:17 p.m. UTC | #2
Hi,

Thanks for  the Review.

Please find the inline comments.

Thanks,
Sajida

-----Original Message-----
From: Adrian Hunter <adrian.hunter@intel.com> 
Sent: Friday, January 21, 2022 12:38 PM
To: Sajida Bhanu (Temp) (QUIC) <quic_c_sbhanu@quicinc.com>; Asutosh Das (QUIC) <quic_asutoshd@quicinc.com>; ulf.hansson@linaro.org; agross@kernel.org; bjorn.andersson@linaro.org; linux-mmc@vger.kernel.org; linux-arm-msm@vger.kernel.org; linux-kernel@vger.kernel.org
Cc: stummala@codeaurora.org; vbadigan@codeaurora.org; Ram Prakash Gupta (QUIC) <quic_rampraka@quicinc.com>; Pradeep Pragallapati (QUIC) <quic_pragalla@quicinc.com>; sartgarg@codeaurora.org; nitirawa@codeaurora.org; sayalil@codeaurora.org; Liangliang Lu <luliang@codeaurora.org>; Bao D . Nguyen <nguyenb@codeaurora.org>
Subject: Re: [PATCH V3 1/4] mmc: sdhci: Capture eMMC and SD card errors

On 20/01/2022 19:26, Shaik Sajida Bhanu wrote:
> Add changes to capture eMMC and SD card errors.
> This is useful for debug and testing.
> 
> Signed-off-by: Shaik Sajida Bhanu <quic_c_sbhanu@quicinc.com>
> Signed-off-by: Liangliang Lu <luliang@codeaurora.org>
> Signed-off-by: Sayali Lokhande <sayalil@codeaurora.org>
> Signed-off-by: Bao D. Nguyen <nguyenb@codeaurora.org>
> ---
>  drivers/mmc/host/sdhci-msm.c |  3 ++
>  drivers/mmc/host/sdhci.c     | 72 ++++++++++++++++++++++++++++++++++++--------
>  include/linux/mmc/host.h     | 31 +++++++++++++++++++
>  3 files changed, 94 insertions(+), 12 deletions(-)
> 
> diff --git a/drivers/mmc/host/sdhci-msm.c 
> b/drivers/mmc/host/sdhci-msm.c index 50c71e0..309eb7b 100644
> --- a/drivers/mmc/host/sdhci-msm.c
> +++ b/drivers/mmc/host/sdhci-msm.c
> @@ -128,6 +128,8 @@
>  
>  #define MSM_MMC_AUTOSUSPEND_DELAY_MS	50
>  
> +#define MSM_MMC_ERR_STATS_ENABLE 1
> +
>  /* Timeout value to avoid infinite waiting for pwr_irq */  #define 
> MSM_PWR_IRQ_TIMEOUT_MS 5000
>  
> @@ -2734,6 +2736,7 @@ static int sdhci_msm_probe(struct platform_device *pdev)
>  	if (ret)
>  		goto pm_runtime_disable;
>  
> +	host->mmc->err_stats_enabled = MSM_MMC_ERR_STATS_ENABLE;

Please remove this. SDHCI will enable error stats.

>>>>>> Sure.

>  	pm_runtime_mark_last_busy(&pdev->dev);
>  	pm_runtime_put_autosuspend(&pdev->dev);
>  
> diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c index 
> 07c6da1..74b356e 100644
> --- a/drivers/mmc/host/sdhci.c
> +++ b/drivers/mmc/host/sdhci.c
> @@ -113,6 +113,8 @@ void sdhci_dumpregs(struct sdhci_host *host)
>  	if (host->ops->dump_vendor_regs)
>  		host->ops->dump_vendor_regs(host);
>  
> +	if (host->mmc->err_stats_enabled)
> +		mmc_debugfs_err_stats_enable(host->mmc);

Please move this to sdhci_setup_host() and call it unconditionally i.e. just

	mmc_debugfs_err_stats_enable(host->mmc);


>>>>>>> mmc_debugfs_err_stats_enable() will set err_state , that means some errors occurred in driver level.
If we move this call to sdhci_setup_host(), then it will set if no errors also right?


>  	SDHCI_DUMP("============================================\n");
>  }
>  EXPORT_SYMBOL_GPL(sdhci_dumpregs);
> @@ -3159,6 +3161,8 @@ static void sdhci_timeout_timer(struct timer_list *t)
>  	spin_lock_irqsave(&host->lock, flags);
>  
>  	if (host->cmd && !sdhci_data_line_cmd(host->cmd)) {
> +		if (host->mmc && host->mmc->err_stats_enabled)
> +			mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_REQ_TIMEOUT);

Please remove the 'if ()', i.e. just make it, unconditionally:

		mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_REQ_TIMEOUT);

Same for other calls to mmc_debugfs_err_stats_inc()

>>>>>>>>Sure.

>  		pr_err("%s: Timeout waiting for hardware cmd interrupt.\n",
>  		       mmc_hostname(host->mmc));
>  		sdhci_dumpregs(host);
> @@ -3181,6 +3185,8 @@ static void sdhci_timeout_data_timer(struct 
> timer_list *t)
>  
>  	if (host->data || host->data_cmd ||
>  	    (host->cmd && sdhci_data_line_cmd(host->cmd))) {
> +		if (host->mmc && host->mmc->err_stats_enabled)
> +			mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_REQ_TIMEOUT);
>  		pr_err("%s: Timeout waiting for hardware interrupt.\n",
>  		       mmc_hostname(host->mmc));
>  		sdhci_dumpregs(host);
> @@ -3240,11 +3246,18 @@ static void sdhci_cmd_irq(struct sdhci_host 
> *host, u32 intmask, u32 *intmask_p)
>  
>  	if (intmask & (SDHCI_INT_TIMEOUT | SDHCI_INT_CRC |
>  		       SDHCI_INT_END_BIT | SDHCI_INT_INDEX)) {
> -		if (intmask & SDHCI_INT_TIMEOUT)
> +		if (intmask & SDHCI_INT_TIMEOUT) {
>  			host->cmd->error = -ETIMEDOUT;
> -		else
> +			if (host->mmc && host->mmc->err_stats_enabled)
> +				mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_CMD_TIMEOUT);
> +		} else {
>  			host->cmd->error = -EILSEQ;
> -
> +			if (host->cmd->opcode != MMC_SEND_TUNING_BLOCK ||
> +					host->cmd->opcode != MMC_SEND_TUNING_BLOCK_HS200) {
> +				if (host->mmc && host->mmc->err_stats_enabled)
> +					mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_CMD_CRC);
> +			}
> +		}
>  		/* Treat data command CRC error the same as data CRC error */
>  		if (host->cmd->data &&
>  		    (intmask & (SDHCI_INT_CRC | SDHCI_INT_TIMEOUT)) == @@ -3265,6 
> +3278,8 @@ static void sdhci_cmd_irq(struct sdhci_host *host, u32 intmask, u32 *intmask_p)
>  		int err = (auto_cmd_status & SDHCI_AUTO_CMD_TIMEOUT) ?
>  			  -ETIMEDOUT :
>  			  -EILSEQ;
> +		if (host->mmc && host->mmc->err_stats_enabled)
> +			mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_AUTO_CMD);
>  
>  		if (sdhci_auto_cmd23(host, mrq)) {
>  			mrq->sbc->error = err;
> @@ -3342,6 +3357,8 @@ static void sdhci_data_irq(struct sdhci_host *host, u32 intmask)
>  			if (intmask & SDHCI_INT_DATA_TIMEOUT) {
>  				host->data_cmd = NULL;
>  				data_cmd->error = -ETIMEDOUT;
> +				if (host->mmc && host->mmc->err_stats_enabled)
> +					mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_CMD_TIMEOUT);
>  				__sdhci_finish_mrq(host, data_cmd->mrq);
>  				return;
>  			}
> @@ -3375,18 +3392,29 @@ static void sdhci_data_irq(struct sdhci_host *host, u32 intmask)
>  		return;
>  	}
>  
> -	if (intmask & SDHCI_INT_DATA_TIMEOUT)
> +	if (intmask & SDHCI_INT_DATA_TIMEOUT) {
>  		host->data->error = -ETIMEDOUT;
> +		if (host->mmc && host->mmc->err_stats_enabled)
> +			mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_DAT_TIMEOUT);
> +	}
>  	else if (intmask & SDHCI_INT_DATA_END_BIT)
>  		host->data->error = -EILSEQ;
>  	else if ((intmask & SDHCI_INT_DATA_CRC) &&
>  		SDHCI_GET_CMD(sdhci_readw(host, SDHCI_COMMAND))
> -			!= MMC_BUS_TEST_R)
> +			!= MMC_BUS_TEST_R) {
>  		host->data->error = -EILSEQ;
> +		if (host->cmd->opcode != MMC_SEND_TUNING_BLOCK ||
> +				host->cmd->opcode != MMC_SEND_TUNING_BLOCK_HS200) {
> +			if (host->mmc && host->mmc->err_stats_enabled)
> +				mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_DAT_CRC);
> +		}
> +	}
>  	else if (intmask & SDHCI_INT_ADMA_ERROR) {
>  		pr_err("%s: ADMA error: 0x%08x\n", mmc_hostname(host->mmc),
>  		       intmask);
>  		sdhci_adma_show_error(host);
> +		if (host->mmc && host->mmc->err_stats_enabled)
> +			mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_ADMA);
>  		host->data->error = -EIO;
>  		if (host->ops->adma_workaround)
>  			host->ops->adma_workaround(host, intmask); @@ -3905,20 +3933,40 @@ 
> bool sdhci_cqe_irq(struct sdhci_host *host, u32 intmask, int *cmd_error,
>  	if (!host->cqe_on)
>  		return false;
>  
> -	if (intmask & (SDHCI_INT_INDEX | SDHCI_INT_END_BIT | SDHCI_INT_CRC))
> +	if (intmask & (SDHCI_INT_INDEX | SDHCI_INT_END_BIT | SDHCI_INT_CRC)) 
> +{
>  		*cmd_error = -EILSEQ;
> -	else if (intmask & SDHCI_INT_TIMEOUT)
> +		if (intmask & SDHCI_INT_CRC) {
> +			if (host->cmd->opcode != MMC_SEND_TUNING_BLOCK ||
> +					host->cmd->opcode != MMC_SEND_TUNING_BLOCK_HS200) {
> +				if (host->mmc && host->mmc->err_stats_enabled)
> +					mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_CMD_CRC);
> +			}
> +		}
> +	} else if (intmask & SDHCI_INT_TIMEOUT) {
>  		*cmd_error = -ETIMEDOUT;
> -	else
> +		if (host->mmc && host->mmc->err_stats_enabled)
> +			mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_CMD_TIMEOUT);
> +	} else
>  		*cmd_error = 0;
>  
> -	if (intmask & (SDHCI_INT_DATA_END_BIT | SDHCI_INT_DATA_CRC))
> +	if (intmask & (SDHCI_INT_DATA_END_BIT | SDHCI_INT_DATA_CRC)) {
>  		*data_error = -EILSEQ;
> -	else if (intmask & SDHCI_INT_DATA_TIMEOUT)
> +		if (intmask & SDHCI_INT_DATA_CRC) {
> +			if (host->cmd->opcode != MMC_SEND_TUNING_BLOCK ||
> +					host->cmd->opcode != MMC_SEND_TUNING_BLOCK_HS200) {
> +				if (host->mmc && host->mmc->err_stats_enabled)
> +					mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_DAT_CRC);
> +			}
> +		}
> +	} else if (intmask & SDHCI_INT_DATA_TIMEOUT) {
>  		*data_error = -ETIMEDOUT;
> -	else if (intmask & SDHCI_INT_ADMA_ERROR)
> +		if (host->mmc && host->mmc->err_stats_enabled)
> +			mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_DAT_TIMEOUT);
> +	} else if (intmask & SDHCI_INT_ADMA_ERROR) {
>  		*data_error = -EIO;
> -	else
> +		if (host->mmc && host->mmc->err_stats_enabled)
> +			mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_ADMA);
> +	} else
>  		*data_error = 0;
>  
>  	/* Clear selected interrupts. */
> diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h

Changes to host.h are core changes and belong in patch 3, which should be the first patch.

>>>>>Sure.

> index 7afb57c..883b50b 100644
> --- a/include/linux/mmc/host.h
> +++ b/include/linux/mmc/host.h
> @@ -93,6 +93,23 @@ struct mmc_clk_phase_map {
>  
>  struct mmc_host;
>  
> +enum mmc_err_stat {
> +	MMC_ERR_CMD_TIMEOUT,
> +	MMC_ERR_CMD_CRC,
> +	MMC_ERR_DAT_TIMEOUT,
> +	MMC_ERR_DAT_CRC,
> +	MMC_ERR_AUTO_CMD,
> +	MMC_ERR_ADMA,
> +	MMC_ERR_TUNING,
> +	MMC_ERR_CMDQ_RED,
> +	MMC_ERR_CMDQ_GCE,
> +	MMC_ERR_CMDQ_ICCE,
> +	MMC_ERR_REQ_TIMEOUT,
> +	MMC_ERR_CMDQ_REQ_TIMEOUT,
> +	MMC_ERR_ICE_CFG,
> +	MMC_ERR_MAX,
> +};
> +
>  struct mmc_host_ops {
>  	/*
>  	 * It is optional for the host to implement pre_req and post_req in 
> @@ -500,6 +517,9 @@ struct mmc_host {
>  
>  	/* Host Software Queue support */
>  	bool			hsq_enabled;
> +	u32                     err_stats[MMC_ERR_MAX];
> +	bool 			err_stats_enabled;
> +	bool			err_state;

Please drop err_state for now

>>>>>>>first we can check this variable right,  if it is set then we can go and check err_stats[] to know more on type of error (data /cmd timeout or CRC errors etc.).
Please let me know your opinion on this.

>  
>  	unsigned long		private[] ____cacheline_aligned;
>  };
> @@ -635,6 +655,17 @@ static inline enum dma_data_direction mmc_get_dma_dir(struct mmc_data *data)
>  	return data->flags & MMC_DATA_WRITE ? DMA_TO_DEVICE : 
> DMA_FROM_DEVICE;  }
>  
> +static inline void mmc_debugfs_err_stats_enable(struct mmc_host *mmc)

Please use 'host' as the mmc_host parameter in this file.

> +{
> +	mmc->err_state = true;

Let's make this:

	host->err_stats_enabled = true;

>>>>>>Sure.

> +}
> +
> +static inline void mmc_debugfs_err_stats_inc(struct mmc_host *mmc,
> +		enum mmc_err_stat stat) {
> +

Please remove blank line here

>>>>>>sure.

> +	mmc->err_stats[stat] += 1;
> +}
> +
>  int mmc_send_tuning(struct mmc_host *host, u32 opcode, int 
> *cmd_error);  int mmc_send_abort_tuning(struct mmc_host *host, u32 
> opcode);  int mmc_get_ext_csd(struct mmc_card *card, u8 
> **new_ext_csd);
>
Adrian Hunter Feb. 1, 2022, 1:58 p.m. UTC | #3
On 25/01/2022 20:17, Sajida Bhanu (Temp) wrote:
> Hi,
> 
> Thanks for  the Review.
> 
> Please find the inline comments.
> 
> Thanks,
> Sajida
> 
> -----Original Message-----
> From: Adrian Hunter <adrian.hunter@intel.com> 
> Sent: Friday, January 21, 2022 12:38 PM
> To: Sajida Bhanu (Temp) (QUIC) <quic_c_sbhanu@quicinc.com>; Asutosh Das (QUIC) <quic_asutoshd@quicinc.com>; ulf.hansson@linaro.org; agross@kernel.org; bjorn.andersson@linaro.org; linux-mmc@vger.kernel.org; linux-arm-msm@vger.kernel.org; linux-kernel@vger.kernel.org
> Cc: stummala@codeaurora.org; vbadigan@codeaurora.org; Ram Prakash Gupta (QUIC) <quic_rampraka@quicinc.com>; Pradeep Pragallapati (QUIC) <quic_pragalla@quicinc.com>; sartgarg@codeaurora.org; nitirawa@codeaurora.org; sayalil@codeaurora.org; Liangliang Lu <luliang@codeaurora.org>; Bao D . Nguyen <nguyenb@codeaurora.org>
> Subject: Re: [PATCH V3 1/4] mmc: sdhci: Capture eMMC and SD card errors
> 
> On 20/01/2022 19:26, Shaik Sajida Bhanu wrote:
>> Add changes to capture eMMC and SD card errors.
>> This is useful for debug and testing.
>>
>> Signed-off-by: Shaik Sajida Bhanu <quic_c_sbhanu@quicinc.com>
>> Signed-off-by: Liangliang Lu <luliang@codeaurora.org>
>> Signed-off-by: Sayali Lokhande <sayalil@codeaurora.org>
>> Signed-off-by: Bao D. Nguyen <nguyenb@codeaurora.org>
>> ---
>>  drivers/mmc/host/sdhci-msm.c |  3 ++
>>  drivers/mmc/host/sdhci.c     | 72 ++++++++++++++++++++++++++++++++++++--------
>>  include/linux/mmc/host.h     | 31 +++++++++++++++++++
>>  3 files changed, 94 insertions(+), 12 deletions(-)
>>
>> diff --git a/drivers/mmc/host/sdhci-msm.c 
>> b/drivers/mmc/host/sdhci-msm.c index 50c71e0..309eb7b 100644
>> --- a/drivers/mmc/host/sdhci-msm.c
>> +++ b/drivers/mmc/host/sdhci-msm.c
>> @@ -128,6 +128,8 @@
>>  
>>  #define MSM_MMC_AUTOSUSPEND_DELAY_MS	50
>>  
>> +#define MSM_MMC_ERR_STATS_ENABLE 1
>> +
>>  /* Timeout value to avoid infinite waiting for pwr_irq */  #define 
>> MSM_PWR_IRQ_TIMEOUT_MS 5000
>>  
>> @@ -2734,6 +2736,7 @@ static int sdhci_msm_probe(struct platform_device *pdev)
>>  	if (ret)
>>  		goto pm_runtime_disable;
>>  
>> +	host->mmc->err_stats_enabled = MSM_MMC_ERR_STATS_ENABLE;
> 
> Please remove this. SDHCI will enable error stats.
> 
>>>>>>> Sure.
> 
>>  	pm_runtime_mark_last_busy(&pdev->dev);
>>  	pm_runtime_put_autosuspend(&pdev->dev);
>>  
>> diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c index 
>> 07c6da1..74b356e 100644
>> --- a/drivers/mmc/host/sdhci.c
>> +++ b/drivers/mmc/host/sdhci.c
>> @@ -113,6 +113,8 @@ void sdhci_dumpregs(struct sdhci_host *host)
>>  	if (host->ops->dump_vendor_regs)
>>  		host->ops->dump_vendor_regs(host);
>>  
>> +	if (host->mmc->err_stats_enabled)
>> +		mmc_debugfs_err_stats_enable(host->mmc);
> 
> Please move this to sdhci_setup_host() and call it unconditionally i.e. just
> 
> 	mmc_debugfs_err_stats_enable(host->mmc);
> 
> 
>>>>>>>> mmc_debugfs_err_stats_enable() will set err_state , that means some errors occurred in driver level.
> If we move this call to sdhci_setup_host(), then it will set if no errors also right?

Then it seems like you want to set err_state = true in mmc_debugfs_err_stats_inc()

> 
> 
>>  	SDHCI_DUMP("============================================\n");
>>  }
>>  EXPORT_SYMBOL_GPL(sdhci_dumpregs);
>> @@ -3159,6 +3161,8 @@ static void sdhci_timeout_timer(struct timer_list *t)
>>  	spin_lock_irqsave(&host->lock, flags);
>>  
>>  	if (host->cmd && !sdhci_data_line_cmd(host->cmd)) {
>> +		if (host->mmc && host->mmc->err_stats_enabled)
>> +			mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_REQ_TIMEOUT);
> 
> Please remove the 'if ()', i.e. just make it, unconditionally:
> 
> 		mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_REQ_TIMEOUT);
> 
> Same for other calls to mmc_debugfs_err_stats_inc()
> 
>>>>>>>>> Sure.
> 
>>  		pr_err("%s: Timeout waiting for hardware cmd interrupt.\n",
>>  		       mmc_hostname(host->mmc));
>>  		sdhci_dumpregs(host);
>> @@ -3181,6 +3185,8 @@ static void sdhci_timeout_data_timer(struct 
>> timer_list *t)
>>  
>>  	if (host->data || host->data_cmd ||
>>  	    (host->cmd && sdhci_data_line_cmd(host->cmd))) {
>> +		if (host->mmc && host->mmc->err_stats_enabled)
>> +			mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_REQ_TIMEOUT);
>>  		pr_err("%s: Timeout waiting for hardware interrupt.\n",
>>  		       mmc_hostname(host->mmc));
>>  		sdhci_dumpregs(host);
>> @@ -3240,11 +3246,18 @@ static void sdhci_cmd_irq(struct sdhci_host 
>> *host, u32 intmask, u32 *intmask_p)
>>  
>>  	if (intmask & (SDHCI_INT_TIMEOUT | SDHCI_INT_CRC |
>>  		       SDHCI_INT_END_BIT | SDHCI_INT_INDEX)) {
>> -		if (intmask & SDHCI_INT_TIMEOUT)
>> +		if (intmask & SDHCI_INT_TIMEOUT) {
>>  			host->cmd->error = -ETIMEDOUT;
>> -		else
>> +			if (host->mmc && host->mmc->err_stats_enabled)
>> +				mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_CMD_TIMEOUT);
>> +		} else {
>>  			host->cmd->error = -EILSEQ;
>> -
>> +			if (host->cmd->opcode != MMC_SEND_TUNING_BLOCK ||
>> +					host->cmd->opcode != MMC_SEND_TUNING_BLOCK_HS200) {
>> +				if (host->mmc && host->mmc->err_stats_enabled)
>> +					mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_CMD_CRC);
>> +			}
>> +		}
>>  		/* Treat data command CRC error the same as data CRC error */
>>  		if (host->cmd->data &&
>>  		    (intmask & (SDHCI_INT_CRC | SDHCI_INT_TIMEOUT)) == @@ -3265,6 
>> +3278,8 @@ static void sdhci_cmd_irq(struct sdhci_host *host, u32 intmask, u32 *intmask_p)
>>  		int err = (auto_cmd_status & SDHCI_AUTO_CMD_TIMEOUT) ?
>>  			  -ETIMEDOUT :
>>  			  -EILSEQ;
>> +		if (host->mmc && host->mmc->err_stats_enabled)
>> +			mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_AUTO_CMD);
>>  
>>  		if (sdhci_auto_cmd23(host, mrq)) {
>>  			mrq->sbc->error = err;
>> @@ -3342,6 +3357,8 @@ static void sdhci_data_irq(struct sdhci_host *host, u32 intmask)
>>  			if (intmask & SDHCI_INT_DATA_TIMEOUT) {
>>  				host->data_cmd = NULL;
>>  				data_cmd->error = -ETIMEDOUT;
>> +				if (host->mmc && host->mmc->err_stats_enabled)
>> +					mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_CMD_TIMEOUT);
>>  				__sdhci_finish_mrq(host, data_cmd->mrq);
>>  				return;
>>  			}
>> @@ -3375,18 +3392,29 @@ static void sdhci_data_irq(struct sdhci_host *host, u32 intmask)
>>  		return;
>>  	}
>>  
>> -	if (intmask & SDHCI_INT_DATA_TIMEOUT)
>> +	if (intmask & SDHCI_INT_DATA_TIMEOUT) {
>>  		host->data->error = -ETIMEDOUT;
>> +		if (host->mmc && host->mmc->err_stats_enabled)
>> +			mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_DAT_TIMEOUT);
>> +	}
>>  	else if (intmask & SDHCI_INT_DATA_END_BIT)
>>  		host->data->error = -EILSEQ;
>>  	else if ((intmask & SDHCI_INT_DATA_CRC) &&
>>  		SDHCI_GET_CMD(sdhci_readw(host, SDHCI_COMMAND))
>> -			!= MMC_BUS_TEST_R)
>> +			!= MMC_BUS_TEST_R) {
>>  		host->data->error = -EILSEQ;
>> +		if (host->cmd->opcode != MMC_SEND_TUNING_BLOCK ||
>> +				host->cmd->opcode != MMC_SEND_TUNING_BLOCK_HS200) {
>> +			if (host->mmc && host->mmc->err_stats_enabled)
>> +				mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_DAT_CRC);
>> +		}
>> +	}
>>  	else if (intmask & SDHCI_INT_ADMA_ERROR) {
>>  		pr_err("%s: ADMA error: 0x%08x\n", mmc_hostname(host->mmc),
>>  		       intmask);
>>  		sdhci_adma_show_error(host);
>> +		if (host->mmc && host->mmc->err_stats_enabled)
>> +			mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_ADMA);
>>  		host->data->error = -EIO;
>>  		if (host->ops->adma_workaround)
>>  			host->ops->adma_workaround(host, intmask); @@ -3905,20 +3933,40 @@ 
>> bool sdhci_cqe_irq(struct sdhci_host *host, u32 intmask, int *cmd_error,
>>  	if (!host->cqe_on)
>>  		return false;
>>  
>> -	if (intmask & (SDHCI_INT_INDEX | SDHCI_INT_END_BIT | SDHCI_INT_CRC))
>> +	if (intmask & (SDHCI_INT_INDEX | SDHCI_INT_END_BIT | SDHCI_INT_CRC)) 
>> +{
>>  		*cmd_error = -EILSEQ;
>> -	else if (intmask & SDHCI_INT_TIMEOUT)
>> +		if (intmask & SDHCI_INT_CRC) {
>> +			if (host->cmd->opcode != MMC_SEND_TUNING_BLOCK ||
>> +					host->cmd->opcode != MMC_SEND_TUNING_BLOCK_HS200) {
>> +				if (host->mmc && host->mmc->err_stats_enabled)
>> +					mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_CMD_CRC);
>> +			}
>> +		}
>> +	} else if (intmask & SDHCI_INT_TIMEOUT) {
>>  		*cmd_error = -ETIMEDOUT;
>> -	else
>> +		if (host->mmc && host->mmc->err_stats_enabled)
>> +			mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_CMD_TIMEOUT);
>> +	} else
>>  		*cmd_error = 0;
>>  
>> -	if (intmask & (SDHCI_INT_DATA_END_BIT | SDHCI_INT_DATA_CRC))
>> +	if (intmask & (SDHCI_INT_DATA_END_BIT | SDHCI_INT_DATA_CRC)) {
>>  		*data_error = -EILSEQ;
>> -	else if (intmask & SDHCI_INT_DATA_TIMEOUT)
>> +		if (intmask & SDHCI_INT_DATA_CRC) {
>> +			if (host->cmd->opcode != MMC_SEND_TUNING_BLOCK ||
>> +					host->cmd->opcode != MMC_SEND_TUNING_BLOCK_HS200) {
>> +				if (host->mmc && host->mmc->err_stats_enabled)
>> +					mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_DAT_CRC);
>> +			}
>> +		}
>> +	} else if (intmask & SDHCI_INT_DATA_TIMEOUT) {
>>  		*data_error = -ETIMEDOUT;
>> -	else if (intmask & SDHCI_INT_ADMA_ERROR)
>> +		if (host->mmc && host->mmc->err_stats_enabled)
>> +			mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_DAT_TIMEOUT);
>> +	} else if (intmask & SDHCI_INT_ADMA_ERROR) {
>>  		*data_error = -EIO;
>> -	else
>> +		if (host->mmc && host->mmc->err_stats_enabled)
>> +			mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_ADMA);
>> +	} else
>>  		*data_error = 0;
>>  
>>  	/* Clear selected interrupts. */
>> diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h
> 
> Changes to host.h are core changes and belong in patch 3, which should be the first patch.
> 
>>>>>> Sure.
> 
>> index 7afb57c..883b50b 100644
>> --- a/include/linux/mmc/host.h
>> +++ b/include/linux/mmc/host.h
>> @@ -93,6 +93,23 @@ struct mmc_clk_phase_map {
>>  
>>  struct mmc_host;
>>  
>> +enum mmc_err_stat {
>> +	MMC_ERR_CMD_TIMEOUT,
>> +	MMC_ERR_CMD_CRC,
>> +	MMC_ERR_DAT_TIMEOUT,
>> +	MMC_ERR_DAT_CRC,
>> +	MMC_ERR_AUTO_CMD,
>> +	MMC_ERR_ADMA,
>> +	MMC_ERR_TUNING,
>> +	MMC_ERR_CMDQ_RED,
>> +	MMC_ERR_CMDQ_GCE,
>> +	MMC_ERR_CMDQ_ICCE,
>> +	MMC_ERR_REQ_TIMEOUT,
>> +	MMC_ERR_CMDQ_REQ_TIMEOUT,
>> +	MMC_ERR_ICE_CFG,
>> +	MMC_ERR_MAX,
>> +};
>> +
>>  struct mmc_host_ops {
>>  	/*
>>  	 * It is optional for the host to implement pre_req and post_req in 
>> @@ -500,6 +517,9 @@ struct mmc_host {
>>  
>>  	/* Host Software Queue support */
>>  	bool			hsq_enabled;
>> +	u32                     err_stats[MMC_ERR_MAX];
>> +	bool 			err_stats_enabled;
>> +	bool			err_state;
> 
> Please drop err_state for now
> 
>>>>>>>> first we can check this variable right,  if it is set then we can go and check err_stats[] to know more on type of error (data /cmd timeout or CRC errors etc.).
> Please let me know your opinion on this.

As I wrote above, you could set err_state in mmc_debugfs_err_stats_inc().
But maybe make the err_state addition a separate patch so it is easy to see how it works.

> 
>>  
>>  	unsigned long		private[] ____cacheline_aligned;
>>  };
>> @@ -635,6 +655,17 @@ static inline enum dma_data_direction mmc_get_dma_dir(struct mmc_data *data)
>>  	return data->flags & MMC_DATA_WRITE ? DMA_TO_DEVICE : 
>> DMA_FROM_DEVICE;  }
>>  
>> +static inline void mmc_debugfs_err_stats_enable(struct mmc_host *mmc)
> 
> Please use 'host' as the mmc_host parameter in this file.
> 
>> +{
>> +	mmc->err_state = true;
> 
> Let's make this:
> 
> 	host->err_stats_enabled = true;
> 
>>>>>>> Sure.
> 
>> +}
>> +
>> +static inline void mmc_debugfs_err_stats_inc(struct mmc_host *mmc,
>> +		enum mmc_err_stat stat) {
>> +
> 
> Please remove blank line here
> 
>>>>>>> sure.
> 
>> +	mmc->err_stats[stat] += 1;
>> +}
>> +
>>  int mmc_send_tuning(struct mmc_host *host, u32 opcode, int 
>> *cmd_error);  int mmc_send_abort_tuning(struct mmc_host *host, u32 
>> opcode);  int mmc_get_ext_csd(struct mmc_card *card, u8 
>> **new_ext_csd);
>>
>
Shaik Sajida Bhanu Feb. 8, 2022, 7:04 p.m. UTC | #4
Hi,

Thanks for the review.

Please find the inline comments

Thanks,
Sajida
-----Original Message-----
From: Adrian Hunter <adrian.hunter@intel.com> 
Sent: Tuesday, February 1, 2022 7:28 PM
To: Sajida Bhanu (Temp) <c_sbhanu@qti.qualcomm.com>; Sajida Bhanu (Temp) (QUIC) <quic_c_sbhanu@quicinc.com>; Asutosh Das (QUIC) <quic_asutoshd@quicinc.com>; ulf.hansson@linaro.org; agross@kernel.org; bjorn.andersson@linaro.org; linux-mmc@vger.kernel.org; linux-arm-msm@vger.kernel.org; linux-kernel@vger.kernel.org
Cc: stummala@codeaurora.org; vbadigan@codeaurora.org; Ram Prakash Gupta (QUIC) <quic_rampraka@quicinc.com>; Pradeep Pragallapati (QUIC) <quic_pragalla@quicinc.com>; sartgarg@codeaurora.org; nitirawa@codeaurora.org; sayalil@codeaurora.org; Liangliang Lu <luliang@codeaurora.org>; Bao D . Nguyen <nguyenb@codeaurora.org>
Subject: Re: [PATCH V3 1/4] mmc: sdhci: Capture eMMC and SD card errors

On 25/01/2022 20:17, Sajida Bhanu (Temp) wrote:
> Hi,
> 
> Thanks for  the Review.
> 
> Please find the inline comments.
> 
> Thanks,
> Sajida
> 
> -----Original Message-----
> From: Adrian Hunter <adrian.hunter@intel.com>
> Sent: Friday, January 21, 2022 12:38 PM
> To: Sajida Bhanu (Temp) (QUIC) <quic_c_sbhanu@quicinc.com>; Asutosh 
> Das (QUIC) <quic_asutoshd@quicinc.com>; ulf.hansson@linaro.org; 
> agross@kernel.org; bjorn.andersson@linaro.org; 
> linux-mmc@vger.kernel.org; linux-arm-msm@vger.kernel.org; 
> linux-kernel@vger.kernel.org
> Cc: stummala@codeaurora.org; vbadigan@codeaurora.org; Ram Prakash 
> Gupta (QUIC) <quic_rampraka@quicinc.com>; Pradeep Pragallapati (QUIC) 
> <quic_pragalla@quicinc.com>; sartgarg@codeaurora.org; 
> nitirawa@codeaurora.org; sayalil@codeaurora.org; Liangliang Lu 
> <luliang@codeaurora.org>; Bao D . Nguyen <nguyenb@codeaurora.org>
> Subject: Re: [PATCH V3 1/4] mmc: sdhci: Capture eMMC and SD card 
> errors
> 
> On 20/01/2022 19:26, Shaik Sajida Bhanu wrote:
>> Add changes to capture eMMC and SD card errors.
>> This is useful for debug and testing.
>>
>> Signed-off-by: Shaik Sajida Bhanu <quic_c_sbhanu@quicinc.com>
>> Signed-off-by: Liangliang Lu <luliang@codeaurora.org>
>> Signed-off-by: Sayali Lokhande <sayalil@codeaurora.org>
>> Signed-off-by: Bao D. Nguyen <nguyenb@codeaurora.org>
>> ---
>>  drivers/mmc/host/sdhci-msm.c |  3 ++
>>  drivers/mmc/host/sdhci.c     | 72 ++++++++++++++++++++++++++++++++++++--------
>>  include/linux/mmc/host.h     | 31 +++++++++++++++++++
>>  3 files changed, 94 insertions(+), 12 deletions(-)
>>
>> diff --git a/drivers/mmc/host/sdhci-msm.c 
>> b/drivers/mmc/host/sdhci-msm.c index 50c71e0..309eb7b 100644
>> --- a/drivers/mmc/host/sdhci-msm.c
>> +++ b/drivers/mmc/host/sdhci-msm.c
>> @@ -128,6 +128,8 @@
>>  
>>  #define MSM_MMC_AUTOSUSPEND_DELAY_MS	50
>>  
>> +#define MSM_MMC_ERR_STATS_ENABLE 1
>> +
>>  /* Timeout value to avoid infinite waiting for pwr_irq */  #define 
>> MSM_PWR_IRQ_TIMEOUT_MS 5000
>>  
>> @@ -2734,6 +2736,7 @@ static int sdhci_msm_probe(struct platform_device *pdev)
>>  	if (ret)
>>  		goto pm_runtime_disable;
>>  
>> +	host->mmc->err_stats_enabled = MSM_MMC_ERR_STATS_ENABLE;
> 
> Please remove this. SDHCI will enable error stats.
> 
>>>>>>> Sure.
> 
>>  	pm_runtime_mark_last_busy(&pdev->dev);
>>  	pm_runtime_put_autosuspend(&pdev->dev);
>>  
>> diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c 
>> index 07c6da1..74b356e 100644
>> --- a/drivers/mmc/host/sdhci.c
>> +++ b/drivers/mmc/host/sdhci.c
>> @@ -113,6 +113,8 @@ void sdhci_dumpregs(struct sdhci_host *host)
>>  	if (host->ops->dump_vendor_regs)
>>  		host->ops->dump_vendor_regs(host);
>>  
>> +	if (host->mmc->err_stats_enabled)
>> +		mmc_debugfs_err_stats_enable(host->mmc);
> 
> Please move this to sdhci_setup_host() and call it unconditionally 
> i.e. just
> 
> 	mmc_debugfs_err_stats_enable(host->mmc);
> 
> 
>>>>>>>> mmc_debugfs_err_stats_enable() will set err_state , that means some errors occurred in driver level.
> If we move this call to sdhci_setup_host(), then it will set if no errors also right?

Then it seems like you want to set err_state = true in mmc_debugfs_err_stats_inc()

>>>>>No ..I have updated  err_state = true in sdhci_dumpregs() because if any errors (serious) in driver,  we are calling sdhci_dumpregs().

> 
> 
>>  	SDHCI_DUMP("============================================\n");
>>  }
>>  EXPORT_SYMBOL_GPL(sdhci_dumpregs);
>> @@ -3159,6 +3161,8 @@ static void sdhci_timeout_timer(struct timer_list *t)
>>  	spin_lock_irqsave(&host->lock, flags);
>>  
>>  	if (host->cmd && !sdhci_data_line_cmd(host->cmd)) {
>> +		if (host->mmc && host->mmc->err_stats_enabled)
>> +			mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_REQ_TIMEOUT);
> 
> Please remove the 'if ()', i.e. just make it, unconditionally:
> 
> 		mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_REQ_TIMEOUT);
> 
> Same for other calls to mmc_debugfs_err_stats_inc()
> 
>>>>>>>>> Sure.
> 
>>  		pr_err("%s: Timeout waiting for hardware cmd interrupt.\n",
>>  		       mmc_hostname(host->mmc));
>>  		sdhci_dumpregs(host);
>> @@ -3181,6 +3185,8 @@ static void sdhci_timeout_data_timer(struct 
>> timer_list *t)
>>  
>>  	if (host->data || host->data_cmd ||
>>  	    (host->cmd && sdhci_data_line_cmd(host->cmd))) {
>> +		if (host->mmc && host->mmc->err_stats_enabled)
>> +			mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_REQ_TIMEOUT);
>>  		pr_err("%s: Timeout waiting for hardware interrupt.\n",
>>  		       mmc_hostname(host->mmc));
>>  		sdhci_dumpregs(host);
>> @@ -3240,11 +3246,18 @@ static void sdhci_cmd_irq(struct sdhci_host 
>> *host, u32 intmask, u32 *intmask_p)
>>  
>>  	if (intmask & (SDHCI_INT_TIMEOUT | SDHCI_INT_CRC |
>>  		       SDHCI_INT_END_BIT | SDHCI_INT_INDEX)) {
>> -		if (intmask & SDHCI_INT_TIMEOUT)
>> +		if (intmask & SDHCI_INT_TIMEOUT) {
>>  			host->cmd->error = -ETIMEDOUT;
>> -		else
>> +			if (host->mmc && host->mmc->err_stats_enabled)
>> +				mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_CMD_TIMEOUT);
>> +		} else {
>>  			host->cmd->error = -EILSEQ;
>> -
>> +			if (host->cmd->opcode != MMC_SEND_TUNING_BLOCK ||
>> +					host->cmd->opcode != MMC_SEND_TUNING_BLOCK_HS200) {
>> +				if (host->mmc && host->mmc->err_stats_enabled)
>> +					mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_CMD_CRC);
>> +			}
>> +		}
>>  		/* Treat data command CRC error the same as data CRC error */
>>  		if (host->cmd->data &&
>>  		    (intmask & (SDHCI_INT_CRC | SDHCI_INT_TIMEOUT)) == @@ -3265,6
>> +3278,8 @@ static void sdhci_cmd_irq(struct sdhci_host *host, u32 
>> +intmask, u32 *intmask_p)
>>  		int err = (auto_cmd_status & SDHCI_AUTO_CMD_TIMEOUT) ?
>>  			  -ETIMEDOUT :
>>  			  -EILSEQ;
>> +		if (host->mmc && host->mmc->err_stats_enabled)
>> +			mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_AUTO_CMD);
>>  
>>  		if (sdhci_auto_cmd23(host, mrq)) {
>>  			mrq->sbc->error = err;
>> @@ -3342,6 +3357,8 @@ static void sdhci_data_irq(struct sdhci_host *host, u32 intmask)
>>  			if (intmask & SDHCI_INT_DATA_TIMEOUT) {
>>  				host->data_cmd = NULL;
>>  				data_cmd->error = -ETIMEDOUT;
>> +				if (host->mmc && host->mmc->err_stats_enabled)
>> +					mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_CMD_TIMEOUT);
>>  				__sdhci_finish_mrq(host, data_cmd->mrq);
>>  				return;
>>  			}
>> @@ -3375,18 +3392,29 @@ static void sdhci_data_irq(struct sdhci_host *host, u32 intmask)
>>  		return;
>>  	}
>>  
>> -	if (intmask & SDHCI_INT_DATA_TIMEOUT)
>> +	if (intmask & SDHCI_INT_DATA_TIMEOUT) {
>>  		host->data->error = -ETIMEDOUT;
>> +		if (host->mmc && host->mmc->err_stats_enabled)
>> +			mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_DAT_TIMEOUT);
>> +	}
>>  	else if (intmask & SDHCI_INT_DATA_END_BIT)
>>  		host->data->error = -EILSEQ;
>>  	else if ((intmask & SDHCI_INT_DATA_CRC) &&
>>  		SDHCI_GET_CMD(sdhci_readw(host, SDHCI_COMMAND))
>> -			!= MMC_BUS_TEST_R)
>> +			!= MMC_BUS_TEST_R) {
>>  		host->data->error = -EILSEQ;
>> +		if (host->cmd->opcode != MMC_SEND_TUNING_BLOCK ||
>> +				host->cmd->opcode != MMC_SEND_TUNING_BLOCK_HS200) {
>> +			if (host->mmc && host->mmc->err_stats_enabled)
>> +				mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_DAT_CRC);
>> +		}
>> +	}
>>  	else if (intmask & SDHCI_INT_ADMA_ERROR) {
>>  		pr_err("%s: ADMA error: 0x%08x\n", mmc_hostname(host->mmc),
>>  		       intmask);
>>  		sdhci_adma_show_error(host);
>> +		if (host->mmc && host->mmc->err_stats_enabled)
>> +			mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_ADMA);
>>  		host->data->error = -EIO;
>>  		if (host->ops->adma_workaround)
>>  			host->ops->adma_workaround(host, intmask); @@ -3905,20 +3933,40 
>> @@ bool sdhci_cqe_irq(struct sdhci_host *host, u32 intmask, int *cmd_error,
>>  	if (!host->cqe_on)
>>  		return false;
>>  
>> -	if (intmask & (SDHCI_INT_INDEX | SDHCI_INT_END_BIT | SDHCI_INT_CRC))
>> +	if (intmask & (SDHCI_INT_INDEX | SDHCI_INT_END_BIT | 
>> +SDHCI_INT_CRC)) {
>>  		*cmd_error = -EILSEQ;
>> -	else if (intmask & SDHCI_INT_TIMEOUT)
>> +		if (intmask & SDHCI_INT_CRC) {
>> +			if (host->cmd->opcode != MMC_SEND_TUNING_BLOCK ||
>> +					host->cmd->opcode != MMC_SEND_TUNING_BLOCK_HS200) {
>> +				if (host->mmc && host->mmc->err_stats_enabled)
>> +					mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_CMD_CRC);
>> +			}
>> +		}
>> +	} else if (intmask & SDHCI_INT_TIMEOUT) {
>>  		*cmd_error = -ETIMEDOUT;
>> -	else
>> +		if (host->mmc && host->mmc->err_stats_enabled)
>> +			mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_CMD_TIMEOUT);
>> +	} else
>>  		*cmd_error = 0;
>>  
>> -	if (intmask & (SDHCI_INT_DATA_END_BIT | SDHCI_INT_DATA_CRC))
>> +	if (intmask & (SDHCI_INT_DATA_END_BIT | SDHCI_INT_DATA_CRC)) {
>>  		*data_error = -EILSEQ;
>> -	else if (intmask & SDHCI_INT_DATA_TIMEOUT)
>> +		if (intmask & SDHCI_INT_DATA_CRC) {
>> +			if (host->cmd->opcode != MMC_SEND_TUNING_BLOCK ||
>> +					host->cmd->opcode != MMC_SEND_TUNING_BLOCK_HS200) {
>> +				if (host->mmc && host->mmc->err_stats_enabled)
>> +					mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_DAT_CRC);
>> +			}
>> +		}
>> +	} else if (intmask & SDHCI_INT_DATA_TIMEOUT) {
>>  		*data_error = -ETIMEDOUT;
>> -	else if (intmask & SDHCI_INT_ADMA_ERROR)
>> +		if (host->mmc && host->mmc->err_stats_enabled)
>> +			mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_DAT_TIMEOUT);
>> +	} else if (intmask & SDHCI_INT_ADMA_ERROR) {
>>  		*data_error = -EIO;
>> -	else
>> +		if (host->mmc && host->mmc->err_stats_enabled)
>> +			mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_ADMA);
>> +	} else
>>  		*data_error = 0;
>>  
>>  	/* Clear selected interrupts. */
>> diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h
> 
> Changes to host.h are core changes and belong in patch 3, which should be the first patch.
> 
>>>>>> Sure.
> 
>> index 7afb57c..883b50b 100644
>> --- a/include/linux/mmc/host.h
>> +++ b/include/linux/mmc/host.h
>> @@ -93,6 +93,23 @@ struct mmc_clk_phase_map {
>>  
>>  struct mmc_host;
>>  
>> +enum mmc_err_stat {
>> +	MMC_ERR_CMD_TIMEOUT,
>> +	MMC_ERR_CMD_CRC,
>> +	MMC_ERR_DAT_TIMEOUT,
>> +	MMC_ERR_DAT_CRC,
>> +	MMC_ERR_AUTO_CMD,
>> +	MMC_ERR_ADMA,
>> +	MMC_ERR_TUNING,
>> +	MMC_ERR_CMDQ_RED,
>> +	MMC_ERR_CMDQ_GCE,
>> +	MMC_ERR_CMDQ_ICCE,
>> +	MMC_ERR_REQ_TIMEOUT,
>> +	MMC_ERR_CMDQ_REQ_TIMEOUT,
>> +	MMC_ERR_ICE_CFG,
>> +	MMC_ERR_MAX,
>> +};
>> +
>>  struct mmc_host_ops {
>>  	/*
>>  	 * It is optional for the host to implement pre_req and post_req in 
>> @@ -500,6 +517,9 @@ struct mmc_host {
>>  
>>  	/* Host Software Queue support */
>>  	bool			hsq_enabled;
>> +	u32                     err_stats[MMC_ERR_MAX];
>> +	bool 			err_stats_enabled;
>> +	bool			err_state;
> 
> Please drop err_state for now
> 
>>>>>>>> first we can check this variable right,  if it is set then we can go and check err_stats[] to know more on type of error (data /cmd timeout or CRC errors etc.).
> Please let me know your opinion on this.

As I wrote above, you could set err_state in mmc_debugfs_err_stats_inc().
But maybe make the err_state addition a separate patch so it is easy to see how it works.

>>>>> Sure will post separate patch for err_state settings.

> 
>>  
>>  	unsigned long		private[] ____cacheline_aligned;
>>  };
>> @@ -635,6 +655,17 @@ static inline enum dma_data_direction mmc_get_dma_dir(struct mmc_data *data)
>>  	return data->flags & MMC_DATA_WRITE ? DMA_TO_DEVICE : 
>> DMA_FROM_DEVICE;  }
>>  
>> +static inline void mmc_debugfs_err_stats_enable(struct mmc_host 
>> +*mmc)
> 
> Please use 'host' as the mmc_host parameter in this file.
> 
>> +{
>> +	mmc->err_state = true;
> 
> Let's make this:
> 
> 	host->err_stats_enabled = true;
> 
>>>>>>> Sure.
> 
>> +}
>> +
>> +static inline void mmc_debugfs_err_stats_inc(struct mmc_host *mmc,
>> +		enum mmc_err_stat stat) {
>> +
> 
> Please remove blank line here
> 
>>>>>>> sure.
> 
>> +	mmc->err_stats[stat] += 1;
>> +}
>> +
>>  int mmc_send_tuning(struct mmc_host *host, u32 opcode, int 
>> *cmd_error);  int mmc_send_abort_tuning(struct mmc_host *host, u32 
>> opcode);  int mmc_get_ext_csd(struct mmc_card *card, u8 
>> **new_ext_csd);
>>
>
Adrian Hunter Feb. 11, 2022, 5:51 a.m. UTC | #5
On 08/02/2022 21:04, Sajida Bhanu (Temp) (QUIC) wrote:
> Hi,
> 
> Thanks for the review.
> 
> Please find the inline comments
> 
> Thanks,
> Sajida
> -----Original Message-----
> From: Adrian Hunter <adrian.hunter@intel.com> 
> Sent: Tuesday, February 1, 2022 7:28 PM
> To: Sajida Bhanu (Temp) <c_sbhanu@qti.qualcomm.com>; Sajida Bhanu (Temp) (QUIC) <quic_c_sbhanu@quicinc.com>; Asutosh Das (QUIC) <quic_asutoshd@quicinc.com>; ulf.hansson@linaro.org; agross@kernel.org; bjorn.andersson@linaro.org; linux-mmc@vger.kernel.org; linux-arm-msm@vger.kernel.org; linux-kernel@vger.kernel.org
> Cc: stummala@codeaurora.org; vbadigan@codeaurora.org; Ram Prakash Gupta (QUIC) <quic_rampraka@quicinc.com>; Pradeep Pragallapati (QUIC) <quic_pragalla@quicinc.com>; sartgarg@codeaurora.org; nitirawa@codeaurora.org; sayalil@codeaurora.org; Liangliang Lu <luliang@codeaurora.org>; Bao D . Nguyen <nguyenb@codeaurora.org>
> Subject: Re: [PATCH V3 1/4] mmc: sdhci: Capture eMMC and SD card errors
> 
> On 25/01/2022 20:17, Sajida Bhanu (Temp) wrote:
>> Hi,
>>
>> Thanks for  the Review.
>>
>> Please find the inline comments.
>>
>> Thanks,
>> Sajida
>>
>> -----Original Message-----
>> From: Adrian Hunter <adrian.hunter@intel.com>
>> Sent: Friday, January 21, 2022 12:38 PM
>> To: Sajida Bhanu (Temp) (QUIC) <quic_c_sbhanu@quicinc.com>; Asutosh 
>> Das (QUIC) <quic_asutoshd@quicinc.com>; ulf.hansson@linaro.org; 
>> agross@kernel.org; bjorn.andersson@linaro.org; 
>> linux-mmc@vger.kernel.org; linux-arm-msm@vger.kernel.org; 
>> linux-kernel@vger.kernel.org
>> Cc: stummala@codeaurora.org; vbadigan@codeaurora.org; Ram Prakash 
>> Gupta (QUIC) <quic_rampraka@quicinc.com>; Pradeep Pragallapati (QUIC) 
>> <quic_pragalla@quicinc.com>; sartgarg@codeaurora.org; 
>> nitirawa@codeaurora.org; sayalil@codeaurora.org; Liangliang Lu 
>> <luliang@codeaurora.org>; Bao D . Nguyen <nguyenb@codeaurora.org>
>> Subject: Re: [PATCH V3 1/4] mmc: sdhci: Capture eMMC and SD card 
>> errors
>>
>> On 20/01/2022 19:26, Shaik Sajida Bhanu wrote:
>>> Add changes to capture eMMC and SD card errors.
>>> This is useful for debug and testing.
>>>
>>> Signed-off-by: Shaik Sajida Bhanu <quic_c_sbhanu@quicinc.com>
>>> Signed-off-by: Liangliang Lu <luliang@codeaurora.org>
>>> Signed-off-by: Sayali Lokhande <sayalil@codeaurora.org>
>>> Signed-off-by: Bao D. Nguyen <nguyenb@codeaurora.org>
>>> ---
>>>  drivers/mmc/host/sdhci-msm.c |  3 ++
>>>  drivers/mmc/host/sdhci.c     | 72 ++++++++++++++++++++++++++++++++++++--------
>>>  include/linux/mmc/host.h     | 31 +++++++++++++++++++
>>>  3 files changed, 94 insertions(+), 12 deletions(-)
>>>
>>> diff --git a/drivers/mmc/host/sdhci-msm.c 
>>> b/drivers/mmc/host/sdhci-msm.c index 50c71e0..309eb7b 100644
>>> --- a/drivers/mmc/host/sdhci-msm.c
>>> +++ b/drivers/mmc/host/sdhci-msm.c
>>> @@ -128,6 +128,8 @@
>>>  
>>>  #define MSM_MMC_AUTOSUSPEND_DELAY_MS	50
>>>  
>>> +#define MSM_MMC_ERR_STATS_ENABLE 1
>>> +
>>>  /* Timeout value to avoid infinite waiting for pwr_irq */  #define 
>>> MSM_PWR_IRQ_TIMEOUT_MS 5000
>>>  
>>> @@ -2734,6 +2736,7 @@ static int sdhci_msm_probe(struct platform_device *pdev)
>>>  	if (ret)
>>>  		goto pm_runtime_disable;
>>>  
>>> +	host->mmc->err_stats_enabled = MSM_MMC_ERR_STATS_ENABLE;
>>
>> Please remove this. SDHCI will enable error stats.
>>
>>>>>>>> Sure.
>>
>>>  	pm_runtime_mark_last_busy(&pdev->dev);
>>>  	pm_runtime_put_autosuspend(&pdev->dev);
>>>  
>>> diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c 
>>> index 07c6da1..74b356e 100644
>>> --- a/drivers/mmc/host/sdhci.c
>>> +++ b/drivers/mmc/host/sdhci.c
>>> @@ -113,6 +113,8 @@ void sdhci_dumpregs(struct sdhci_host *host)
>>>  	if (host->ops->dump_vendor_regs)
>>>  		host->ops->dump_vendor_regs(host);
>>>  
>>> +	if (host->mmc->err_stats_enabled)
>>> +		mmc_debugfs_err_stats_enable(host->mmc);
>>
>> Please move this to sdhci_setup_host() and call it unconditionally 
>> i.e. just
>>
>> 	mmc_debugfs_err_stats_enable(host->mmc);
>>
>>
>>>>>>>>> mmc_debugfs_err_stats_enable() will set err_state , that means some errors occurred in driver level.
>> If we move this call to sdhci_setup_host(), then it will set if no errors also right?
> 
> Then it seems like you want to set err_state = true in mmc_debugfs_err_stats_inc()
> 
>>>>>> No ..I have updated  err_state = true in sdhci_dumpregs() because if any errors (serious) in driver,  we are calling sdhci_dumpregs().

I see, but it is not OK to mix up the register dump with error logging.
Perhaps add another error type and increment that when needed.

> 
>>
>>
>>>  	SDHCI_DUMP("============================================\n");
>>>  }
>>>  EXPORT_SYMBOL_GPL(sdhci_dumpregs);
>>> @@ -3159,6 +3161,8 @@ static void sdhci_timeout_timer(struct timer_list *t)
>>>  	spin_lock_irqsave(&host->lock, flags);
>>>  
>>>  	if (host->cmd && !sdhci_data_line_cmd(host->cmd)) {
>>> +		if (host->mmc && host->mmc->err_stats_enabled)
>>> +			mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_REQ_TIMEOUT);
>>
>> Please remove the 'if ()', i.e. just make it, unconditionally:
>>
>> 		mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_REQ_TIMEOUT);
>>
>> Same for other calls to mmc_debugfs_err_stats_inc()
>>
>>>>>>>>>> Sure.
>>
>>>  		pr_err("%s: Timeout waiting for hardware cmd interrupt.\n",
>>>  		       mmc_hostname(host->mmc));
>>>  		sdhci_dumpregs(host);
>>> @@ -3181,6 +3185,8 @@ static void sdhci_timeout_data_timer(struct 
>>> timer_list *t)
>>>  
>>>  	if (host->data || host->data_cmd ||
>>>  	    (host->cmd && sdhci_data_line_cmd(host->cmd))) {
>>> +		if (host->mmc && host->mmc->err_stats_enabled)
>>> +			mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_REQ_TIMEOUT);
>>>  		pr_err("%s: Timeout waiting for hardware interrupt.\n",
>>>  		       mmc_hostname(host->mmc));
>>>  		sdhci_dumpregs(host);
>>> @@ -3240,11 +3246,18 @@ static void sdhci_cmd_irq(struct sdhci_host 
>>> *host, u32 intmask, u32 *intmask_p)
>>>  
>>>  	if (intmask & (SDHCI_INT_TIMEOUT | SDHCI_INT_CRC |
>>>  		       SDHCI_INT_END_BIT | SDHCI_INT_INDEX)) {
>>> -		if (intmask & SDHCI_INT_TIMEOUT)
>>> +		if (intmask & SDHCI_INT_TIMEOUT) {
>>>  			host->cmd->error = -ETIMEDOUT;
>>> -		else
>>> +			if (host->mmc && host->mmc->err_stats_enabled)
>>> +				mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_CMD_TIMEOUT);
>>> +		} else {
>>>  			host->cmd->error = -EILSEQ;
>>> -
>>> +			if (host->cmd->opcode != MMC_SEND_TUNING_BLOCK ||
>>> +					host->cmd->opcode != MMC_SEND_TUNING_BLOCK_HS200) {
>>> +				if (host->mmc && host->mmc->err_stats_enabled)
>>> +					mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_CMD_CRC);
>>> +			}
>>> +		}
>>>  		/* Treat data command CRC error the same as data CRC error */
>>>  		if (host->cmd->data &&
>>>  		    (intmask & (SDHCI_INT_CRC | SDHCI_INT_TIMEOUT)) == @@ -3265,6
>>> +3278,8 @@ static void sdhci_cmd_irq(struct sdhci_host *host, u32 
>>> +intmask, u32 *intmask_p)
>>>  		int err = (auto_cmd_status & SDHCI_AUTO_CMD_TIMEOUT) ?
>>>  			  -ETIMEDOUT :
>>>  			  -EILSEQ;
>>> +		if (host->mmc && host->mmc->err_stats_enabled)
>>> +			mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_AUTO_CMD);
>>>  
>>>  		if (sdhci_auto_cmd23(host, mrq)) {
>>>  			mrq->sbc->error = err;
>>> @@ -3342,6 +3357,8 @@ static void sdhci_data_irq(struct sdhci_host *host, u32 intmask)
>>>  			if (intmask & SDHCI_INT_DATA_TIMEOUT) {
>>>  				host->data_cmd = NULL;
>>>  				data_cmd->error = -ETIMEDOUT;
>>> +				if (host->mmc && host->mmc->err_stats_enabled)
>>> +					mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_CMD_TIMEOUT);
>>>  				__sdhci_finish_mrq(host, data_cmd->mrq);
>>>  				return;
>>>  			}
>>> @@ -3375,18 +3392,29 @@ static void sdhci_data_irq(struct sdhci_host *host, u32 intmask)
>>>  		return;
>>>  	}
>>>  
>>> -	if (intmask & SDHCI_INT_DATA_TIMEOUT)
>>> +	if (intmask & SDHCI_INT_DATA_TIMEOUT) {
>>>  		host->data->error = -ETIMEDOUT;
>>> +		if (host->mmc && host->mmc->err_stats_enabled)
>>> +			mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_DAT_TIMEOUT);
>>> +	}
>>>  	else if (intmask & SDHCI_INT_DATA_END_BIT)
>>>  		host->data->error = -EILSEQ;
>>>  	else if ((intmask & SDHCI_INT_DATA_CRC) &&
>>>  		SDHCI_GET_CMD(sdhci_readw(host, SDHCI_COMMAND))
>>> -			!= MMC_BUS_TEST_R)
>>> +			!= MMC_BUS_TEST_R) {
>>>  		host->data->error = -EILSEQ;
>>> +		if (host->cmd->opcode != MMC_SEND_TUNING_BLOCK ||
>>> +				host->cmd->opcode != MMC_SEND_TUNING_BLOCK_HS200) {
>>> +			if (host->mmc && host->mmc->err_stats_enabled)
>>> +				mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_DAT_CRC);
>>> +		}
>>> +	}
>>>  	else if (intmask & SDHCI_INT_ADMA_ERROR) {
>>>  		pr_err("%s: ADMA error: 0x%08x\n", mmc_hostname(host->mmc),
>>>  		       intmask);
>>>  		sdhci_adma_show_error(host);
>>> +		if (host->mmc && host->mmc->err_stats_enabled)
>>> +			mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_ADMA);
>>>  		host->data->error = -EIO;
>>>  		if (host->ops->adma_workaround)
>>>  			host->ops->adma_workaround(host, intmask); @@ -3905,20 +3933,40 
>>> @@ bool sdhci_cqe_irq(struct sdhci_host *host, u32 intmask, int *cmd_error,
>>>  	if (!host->cqe_on)
>>>  		return false;
>>>  
>>> -	if (intmask & (SDHCI_INT_INDEX | SDHCI_INT_END_BIT | SDHCI_INT_CRC))
>>> +	if (intmask & (SDHCI_INT_INDEX | SDHCI_INT_END_BIT | 
>>> +SDHCI_INT_CRC)) {
>>>  		*cmd_error = -EILSEQ;
>>> -	else if (intmask & SDHCI_INT_TIMEOUT)
>>> +		if (intmask & SDHCI_INT_CRC) {
>>> +			if (host->cmd->opcode != MMC_SEND_TUNING_BLOCK ||
>>> +					host->cmd->opcode != MMC_SEND_TUNING_BLOCK_HS200) {
>>> +				if (host->mmc && host->mmc->err_stats_enabled)
>>> +					mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_CMD_CRC);
>>> +			}
>>> +		}
>>> +	} else if (intmask & SDHCI_INT_TIMEOUT) {
>>>  		*cmd_error = -ETIMEDOUT;
>>> -	else
>>> +		if (host->mmc && host->mmc->err_stats_enabled)
>>> +			mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_CMD_TIMEOUT);
>>> +	} else
>>>  		*cmd_error = 0;
>>>  
>>> -	if (intmask & (SDHCI_INT_DATA_END_BIT | SDHCI_INT_DATA_CRC))
>>> +	if (intmask & (SDHCI_INT_DATA_END_BIT | SDHCI_INT_DATA_CRC)) {
>>>  		*data_error = -EILSEQ;
>>> -	else if (intmask & SDHCI_INT_DATA_TIMEOUT)
>>> +		if (intmask & SDHCI_INT_DATA_CRC) {
>>> +			if (host->cmd->opcode != MMC_SEND_TUNING_BLOCK ||
>>> +					host->cmd->opcode != MMC_SEND_TUNING_BLOCK_HS200) {
>>> +				if (host->mmc && host->mmc->err_stats_enabled)
>>> +					mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_DAT_CRC);
>>> +			}
>>> +		}
>>> +	} else if (intmask & SDHCI_INT_DATA_TIMEOUT) {
>>>  		*data_error = -ETIMEDOUT;
>>> -	else if (intmask & SDHCI_INT_ADMA_ERROR)
>>> +		if (host->mmc && host->mmc->err_stats_enabled)
>>> +			mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_DAT_TIMEOUT);
>>> +	} else if (intmask & SDHCI_INT_ADMA_ERROR) {
>>>  		*data_error = -EIO;
>>> -	else
>>> +		if (host->mmc && host->mmc->err_stats_enabled)
>>> +			mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_ADMA);
>>> +	} else
>>>  		*data_error = 0;
>>>  
>>>  	/* Clear selected interrupts. */
>>> diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h
>>
>> Changes to host.h are core changes and belong in patch 3, which should be the first patch.
>>
>>>>>>> Sure.
>>
>>> index 7afb57c..883b50b 100644
>>> --- a/include/linux/mmc/host.h
>>> +++ b/include/linux/mmc/host.h
>>> @@ -93,6 +93,23 @@ struct mmc_clk_phase_map {
>>>  
>>>  struct mmc_host;
>>>  
>>> +enum mmc_err_stat {
>>> +	MMC_ERR_CMD_TIMEOUT,
>>> +	MMC_ERR_CMD_CRC,
>>> +	MMC_ERR_DAT_TIMEOUT,
>>> +	MMC_ERR_DAT_CRC,
>>> +	MMC_ERR_AUTO_CMD,
>>> +	MMC_ERR_ADMA,
>>> +	MMC_ERR_TUNING,
>>> +	MMC_ERR_CMDQ_RED,
>>> +	MMC_ERR_CMDQ_GCE,
>>> +	MMC_ERR_CMDQ_ICCE,
>>> +	MMC_ERR_REQ_TIMEOUT,
>>> +	MMC_ERR_CMDQ_REQ_TIMEOUT,
>>> +	MMC_ERR_ICE_CFG,
>>> +	MMC_ERR_MAX,
>>> +};
>>> +
>>>  struct mmc_host_ops {
>>>  	/*
>>>  	 * It is optional for the host to implement pre_req and post_req in 
>>> @@ -500,6 +517,9 @@ struct mmc_host {
>>>  
>>>  	/* Host Software Queue support */
>>>  	bool			hsq_enabled;
>>> +	u32                     err_stats[MMC_ERR_MAX];
>>> +	bool 			err_stats_enabled;
>>> +	bool			err_state;
>>
>> Please drop err_state for now
>>
>>>>>>>>> first we can check this variable right,  if it is set then we can go and check err_stats[] to know more on type of error (data /cmd timeout or CRC errors etc.).
>> Please let me know your opinion on this.
> 
> As I wrote above, you could set err_state in mmc_debugfs_err_stats_inc().
> But maybe make the err_state addition a separate patch so it is easy to see how it works.
> 
>>>>>> Sure will post separate patch for err_state settings.
> 
>>
>>>  
>>>  	unsigned long		private[] ____cacheline_aligned;
>>>  };
>>> @@ -635,6 +655,17 @@ static inline enum dma_data_direction mmc_get_dma_dir(struct mmc_data *data)
>>>  	return data->flags & MMC_DATA_WRITE ? DMA_TO_DEVICE : 
>>> DMA_FROM_DEVICE;  }
>>>  
>>> +static inline void mmc_debugfs_err_stats_enable(struct mmc_host 
>>> +*mmc)
>>
>> Please use 'host' as the mmc_host parameter in this file.
>>
>>> +{
>>> +	mmc->err_state = true;
>>
>> Let's make this:
>>
>> 	host->err_stats_enabled = true;
>>
>>>>>>>> Sure.
>>
>>> +}
>>> +
>>> +static inline void mmc_debugfs_err_stats_inc(struct mmc_host *mmc,
>>> +		enum mmc_err_stat stat) {
>>> +
>>
>> Please remove blank line here
>>
>>>>>>>> sure.
>>
>>> +	mmc->err_stats[stat] += 1;
>>> +}
>>> +
>>>  int mmc_send_tuning(struct mmc_host *host, u32 opcode, int 
>>> *cmd_error);  int mmc_send_abort_tuning(struct mmc_host *host, u32 
>>> opcode);  int mmc_get_ext_csd(struct mmc_card *card, u8 
>>> **new_ext_csd);
>>>
>>
>
Sajida Bhanu (Temp) Feb. 15, 2022, 12:28 p.m. UTC | #6
Hi Adrian,

Thanks for the review.

Please find the inline comments.

Thanks,
Sajida
-----Original Message-----
From: Adrian Hunter <adrian.hunter@intel.com> 
Sent: Friday, February 11, 2022 11:21 AM
To: Sajida Bhanu (Temp) (QUIC) <quic_c_sbhanu@quicinc.com>; Asutosh Das (QUIC) <quic_asutoshd@quicinc.com>; ulf.hansson@linaro.org; agross@kernel.org; bjorn.andersson@linaro.org; linux-mmc@vger.kernel.org; linux-arm-msm@vger.kernel.org; linux-kernel@vger.kernel.org
Cc: stummala@codeaurora.org; vbadigan@codeaurora.org; Ram Prakash Gupta (QUIC) <quic_rampraka@quicinc.com>; Pradeep Pragallapati (QUIC) <quic_pragalla@quicinc.com>; sartgarg@codeaurora.org; nitirawa@codeaurora.org; sayalil@codeaurora.org; Liangliang Lu <luliang@codeaurora.org>; Bao D . Nguyen <nguyenb@codeaurora.org>
Subject: Re: [PATCH V3 1/4] mmc: sdhci: Capture eMMC and SD card errors

WARNING: This email originated from outside of Qualcomm. Please be wary of any links or attachments, and do not enable macros.

On 08/02/2022 21:04, Sajida Bhanu (Temp) (QUIC) wrote:
> Hi,
>
> Thanks for the review.
>
> Please find the inline comments
>
> Thanks,
> Sajida
> -----Original Message-----
> From: Adrian Hunter <adrian.hunter@intel.com>
> Sent: Tuesday, February 1, 2022 7:28 PM
> To: Sajida Bhanu (Temp) <c_sbhanu@qti.qualcomm.com>; Sajida Bhanu 
> (Temp) (QUIC) <quic_c_sbhanu@quicinc.com>; Asutosh Das (QUIC) 
> <quic_asutoshd@quicinc.com>; ulf.hansson@linaro.org; 
> agross@kernel.org; bjorn.andersson@linaro.org; 
> linux-mmc@vger.kernel.org; linux-arm-msm@vger.kernel.org; 
> linux-kernel@vger.kernel.org
> Cc: stummala@codeaurora.org; vbadigan@codeaurora.org; Ram Prakash 
> Gupta (QUIC) <quic_rampraka@quicinc.com>; Pradeep Pragallapati (QUIC) 
> <quic_pragalla@quicinc.com>; sartgarg@codeaurora.org; 
> nitirawa@codeaurora.org; sayalil@codeaurora.org; Liangliang Lu 
> <luliang@codeaurora.org>; Bao D . Nguyen <nguyenb@codeaurora.org>
> Subject: Re: [PATCH V3 1/4] mmc: sdhci: Capture eMMC and SD card 
> errors
>
> On 25/01/2022 20:17, Sajida Bhanu (Temp) wrote:
>> Hi,
>>
>> Thanks for  the Review.
>>
>> Please find the inline comments.
>>
>> Thanks,
>> Sajida
>>
>> -----Original Message-----
>> From: Adrian Hunter <adrian.hunter@intel.com>
>> Sent: Friday, January 21, 2022 12:38 PM
>> To: Sajida Bhanu (Temp) (QUIC) <quic_c_sbhanu@quicinc.com>; Asutosh 
>> Das (QUIC) <quic_asutoshd@quicinc.com>; ulf.hansson@linaro.org; 
>> agross@kernel.org; bjorn.andersson@linaro.org; 
>> linux-mmc@vger.kernel.org; linux-arm-msm@vger.kernel.org; 
>> linux-kernel@vger.kernel.org
>> Cc: stummala@codeaurora.org; vbadigan@codeaurora.org; Ram Prakash 
>> Gupta (QUIC) <quic_rampraka@quicinc.com>; Pradeep Pragallapati (QUIC) 
>> <quic_pragalla@quicinc.com>; sartgarg@codeaurora.org; 
>> nitirawa@codeaurora.org; sayalil@codeaurora.org; Liangliang Lu 
>> <luliang@codeaurora.org>; Bao D . Nguyen <nguyenb@codeaurora.org>
>> Subject: Re: [PATCH V3 1/4] mmc: sdhci: Capture eMMC and SD card 
>> errors
>>
>> On 20/01/2022 19:26, Shaik Sajida Bhanu wrote:
>>> Add changes to capture eMMC and SD card errors.
>>> This is useful for debug and testing.
>>>
>>> Signed-off-by: Shaik Sajida Bhanu <quic_c_sbhanu@quicinc.com>
>>> Signed-off-by: Liangliang Lu <luliang@codeaurora.org>
>>> Signed-off-by: Sayali Lokhande <sayalil@codeaurora.org>
>>> Signed-off-by: Bao D. Nguyen <nguyenb@codeaurora.org>
>>> ---
>>>  drivers/mmc/host/sdhci-msm.c |  3 ++
>>>  drivers/mmc/host/sdhci.c     | 72 ++++++++++++++++++++++++++++++++++++--------
>>>  include/linux/mmc/host.h     | 31 +++++++++++++++++++
>>>  3 files changed, 94 insertions(+), 12 deletions(-)
>>>
>>> diff --git a/drivers/mmc/host/sdhci-msm.c 
>>> b/drivers/mmc/host/sdhci-msm.c index 50c71e0..309eb7b 100644
>>> --- a/drivers/mmc/host/sdhci-msm.c
>>> +++ b/drivers/mmc/host/sdhci-msm.c
>>> @@ -128,6 +128,8 @@
>>>
>>>  #define MSM_MMC_AUTOSUSPEND_DELAY_MS       50
>>>
>>> +#define MSM_MMC_ERR_STATS_ENABLE 1
>>> +
>>>  /* Timeout value to avoid infinite waiting for pwr_irq */  #define 
>>> MSM_PWR_IRQ_TIMEOUT_MS 5000
>>>
>>> @@ -2734,6 +2736,7 @@ static int sdhci_msm_probe(struct platform_device *pdev)
>>>     if (ret)
>>>             goto pm_runtime_disable;
>>>
>>> +   host->mmc->err_stats_enabled = MSM_MMC_ERR_STATS_ENABLE;
>>
>> Please remove this. SDHCI will enable error stats.
>>
>>>>>>>> Sure.
>>
>>>     pm_runtime_mark_last_busy(&pdev->dev);
>>>     pm_runtime_put_autosuspend(&pdev->dev);
>>>
>>> diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c 
>>> index 07c6da1..74b356e 100644
>>> --- a/drivers/mmc/host/sdhci.c
>>> +++ b/drivers/mmc/host/sdhci.c
>>> @@ -113,6 +113,8 @@ void sdhci_dumpregs(struct sdhci_host *host)
>>>     if (host->ops->dump_vendor_regs)
>>>             host->ops->dump_vendor_regs(host);
>>>
>>> +   if (host->mmc->err_stats_enabled)
>>> +           mmc_debugfs_err_stats_enable(host->mmc);
>>
>> Please move this to sdhci_setup_host() and call it unconditionally 
>> i.e. just
>>
>>      mmc_debugfs_err_stats_enable(host->mmc);
>>
>>
>>>>>>>>> mmc_debugfs_err_stats_enable() will set err_state , that means some errors occurred in driver level.
>> If we move this call to sdhci_setup_host(), then it will set if no errors also right?
>
> Then it seems like you want to set err_state = true in 
> mmc_debugfs_err_stats_inc()
>
>>>>>> No ..I have updated  err_state = true in sdhci_dumpregs() because if any errors (serious) in driver,  we are calling sdhci_dumpregs().

I see, but it is not OK to mix up the register dump with error logging.
Perhaps add another error type and increment that when needed.

>>>>> okay sure.
>
>>
>>
>>>     SDHCI_DUMP("============================================\n");
>>>  }
>>>  EXPORT_SYMBOL_GPL(sdhci_dumpregs);
>>> @@ -3159,6 +3161,8 @@ static void sdhci_timeout_timer(struct timer_list *t)
>>>     spin_lock_irqsave(&host->lock, flags);
>>>
>>>     if (host->cmd && !sdhci_data_line_cmd(host->cmd)) {
>>> +           if (host->mmc && host->mmc->err_stats_enabled)
>>> +                   mmc_debugfs_err_stats_inc(host->mmc, 
>>> + MMC_ERR_REQ_TIMEOUT);
>>
>> Please remove the 'if ()', i.e. just make it, unconditionally:
>>
>>              mmc_debugfs_err_stats_inc(host->mmc, 
>> MMC_ERR_REQ_TIMEOUT);
>>
>> Same for other calls to mmc_debugfs_err_stats_inc()
>>
>>>>>>>>>> Sure.
>>
>>>             pr_err("%s: Timeout waiting for hardware cmd interrupt.\n",
>>>                    mmc_hostname(host->mmc));
>>>             sdhci_dumpregs(host);
>>> @@ -3181,6 +3185,8 @@ static void sdhci_timeout_data_timer(struct 
>>> timer_list *t)
>>>
>>>     if (host->data || host->data_cmd ||
>>>         (host->cmd && sdhci_data_line_cmd(host->cmd))) {
>>> +           if (host->mmc && host->mmc->err_stats_enabled)
>>> +                   mmc_debugfs_err_stats_inc(host->mmc, 
>>> + MMC_ERR_REQ_TIMEOUT);
>>>             pr_err("%s: Timeout waiting for hardware interrupt.\n",
>>>                    mmc_hostname(host->mmc));
>>>             sdhci_dumpregs(host);
>>> @@ -3240,11 +3246,18 @@ static void sdhci_cmd_irq(struct sdhci_host 
>>> *host, u32 intmask, u32 *intmask_p)
>>>
>>>     if (intmask & (SDHCI_INT_TIMEOUT | SDHCI_INT_CRC |
>>>                    SDHCI_INT_END_BIT | SDHCI_INT_INDEX)) {
>>> -           if (intmask & SDHCI_INT_TIMEOUT)
>>> +           if (intmask & SDHCI_INT_TIMEOUT) {
>>>                     host->cmd->error = -ETIMEDOUT;
>>> -           else
>>> +                   if (host->mmc && host->mmc->err_stats_enabled)
>>> +                           mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_CMD_TIMEOUT);
>>> +           } else {
>>>                     host->cmd->error = -EILSEQ;
>>> -
>>> +                   if (host->cmd->opcode != MMC_SEND_TUNING_BLOCK ||
>>> +                                   host->cmd->opcode != MMC_SEND_TUNING_BLOCK_HS200) {
>>> +                           if (host->mmc && host->mmc->err_stats_enabled)
>>> +                                   mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_CMD_CRC);
>>> +                   }
>>> +           }
>>>             /* Treat data command CRC error the same as data CRC error */
>>>             if (host->cmd->data &&
>>>                 (intmask & (SDHCI_INT_CRC | SDHCI_INT_TIMEOUT)) == 
>>> @@ -3265,6
>>> +3278,8 @@ static void sdhci_cmd_irq(struct sdhci_host *host, u32 
>>> +intmask, u32 *intmask_p)
>>>             int err = (auto_cmd_status & SDHCI_AUTO_CMD_TIMEOUT) ?
>>>                       -ETIMEDOUT :
>>>                       -EILSEQ;
>>> +           if (host->mmc && host->mmc->err_stats_enabled)
>>> +                   mmc_debugfs_err_stats_inc(host->mmc, 
>>> + MMC_ERR_AUTO_CMD);
>>>
>>>             if (sdhci_auto_cmd23(host, mrq)) {
>>>                     mrq->sbc->error = err; @@ -3342,6 +3357,8 @@ 
>>> static void sdhci_data_irq(struct sdhci_host *host, u32 intmask)
>>>                     if (intmask & SDHCI_INT_DATA_TIMEOUT) {
>>>                             host->data_cmd = NULL;
>>>                             data_cmd->error = -ETIMEDOUT;
>>> +                           if (host->mmc && host->mmc->err_stats_enabled)
>>> +                                   
>>> + mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_CMD_TIMEOUT);
>>>                             __sdhci_finish_mrq(host, data_cmd->mrq);
>>>                             return;
>>>                     }
>>> @@ -3375,18 +3392,29 @@ static void sdhci_data_irq(struct sdhci_host *host, u32 intmask)
>>>             return;
>>>     }
>>>
>>> -   if (intmask & SDHCI_INT_DATA_TIMEOUT)
>>> +   if (intmask & SDHCI_INT_DATA_TIMEOUT) {
>>>             host->data->error = -ETIMEDOUT;
>>> +           if (host->mmc && host->mmc->err_stats_enabled)
>>> +                   mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_DAT_TIMEOUT);
>>> +   }
>>>     else if (intmask & SDHCI_INT_DATA_END_BIT)
>>>             host->data->error = -EILSEQ;
>>>     else if ((intmask & SDHCI_INT_DATA_CRC) &&
>>>             SDHCI_GET_CMD(sdhci_readw(host, SDHCI_COMMAND))
>>> -                   != MMC_BUS_TEST_R)
>>> +                   != MMC_BUS_TEST_R) {
>>>             host->data->error = -EILSEQ;
>>> +           if (host->cmd->opcode != MMC_SEND_TUNING_BLOCK ||
>>> +                           host->cmd->opcode != MMC_SEND_TUNING_BLOCK_HS200) {
>>> +                   if (host->mmc && host->mmc->err_stats_enabled)
>>> +                           mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_DAT_CRC);
>>> +           }
>>> +   }
>>>     else if (intmask & SDHCI_INT_ADMA_ERROR) {
>>>             pr_err("%s: ADMA error: 0x%08x\n", mmc_hostname(host->mmc),
>>>                    intmask);
>>>             sdhci_adma_show_error(host);
>>> +           if (host->mmc && host->mmc->err_stats_enabled)
>>> +                   mmc_debugfs_err_stats_inc(host->mmc, 
>>> + MMC_ERR_ADMA);
>>>             host->data->error = -EIO;
>>>             if (host->ops->adma_workaround)
>>>                     host->ops->adma_workaround(host, intmask); @@ 
>>> -3905,20 +3933,40 @@ bool sdhci_cqe_irq(struct sdhci_host *host, u32 intmask, int *cmd_error,
>>>     if (!host->cqe_on)
>>>             return false;
>>>
>>> -   if (intmask & (SDHCI_INT_INDEX | SDHCI_INT_END_BIT | SDHCI_INT_CRC))
>>> +   if (intmask & (SDHCI_INT_INDEX | SDHCI_INT_END_BIT |
>>> +SDHCI_INT_CRC)) {
>>>             *cmd_error = -EILSEQ;
>>> -   else if (intmask & SDHCI_INT_TIMEOUT)
>>> +           if (intmask & SDHCI_INT_CRC) {
>>> +                   if (host->cmd->opcode != MMC_SEND_TUNING_BLOCK ||
>>> +                                   host->cmd->opcode != MMC_SEND_TUNING_BLOCK_HS200) {
>>> +                           if (host->mmc && host->mmc->err_stats_enabled)
>>> +                                   mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_CMD_CRC);
>>> +                   }
>>> +           }
>>> +   } else if (intmask & SDHCI_INT_TIMEOUT) {
>>>             *cmd_error = -ETIMEDOUT;
>>> -   else
>>> +           if (host->mmc && host->mmc->err_stats_enabled)
>>> +                   mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_CMD_TIMEOUT);
>>> +   } else
>>>             *cmd_error = 0;
>>>
>>> -   if (intmask & (SDHCI_INT_DATA_END_BIT | SDHCI_INT_DATA_CRC))
>>> +   if (intmask & (SDHCI_INT_DATA_END_BIT | SDHCI_INT_DATA_CRC)) {
>>>             *data_error = -EILSEQ;
>>> -   else if (intmask & SDHCI_INT_DATA_TIMEOUT)
>>> +           if (intmask & SDHCI_INT_DATA_CRC) {
>>> +                   if (host->cmd->opcode != MMC_SEND_TUNING_BLOCK ||
>>> +                                   host->cmd->opcode != MMC_SEND_TUNING_BLOCK_HS200) {
>>> +                           if (host->mmc && host->mmc->err_stats_enabled)
>>> +                                   mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_DAT_CRC);
>>> +                   }
>>> +           }
>>> +   } else if (intmask & SDHCI_INT_DATA_TIMEOUT) {
>>>             *data_error = -ETIMEDOUT;
>>> -   else if (intmask & SDHCI_INT_ADMA_ERROR)
>>> +           if (host->mmc && host->mmc->err_stats_enabled)
>>> +                   mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_DAT_TIMEOUT);
>>> +   } else if (intmask & SDHCI_INT_ADMA_ERROR) {
>>>             *data_error = -EIO;
>>> -   else
>>> +           if (host->mmc && host->mmc->err_stats_enabled)
>>> +                   mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_ADMA);
>>> +   } else
>>>             *data_error = 0;
>>>
>>>     /* Clear selected interrupts. */ diff --git 
>>> a/include/linux/mmc/host.h b/include/linux/mmc/host.h
>>
>> Changes to host.h are core changes and belong in patch 3, which should be the first patch.
>>
>>>>>>> Sure.
>>
>>> index 7afb57c..883b50b 100644
>>> --- a/include/linux/mmc/host.h
>>> +++ b/include/linux/mmc/host.h
>>> @@ -93,6 +93,23 @@ struct mmc_clk_phase_map {
>>>
>>>  struct mmc_host;
>>>
>>> +enum mmc_err_stat {
>>> +   MMC_ERR_CMD_TIMEOUT,
>>> +   MMC_ERR_CMD_CRC,
>>> +   MMC_ERR_DAT_TIMEOUT,
>>> +   MMC_ERR_DAT_CRC,
>>> +   MMC_ERR_AUTO_CMD,
>>> +   MMC_ERR_ADMA,
>>> +   MMC_ERR_TUNING,
>>> +   MMC_ERR_CMDQ_RED,
>>> +   MMC_ERR_CMDQ_GCE,
>>> +   MMC_ERR_CMDQ_ICCE,
>>> +   MMC_ERR_REQ_TIMEOUT,
>>> +   MMC_ERR_CMDQ_REQ_TIMEOUT,
>>> +   MMC_ERR_ICE_CFG,
>>> +   MMC_ERR_MAX,
>>> +};
>>> +
>>>  struct mmc_host_ops {
>>>     /*
>>>      * It is optional for the host to implement pre_req and post_req 
>>> in @@ -500,6 +517,9 @@ struct mmc_host {
>>>
>>>     /* Host Software Queue support */
>>>     bool                    hsq_enabled;
>>> +   u32                     err_stats[MMC_ERR_MAX];
>>> +   bool                    err_stats_enabled;
>>> +   bool                    err_state;
>>
>> Please drop err_state for now
>>
>>>>>>>>> first we can check this variable right,  if it is set then we can go and check err_stats[] to know more on type of error (data /cmd timeout or CRC errors etc.).
>> Please let me know your opinion on this.
>
> As I wrote above, you could set err_state in mmc_debugfs_err_stats_inc().
> But maybe make the err_state addition a separate patch so it is easy to see how it works.
>
>>>>>> Sure will post separate patch for err_state settings.
>
>>
>>>
>>>     unsigned long           private[] ____cacheline_aligned;
>>>  };
>>> @@ -635,6 +655,17 @@ static inline enum dma_data_direction mmc_get_dma_dir(struct mmc_data *data)
>>>     return data->flags & MMC_DATA_WRITE ? DMA_TO_DEVICE :
>>> DMA_FROM_DEVICE;  }
>>>
>>> +static inline void mmc_debugfs_err_stats_enable(struct mmc_host
>>> +*mmc)
>>
>> Please use 'host' as the mmc_host parameter in this file.
>>
>>> +{
>>> +   mmc->err_state = true;
>>
>> Let's make this:
>>
>>      host->err_stats_enabled = true;
>>
>>>>>>>> Sure.
>>
>>> +}
>>> +
>>> +static inline void mmc_debugfs_err_stats_inc(struct mmc_host *mmc,
>>> +           enum mmc_err_stat stat) {
>>> +
>>
>> Please remove blank line here
>>
>>>>>>>> sure.
>>
>>> +   mmc->err_stats[stat] += 1;
>>> +}
>>> +
>>>  int mmc_send_tuning(struct mmc_host *host, u32 opcode, int 
>>> *cmd_error);  int mmc_send_abort_tuning(struct mmc_host *host, u32 
>>> opcode);  int mmc_get_ext_csd(struct mmc_card *card, u8 
>>> **new_ext_csd);
>>>
>>
>
Bjorn Andersson Feb. 15, 2022, 4:59 p.m. UTC | #7
On Thu 20 Jan 11:26 CST 2022, Shaik Sajida Bhanu wrote:

> Add changes to capture eMMC and SD card errors.
> This is useful for debug and testing.
> 
> Signed-off-by: Shaik Sajida Bhanu <quic_c_sbhanu@quicinc.com>
> Signed-off-by: Liangliang Lu <luliang@codeaurora.org>
> Signed-off-by: Sayali Lokhande <sayalil@codeaurora.org>
> Signed-off-by: Bao D. Nguyen <nguyenb@codeaurora.org>

Please read
https://docs.kernel.org/process/submitting-patches.html#sign-your-work-the-developer-s-certificate-of-origin
and the one section below on what your S-o-b actually means.

In particular this does not say "the four of us authored this patch", it
documents the path the patch took to this point. In which case Bao is
the last one stating that he _handled_ the patch - but then somehow it
came out of your mailbox.

You're probably looking for Co-developed-by, which is described just
below that.

Regards,
Bjorn

> ---
>  drivers/mmc/host/sdhci-msm.c |  3 ++
>  drivers/mmc/host/sdhci.c     | 72 ++++++++++++++++++++++++++++++++++++--------
>  include/linux/mmc/host.h     | 31 +++++++++++++++++++
>  3 files changed, 94 insertions(+), 12 deletions(-)
> 
> diff --git a/drivers/mmc/host/sdhci-msm.c b/drivers/mmc/host/sdhci-msm.c
> index 50c71e0..309eb7b 100644
> --- a/drivers/mmc/host/sdhci-msm.c
> +++ b/drivers/mmc/host/sdhci-msm.c
> @@ -128,6 +128,8 @@
>  
>  #define MSM_MMC_AUTOSUSPEND_DELAY_MS	50
>  
> +#define MSM_MMC_ERR_STATS_ENABLE 1
> +
>  /* Timeout value to avoid infinite waiting for pwr_irq */
>  #define MSM_PWR_IRQ_TIMEOUT_MS 5000
>  
> @@ -2734,6 +2736,7 @@ static int sdhci_msm_probe(struct platform_device *pdev)
>  	if (ret)
>  		goto pm_runtime_disable;
>  
> +	host->mmc->err_stats_enabled = MSM_MMC_ERR_STATS_ENABLE;
>  	pm_runtime_mark_last_busy(&pdev->dev);
>  	pm_runtime_put_autosuspend(&pdev->dev);
>  
> diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c
> index 07c6da1..74b356e 100644
> --- a/drivers/mmc/host/sdhci.c
> +++ b/drivers/mmc/host/sdhci.c
> @@ -113,6 +113,8 @@ void sdhci_dumpregs(struct sdhci_host *host)
>  	if (host->ops->dump_vendor_regs)
>  		host->ops->dump_vendor_regs(host);
>  
> +	if (host->mmc->err_stats_enabled)
> +		mmc_debugfs_err_stats_enable(host->mmc);
>  	SDHCI_DUMP("============================================\n");
>  }
>  EXPORT_SYMBOL_GPL(sdhci_dumpregs);
> @@ -3159,6 +3161,8 @@ static void sdhci_timeout_timer(struct timer_list *t)
>  	spin_lock_irqsave(&host->lock, flags);
>  
>  	if (host->cmd && !sdhci_data_line_cmd(host->cmd)) {
> +		if (host->mmc && host->mmc->err_stats_enabled)
> +			mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_REQ_TIMEOUT);
>  		pr_err("%s: Timeout waiting for hardware cmd interrupt.\n",
>  		       mmc_hostname(host->mmc));
>  		sdhci_dumpregs(host);
> @@ -3181,6 +3185,8 @@ static void sdhci_timeout_data_timer(struct timer_list *t)
>  
>  	if (host->data || host->data_cmd ||
>  	    (host->cmd && sdhci_data_line_cmd(host->cmd))) {
> +		if (host->mmc && host->mmc->err_stats_enabled)
> +			mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_REQ_TIMEOUT);
>  		pr_err("%s: Timeout waiting for hardware interrupt.\n",
>  		       mmc_hostname(host->mmc));
>  		sdhci_dumpregs(host);
> @@ -3240,11 +3246,18 @@ static void sdhci_cmd_irq(struct sdhci_host *host, u32 intmask, u32 *intmask_p)
>  
>  	if (intmask & (SDHCI_INT_TIMEOUT | SDHCI_INT_CRC |
>  		       SDHCI_INT_END_BIT | SDHCI_INT_INDEX)) {
> -		if (intmask & SDHCI_INT_TIMEOUT)
> +		if (intmask & SDHCI_INT_TIMEOUT) {
>  			host->cmd->error = -ETIMEDOUT;
> -		else
> +			if (host->mmc && host->mmc->err_stats_enabled)
> +				mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_CMD_TIMEOUT);
> +		} else {
>  			host->cmd->error = -EILSEQ;
> -
> +			if (host->cmd->opcode != MMC_SEND_TUNING_BLOCK ||
> +					host->cmd->opcode != MMC_SEND_TUNING_BLOCK_HS200) {
> +				if (host->mmc && host->mmc->err_stats_enabled)
> +					mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_CMD_CRC);
> +			}
> +		}
>  		/* Treat data command CRC error the same as data CRC error */
>  		if (host->cmd->data &&
>  		    (intmask & (SDHCI_INT_CRC | SDHCI_INT_TIMEOUT)) ==
> @@ -3265,6 +3278,8 @@ static void sdhci_cmd_irq(struct sdhci_host *host, u32 intmask, u32 *intmask_p)
>  		int err = (auto_cmd_status & SDHCI_AUTO_CMD_TIMEOUT) ?
>  			  -ETIMEDOUT :
>  			  -EILSEQ;
> +		if (host->mmc && host->mmc->err_stats_enabled)
> +			mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_AUTO_CMD);
>  
>  		if (sdhci_auto_cmd23(host, mrq)) {
>  			mrq->sbc->error = err;
> @@ -3342,6 +3357,8 @@ static void sdhci_data_irq(struct sdhci_host *host, u32 intmask)
>  			if (intmask & SDHCI_INT_DATA_TIMEOUT) {
>  				host->data_cmd = NULL;
>  				data_cmd->error = -ETIMEDOUT;
> +				if (host->mmc && host->mmc->err_stats_enabled)
> +					mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_CMD_TIMEOUT);
>  				__sdhci_finish_mrq(host, data_cmd->mrq);
>  				return;
>  			}
> @@ -3375,18 +3392,29 @@ static void sdhci_data_irq(struct sdhci_host *host, u32 intmask)
>  		return;
>  	}
>  
> -	if (intmask & SDHCI_INT_DATA_TIMEOUT)
> +	if (intmask & SDHCI_INT_DATA_TIMEOUT) {
>  		host->data->error = -ETIMEDOUT;
> +		if (host->mmc && host->mmc->err_stats_enabled)
> +			mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_DAT_TIMEOUT);
> +	}
>  	else if (intmask & SDHCI_INT_DATA_END_BIT)
>  		host->data->error = -EILSEQ;
>  	else if ((intmask & SDHCI_INT_DATA_CRC) &&
>  		SDHCI_GET_CMD(sdhci_readw(host, SDHCI_COMMAND))
> -			!= MMC_BUS_TEST_R)
> +			!= MMC_BUS_TEST_R) {
>  		host->data->error = -EILSEQ;
> +		if (host->cmd->opcode != MMC_SEND_TUNING_BLOCK ||
> +				host->cmd->opcode != MMC_SEND_TUNING_BLOCK_HS200) {
> +			if (host->mmc && host->mmc->err_stats_enabled)
> +				mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_DAT_CRC);
> +		}
> +	}
>  	else if (intmask & SDHCI_INT_ADMA_ERROR) {
>  		pr_err("%s: ADMA error: 0x%08x\n", mmc_hostname(host->mmc),
>  		       intmask);
>  		sdhci_adma_show_error(host);
> +		if (host->mmc && host->mmc->err_stats_enabled)
> +			mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_ADMA);
>  		host->data->error = -EIO;
>  		if (host->ops->adma_workaround)
>  			host->ops->adma_workaround(host, intmask);
> @@ -3905,20 +3933,40 @@ bool sdhci_cqe_irq(struct sdhci_host *host, u32 intmask, int *cmd_error,
>  	if (!host->cqe_on)
>  		return false;
>  
> -	if (intmask & (SDHCI_INT_INDEX | SDHCI_INT_END_BIT | SDHCI_INT_CRC))
> +	if (intmask & (SDHCI_INT_INDEX | SDHCI_INT_END_BIT | SDHCI_INT_CRC)) {
>  		*cmd_error = -EILSEQ;
> -	else if (intmask & SDHCI_INT_TIMEOUT)
> +		if (intmask & SDHCI_INT_CRC) {
> +			if (host->cmd->opcode != MMC_SEND_TUNING_BLOCK ||
> +					host->cmd->opcode != MMC_SEND_TUNING_BLOCK_HS200) {
> +				if (host->mmc && host->mmc->err_stats_enabled)
> +					mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_CMD_CRC);
> +			}
> +		}
> +	} else if (intmask & SDHCI_INT_TIMEOUT) {
>  		*cmd_error = -ETIMEDOUT;
> -	else
> +		if (host->mmc && host->mmc->err_stats_enabled)
> +			mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_CMD_TIMEOUT);
> +	} else
>  		*cmd_error = 0;
>  
> -	if (intmask & (SDHCI_INT_DATA_END_BIT | SDHCI_INT_DATA_CRC))
> +	if (intmask & (SDHCI_INT_DATA_END_BIT | SDHCI_INT_DATA_CRC)) {
>  		*data_error = -EILSEQ;
> -	else if (intmask & SDHCI_INT_DATA_TIMEOUT)
> +		if (intmask & SDHCI_INT_DATA_CRC) {
> +			if (host->cmd->opcode != MMC_SEND_TUNING_BLOCK ||
> +					host->cmd->opcode != MMC_SEND_TUNING_BLOCK_HS200) {
> +				if (host->mmc && host->mmc->err_stats_enabled)
> +					mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_DAT_CRC);
> +			}
> +		}
> +	} else if (intmask & SDHCI_INT_DATA_TIMEOUT) {
>  		*data_error = -ETIMEDOUT;
> -	else if (intmask & SDHCI_INT_ADMA_ERROR)
> +		if (host->mmc && host->mmc->err_stats_enabled)
> +			mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_DAT_TIMEOUT);
> +	} else if (intmask & SDHCI_INT_ADMA_ERROR) {
>  		*data_error = -EIO;
> -	else
> +		if (host->mmc && host->mmc->err_stats_enabled)
> +			mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_ADMA);
> +	} else
>  		*data_error = 0;
>  
>  	/* Clear selected interrupts. */
> diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h
> index 7afb57c..883b50b 100644
> --- a/include/linux/mmc/host.h
> +++ b/include/linux/mmc/host.h
> @@ -93,6 +93,23 @@ struct mmc_clk_phase_map {
>  
>  struct mmc_host;
>  
> +enum mmc_err_stat {
> +	MMC_ERR_CMD_TIMEOUT,
> +	MMC_ERR_CMD_CRC,
> +	MMC_ERR_DAT_TIMEOUT,
> +	MMC_ERR_DAT_CRC,
> +	MMC_ERR_AUTO_CMD,
> +	MMC_ERR_ADMA,
> +	MMC_ERR_TUNING,
> +	MMC_ERR_CMDQ_RED,
> +	MMC_ERR_CMDQ_GCE,
> +	MMC_ERR_CMDQ_ICCE,
> +	MMC_ERR_REQ_TIMEOUT,
> +	MMC_ERR_CMDQ_REQ_TIMEOUT,
> +	MMC_ERR_ICE_CFG,
> +	MMC_ERR_MAX,
> +};
> +
>  struct mmc_host_ops {
>  	/*
>  	 * It is optional for the host to implement pre_req and post_req in
> @@ -500,6 +517,9 @@ struct mmc_host {
>  
>  	/* Host Software Queue support */
>  	bool			hsq_enabled;
> +	u32                     err_stats[MMC_ERR_MAX];
> +	bool 			err_stats_enabled;
> +	bool			err_state;
>  
>  	unsigned long		private[] ____cacheline_aligned;
>  };
> @@ -635,6 +655,17 @@ static inline enum dma_data_direction mmc_get_dma_dir(struct mmc_data *data)
>  	return data->flags & MMC_DATA_WRITE ? DMA_TO_DEVICE : DMA_FROM_DEVICE;
>  }
>  
> +static inline void mmc_debugfs_err_stats_enable(struct mmc_host *mmc)
> +{
> +	mmc->err_state = true;
> +}
> +
> +static inline void mmc_debugfs_err_stats_inc(struct mmc_host *mmc,
> +		enum mmc_err_stat stat) {
> +
> +	mmc->err_stats[stat] += 1;
> +}
> +
>  int mmc_send_tuning(struct mmc_host *host, u32 opcode, int *cmd_error);
>  int mmc_send_abort_tuning(struct mmc_host *host, u32 opcode);
>  int mmc_get_ext_csd(struct mmc_card *card, u8 **new_ext_csd);
> -- 
> QUALCOMM INDIA, on behalf of Qualcomm Innovation Center, Inc. is a member 
> of Code Aurora Forum, hosted by The Linux Foundation
>
Shaik Sajida Bhanu Feb. 16, 2022, 7:34 a.m. UTC | #8
Hi Bjorn,

Thank you.

Sure will address this in patch set.

Thanks,
Sajida
-----Original Message-----
From: Bjorn Andersson <bjorn.andersson@linaro.org> 
Sent: Tuesday, February 15, 2022 10:29 PM
To: Sajida Bhanu (Temp) (QUIC) <quic_c_sbhanu@quicinc.com>
Cc: adrian.hunter@intel.com; Asutosh Das (QUIC) <quic_asutoshd@quicinc.com>; ulf.hansson@linaro.org; agross@kernel.org; linux-mmc@vger.kernel.org; linux-arm-msm@vger.kernel.org; linux-kernel@vger.kernel.org; stummala@codeaurora.org; vbadigan@codeaurora.org; Ram Prakash Gupta (QUIC) <quic_rampraka@quicinc.com>; Pradeep Pragallapati (QUIC) <quic_pragalla@quicinc.com>; sartgarg@codeaurora.org; nitirawa@codeaurora.org; sayalil@codeaurora.org; Liangliang Lu <luliang@codeaurora.org>; Bao D . Nguyen <nguyenb@codeaurora.org>
Subject: Re: [PATCH V3 1/4] mmc: sdhci: Capture eMMC and SD card errors

On Thu 20 Jan 11:26 CST 2022, Shaik Sajida Bhanu wrote:

> Add changes to capture eMMC and SD card errors.
> This is useful for debug and testing.
> 
> Signed-off-by: Shaik Sajida Bhanu <quic_c_sbhanu@quicinc.com>
> Signed-off-by: Liangliang Lu <luliang@codeaurora.org>
> Signed-off-by: Sayali Lokhande <sayalil@codeaurora.org>
> Signed-off-by: Bao D. Nguyen <nguyenb@codeaurora.org>

Please read
https://docs.kernel.org/process/submitting-patches.html#sign-your-work-the-developer-s-certificate-of-origin
and the one section below on what your S-o-b actually means.

In particular this does not say "the four of us authored this patch", it documents the path the patch took to this point. In which case Bao is the last one stating that he _handled_ the patch - but then somehow it came out of your mailbox.

You're probably looking for Co-developed-by, which is described just below that.

Regards,
Bjorn

> ---
>  drivers/mmc/host/sdhci-msm.c |  3 ++
>  drivers/mmc/host/sdhci.c     | 72 ++++++++++++++++++++++++++++++++++++--------
>  include/linux/mmc/host.h     | 31 +++++++++++++++++++
>  3 files changed, 94 insertions(+), 12 deletions(-)
> 
> diff --git a/drivers/mmc/host/sdhci-msm.c 
> b/drivers/mmc/host/sdhci-msm.c index 50c71e0..309eb7b 100644
> --- a/drivers/mmc/host/sdhci-msm.c
> +++ b/drivers/mmc/host/sdhci-msm.c
> @@ -128,6 +128,8 @@
>  
>  #define MSM_MMC_AUTOSUSPEND_DELAY_MS	50
>  
> +#define MSM_MMC_ERR_STATS_ENABLE 1
> +
>  /* Timeout value to avoid infinite waiting for pwr_irq */  #define 
> MSM_PWR_IRQ_TIMEOUT_MS 5000
>  
> @@ -2734,6 +2736,7 @@ static int sdhci_msm_probe(struct platform_device *pdev)
>  	if (ret)
>  		goto pm_runtime_disable;
>  
> +	host->mmc->err_stats_enabled = MSM_MMC_ERR_STATS_ENABLE;
>  	pm_runtime_mark_last_busy(&pdev->dev);
>  	pm_runtime_put_autosuspend(&pdev->dev);
>  
> diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c index 
> 07c6da1..74b356e 100644
> --- a/drivers/mmc/host/sdhci.c
> +++ b/drivers/mmc/host/sdhci.c
> @@ -113,6 +113,8 @@ void sdhci_dumpregs(struct sdhci_host *host)
>  	if (host->ops->dump_vendor_regs)
>  		host->ops->dump_vendor_regs(host);
>  
> +	if (host->mmc->err_stats_enabled)
> +		mmc_debugfs_err_stats_enable(host->mmc);
>  	SDHCI_DUMP("============================================\n");
>  }
>  EXPORT_SYMBOL_GPL(sdhci_dumpregs);
> @@ -3159,6 +3161,8 @@ static void sdhci_timeout_timer(struct timer_list *t)
>  	spin_lock_irqsave(&host->lock, flags);
>  
>  	if (host->cmd && !sdhci_data_line_cmd(host->cmd)) {
> +		if (host->mmc && host->mmc->err_stats_enabled)
> +			mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_REQ_TIMEOUT);
>  		pr_err("%s: Timeout waiting for hardware cmd interrupt.\n",
>  		       mmc_hostname(host->mmc));
>  		sdhci_dumpregs(host);
> @@ -3181,6 +3185,8 @@ static void sdhci_timeout_data_timer(struct 
> timer_list *t)
>  
>  	if (host->data || host->data_cmd ||
>  	    (host->cmd && sdhci_data_line_cmd(host->cmd))) {
> +		if (host->mmc && host->mmc->err_stats_enabled)
> +			mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_REQ_TIMEOUT);
>  		pr_err("%s: Timeout waiting for hardware interrupt.\n",
>  		       mmc_hostname(host->mmc));
>  		sdhci_dumpregs(host);
> @@ -3240,11 +3246,18 @@ static void sdhci_cmd_irq(struct sdhci_host 
> *host, u32 intmask, u32 *intmask_p)
>  
>  	if (intmask & (SDHCI_INT_TIMEOUT | SDHCI_INT_CRC |
>  		       SDHCI_INT_END_BIT | SDHCI_INT_INDEX)) {
> -		if (intmask & SDHCI_INT_TIMEOUT)
> +		if (intmask & SDHCI_INT_TIMEOUT) {
>  			host->cmd->error = -ETIMEDOUT;
> -		else
> +			if (host->mmc && host->mmc->err_stats_enabled)
> +				mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_CMD_TIMEOUT);
> +		} else {
>  			host->cmd->error = -EILSEQ;
> -
> +			if (host->cmd->opcode != MMC_SEND_TUNING_BLOCK ||
> +					host->cmd->opcode != MMC_SEND_TUNING_BLOCK_HS200) {
> +				if (host->mmc && host->mmc->err_stats_enabled)
> +					mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_CMD_CRC);
> +			}
> +		}
>  		/* Treat data command CRC error the same as data CRC error */
>  		if (host->cmd->data &&
>  		    (intmask & (SDHCI_INT_CRC | SDHCI_INT_TIMEOUT)) == @@ -3265,6 
> +3278,8 @@ static void sdhci_cmd_irq(struct sdhci_host *host, u32 intmask, u32 *intmask_p)
>  		int err = (auto_cmd_status & SDHCI_AUTO_CMD_TIMEOUT) ?
>  			  -ETIMEDOUT :
>  			  -EILSEQ;
> +		if (host->mmc && host->mmc->err_stats_enabled)
> +			mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_AUTO_CMD);
>  
>  		if (sdhci_auto_cmd23(host, mrq)) {
>  			mrq->sbc->error = err;
> @@ -3342,6 +3357,8 @@ static void sdhci_data_irq(struct sdhci_host *host, u32 intmask)
>  			if (intmask & SDHCI_INT_DATA_TIMEOUT) {
>  				host->data_cmd = NULL;
>  				data_cmd->error = -ETIMEDOUT;
> +				if (host->mmc && host->mmc->err_stats_enabled)
> +					mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_CMD_TIMEOUT);
>  				__sdhci_finish_mrq(host, data_cmd->mrq);
>  				return;
>  			}
> @@ -3375,18 +3392,29 @@ static void sdhci_data_irq(struct sdhci_host *host, u32 intmask)
>  		return;
>  	}
>  
> -	if (intmask & SDHCI_INT_DATA_TIMEOUT)
> +	if (intmask & SDHCI_INT_DATA_TIMEOUT) {
>  		host->data->error = -ETIMEDOUT;
> +		if (host->mmc && host->mmc->err_stats_enabled)
> +			mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_DAT_TIMEOUT);
> +	}
>  	else if (intmask & SDHCI_INT_DATA_END_BIT)
>  		host->data->error = -EILSEQ;
>  	else if ((intmask & SDHCI_INT_DATA_CRC) &&
>  		SDHCI_GET_CMD(sdhci_readw(host, SDHCI_COMMAND))
> -			!= MMC_BUS_TEST_R)
> +			!= MMC_BUS_TEST_R) {
>  		host->data->error = -EILSEQ;
> +		if (host->cmd->opcode != MMC_SEND_TUNING_BLOCK ||
> +				host->cmd->opcode != MMC_SEND_TUNING_BLOCK_HS200) {
> +			if (host->mmc && host->mmc->err_stats_enabled)
> +				mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_DAT_CRC);
> +		}
> +	}
>  	else if (intmask & SDHCI_INT_ADMA_ERROR) {
>  		pr_err("%s: ADMA error: 0x%08x\n", mmc_hostname(host->mmc),
>  		       intmask);
>  		sdhci_adma_show_error(host);
> +		if (host->mmc && host->mmc->err_stats_enabled)
> +			mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_ADMA);
>  		host->data->error = -EIO;
>  		if (host->ops->adma_workaround)
>  			host->ops->adma_workaround(host, intmask); @@ -3905,20 +3933,40 @@ 
> bool sdhci_cqe_irq(struct sdhci_host *host, u32 intmask, int *cmd_error,
>  	if (!host->cqe_on)
>  		return false;
>  
> -	if (intmask & (SDHCI_INT_INDEX | SDHCI_INT_END_BIT | SDHCI_INT_CRC))
> +	if (intmask & (SDHCI_INT_INDEX | SDHCI_INT_END_BIT | SDHCI_INT_CRC)) 
> +{
>  		*cmd_error = -EILSEQ;
> -	else if (intmask & SDHCI_INT_TIMEOUT)
> +		if (intmask & SDHCI_INT_CRC) {
> +			if (host->cmd->opcode != MMC_SEND_TUNING_BLOCK ||
> +					host->cmd->opcode != MMC_SEND_TUNING_BLOCK_HS200) {
> +				if (host->mmc && host->mmc->err_stats_enabled)
> +					mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_CMD_CRC);
> +			}
> +		}
> +	} else if (intmask & SDHCI_INT_TIMEOUT) {
>  		*cmd_error = -ETIMEDOUT;
> -	else
> +		if (host->mmc && host->mmc->err_stats_enabled)
> +			mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_CMD_TIMEOUT);
> +	} else
>  		*cmd_error = 0;
>  
> -	if (intmask & (SDHCI_INT_DATA_END_BIT | SDHCI_INT_DATA_CRC))
> +	if (intmask & (SDHCI_INT_DATA_END_BIT | SDHCI_INT_DATA_CRC)) {
>  		*data_error = -EILSEQ;
> -	else if (intmask & SDHCI_INT_DATA_TIMEOUT)
> +		if (intmask & SDHCI_INT_DATA_CRC) {
> +			if (host->cmd->opcode != MMC_SEND_TUNING_BLOCK ||
> +					host->cmd->opcode != MMC_SEND_TUNING_BLOCK_HS200) {
> +				if (host->mmc && host->mmc->err_stats_enabled)
> +					mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_DAT_CRC);
> +			}
> +		}
> +	} else if (intmask & SDHCI_INT_DATA_TIMEOUT) {
>  		*data_error = -ETIMEDOUT;
> -	else if (intmask & SDHCI_INT_ADMA_ERROR)
> +		if (host->mmc && host->mmc->err_stats_enabled)
> +			mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_DAT_TIMEOUT);
> +	} else if (intmask & SDHCI_INT_ADMA_ERROR) {
>  		*data_error = -EIO;
> -	else
> +		if (host->mmc && host->mmc->err_stats_enabled)
> +			mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_ADMA);
> +	} else
>  		*data_error = 0;
>  
>  	/* Clear selected interrupts. */
> diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h index 
> 7afb57c..883b50b 100644
> --- a/include/linux/mmc/host.h
> +++ b/include/linux/mmc/host.h
> @@ -93,6 +93,23 @@ struct mmc_clk_phase_map {
>  
>  struct mmc_host;
>  
> +enum mmc_err_stat {
> +	MMC_ERR_CMD_TIMEOUT,
> +	MMC_ERR_CMD_CRC,
> +	MMC_ERR_DAT_TIMEOUT,
> +	MMC_ERR_DAT_CRC,
> +	MMC_ERR_AUTO_CMD,
> +	MMC_ERR_ADMA,
> +	MMC_ERR_TUNING,
> +	MMC_ERR_CMDQ_RED,
> +	MMC_ERR_CMDQ_GCE,
> +	MMC_ERR_CMDQ_ICCE,
> +	MMC_ERR_REQ_TIMEOUT,
> +	MMC_ERR_CMDQ_REQ_TIMEOUT,
> +	MMC_ERR_ICE_CFG,
> +	MMC_ERR_MAX,
> +};
> +
>  struct mmc_host_ops {
>  	/*
>  	 * It is optional for the host to implement pre_req and post_req in 
> @@ -500,6 +517,9 @@ struct mmc_host {
>  
>  	/* Host Software Queue support */
>  	bool			hsq_enabled;
> +	u32                     err_stats[MMC_ERR_MAX];
> +	bool 			err_stats_enabled;
> +	bool			err_state;
>  
>  	unsigned long		private[] ____cacheline_aligned;
>  };
> @@ -635,6 +655,17 @@ static inline enum dma_data_direction mmc_get_dma_dir(struct mmc_data *data)
>  	return data->flags & MMC_DATA_WRITE ? DMA_TO_DEVICE : 
> DMA_FROM_DEVICE;  }
>  
> +static inline void mmc_debugfs_err_stats_enable(struct mmc_host *mmc) 
> +{
> +	mmc->err_state = true;
> +}
> +
> +static inline void mmc_debugfs_err_stats_inc(struct mmc_host *mmc,
> +		enum mmc_err_stat stat) {
> +
> +	mmc->err_stats[stat] += 1;
> +}
> +
>  int mmc_send_tuning(struct mmc_host *host, u32 opcode, int 
> *cmd_error);  int mmc_send_abort_tuning(struct mmc_host *host, u32 
> opcode);  int mmc_get_ext_csd(struct mmc_card *card, u8 
> **new_ext_csd);
> --
> QUALCOMM INDIA, on behalf of Qualcomm Innovation Center, Inc. is a 
> member of Code Aurora Forum, hosted by The Linux Foundation
>
diff mbox series

Patch

diff --git a/drivers/mmc/host/sdhci-msm.c b/drivers/mmc/host/sdhci-msm.c
index 50c71e0..309eb7b 100644
--- a/drivers/mmc/host/sdhci-msm.c
+++ b/drivers/mmc/host/sdhci-msm.c
@@ -128,6 +128,8 @@ 
 
 #define MSM_MMC_AUTOSUSPEND_DELAY_MS	50
 
+#define MSM_MMC_ERR_STATS_ENABLE 1
+
 /* Timeout value to avoid infinite waiting for pwr_irq */
 #define MSM_PWR_IRQ_TIMEOUT_MS 5000
 
@@ -2734,6 +2736,7 @@  static int sdhci_msm_probe(struct platform_device *pdev)
 	if (ret)
 		goto pm_runtime_disable;
 
+	host->mmc->err_stats_enabled = MSM_MMC_ERR_STATS_ENABLE;
 	pm_runtime_mark_last_busy(&pdev->dev);
 	pm_runtime_put_autosuspend(&pdev->dev);
 
diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c
index 07c6da1..74b356e 100644
--- a/drivers/mmc/host/sdhci.c
+++ b/drivers/mmc/host/sdhci.c
@@ -113,6 +113,8 @@  void sdhci_dumpregs(struct sdhci_host *host)
 	if (host->ops->dump_vendor_regs)
 		host->ops->dump_vendor_regs(host);
 
+	if (host->mmc->err_stats_enabled)
+		mmc_debugfs_err_stats_enable(host->mmc);
 	SDHCI_DUMP("============================================\n");
 }
 EXPORT_SYMBOL_GPL(sdhci_dumpregs);
@@ -3159,6 +3161,8 @@  static void sdhci_timeout_timer(struct timer_list *t)
 	spin_lock_irqsave(&host->lock, flags);
 
 	if (host->cmd && !sdhci_data_line_cmd(host->cmd)) {
+		if (host->mmc && host->mmc->err_stats_enabled)
+			mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_REQ_TIMEOUT);
 		pr_err("%s: Timeout waiting for hardware cmd interrupt.\n",
 		       mmc_hostname(host->mmc));
 		sdhci_dumpregs(host);
@@ -3181,6 +3185,8 @@  static void sdhci_timeout_data_timer(struct timer_list *t)
 
 	if (host->data || host->data_cmd ||
 	    (host->cmd && sdhci_data_line_cmd(host->cmd))) {
+		if (host->mmc && host->mmc->err_stats_enabled)
+			mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_REQ_TIMEOUT);
 		pr_err("%s: Timeout waiting for hardware interrupt.\n",
 		       mmc_hostname(host->mmc));
 		sdhci_dumpregs(host);
@@ -3240,11 +3246,18 @@  static void sdhci_cmd_irq(struct sdhci_host *host, u32 intmask, u32 *intmask_p)
 
 	if (intmask & (SDHCI_INT_TIMEOUT | SDHCI_INT_CRC |
 		       SDHCI_INT_END_BIT | SDHCI_INT_INDEX)) {
-		if (intmask & SDHCI_INT_TIMEOUT)
+		if (intmask & SDHCI_INT_TIMEOUT) {
 			host->cmd->error = -ETIMEDOUT;
-		else
+			if (host->mmc && host->mmc->err_stats_enabled)
+				mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_CMD_TIMEOUT);
+		} else {
 			host->cmd->error = -EILSEQ;
-
+			if (host->cmd->opcode != MMC_SEND_TUNING_BLOCK ||
+					host->cmd->opcode != MMC_SEND_TUNING_BLOCK_HS200) {
+				if (host->mmc && host->mmc->err_stats_enabled)
+					mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_CMD_CRC);
+			}
+		}
 		/* Treat data command CRC error the same as data CRC error */
 		if (host->cmd->data &&
 		    (intmask & (SDHCI_INT_CRC | SDHCI_INT_TIMEOUT)) ==
@@ -3265,6 +3278,8 @@  static void sdhci_cmd_irq(struct sdhci_host *host, u32 intmask, u32 *intmask_p)
 		int err = (auto_cmd_status & SDHCI_AUTO_CMD_TIMEOUT) ?
 			  -ETIMEDOUT :
 			  -EILSEQ;
+		if (host->mmc && host->mmc->err_stats_enabled)
+			mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_AUTO_CMD);
 
 		if (sdhci_auto_cmd23(host, mrq)) {
 			mrq->sbc->error = err;
@@ -3342,6 +3357,8 @@  static void sdhci_data_irq(struct sdhci_host *host, u32 intmask)
 			if (intmask & SDHCI_INT_DATA_TIMEOUT) {
 				host->data_cmd = NULL;
 				data_cmd->error = -ETIMEDOUT;
+				if (host->mmc && host->mmc->err_stats_enabled)
+					mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_CMD_TIMEOUT);
 				__sdhci_finish_mrq(host, data_cmd->mrq);
 				return;
 			}
@@ -3375,18 +3392,29 @@  static void sdhci_data_irq(struct sdhci_host *host, u32 intmask)
 		return;
 	}
 
-	if (intmask & SDHCI_INT_DATA_TIMEOUT)
+	if (intmask & SDHCI_INT_DATA_TIMEOUT) {
 		host->data->error = -ETIMEDOUT;
+		if (host->mmc && host->mmc->err_stats_enabled)
+			mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_DAT_TIMEOUT);
+	}
 	else if (intmask & SDHCI_INT_DATA_END_BIT)
 		host->data->error = -EILSEQ;
 	else if ((intmask & SDHCI_INT_DATA_CRC) &&
 		SDHCI_GET_CMD(sdhci_readw(host, SDHCI_COMMAND))
-			!= MMC_BUS_TEST_R)
+			!= MMC_BUS_TEST_R) {
 		host->data->error = -EILSEQ;
+		if (host->cmd->opcode != MMC_SEND_TUNING_BLOCK ||
+				host->cmd->opcode != MMC_SEND_TUNING_BLOCK_HS200) {
+			if (host->mmc && host->mmc->err_stats_enabled)
+				mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_DAT_CRC);
+		}
+	}
 	else if (intmask & SDHCI_INT_ADMA_ERROR) {
 		pr_err("%s: ADMA error: 0x%08x\n", mmc_hostname(host->mmc),
 		       intmask);
 		sdhci_adma_show_error(host);
+		if (host->mmc && host->mmc->err_stats_enabled)
+			mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_ADMA);
 		host->data->error = -EIO;
 		if (host->ops->adma_workaround)
 			host->ops->adma_workaround(host, intmask);
@@ -3905,20 +3933,40 @@  bool sdhci_cqe_irq(struct sdhci_host *host, u32 intmask, int *cmd_error,
 	if (!host->cqe_on)
 		return false;
 
-	if (intmask & (SDHCI_INT_INDEX | SDHCI_INT_END_BIT | SDHCI_INT_CRC))
+	if (intmask & (SDHCI_INT_INDEX | SDHCI_INT_END_BIT | SDHCI_INT_CRC)) {
 		*cmd_error = -EILSEQ;
-	else if (intmask & SDHCI_INT_TIMEOUT)
+		if (intmask & SDHCI_INT_CRC) {
+			if (host->cmd->opcode != MMC_SEND_TUNING_BLOCK ||
+					host->cmd->opcode != MMC_SEND_TUNING_BLOCK_HS200) {
+				if (host->mmc && host->mmc->err_stats_enabled)
+					mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_CMD_CRC);
+			}
+		}
+	} else if (intmask & SDHCI_INT_TIMEOUT) {
 		*cmd_error = -ETIMEDOUT;
-	else
+		if (host->mmc && host->mmc->err_stats_enabled)
+			mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_CMD_TIMEOUT);
+	} else
 		*cmd_error = 0;
 
-	if (intmask & (SDHCI_INT_DATA_END_BIT | SDHCI_INT_DATA_CRC))
+	if (intmask & (SDHCI_INT_DATA_END_BIT | SDHCI_INT_DATA_CRC)) {
 		*data_error = -EILSEQ;
-	else if (intmask & SDHCI_INT_DATA_TIMEOUT)
+		if (intmask & SDHCI_INT_DATA_CRC) {
+			if (host->cmd->opcode != MMC_SEND_TUNING_BLOCK ||
+					host->cmd->opcode != MMC_SEND_TUNING_BLOCK_HS200) {
+				if (host->mmc && host->mmc->err_stats_enabled)
+					mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_DAT_CRC);
+			}
+		}
+	} else if (intmask & SDHCI_INT_DATA_TIMEOUT) {
 		*data_error = -ETIMEDOUT;
-	else if (intmask & SDHCI_INT_ADMA_ERROR)
+		if (host->mmc && host->mmc->err_stats_enabled)
+			mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_DAT_TIMEOUT);
+	} else if (intmask & SDHCI_INT_ADMA_ERROR) {
 		*data_error = -EIO;
-	else
+		if (host->mmc && host->mmc->err_stats_enabled)
+			mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_ADMA);
+	} else
 		*data_error = 0;
 
 	/* Clear selected interrupts. */
diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h
index 7afb57c..883b50b 100644
--- a/include/linux/mmc/host.h
+++ b/include/linux/mmc/host.h
@@ -93,6 +93,23 @@  struct mmc_clk_phase_map {
 
 struct mmc_host;
 
+enum mmc_err_stat {
+	MMC_ERR_CMD_TIMEOUT,
+	MMC_ERR_CMD_CRC,
+	MMC_ERR_DAT_TIMEOUT,
+	MMC_ERR_DAT_CRC,
+	MMC_ERR_AUTO_CMD,
+	MMC_ERR_ADMA,
+	MMC_ERR_TUNING,
+	MMC_ERR_CMDQ_RED,
+	MMC_ERR_CMDQ_GCE,
+	MMC_ERR_CMDQ_ICCE,
+	MMC_ERR_REQ_TIMEOUT,
+	MMC_ERR_CMDQ_REQ_TIMEOUT,
+	MMC_ERR_ICE_CFG,
+	MMC_ERR_MAX,
+};
+
 struct mmc_host_ops {
 	/*
 	 * It is optional for the host to implement pre_req and post_req in
@@ -500,6 +517,9 @@  struct mmc_host {
 
 	/* Host Software Queue support */
 	bool			hsq_enabled;
+	u32                     err_stats[MMC_ERR_MAX];
+	bool 			err_stats_enabled;
+	bool			err_state;
 
 	unsigned long		private[] ____cacheline_aligned;
 };
@@ -635,6 +655,17 @@  static inline enum dma_data_direction mmc_get_dma_dir(struct mmc_data *data)
 	return data->flags & MMC_DATA_WRITE ? DMA_TO_DEVICE : DMA_FROM_DEVICE;
 }
 
+static inline void mmc_debugfs_err_stats_enable(struct mmc_host *mmc)
+{
+	mmc->err_state = true;
+}
+
+static inline void mmc_debugfs_err_stats_inc(struct mmc_host *mmc,
+		enum mmc_err_stat stat) {
+
+	mmc->err_stats[stat] += 1;
+}
+
 int mmc_send_tuning(struct mmc_host *host, u32 opcode, int *cmd_error);
 int mmc_send_abort_tuning(struct mmc_host *host, u32 opcode);
 int mmc_get_ext_csd(struct mmc_card *card, u8 **new_ext_csd);