mbox series

[V5,0/5] mmc: add error statistics for eMMC and SD card

Message ID 1650902443-26357-1-git-send-email-quic_c_sbhanu@quicinc.com
Headers show
Series mmc: add error statistics for eMMC and SD card | expand

Message

Shaik Sajida Bhanu April 25, 2022, 4 p.m. UTC
Changes since V4:
	- Defined new macro to increment err_stats members when error occured
	  as suggested by Adrain Hunter.
	- Called err_stats members increment function after printing the error
	  as suggested by Adrain Hunter.
	- Considered INDEX and END_BIT errors same as CRC errors as suggested
	  by Adrain Hunter.
	- Removed Null check for host in debug fs functions and Reordered
	  err_stats declarationas suggested by Adrain Hunter.
	- Removed err_state variable stuff and updated err_state debug fs entry
	  based on the err_stats members state as suggested by Adrain Hunter.

Changes since V3:
	- Dropped error stats feature flag as suggested by Adrain Hunter.
	- Separated error state related changes in separate patches as
	  suggested by Adrain Hunter.
	  [PATCH V4 4/7] : error state debug fs
	  [PATCH V4 5/7] : error state enable function
	  [PATCH V4 6/7] : error state enable in error case
	- Note: we are enabling error state before calling sdhci_dumpregs
	  we couldn't add the err state in error stats array as err state
	  is not error type.
	- Corrected Signed-off-by order as suggested by Bjron Andersson.
	- Moved error state enable code from sdhci_dumpregs to error
	  conditions as suggested by Adrain Hunter

Changes since V2:
	- Removed userspace error stats clear debug fs entry as suggested
	  by Adrain Hunter.
	- Split patch into 4 patches
	  [PATCH V3 1/4] : sdhci driver
	  [PATCH V3 2/4] : debug fs entries
	  [PATCH V3 3/4] : core driver
	  [PATCH V3 4/4] : cqhci driver
	- Used for loop to print error messages instead of using printf
	  statements for all error messages as suggested by Adrain Hunter.
	- Introduced one flag to enable error stats feature, if any other
	  client wants to use this feature, they need to enable that flag.
	- Moved reset command timeout error statement to card init flow
	  as suggested by Adrain Hunter.

Changes since V1:
	- Removed sysfs entry for eMMC and SD card error statistics and added
	  debugfs entry as suggested by Adrian Hunter and Ulf Hansson.

Shaik Sajida Bhanu (5):
  mmc: core: Capture eMMC and SD card errors
  mmc: sdhci: Capture eMMC and SD card errors
  mmc: debugfs: Add debug fs entry for mmc driver
  mmc: debugfs: Add debug fs error state entry for mmc driver
  mmc: cqhci: Capture eMMC and SD card errors

 drivers/mmc/core/core.c       |  6 ++++
 drivers/mmc/core/debugfs.c    | 81 +++++++++++++++++++++++++++++++++++++++++++
 drivers/mmc/host/cqhci-core.c |  9 ++++-
 drivers/mmc/host/sdhci.c      | 54 ++++++++++++++++++++++-------
 drivers/mmc/host/sdhci.h      |  3 ++
 include/linux/mmc/host.h      | 26 ++++++++++++++
 include/linux/mmc/mmc.h       |  6 ++++
 7 files changed, 171 insertions(+), 14 deletions(-)

Comments

Adrian Hunter April 26, 2022, 7:51 a.m. UTC | #1
On 25/04/22 19:00, Shaik Sajida Bhanu wrote:
> Add changes to capture eMMC and SD card errors.
> This is useful for debug and testing.
> 
> Signed-off-by: Liangliang Lu <quic_luliang@quicinc.com>
> Signed-off-by: Sayali Lokhande <quic_sayalil@quicinc.com>
> Signed-off-by: Bao D. Nguyen <quic_nguyenb@quicinc.com>
> Signed-off-by: Shaik Sajida Bhanu <quic_c_sbhanu@quicinc.com>

Looks good.  A couple of minor comments.

> ---
>  drivers/mmc/host/sdhci.c | 54 ++++++++++++++++++++++++++++++++++++------------
>  drivers/mmc/host/sdhci.h |  3 +++
>  include/linux/mmc/mmc.h  |  6 ++++++
>  3 files changed, 50 insertions(+), 13 deletions(-)
> 
> diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c
> index 2215202..1cda28ba 100644
> --- a/drivers/mmc/host/sdhci.c
> +++ b/drivers/mmc/host/sdhci.c
> @@ -224,6 +224,7 @@ void sdhci_reset(struct sdhci_host *host, u8 mask)
>  		if (timedout) {
>  			pr_err("%s: Reset 0x%x never completed.\n",
>  				mmc_hostname(host->mmc), (int)mask);
> +			sdhci_err_stats_inc(host, CTRL_TIMEOUT);
>  			sdhci_dumpregs(host);
>  			return;
>  		}
> @@ -1716,6 +1717,7 @@ static bool sdhci_send_command_retry(struct sdhci_host *host,
>  		if (!timeout--) {
>  			pr_err("%s: Controller never released inhibit bit(s).\n",
>  			       mmc_hostname(host->mmc));
> +			sdhci_err_stats_inc(host, CTRL_TIMEOUT);
>  			sdhci_dumpregs(host);
>  			cmd->error = -EIO;
>  			return false;
> @@ -1965,6 +1967,7 @@ void sdhci_enable_clk(struct sdhci_host *host, u16 clk)
>  		if (timedout) {
>  			pr_err("%s: Internal clock never stabilised.\n",
>  			       mmc_hostname(host->mmc));
> +			sdhci_err_stats_inc(host, CTRL_TIMEOUT);
>  			sdhci_dumpregs(host);
>  			return;
>  		}
> @@ -1987,6 +1990,7 @@ void sdhci_enable_clk(struct sdhci_host *host, u16 clk)
>  			if (timedout) {
>  				pr_err("%s: PLL clock never stabilised.\n",
>  				       mmc_hostname(host->mmc));
> +				sdhci_err_stats_inc(host, CTRL_TIMEOUT);
>  				sdhci_dumpregs(host);
>  				return;
>  			}
> @@ -3161,6 +3165,7 @@ static void sdhci_timeout_timer(struct timer_list *t)
>  	if (host->cmd && !sdhci_data_line_cmd(host->cmd)) {
>  		pr_err("%s: Timeout waiting for hardware cmd interrupt.\n",
>  		       mmc_hostname(host->mmc));
> +		sdhci_err_stats_inc(host, REQ_TIMEOUT);
>  		sdhci_dumpregs(host);
>  
>  		host->cmd->error = -ETIMEDOUT;
> @@ -3183,6 +3188,7 @@ static void sdhci_timeout_data_timer(struct timer_list *t)
>  	    (host->cmd && sdhci_data_line_cmd(host->cmd))) {
>  		pr_err("%s: Timeout waiting for hardware interrupt.\n",
>  		       mmc_hostname(host->mmc));
> +		sdhci_err_stats_inc(host, REQ_TIMEOUT);
>  		sdhci_dumpregs(host);
>  
>  		if (host->data) {
> @@ -3234,17 +3240,21 @@ static void sdhci_cmd_irq(struct sdhci_host *host, u32 intmask, u32 *intmask_p)
>  			return;
>  		pr_err("%s: Got command interrupt 0x%08x even though no command operation was in progress.\n",
>  		       mmc_hostname(host->mmc), (unsigned)intmask);
> +		sdhci_err_stats_inc(host, UNEXPECTED_IRQ);
>  		sdhci_dumpregs(host);
>  		return;
>  	}
>  
>  	if (intmask & (SDHCI_INT_TIMEOUT | SDHCI_INT_CRC |
>  		       SDHCI_INT_END_BIT | SDHCI_INT_INDEX)) {
> -		if (intmask & SDHCI_INT_TIMEOUT)
> +		if (intmask & SDHCI_INT_TIMEOUT) {
>  			host->cmd->error = -ETIMEDOUT;
> -		else
> +			sdhci_err_stats_inc(host, CMD_TIMEOUT);
> +		} else {
>  			host->cmd->error = -EILSEQ;
> -
> +			if (!mmc_op_tuning(host->cmd->opcode))
> +				sdhci_err_stats_inc(host, CMD_CRC);
> +		}
>  		/* Treat data command CRC error the same as data CRC error */
>  		if (host->cmd->data &&
>  		    (intmask & (SDHCI_INT_CRC | SDHCI_INT_TIMEOUT)) ==
> @@ -3266,6 +3276,8 @@ static void sdhci_cmd_irq(struct sdhci_host *host, u32 intmask, u32 *intmask_p)
>  			  -ETIMEDOUT :
>  			  -EILSEQ;
>  
> +		sdhci_err_stats_inc(host, AUTO_CMD);
> +
>  		if (sdhci_auto_cmd23(host, mrq)) {
>  			mrq->sbc->error = err;
>  			__sdhci_finish_mrq(host, mrq);
> @@ -3342,6 +3354,7 @@ static void sdhci_data_irq(struct sdhci_host *host, u32 intmask)
>  			if (intmask & SDHCI_INT_DATA_TIMEOUT) {
>  				host->data_cmd = NULL;
>  				data_cmd->error = -ETIMEDOUT;
> +				sdhci_err_stats_inc(host, CMD_TIMEOUT);
>  				__sdhci_finish_mrq(host, data_cmd->mrq);
>  				return;
>  			}
> @@ -3370,23 +3383,29 @@ static void sdhci_data_irq(struct sdhci_host *host, u32 intmask)
>  
>  		pr_err("%s: Got data interrupt 0x%08x even though no data operation was in progress.\n",
>  		       mmc_hostname(host->mmc), (unsigned)intmask);
> +		sdhci_err_stats_inc(host, UNEXPECTED_IRQ);
>  		sdhci_dumpregs(host);
>  
>  		return;
>  	}
>  
> -	if (intmask & SDHCI_INT_DATA_TIMEOUT)
> +	if (intmask & SDHCI_INT_DATA_TIMEOUT) {
>  		host->data->error = -ETIMEDOUT;
> -	else if (intmask & SDHCI_INT_DATA_END_BIT)
> +		sdhci_err_stats_inc(host, DAT_TIMEOUT);
> +	} else if (intmask & SDHCI_INT_DATA_END_BIT)
>  		host->data->error = -EILSEQ;

Seems to be missing here:

		sdhci_err_stats_inc(host, DAT_CRC);

Also it would be nice to have braces {} on all arms of if-else-if
Can use checkpatch.pl --strict to see where

>  	else if ((intmask & SDHCI_INT_DATA_CRC) &&
>  		SDHCI_GET_CMD(sdhci_readw(host, SDHCI_COMMAND))
> -			!= MMC_BUS_TEST_R)
> +			!= MMC_BUS_TEST_R) {
>  		host->data->error = -EILSEQ;
> +		if (!mmc_op_tuning(host->cmd->opcode))
> +			sdhci_err_stats_inc(host, DAT_CRC);
> +	}
>  	else if (intmask & SDHCI_INT_ADMA_ERROR) {
>  		pr_err("%s: ADMA error: 0x%08x\n", mmc_hostname(host->mmc),
>  		       intmask);
>  		sdhci_adma_show_error(host);
> +		sdhci_err_stats_inc(host, ADMA);
>  		host->data->error = -EIO;
>  		if (host->ops->adma_workaround)
>  			host->ops->adma_workaround(host, intmask);
> @@ -3584,6 +3603,7 @@ static irqreturn_t sdhci_irq(int irq, void *dev_id)
>  	if (unexpected) {
>  		pr_err("%s: Unexpected interrupt 0x%08x.\n",
>  			   mmc_hostname(host->mmc), unexpected);
> +		sdhci_err_stats_inc(host, UNEXPECTED_IRQ);
>  		sdhci_dumpregs(host);
>  	}
>  
> @@ -3905,20 +3925,27 @@ bool sdhci_cqe_irq(struct sdhci_host *host, u32 intmask, int *cmd_error,
>  	if (!host->cqe_on)
>  		return false;
>  
> -	if (intmask & (SDHCI_INT_INDEX | SDHCI_INT_END_BIT | SDHCI_INT_CRC))
> +	if (intmask & (SDHCI_INT_INDEX | SDHCI_INT_END_BIT | SDHCI_INT_CRC)) {
>  		*cmd_error = -EILSEQ;
> -	else if (intmask & SDHCI_INT_TIMEOUT)
> +		if (!mmc_op_tuning(host->cmd->opcode))
> +			sdhci_err_stats_inc(host, CMD_CRC);
> +	} else if (intmask & SDHCI_INT_TIMEOUT) {
>  		*cmd_error = -ETIMEDOUT;
> -	else
> +		sdhci_err_stats_inc(host, CMD_TIMEOUT);
> +	} else
>  		*cmd_error = 0;
>  
> -	if (intmask & (SDHCI_INT_DATA_END_BIT | SDHCI_INT_DATA_CRC))
> +	if (intmask & (SDHCI_INT_DATA_END_BIT | SDHCI_INT_DATA_CRC)) {
>  		*data_error = -EILSEQ;
> -	else if (intmask & SDHCI_INT_DATA_TIMEOUT)
> +		if (!mmc_op_tuning(host->cmd->opcode))
> +			sdhci_err_stats_inc(host, DAT_CRC);
> +	} else if (intmask & SDHCI_INT_DATA_TIMEOUT) {
>  		*data_error = -ETIMEDOUT;
> -	else if (intmask & SDHCI_INT_ADMA_ERROR)
> +		sdhci_err_stats_inc(host, DAT_TIMEOUT);
> +	} else if (intmask & SDHCI_INT_ADMA_ERROR) {
>  		*data_error = -EIO;
> -	else
> +		sdhci_err_stats_inc(host, ADMA);
> +	} else
>  		*data_error = 0;
>  
>  	/* Clear selected interrupts. */
> @@ -3934,6 +3961,7 @@ bool sdhci_cqe_irq(struct sdhci_host *host, u32 intmask, int *cmd_error,
>  		sdhci_writel(host, intmask, SDHCI_INT_STATUS);
>  		pr_err("%s: CQE: Unexpected interrupt 0x%08x.\n",
>  		       mmc_hostname(host->mmc), intmask);
> +		sdhci_err_stats_inc(host, UNEXPECTED_IRQ);
>  		sdhci_dumpregs(host);
>  	}
>  
> diff --git a/drivers/mmc/host/sdhci.h b/drivers/mmc/host/sdhci.h
> index d7929d7..95a08f0 100644
> --- a/drivers/mmc/host/sdhci.h
> +++ b/drivers/mmc/host/sdhci.h
> @@ -356,6 +356,9 @@ struct sdhci_adma2_64_desc {
>   */
>  #define MMC_CMD_TRANSFER_TIME	(10 * NSEC_PER_MSEC) /* max 10 ms */
>  
> +#define sdhci_err_stats_inc(host, err_name) \
> +	mmc_debugfs_err_stats_inc((host)->mmc, MMC_ERR_##err_name)
> +
>  enum sdhci_cookie {
>  	COOKIE_UNMAPPED,
>  	COOKIE_PRE_MAPPED,	/* mapped by sdhci_pre_req() */
> diff --git a/include/linux/mmc/mmc.h b/include/linux/mmc/mmc.h
> index d9a65c6..9c50bc4 100644
> --- a/include/linux/mmc/mmc.h
> +++ b/include/linux/mmc/mmc.h
> @@ -99,6 +99,12 @@ static inline bool mmc_op_multi(u32 opcode)
>  	       opcode == MMC_READ_MULTIPLE_BLOCK;
>  }
>  
> +static inline bool mmc_op_tuning(u32 opcode)
> +{
> +	return opcode == MMC_SEND_TUNING_BLOCK ||
> +			opcode == MMC_SEND_TUNING_BLOCK_HS200;
> +}
> +
>  /*
>   * MMC_SWITCH argument format:
>   *

There does not seem to be any:

	sdhci_err_stats_inc(host, TUNING);

MMC_ERR_TUNING does not seem to get used.