diff mbox series

[v2,1/3] ALSA: hda/tegra: Fix Tegra194 HDA reset failure

Message ID 1640021408-12824-2-git-send-email-spujar@nvidia.com
State New
Headers show
Series Fix Tegra194 HDA regression | expand

Commit Message

Sameer Pujar Dec. 20, 2021, 5:30 p.m. UTC
HDA regression is recently reported on Tegra194 based platforms.
This happens because "hda2codec_2x" reset does not really exist
in Tegra194 and it causes probe failure. All the HDA based audio
tests fail at the moment. This underlying issue is exposed by
commit c045ceb5a145 ("reset: tegra-bpmp: Handle errors in BPMP
response") which now checks return code of BPMP command response.
Fix this issue by skipping unavailable reset on Tegra194.

Signed-off-by: Sameer Pujar <spujar@nvidia.com>
Cc: stable@vger.kernel.org
Depends-on: 87f0e46e7559 ("ALSA: hda/tegra: Reset hardware")
---
 sound/pci/hda/hda_tegra.c | 96 ++++++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 86 insertions(+), 10 deletions(-)

Comments

Dmitry Osipenko Dec. 21, 2021, 1:21 a.m. UTC | #1
20.12.2021 20:30, Sameer Pujar пишет:
> HDA regression is recently reported on Tegra194 based platforms.
> This happens because "hda2codec_2x" reset does not really exist
> in Tegra194 and it causes probe failure. All the HDA based audio
> tests fail at the moment. This underlying issue is exposed by
> commit c045ceb5a145 ("reset: tegra-bpmp: Handle errors in BPMP
> response") which now checks return code of BPMP command response.
> Fix this issue by skipping unavailable reset on Tegra194.
> 
> Signed-off-by: Sameer Pujar <spujar@nvidia.com>
> Cc: stable@vger.kernel.org
> Depends-on: 87f0e46e7559 ("ALSA: hda/tegra: Reset hardware")
> ---
>  sound/pci/hda/hda_tegra.c | 96 ++++++++++++++++++++++++++++++++++++++++++-----
>  1 file changed, 86 insertions(+), 10 deletions(-)
> 
> diff --git a/sound/pci/hda/hda_tegra.c b/sound/pci/hda/hda_tegra.c
> index ea700395..be010cd 100644
> --- a/sound/pci/hda/hda_tegra.c
> +++ b/sound/pci/hda/hda_tegra.c
> @@ -68,14 +68,21 @@
>   */
>  #define TEGRA194_NUM_SDO_LINES	  4
>  
> +struct hda_tegra_soc {
> +	bool has_hda2codec_2x_reset;
> +};
> +
>  struct hda_tegra {
>  	struct azx chip;
>  	struct device *dev;
> -	struct reset_control *reset;
> +	struct reset_control *reset_hda;
> +	struct reset_control *reset_hda2hdmi;
> +	struct reset_control *reset_hda2codec_2x;
>  	struct clk_bulk_data clocks[3];
>  	unsigned int nclocks;
>  	void __iomem *regs;
>  	struct work_struct probe_work;
> +	const struct hda_tegra_soc *data;
>  };
>  
>  #ifdef CONFIG_PM
> @@ -170,9 +177,26 @@ static int __maybe_unused hda_tegra_runtime_resume(struct device *dev)
>  	int rc;
>  
>  	if (!chip->running) {
> -		rc = reset_control_assert(hda->reset);
> -		if (rc)
> +		rc = reset_control_assert(hda->reset_hda);
> +		if (rc) {
> +			dev_err(dev, "hda reset assert failed, err: %d\n", rc);
> +			return rc;
> +		}
> +
> +		rc = reset_control_assert(hda->reset_hda2hdmi);
> +		if (rc) {
> +			dev_err(dev, "hda2hdmi reset assert failed, err: %d\n",
> +				rc);
> +			return rc;
> +		}
> +
> +		rc = reset_control_assert(hda->reset_hda2codec_2x);
> +		if (rc) {
> +			dev_err(dev,
> +				"hda2codec_2x reset assert failed, err: %d\n",
> +				rc);
>  			return rc;
> +		}
>  	}
>  
>  	rc = clk_bulk_prepare_enable(hda->nclocks, hda->clocks);
> @@ -187,9 +211,27 @@ static int __maybe_unused hda_tegra_runtime_resume(struct device *dev)
>  	} else {
>  		usleep_range(10, 100);
>  
> -		rc = reset_control_deassert(hda->reset);
> -		if (rc)
> +		rc = reset_control_deassert(hda->reset_hda);
> +		if (rc) {
> +			dev_err(dev, "hda reset deassert failed, err: %d\n",
> +				rc);
>  			return rc;
> +		}
> +
> +		rc = reset_control_deassert(hda->reset_hda2hdmi);
> +		if (rc) {
> +			dev_err(dev, "hda2hdmi reset deassert failed, err: %d\n",
> +				rc);
> +			return rc;
> +		}
> +
> +		rc = reset_control_deassert(hda->reset_hda2codec_2x);
> +		if (rc) {
> +			dev_err(dev,
> +				"hda2codec_2x reset deassert failed, err: %d\n",
> +				rc);
> +			return rc;
> +		}
>  	}
>  
>  	return 0;
> @@ -427,9 +469,17 @@ static int hda_tegra_create(struct snd_card *card,
>  	return 0;
>  }
>  
> +static const struct hda_tegra_soc tegra30_data = {
> +	.has_hda2codec_2x_reset = true,
> +};
> +
> +static const struct hda_tegra_soc tegra194_data = {
> +	.has_hda2codec_2x_reset = false,
> +};
> +
>  static const struct of_device_id hda_tegra_match[] = {
> -	{ .compatible = "nvidia,tegra30-hda" },
> -	{ .compatible = "nvidia,tegra194-hda" },
> +	{ .compatible = "nvidia,tegra30-hda", .data = &tegra30_data },
> +	{ .compatible = "nvidia,tegra194-hda", .data = &tegra194_data },
>  	{},
>  };
>  MODULE_DEVICE_TABLE(of, hda_tegra_match);
> @@ -449,6 +499,10 @@ static int hda_tegra_probe(struct platform_device *pdev)
>  	hda->dev = &pdev->dev;
>  	chip = &hda->chip;
>  
> +	hda->data = of_device_get_match_data(&pdev->dev);
> +	if (!hda->data)
> +		return -EINVAL;
> +
>  	err = snd_card_new(&pdev->dev, SNDRV_DEFAULT_IDX1, SNDRV_DEFAULT_STR1,
>  			   THIS_MODULE, 0, &card);
>  	if (err < 0) {
> @@ -456,12 +510,34 @@ static int hda_tegra_probe(struct platform_device *pdev)
>  		return err;
>  	}
>  
> -	hda->reset = devm_reset_control_array_get_exclusive(&pdev->dev);
> -	if (IS_ERR(hda->reset)) {
> -		err = PTR_ERR(hda->reset);
> +	hda->reset_hda = devm_reset_control_get_exclusive(&pdev->dev, "hda");
> +	if (IS_ERR(hda->reset_hda)) {
> +		err = PTR_ERR(hda->reset_hda);
>  		goto out_free;
>  	}
>  
> +	hda->reset_hda2hdmi = devm_reset_control_get_exclusive(&pdev->dev,
> +							       "hda2hdmi");
> +	if (IS_ERR(hda->reset_hda2hdmi)) {
> +		err = PTR_ERR(hda->reset_hda2hdmi);
> +		goto out_free;
> +	}
> +
> +	/*
> +	 * "hda2codec_2x" reset is not present on Tegra194. Though DT would
> +	 * be updated to reflect this, but to have backward compatibility
> +	 * below is necessary.
> +	 */
> +	if (hda->data->has_hda2codec_2x_reset) {
> +		hda->reset_hda2codec_2x =
> +			devm_reset_control_get_exclusive(&pdev->dev,
> +							 "hda2codec_2x");
> +		if (IS_ERR(hda->reset_hda2codec_2x)) {
> +			err = PTR_ERR(hda->reset_hda2codec_2x);
> +			goto out_free;
> +		}
> +	}
> +
>  	hda->clocks[hda->nclocks++].id = "hda";
>  	hda->clocks[hda->nclocks++].id = "hda2hdmi";
>  	hda->clocks[hda->nclocks++].id = "hda2codec_2x";
> 

All stable kernels affected by this problem that don't support the bulk
reset API are EOL now. Please use bulk reset API like I suggested in the
comment to v1, it will allow us to have a cleaner and nicer code.

The bulk reset code will look similar to the bulk clk API already used
by the HDA driver, you'll only need to skip adding the hda2codec_2x to
resets[3] and switch to use reset_control_bulk_reset_*() variants of the
functions.
Sameer Pujar Dec. 21, 2021, 6:18 a.m. UTC | #2
On 12/21/2021 6:51 AM, Dmitry Osipenko wrote:
>
> All stable kernels affected by this problem that don't support the bulk
> reset API are EOL now. Please use bulk reset API like I suggested in the
> comment to v1, it will allow us to have a cleaner and nicer code.

Agree that it would be compact and cleaner, but any specific reset 
failure in the group won't be obvious in the logs. In this case it 
failed silently. If compactness is preferred, then may be I can keep an 
error print at group level so that we see some failure context whenever 
it happens.
Dmitry Osipenko Dec. 21, 2021, 3:20 p.m. UTC | #3
21.12.2021 09:18, Sameer Pujar пишет:
> 
> 
> On 12/21/2021 6:51 AM, Dmitry Osipenko wrote:
>>
>> All stable kernels affected by this problem that don't support the bulk
>> reset API are EOL now. Please use bulk reset API like I suggested in the
>> comment to v1, it will allow us to have a cleaner and nicer code.
> 
> Agree that it would be compact and cleaner, but any specific reset
> failure in the group won't be obvious in the logs. In this case it
> failed silently. If compactness is preferred, then may be I can keep an
> error print at group level so that we see some failure context whenever
> it happens.

The group shouldn't fail ever unless device-tree is wrong. Why do you
think we should care about the case which realistically won't ever
happen? This is a bit unpractical approach.

If we really care about those error messages, then will be much more
reasonable to add them to the reset core, like clk core does it [1],
IMO. This will be a trivial change. Will you be happy with this variant?

[1]
https://elixir.bootlin.com/linux/v5.16-rc6/source/drivers/clk/clk-bulk.c#L100

diff --git a/drivers/reset/core.c b/drivers/reset/core.c
index 61e688882643..85ce0d6eeb34 100644
--- a/drivers/reset/core.c
+++ b/drivers/reset/core.c
@@ -962,6 +962,11 @@ int __reset_control_bulk_get(struct device *dev,
int num_rstcs,
 						    shared, optional, acquired);
 		if (IS_ERR(rstcs[i].rstc)) {
 			ret = PTR_ERR(rstcs[i].rstc);
+
+			if (ret != -EPROBE_DEFER)
+				dev_err(dev, "Failed to get reset '%s': %d\n",
+					rstcs[i].id, ret);
+
 			goto err;
 		}
 	}
Sameer Pujar Dec. 21, 2021, 4:03 p.m. UTC | #4
On 12/21/2021 8:50 PM, Dmitry Osipenko wrote:
> 21.12.2021 09:18, Sameer Pujar пишет:
>>
>> On 12/21/2021 6:51 AM, Dmitry Osipenko wrote:
>>> All stable kernels affected by this problem that don't support the bulk
>>> reset API are EOL now. Please use bulk reset API like I suggested in the
>>> comment to v1, it will allow us to have a cleaner and nicer code.
>> Agree that it would be compact and cleaner, but any specific reset
>> failure in the group won't be obvious in the logs. In this case it
>> failed silently. If compactness is preferred, then may be I can keep an
>> error print at group level so that we see some failure context whenever
>> it happens.
> The group shouldn't fail ever unless device-tree is wrong. Why do you
> think we should care about the case which realistically won't ever
> happen? This is a bit unpractical approach.

Though it is very rare that something like this would happen, but can't 
be ruled out completely.

> If we really care about those error messages, then will be much more
> reasonable to add them to the reset core, like clk core does it [1],
> IMO. This will be a trivial change. Will you be happy with this variant?

It would be nicer to know why exactly it failed. Yes, it makes sense to 
have this in the core. I will send v3 with bulk APIs for HDA driver. 
Thank you.

>
> [1]
> https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Felixir.bootlin.com%2Flinux%2Fv5.16-rc6%2Fsource%2Fdrivers%2Fclk%2Fclk-bulk.c%23L100&amp;data=04%7C01%7Cspujar%40nvidia.com%7C53e278c9a4804612f74b08d9c49564a0%7C43083d15727340c1b7db39efd9ccc17a%7C0%7C0%7C637756968218491760%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C3000&amp;sdata=QEe8SlSdAN1N8nOu3XqtAdbXP1JbtMBPlswqnBIhq5w%3D&amp;reserved=0
>
> diff --git a/drivers/reset/core.c b/drivers/reset/core.c
> index 61e688882643..85ce0d6eeb34 100644
> --- a/drivers/reset/core.c
> +++ b/drivers/reset/core.c
> @@ -962,6 +962,11 @@ int __reset_control_bulk_get(struct device *dev,
> int num_rstcs,
>                                                      shared, optional, acquired);
>                  if (IS_ERR(rstcs[i].rstc)) {
>                          ret = PTR_ERR(rstcs[i].rstc);
> +
> +                       if (ret != -EPROBE_DEFER)
> +                               dev_err(dev, "Failed to get reset '%s': %d\n",
> +                                       rstcs[i].id, ret);
> +
>                          goto err;
>                  }
>          }
diff mbox series

Patch

diff --git a/sound/pci/hda/hda_tegra.c b/sound/pci/hda/hda_tegra.c
index ea700395..be010cd 100644
--- a/sound/pci/hda/hda_tegra.c
+++ b/sound/pci/hda/hda_tegra.c
@@ -68,14 +68,21 @@ 
  */
 #define TEGRA194_NUM_SDO_LINES	  4
 
+struct hda_tegra_soc {
+	bool has_hda2codec_2x_reset;
+};
+
 struct hda_tegra {
 	struct azx chip;
 	struct device *dev;
-	struct reset_control *reset;
+	struct reset_control *reset_hda;
+	struct reset_control *reset_hda2hdmi;
+	struct reset_control *reset_hda2codec_2x;
 	struct clk_bulk_data clocks[3];
 	unsigned int nclocks;
 	void __iomem *regs;
 	struct work_struct probe_work;
+	const struct hda_tegra_soc *data;
 };
 
 #ifdef CONFIG_PM
@@ -170,9 +177,26 @@  static int __maybe_unused hda_tegra_runtime_resume(struct device *dev)
 	int rc;
 
 	if (!chip->running) {
-		rc = reset_control_assert(hda->reset);
-		if (rc)
+		rc = reset_control_assert(hda->reset_hda);
+		if (rc) {
+			dev_err(dev, "hda reset assert failed, err: %d\n", rc);
+			return rc;
+		}
+
+		rc = reset_control_assert(hda->reset_hda2hdmi);
+		if (rc) {
+			dev_err(dev, "hda2hdmi reset assert failed, err: %d\n",
+				rc);
+			return rc;
+		}
+
+		rc = reset_control_assert(hda->reset_hda2codec_2x);
+		if (rc) {
+			dev_err(dev,
+				"hda2codec_2x reset assert failed, err: %d\n",
+				rc);
 			return rc;
+		}
 	}
 
 	rc = clk_bulk_prepare_enable(hda->nclocks, hda->clocks);
@@ -187,9 +211,27 @@  static int __maybe_unused hda_tegra_runtime_resume(struct device *dev)
 	} else {
 		usleep_range(10, 100);
 
-		rc = reset_control_deassert(hda->reset);
-		if (rc)
+		rc = reset_control_deassert(hda->reset_hda);
+		if (rc) {
+			dev_err(dev, "hda reset deassert failed, err: %d\n",
+				rc);
 			return rc;
+		}
+
+		rc = reset_control_deassert(hda->reset_hda2hdmi);
+		if (rc) {
+			dev_err(dev, "hda2hdmi reset deassert failed, err: %d\n",
+				rc);
+			return rc;
+		}
+
+		rc = reset_control_deassert(hda->reset_hda2codec_2x);
+		if (rc) {
+			dev_err(dev,
+				"hda2codec_2x reset deassert failed, err: %d\n",
+				rc);
+			return rc;
+		}
 	}
 
 	return 0;
@@ -427,9 +469,17 @@  static int hda_tegra_create(struct snd_card *card,
 	return 0;
 }
 
+static const struct hda_tegra_soc tegra30_data = {
+	.has_hda2codec_2x_reset = true,
+};
+
+static const struct hda_tegra_soc tegra194_data = {
+	.has_hda2codec_2x_reset = false,
+};
+
 static const struct of_device_id hda_tegra_match[] = {
-	{ .compatible = "nvidia,tegra30-hda" },
-	{ .compatible = "nvidia,tegra194-hda" },
+	{ .compatible = "nvidia,tegra30-hda", .data = &tegra30_data },
+	{ .compatible = "nvidia,tegra194-hda", .data = &tegra194_data },
 	{},
 };
 MODULE_DEVICE_TABLE(of, hda_tegra_match);
@@ -449,6 +499,10 @@  static int hda_tegra_probe(struct platform_device *pdev)
 	hda->dev = &pdev->dev;
 	chip = &hda->chip;
 
+	hda->data = of_device_get_match_data(&pdev->dev);
+	if (!hda->data)
+		return -EINVAL;
+
 	err = snd_card_new(&pdev->dev, SNDRV_DEFAULT_IDX1, SNDRV_DEFAULT_STR1,
 			   THIS_MODULE, 0, &card);
 	if (err < 0) {
@@ -456,12 +510,34 @@  static int hda_tegra_probe(struct platform_device *pdev)
 		return err;
 	}
 
-	hda->reset = devm_reset_control_array_get_exclusive(&pdev->dev);
-	if (IS_ERR(hda->reset)) {
-		err = PTR_ERR(hda->reset);
+	hda->reset_hda = devm_reset_control_get_exclusive(&pdev->dev, "hda");
+	if (IS_ERR(hda->reset_hda)) {
+		err = PTR_ERR(hda->reset_hda);
 		goto out_free;
 	}
 
+	hda->reset_hda2hdmi = devm_reset_control_get_exclusive(&pdev->dev,
+							       "hda2hdmi");
+	if (IS_ERR(hda->reset_hda2hdmi)) {
+		err = PTR_ERR(hda->reset_hda2hdmi);
+		goto out_free;
+	}
+
+	/*
+	 * "hda2codec_2x" reset is not present on Tegra194. Though DT would
+	 * be updated to reflect this, but to have backward compatibility
+	 * below is necessary.
+	 */
+	if (hda->data->has_hda2codec_2x_reset) {
+		hda->reset_hda2codec_2x =
+			devm_reset_control_get_exclusive(&pdev->dev,
+							 "hda2codec_2x");
+		if (IS_ERR(hda->reset_hda2codec_2x)) {
+			err = PTR_ERR(hda->reset_hda2codec_2x);
+			goto out_free;
+		}
+	}
+
 	hda->clocks[hda->nclocks++].id = "hda";
 	hda->clocks[hda->nclocks++].id = "hda2hdmi";
 	hda->clocks[hda->nclocks++].id = "hda2codec_2x";