diff mbox series

[v6,07/10] mhi: pci_generic: Add PCI error handlers

Message ID 1609231431-10048-8-git-send-email-loic.poulain@linaro.org
State Superseded
Headers show
Series mhi: pci_generic: Misc improvements | expand

Commit Message

Loic Poulain Dec. 29, 2020, 8:43 a.m. UTC
In AER capable root complex, errors are reported to the host which
can then act accordingly and perform PCI recovering procedure.

This patch enables error reporting and implements error_detected,
slot_reset and resume callbacks.

Signed-off-by: Loic Poulain <loic.poulain@linaro.org>

Reviewed-by Hemant Kumar <hemantk@codeaurora.org>
---
 drivers/bus/mhi/pci_generic.c | 50 +++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 50 insertions(+)

-- 
2.7.4

Comments

Manivannan Sadhasivam Dec. 31, 2020, 7:18 a.m. UTC | #1
On Tue, Dec 29, 2020 at 09:43:48AM +0100, Loic Poulain wrote:
> In AER capable root complex, errors are reported to the host which

> can then act accordingly and perform PCI recovering procedure.

> 

> This patch enables error reporting and implements error_detected,

> slot_reset and resume callbacks.

> 

> Signed-off-by: Loic Poulain <loic.poulain@linaro.org>

> Reviewed-by Hemant Kumar <hemantk@codeaurora.org>

> ---

>  drivers/bus/mhi/pci_generic.c | 50 +++++++++++++++++++++++++++++++++++++++++++

>  1 file changed, 50 insertions(+)

> 

> diff --git a/drivers/bus/mhi/pci_generic.c b/drivers/bus/mhi/pci_generic.c

> index 3297d95..9fe1e30 100644

> --- a/drivers/bus/mhi/pci_generic.c

> +++ b/drivers/bus/mhi/pci_generic.c

> @@ -8,6 +8,7 @@

>   * Copyright (C) 2020 Linaro Ltd <loic.poulain@linaro.org>

>   */

>  

> +#include <linux/aer.h>

>  #include <linux/delay.h>

>  #include <linux/device.h>

>  #include <linux/mhi.h>

> @@ -405,6 +406,8 @@ static int mhi_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)

>  	pci_save_state(pdev);

>  	mhi_pdev->pci_state = pci_store_saved_state(pdev);

>  

> +	pci_enable_pcie_error_reporting(pdev);

> +

>  	err = mhi_register_controller(mhi_cntrl, mhi_cntrl_config);

>  	if (err)

>  		return err;

> @@ -501,7 +504,54 @@ static void mhi_pci_reset_done(struct pci_dev *pdev)

>  	set_bit(MHI_PCI_DEV_STARTED, &mhi_pdev->status);

>  }

>  

> +static pci_ers_result_t mhi_pci_error_detected(struct pci_dev *pdev,

> +					       pci_channel_state_t state)

> +{

> +	struct mhi_pci_device *mhi_pdev = pci_get_drvdata(pdev);

> +	struct mhi_controller *mhi_cntrl = &mhi_pdev->mhi_cntrl;

> +

> +	dev_err(&pdev->dev, "PCI error detected, state = %u\n", state);

> +

> +	if (state == pci_channel_io_perm_failure)

> +		return PCI_ERS_RESULT_DISCONNECT;

> +

> +	/* Clean up MHI state */

> +	if (test_and_clear_bit(MHI_PCI_DEV_STARTED, &mhi_pdev->status)) {

> +		mhi_power_down(mhi_cntrl, false);

> +		mhi_unprepare_after_power_down(mhi_cntrl);

> +	} else {

> +		/* Nothing to do */

> +		return PCI_ERS_RESULT_RECOVERED;

> +	}

> +

> +	pci_disable_device(pdev);

> +

> +	return PCI_ERS_RESULT_NEED_RESET;

> +}

> +

> +static pci_ers_result_t mhi_pci_slot_reset(struct pci_dev *pdev)

> +{

> +	if (pci_enable_device(pdev)) {

> +		dev_err(&pdev->dev, "Cannot re-enable PCI device after reset.\n");

> +		return PCI_ERS_RESULT_DISCONNECT;

> +	}

> +


This callback will be called after PCI slot reset, so we should also be resetting
the device after enabling it.

Thanks,
Mani

> +	return PCI_ERS_RESULT_RECOVERED;

> +}

> +

> +static void mhi_pci_io_resume(struct pci_dev *pdev)

> +{

> +	struct mhi_pci_device *mhi_pdev = pci_get_drvdata(pdev);

> +

> +	dev_err(&pdev->dev, "PCI slot reset done\n");

> +

> +	queue_work(system_long_wq, &mhi_pdev->recovery_work);

> +}

> +

>  static const struct pci_error_handlers mhi_pci_err_handler = {

> +	.error_detected = mhi_pci_error_detected,

> +	.slot_reset = mhi_pci_slot_reset,

> +	.resume = mhi_pci_io_resume,

>  	.reset_prepare = mhi_pci_reset_prepare,

>  	.reset_done = mhi_pci_reset_done,

>  };

> -- 

> 2.7.4

>
Loic Poulain Dec. 31, 2020, 9:27 a.m. UTC | #2
Hi Mani,

On Thu, 31 Dec 2020 at 08:18, Manivannan Sadhasivam
<manivannan.sadhasivam@linaro.org> wrote:
>

> On Tue, Dec 29, 2020 at 09:43:48AM +0100, Loic Poulain wrote:

> > In AER capable root complex, errors are reported to the host which

> > can then act accordingly and perform PCI recovering procedure.

> >

> > This patch enables error reporting and implements error_detected,

> > slot_reset and resume callbacks.

> >

> > Signed-off-by: Loic Poulain <loic.poulain@linaro.org>

> > Reviewed-by Hemant Kumar <hemantk@codeaurora.org>

> > ---

> >  drivers/bus/mhi/pci_generic.c | 50 +++++++++++++++++++++++++++++++++++++++++++

> >  1 file changed, 50 insertions(+)


> > +static pci_ers_result_t mhi_pci_slot_reset(struct pci_dev *pdev)

> > +{

> > +     if (pci_enable_device(pdev)) {

> > +             dev_err(&pdev->dev, "Cannot re-enable PCI device after reset.\n");

> > +             return PCI_ERS_RESULT_DISCONNECT;

> > +     }

> > +

>

> This callback will be called after PCI slot reset, so we should also be resetting

> the device after enabling it.


Yes, but that is done in mhi_pci_io_resume.

From the PCI error recovery documentation, "drivers should not restart
normal I/O processing operations at this point (in slot_reset) If all
device drivers report success on this callback, the platform will call
resume() to complete the sequence, and let the driver restart normal
I/O processing."
The actual MHI PCI/recovery is then done in resume (mhi_pci_io_resume).

Regards,
Loic
Manivannan Sadhasivam Dec. 31, 2020, 11:14 a.m. UTC | #3
On Thu, Dec 31, 2020 at 10:27:01AM +0100, Loic Poulain wrote:
> Hi Mani,

> 

> On Thu, 31 Dec 2020 at 08:18, Manivannan Sadhasivam

> <manivannan.sadhasivam@linaro.org> wrote:

> >

> > On Tue, Dec 29, 2020 at 09:43:48AM +0100, Loic Poulain wrote:

> > > In AER capable root complex, errors are reported to the host which

> > > can then act accordingly and perform PCI recovering procedure.

> > >

> > > This patch enables error reporting and implements error_detected,

> > > slot_reset and resume callbacks.

> > >

> > > Signed-off-by: Loic Poulain <loic.poulain@linaro.org>

> > > Reviewed-by Hemant Kumar <hemantk@codeaurora.org>

> > > ---

> > >  drivers/bus/mhi/pci_generic.c | 50 +++++++++++++++++++++++++++++++++++++++++++

> > >  1 file changed, 50 insertions(+)

> 

> > > +static pci_ers_result_t mhi_pci_slot_reset(struct pci_dev *pdev)

> > > +{

> > > +     if (pci_enable_device(pdev)) {

> > > +             dev_err(&pdev->dev, "Cannot re-enable PCI device after reset.\n");

> > > +             return PCI_ERS_RESULT_DISCONNECT;

> > > +     }

> > > +

> >

> > This callback will be called after PCI slot reset, so we should also be resetting

> > the device after enabling it.

> 

> Yes, but that is done in mhi_pci_io_resume.

> 

> From the PCI error recovery documentation, "drivers should not restart

> normal I/O processing operations at this point (in slot_reset) If all

> device drivers report success on this callback, the platform will call

> resume() to complete the sequence, and let the driver restart normal

> I/O processing."

> The actual MHI PCI/recovery is then done in resume (mhi_pci_io_resume).

> 


If you read one paragraph above,

"This call gives drivers the chance to re-initialize the hardware
(re-download firmware, etc.).  At this point, the driver may assume
that the card is in a fresh state and is fully functional. The slot
is unfrozen and the driver has full access to PCI config space,
memory mapped I/O space and DMA."

So at the end of this call, the device is assumed to be functional (then only
PCI_ERS_RESULT_RECOVERED makes sense).

IMO, you should call mhi_pci_reset_prepare() in this callback and
mhi_pci_reset_done() in resume(). No need to schedule the recovery work.

Thanks,
Mani

> Regards,

> Loic
diff mbox series

Patch

diff --git a/drivers/bus/mhi/pci_generic.c b/drivers/bus/mhi/pci_generic.c
index 3297d95..9fe1e30 100644
--- a/drivers/bus/mhi/pci_generic.c
+++ b/drivers/bus/mhi/pci_generic.c
@@ -8,6 +8,7 @@ 
  * Copyright (C) 2020 Linaro Ltd <loic.poulain@linaro.org>
  */
 
+#include <linux/aer.h>
 #include <linux/delay.h>
 #include <linux/device.h>
 #include <linux/mhi.h>
@@ -405,6 +406,8 @@  static int mhi_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 	pci_save_state(pdev);
 	mhi_pdev->pci_state = pci_store_saved_state(pdev);
 
+	pci_enable_pcie_error_reporting(pdev);
+
 	err = mhi_register_controller(mhi_cntrl, mhi_cntrl_config);
 	if (err)
 		return err;
@@ -501,7 +504,54 @@  static void mhi_pci_reset_done(struct pci_dev *pdev)
 	set_bit(MHI_PCI_DEV_STARTED, &mhi_pdev->status);
 }
 
+static pci_ers_result_t mhi_pci_error_detected(struct pci_dev *pdev,
+					       pci_channel_state_t state)
+{
+	struct mhi_pci_device *mhi_pdev = pci_get_drvdata(pdev);
+	struct mhi_controller *mhi_cntrl = &mhi_pdev->mhi_cntrl;
+
+	dev_err(&pdev->dev, "PCI error detected, state = %u\n", state);
+
+	if (state == pci_channel_io_perm_failure)
+		return PCI_ERS_RESULT_DISCONNECT;
+
+	/* Clean up MHI state */
+	if (test_and_clear_bit(MHI_PCI_DEV_STARTED, &mhi_pdev->status)) {
+		mhi_power_down(mhi_cntrl, false);
+		mhi_unprepare_after_power_down(mhi_cntrl);
+	} else {
+		/* Nothing to do */
+		return PCI_ERS_RESULT_RECOVERED;
+	}
+
+	pci_disable_device(pdev);
+
+	return PCI_ERS_RESULT_NEED_RESET;
+}
+
+static pci_ers_result_t mhi_pci_slot_reset(struct pci_dev *pdev)
+{
+	if (pci_enable_device(pdev)) {
+		dev_err(&pdev->dev, "Cannot re-enable PCI device after reset.\n");
+		return PCI_ERS_RESULT_DISCONNECT;
+	}
+
+	return PCI_ERS_RESULT_RECOVERED;
+}
+
+static void mhi_pci_io_resume(struct pci_dev *pdev)
+{
+	struct mhi_pci_device *mhi_pdev = pci_get_drvdata(pdev);
+
+	dev_err(&pdev->dev, "PCI slot reset done\n");
+
+	queue_work(system_long_wq, &mhi_pdev->recovery_work);
+}
+
 static const struct pci_error_handlers mhi_pci_err_handler = {
+	.error_detected = mhi_pci_error_detected,
+	.slot_reset = mhi_pci_slot_reset,
+	.resume = mhi_pci_io_resume,
 	.reset_prepare = mhi_pci_reset_prepare,
 	.reset_done = mhi_pci_reset_done,
 };