PCI: qcom: Disable power management for uPD720201 USB3 controller

Message ID 20200615182413.15649-1-georgi.djakov@linaro.org
State New
Headers show
Series
  • PCI: qcom: Disable power management for uPD720201 USB3 controller
Related show

Commit Message

Georgi Djakov June 15, 2020, 6:24 p.m.
The uPD720201 USB3 host controller (connected to PCIe) on the Dragonboard
845c is often failing during suspend and resume. The following messages
are seen over the console:

  PM: suspend entry (s2idle)
  Filesystems sync: 0.000 seconds
  Freezing user space processes ... (elapsed 0.001 seconds) done.
  OOM killer disabled.
  Freezing remaining freezable tasks ... (elapsed 0.001 seconds) done.
  printk: Suspending console(s) (use no_console_suspend to debug)
  dwc3-qcom a8f8800.usb: HS-PHY not in L2
  dwc3-qcom a6f8800.usb: HS-PHY not in L2
  xhci_hcd 0000:01:00.0: can't change power state from D3hot to D0 (config
  space inaccessible)
  xhci_hcd 0000:01:00.0: can't change power state from D3hot to D0 (config
  space inaccessible)
  xhci_hcd 0000:01:00.0: Controller not ready at resume -19
  xhci_hcd 0000:01:00.0: PCI post-resume error -19!
  xhci_hcd 0000:01:00.0: HC died; cleaning up

Then the USB devices are not functional anymore. Let's disable the PM of
the controller for now, as this will at least keep USB devices working
even after suspend and resume.

Signed-off-by: Georgi Djakov <georgi.djakov@linaro.org>

---
 drivers/pci/controller/dwc/pcie-qcom.c | 8 ++++++++
 1 file changed, 8 insertions(+)

Comments

Bjorn Helgaas June 16, 2020, 9:17 p.m. | #1
[+cc Rafael, linux-pm]

On Mon, Jun 15, 2020 at 09:24:13PM +0300, Georgi Djakov wrote:
> The uPD720201 USB3 host controller (connected to PCIe) on the Dragonboard

> 845c is often failing during suspend and resume. The following messages

> are seen over the console:

> 

>   PM: suspend entry (s2idle)

>   Filesystems sync: 0.000 seconds

>   Freezing user space processes ... (elapsed 0.001 seconds) done.

>   OOM killer disabled.

>   Freezing remaining freezable tasks ... (elapsed 0.001 seconds) done.

>   printk: Suspending console(s) (use no_console_suspend to debug)

>   dwc3-qcom a8f8800.usb: HS-PHY not in L2

>   dwc3-qcom a6f8800.usb: HS-PHY not in L2

>   xhci_hcd 0000:01:00.0: can't change power state from D3hot to D0 (config

>   space inaccessible)

>   xhci_hcd 0000:01:00.0: can't change power state from D3hot to D0 (config

>   space inaccessible)

>   xhci_hcd 0000:01:00.0: Controller not ready at resume -19

>   xhci_hcd 0000:01:00.0: PCI post-resume error -19!

>   xhci_hcd 0000:01:00.0: HC died; cleaning up

> 

> Then the USB devices are not functional anymore. Let's disable the PM of

> the controller for now, as this will at least keep USB devices working

> even after suspend and resume.


This seems like we're just covering up a deeper problem here.  I think
it would be better to fix the underlying problem.

The quirk you're adding is specific to the Renesas 0x0014 device.  Is
there some reason to think the problem is specific to that device, or
might other devices have the same problem?

Maybe we're missing something in pcie-qcom.c?  Is there any
suspend/resume support required in that driver?  It doesn't look like
it has anything except that it calls pm_runtime_enable().

> Signed-off-by: Georgi Djakov <georgi.djakov@linaro.org>

> ---

>  drivers/pci/controller/dwc/pcie-qcom.c | 8 ++++++++

>  1 file changed, 8 insertions(+)

> 

> diff --git a/drivers/pci/controller/dwc/pcie-qcom.c b/drivers/pci/controller/dwc/pcie-qcom.c

> index 138e1a2d21cc..c1f502682a19 100644

> --- a/drivers/pci/controller/dwc/pcie-qcom.c

> +++ b/drivers/pci/controller/dwc/pcie-qcom.c

> @@ -1439,6 +1439,13 @@ static void qcom_fixup_class(struct pci_dev *dev)

>  {

>  	dev->class = PCI_CLASS_BRIDGE_PCI << 8;

>  }

> +

> +static void qcom_fixup_nopm(struct pci_dev *dev)

> +{

> +	dev->pm_cap = 0;

> +	dev_info(&dev->dev, "Disabling PCI power management\n");

> +}

> +

>  DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_QCOM, 0x0101, qcom_fixup_class);

>  DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_QCOM, 0x0104, qcom_fixup_class);

>  DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_QCOM, 0x0106, qcom_fixup_class);

> @@ -1446,6 +1453,7 @@ DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_QCOM, 0x0107, qcom_fixup_class);

>  DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_QCOM, 0x0302, qcom_fixup_class);

>  DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_QCOM, 0x1000, qcom_fixup_class);

>  DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_QCOM, 0x1001, qcom_fixup_class);

> +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_RENESAS, 0x0014, qcom_fixup_nopm);


The convention is that DECLARE_PCI_FIXUP_*() comes immediately after
the quirk function itself, so the whole patch would be a single diff
hunk.  See drivers/pci/quirks.c for many examples.

>  static struct platform_driver qcom_pcie_driver = {

>  	.probe = qcom_pcie_probe,
Stanimir Varbanov June 16, 2020, 11:36 p.m. | #2
On 6/17/20 12:17 AM, Bjorn Helgaas wrote:
> [+cc Rafael, linux-pm]

> 

> On Mon, Jun 15, 2020 at 09:24:13PM +0300, Georgi Djakov wrote:

>> The uPD720201 USB3 host controller (connected to PCIe) on the Dragonboard

>> 845c is often failing during suspend and resume. The following messages

>> are seen over the console:

>>

>>   PM: suspend entry (s2idle)

>>   Filesystems sync: 0.000 seconds

>>   Freezing user space processes ... (elapsed 0.001 seconds) done.

>>   OOM killer disabled.

>>   Freezing remaining freezable tasks ... (elapsed 0.001 seconds) done.

>>   printk: Suspending console(s) (use no_console_suspend to debug)

>>   dwc3-qcom a8f8800.usb: HS-PHY not in L2

>>   dwc3-qcom a6f8800.usb: HS-PHY not in L2

>>   xhci_hcd 0000:01:00.0: can't change power state from D3hot to D0 (config

>>   space inaccessible)

>>   xhci_hcd 0000:01:00.0: can't change power state from D3hot to D0 (config

>>   space inaccessible)

>>   xhci_hcd 0000:01:00.0: Controller not ready at resume -19

>>   xhci_hcd 0000:01:00.0: PCI post-resume error -19!

>>   xhci_hcd 0000:01:00.0: HC died; cleaning up

>>

>> Then the USB devices are not functional anymore. Let's disable the PM of

>> the controller for now, as this will at least keep USB devices working

>> even after suspend and resume.

> 

> This seems like we're just covering up a deeper problem here.  I think

> it would be better to fix the underlying problem.

> 

> The quirk you're adding is specific to the Renesas 0x0014 device.  Is

> there some reason to think the problem is specific to that device, or

> might other devices have the same problem?


I also think that the USB controller might have some issue with .resume.
It is obvious that qcom-pcie RC doesn't implement suspend/resume which
means that its clocks are not disabled at the moment when USB is resuming.

Georgi, can you try to bypass suspend/resume in the USB driver itself?

> 

> Maybe we're missing something in pcie-qcom.c?  Is there any

> suspend/resume support required in that driver?  It doesn't look like

> it has anything except that it calls pm_runtime_enable().


Yes, definitely we did not implemented suspend/resume callbacks, but
that means that the RC should be functional while the system is in suspend.

> 

>> Signed-off-by: Georgi Djakov <georgi.djakov@linaro.org>

>> ---

>>  drivers/pci/controller/dwc/pcie-qcom.c | 8 ++++++++

>>  1 file changed, 8 insertions(+)

>>

>> diff --git a/drivers/pci/controller/dwc/pcie-qcom.c b/drivers/pci/controller/dwc/pcie-qcom.c

>> index 138e1a2d21cc..c1f502682a19 100644

>> --- a/drivers/pci/controller/dwc/pcie-qcom.c

>> +++ b/drivers/pci/controller/dwc/pcie-qcom.c

>> @@ -1439,6 +1439,13 @@ static void qcom_fixup_class(struct pci_dev *dev)

>>  {

>>  	dev->class = PCI_CLASS_BRIDGE_PCI << 8;

>>  }

>> +

>> +static void qcom_fixup_nopm(struct pci_dev *dev)

>> +{

>> +	dev->pm_cap = 0;

>> +	dev_info(&dev->dev, "Disabling PCI power management\n");

>> +}

>> +

>>  DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_QCOM, 0x0101, qcom_fixup_class);

>>  DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_QCOM, 0x0104, qcom_fixup_class);

>>  DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_QCOM, 0x0106, qcom_fixup_class);

>> @@ -1446,6 +1453,7 @@ DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_QCOM, 0x0107, qcom_fixup_class);

>>  DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_QCOM, 0x0302, qcom_fixup_class);

>>  DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_QCOM, 0x1000, qcom_fixup_class);

>>  DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_QCOM, 0x1001, qcom_fixup_class);

>> +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_RENESAS, 0x0014, qcom_fixup_nopm);

> 

> The convention is that DECLARE_PCI_FIXUP_*() comes immediately after

> the quirk function itself, so the whole patch would be a single diff

> hunk.  See drivers/pci/quirks.c for many examples.

> 

>>  static struct platform_driver qcom_pcie_driver = {

>>  	.probe = qcom_pcie_probe,


-- 
regards,
Stan
Bjorn Helgaas June 18, 2020, 11:20 p.m. | #3
On Tue, Jun 16, 2020 at 04:17:11PM -0500, Bjorn Helgaas wrote:
> On Mon, Jun 15, 2020 at 09:24:13PM +0300, Georgi Djakov wrote:

> > The uPD720201 USB3 host controller (connected to PCIe) on the Dragonboard

> > 845c is often failing during suspend and resume. The following messages

> > are seen over the console:

> > 

> >   PM: suspend entry (s2idle)

> >   Filesystems sync: 0.000 seconds

> >   Freezing user space processes ... (elapsed 0.001 seconds) done.

> >   OOM killer disabled.

> >   Freezing remaining freezable tasks ... (elapsed 0.001 seconds) done.

> >   printk: Suspending console(s) (use no_console_suspend to debug)

> >   dwc3-qcom a8f8800.usb: HS-PHY not in L2

> >   dwc3-qcom a6f8800.usb: HS-PHY not in L2

> >   xhci_hcd 0000:01:00.0: can't change power state from D3hot to D0 (config

> >   space inaccessible)

> >   xhci_hcd 0000:01:00.0: can't change power state from D3hot to D0 (config

> >   space inaccessible)

> >   xhci_hcd 0000:01:00.0: Controller not ready at resume -19

> >   xhci_hcd 0000:01:00.0: PCI post-resume error -19!

> >   xhci_hcd 0000:01:00.0: HC died; cleaning up

> > 

> > Then the USB devices are not functional anymore. Let's disable the PM of

> > the controller for now, as this will at least keep USB devices working

> > even after suspend and resume.


Georgi, can you collect the complete dmesg log and "sudo lspci
-vvxxxx" output somewhere?  A new report at bugzilla.kernel.org would
be a good spot.

Maybe we're missing a delay here.  The "config space inaccessible"
message means we read 0xffff from PCI_PM_CTRL, which probably means
the device is still in D3cold.  If it were in any other power state,
PCI_PM_CTRL should be readable, and 0xffff is not a valid value.

Could you also insert a dump_stack() right after we print that "config
space inaccessible" message?  I don't know enough about power
management to understand why we see that message twice.

> This seems like we're just covering up a deeper problem here.  I think

> it would be better to fix the underlying problem.

> 

> The quirk you're adding is specific to the Renesas 0x0014 device.  Is

> there some reason to think the problem is specific to that device, or

> might other devices have the same problem?

> 

> Maybe we're missing something in pcie-qcom.c?  Is there any

> suspend/resume support required in that driver?  It doesn't look like

> it has anything except that it calls pm_runtime_enable().

> 

> > Signed-off-by: Georgi Djakov <georgi.djakov@linaro.org>

> > ---

> >  drivers/pci/controller/dwc/pcie-qcom.c | 8 ++++++++

> >  1 file changed, 8 insertions(+)

> > 

> > diff --git a/drivers/pci/controller/dwc/pcie-qcom.c b/drivers/pci/controller/dwc/pcie-qcom.c

> > index 138e1a2d21cc..c1f502682a19 100644

> > --- a/drivers/pci/controller/dwc/pcie-qcom.c

> > +++ b/drivers/pci/controller/dwc/pcie-qcom.c

> > @@ -1439,6 +1439,13 @@ static void qcom_fixup_class(struct pci_dev *dev)

> >  {

> >  	dev->class = PCI_CLASS_BRIDGE_PCI << 8;

> >  }

> > +

> > +static void qcom_fixup_nopm(struct pci_dev *dev)

> > +{

> > +	dev->pm_cap = 0;

> > +	dev_info(&dev->dev, "Disabling PCI power management\n");

> > +}

> > +

> >  DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_QCOM, 0x0101, qcom_fixup_class);

> >  DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_QCOM, 0x0104, qcom_fixup_class);

> >  DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_QCOM, 0x0106, qcom_fixup_class);

> > @@ -1446,6 +1453,7 @@ DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_QCOM, 0x0107, qcom_fixup_class);

> >  DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_QCOM, 0x0302, qcom_fixup_class);

> >  DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_QCOM, 0x1000, qcom_fixup_class);

> >  DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_QCOM, 0x1001, qcom_fixup_class);

> > +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_RENESAS, 0x0014, qcom_fixup_nopm);

> 

> The convention is that DECLARE_PCI_FIXUP_*() comes immediately after

> the quirk function itself, so the whole patch would be a single diff

> hunk.  See drivers/pci/quirks.c for many examples.

> 

> >  static struct platform_driver qcom_pcie_driver = {

> >  	.probe = qcom_pcie_probe,

Patch

diff --git a/drivers/pci/controller/dwc/pcie-qcom.c b/drivers/pci/controller/dwc/pcie-qcom.c
index 138e1a2d21cc..c1f502682a19 100644
--- a/drivers/pci/controller/dwc/pcie-qcom.c
+++ b/drivers/pci/controller/dwc/pcie-qcom.c
@@ -1439,6 +1439,13 @@  static void qcom_fixup_class(struct pci_dev *dev)
 {
 	dev->class = PCI_CLASS_BRIDGE_PCI << 8;
 }
+
+static void qcom_fixup_nopm(struct pci_dev *dev)
+{
+	dev->pm_cap = 0;
+	dev_info(&dev->dev, "Disabling PCI power management\n");
+}
+
 DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_QCOM, 0x0101, qcom_fixup_class);
 DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_QCOM, 0x0104, qcom_fixup_class);
 DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_QCOM, 0x0106, qcom_fixup_class);
@@ -1446,6 +1453,7 @@  DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_QCOM, 0x0107, qcom_fixup_class);
 DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_QCOM, 0x0302, qcom_fixup_class);
 DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_QCOM, 0x1000, qcom_fixup_class);
 DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_QCOM, 0x1001, qcom_fixup_class);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_RENESAS, 0x0014, qcom_fixup_nopm);
 
 static struct platform_driver qcom_pcie_driver = {
 	.probe = qcom_pcie_probe,