diff mbox series

[RFC,net-next,v4,2/2] r8169: Implement dynamic ASPM mechanism

Message ID 20210827171452.217123-3-kai.heng.feng@canonical.com
State New
Headers show
Series r8169: Implement dynamic ASPM mechanism for recent 1.0/2.5Gbps Realtek NICs | expand

Commit Message

Kai-Heng Feng Aug. 27, 2021, 5:14 p.m. UTC
r8169 NICs on some platforms have abysmal speed when ASPM is enabled.
Same issue can be observed with older vendor drivers.

The issue is however solved by the latest vendor driver. There's a new
mechanism, which disables r8169's internal ASPM when the NIC traffic has
more than 10 packets, and vice versa. The possible reason for this is
likely because the buffer on the chip is too small for its ASPM exit
latency.

Realtek confirmed that all their PCIe LAN NICs, r8106, r8168 and r8125
use dynamic ASPM under Windows. So implement the same mechanism here to
resolve the issue.

Because ASPM control may not be granted by BIOS while ASPM is enabled,
remove aspm_manageable and use pcie_aspm_capable() instead. If BIOS
enables ASPM for the device, we want to enable dynamic ASPM on it.

In addition, since PCIe ASPM can be switched via sysfs, enable/disable
dynamic ASPM accordingly by checking pcie_aspm_enabled().

Signed-off-by: Kai-Heng Feng <kai.heng.feng@canonical.com>
---
v4:
 - Squash two patches
 - Remove aspm_manageable and use pcie_aspm_capable()
   pcie_aspm_enabled() accordingly

v3:
 - Use msecs_to_jiffies() for delay time
 - Use atomic_t instead of mutex for bh
 - Mention the buffer size and ASPM exit latency in commit message

v2: 
 - Use delayed_work instead of timer_list to avoid interrupt context
 - Use mutex to serialize packet counter read/write
 - Wording change
 drivers/net/ethernet/realtek/r8169_main.c | 77 ++++++++++++++++++++---
 1 file changed, 69 insertions(+), 8 deletions(-)

Comments

Bjorn Helgaas Aug. 30, 2021, 6:09 p.m. UTC | #1
On Sat, Aug 28, 2021 at 01:14:52AM +0800, Kai-Heng Feng wrote:
> r8169 NICs on some platforms have abysmal speed when ASPM is enabled.

> Same issue can be observed with older vendor drivers.

> 

> The issue is however solved by the latest vendor driver. There's a new

> mechanism, which disables r8169's internal ASPM when the NIC traffic has

> more than 10 packets, and vice versa. The possible reason for this is

> likely because the buffer on the chip is too small for its ASPM exit

> latency.


This sounds like good speculation, but of course, it would be better
to have the supporting data.

You say above that this problem affects r8169 on "some platforms."  I
infer that ASPM works fine on other platforms.  It would be extremely
interesting to have some data on both classes, e.g., "lspci -vv"
output for the entire system.

If r8169 ASPM works well on some systems, we *should* be able to make
it work well on *all* systems, because the device can't tell what
system it's in.  All the device can see are the latencies for entry
and exit for link states.

IIUC this patch makes the driver wake up every 1000ms.  If the NIC has
sent or received more than 10 packets in the last 1000ms, it disables
ASPM; otherwise it enables ASPM.

I asked these same questions earlier, but nothing changed, so I won't
raise them again if you don't think they're pertinent.  Some patch
splitting comments below.

> Realtek confirmed that all their PCIe LAN NICs, r8106, r8168 and r8125

> use dynamic ASPM under Windows. So implement the same mechanism here to

> resolve the issue.

> 

> Because ASPM control may not be granted by BIOS while ASPM is enabled,

> remove aspm_manageable and use pcie_aspm_capable() instead. If BIOS

> enables ASPM for the device, we want to enable dynamic ASPM on it.

> 

> In addition, since PCIe ASPM can be switched via sysfs, enable/disable

> dynamic ASPM accordingly by checking pcie_aspm_enabled().

> 

> Signed-off-by: Kai-Heng Feng <kai.heng.feng@canonical.com>

> ---

> v4:

>  - Squash two patches

>  - Remove aspm_manageable and use pcie_aspm_capable()

>    pcie_aspm_enabled() accordingly

> 

> v3:

>  - Use msecs_to_jiffies() for delay time

>  - Use atomic_t instead of mutex for bh

>  - Mention the buffer size and ASPM exit latency in commit message

> 

> v2: 

>  - Use delayed_work instead of timer_list to avoid interrupt context

>  - Use mutex to serialize packet counter read/write

>  - Wording change

>  drivers/net/ethernet/realtek/r8169_main.c | 77 ++++++++++++++++++++---

>  1 file changed, 69 insertions(+), 8 deletions(-)

> 

> diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c

> index 46a6ff9a782d7..97dba8f437b78 100644

> --- a/drivers/net/ethernet/realtek/r8169_main.c

> +++ b/drivers/net/ethernet/realtek/r8169_main.c

> @@ -623,7 +623,10 @@ struct rtl8169_private {

>  	} wk;

>  

>  	unsigned supports_gmii:1;

> -	unsigned aspm_manageable:1;

> +	unsigned rtl_aspm_enabled:1;

> +	struct delayed_work aspm_toggle;

> +	atomic_t aspm_packet_count;

> +

>  	dma_addr_t counters_phys_addr;

>  	struct rtl8169_counters *counters;

>  	struct rtl8169_tc_offsets tc_offset;

> @@ -698,6 +701,20 @@ static bool rtl_is_8168evl_up(struct rtl8169_private *tp)

>  	       tp->mac_version <= RTL_GIGA_MAC_VER_53;

>  }

>  

> +static int rtl_supports_aspm(struct rtl8169_private *tp)

> +{

> +	switch (tp->mac_version) {

> +	case RTL_GIGA_MAC_VER_02 ... RTL_GIGA_MAC_VER_31:

> +	case RTL_GIGA_MAC_VER_37:

> +	case RTL_GIGA_MAC_VER_39:

> +	case RTL_GIGA_MAC_VER_43:

> +	case RTL_GIGA_MAC_VER_47:

> +		return 0;

> +	default:

> +		return 1;

> +	}


This part looks like it should be a separate patch.  I would think
rtl_init_one() could call this once and set a bit in rtl8169_private.
Then rtl_hw_aspm_clkreq_enable() could just return without doing
anything if the bit is not set.

> +}

> +

>  static bool rtl_supports_eee(struct rtl8169_private *tp)

>  {

>  	return tp->mac_version >= RTL_GIGA_MAC_VER_34 &&

> @@ -2699,8 +2716,15 @@ static void rtl_enable_exit_l1(struct rtl8169_private *tp)

>  

>  static void rtl_hw_aspm_clkreq_enable(struct rtl8169_private *tp, bool enable)

>  {

> +	struct pci_dev *pdev = tp->pci_dev;

> +

> +	if (!pcie_aspm_enabled(pdev) && enable)

> +		return;

> +

> +	tp->rtl_aspm_enabled = enable;

> +

>  	/* Don't enable ASPM in the chip if OS can't control ASPM */

> -	if (enable && tp->aspm_manageable) {

> +	if (enable) {


This part also looks like it should be a separate patch, since it is
strictly concerned with whether the OS can control ASPM and doesn't
seem related to dynamic ASPM.

>  		RTL_W8(tp, Config5, RTL_R8(tp, Config5) | ASPM_en);

>  		RTL_W8(tp, Config2, RTL_R8(tp, Config2) | ClkReqEn);

>  	} else {

> @@ -4440,6 +4464,7 @@ static void rtl_tx(struct net_device *dev, struct rtl8169_private *tp,

>  

>  	dirty_tx = tp->dirty_tx;

>  

> +	atomic_add(tp->cur_tx - dirty_tx, &tp->aspm_packet_count);

>  	while (READ_ONCE(tp->cur_tx) != dirty_tx) {

>  		unsigned int entry = dirty_tx % NUM_TX_DESC;

>  		u32 status;

> @@ -4584,6 +4609,8 @@ static int rtl_rx(struct net_device *dev, struct rtl8169_private *tp, int budget

>  		rtl8169_mark_to_asic(desc);

>  	}

>  

> +	atomic_add(count, &tp->aspm_packet_count);

> +

>  	return count;

>  }

>  

> @@ -4691,8 +4718,39 @@ static int r8169_phy_connect(struct rtl8169_private *tp)

>  	return 0;

>  }

>  

> +#define ASPM_PACKET_THRESHOLD 10

> +#define ASPM_TOGGLE_INTERVAL 1000

> +

> +static void rtl8169_aspm_toggle(struct work_struct *work)

> +{

> +	struct rtl8169_private *tp = container_of(work, struct rtl8169_private,

> +						  aspm_toggle.work);

> +	int packet_count;

> +	bool enable;

> +

> +	packet_count = atomic_xchg(&tp->aspm_packet_count, 0);

> +

> +	if (pcie_aspm_enabled(tp->pci_dev)) {

> +		enable = packet_count <= ASPM_PACKET_THRESHOLD;

> +

> +		if (tp->rtl_aspm_enabled != enable) {

> +			rtl_unlock_config_regs(tp);

> +			rtl_hw_aspm_clkreq_enable(tp, enable);

> +			rtl_lock_config_regs(tp);

> +		}

> +	} else if (tp->rtl_aspm_enabled) {

> +		rtl_unlock_config_regs(tp);

> +		rtl_hw_aspm_clkreq_enable(tp, false);

> +		rtl_lock_config_regs(tp);

> +	}

> +

> +	schedule_delayed_work(&tp->aspm_toggle, msecs_to_jiffies(ASPM_TOGGLE_INTERVAL));

> +}

> +

>  static void rtl8169_down(struct rtl8169_private *tp)

>  {

> +	cancel_delayed_work_sync(&tp->aspm_toggle);

> +

>  	/* Clear all task flags */

>  	bitmap_zero(tp->wk.flags, RTL_FLAG_MAX);

>  

> @@ -4719,6 +4777,11 @@ static void rtl8169_up(struct rtl8169_private *tp)

>  	rtl_reset_work(tp);

>  

>  	phy_start(tp->phydev);

> +

> +	/* pcie_aspm_capable may change after system resume */

> +	if (pcie_aspm_support_enabled() && pcie_aspm_capable(tp->pci_dev) &&

> +	    rtl_supports_aspm(tp))

> +		schedule_delayed_work(&tp->aspm_toggle, 0);

>  }

>  

>  static int rtl8169_close(struct net_device *dev)

> @@ -5306,12 +5369,6 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)

>  	if (rc)

>  		return rc;

>  

> -	/* Disable ASPM L1 as that cause random device stop working

> -	 * problems as well as full system hangs for some PCIe devices users.

> -	 */

> -	rc = pci_disable_link_state(pdev, PCIE_LINK_STATE_L1);

> -	tp->aspm_manageable = !rc;

> -

>  	/* enable device (incl. PCI PM wakeup and hotplug setup) */

>  	rc = pcim_enable_device(pdev);

>  	if (rc < 0) {

> @@ -5378,6 +5435,10 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)

>  

>  	INIT_WORK(&tp->wk.work, rtl_task);

>  

> +	INIT_DELAYED_WORK(&tp->aspm_toggle, rtl8169_aspm_toggle);

> +

> +	atomic_set(&tp->aspm_packet_count, 0);

> +

>  	rtl_init_mac_address(tp);

>  

>  	dev->ethtool_ops = &rtl8169_ethtool_ops;

> -- 

> 2.32.0

>
Kai-Heng Feng Sept. 3, 2021, 3:56 p.m. UTC | #2
On Tue, Aug 31, 2021 at 2:09 AM Bjorn Helgaas <helgaas@kernel.org> wrote:
>

> On Sat, Aug 28, 2021 at 01:14:52AM +0800, Kai-Heng Feng wrote:

> > r8169 NICs on some platforms have abysmal speed when ASPM is enabled.

> > Same issue can be observed with older vendor drivers.

> >

> > The issue is however solved by the latest vendor driver. There's a new

> > mechanism, which disables r8169's internal ASPM when the NIC traffic has

> > more than 10 packets, and vice versa. The possible reason for this is

> > likely because the buffer on the chip is too small for its ASPM exit

> > latency.

>

> This sounds like good speculation, but of course, it would be better

> to have the supporting data.

>

> You say above that this problem affects r8169 on "some platforms."  I

> infer that ASPM works fine on other platforms.  It would be extremely

> interesting to have some data on both classes, e.g., "lspci -vv"

> output for the entire system.


lspci data collected from working and non-working system can be found here:
https://bugzilla.kernel.org/show_bug.cgi?id=214307

>

> If r8169 ASPM works well on some systems, we *should* be able to make

> it work well on *all* systems, because the device can't tell what

> system it's in.  All the device can see are the latencies for entry

> and exit for link states.


That's definitely better if we can make r8169 ASPM work for all platforms.

>

> IIUC this patch makes the driver wake up every 1000ms.  If the NIC has

> sent or received more than 10 packets in the last 1000ms, it disables

> ASPM; otherwise it enables ASPM.


Yes, that's correct.

>

> I asked these same questions earlier, but nothing changed, so I won't

> raise them again if you don't think they're pertinent.  Some patch

> splitting comments below.


Sorry about that. The lspci data is attached.

>

> > Realtek confirmed that all their PCIe LAN NICs, r8106, r8168 and r8125

> > use dynamic ASPM under Windows. So implement the same mechanism here to

> > resolve the issue.

> >

> > Because ASPM control may not be granted by BIOS while ASPM is enabled,

> > remove aspm_manageable and use pcie_aspm_capable() instead. If BIOS

> > enables ASPM for the device, we want to enable dynamic ASPM on it.

> >

> > In addition, since PCIe ASPM can be switched via sysfs, enable/disable

> > dynamic ASPM accordingly by checking pcie_aspm_enabled().

> >

> > Signed-off-by: Kai-Heng Feng <kai.heng.feng@canonical.com>

> > ---

> > v4:

> >  - Squash two patches

> >  - Remove aspm_manageable and use pcie_aspm_capable()

> >    pcie_aspm_enabled() accordingly

> >

> > v3:

> >  - Use msecs_to_jiffies() for delay time

> >  - Use atomic_t instead of mutex for bh

> >  - Mention the buffer size and ASPM exit latency in commit message

> >

> > v2:

> >  - Use delayed_work instead of timer_list to avoid interrupt context

> >  - Use mutex to serialize packet counter read/write

> >  - Wording change

> >  drivers/net/ethernet/realtek/r8169_main.c | 77 ++++++++++++++++++++---

> >  1 file changed, 69 insertions(+), 8 deletions(-)

> >

> > diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c

> > index 46a6ff9a782d7..97dba8f437b78 100644

> > --- a/drivers/net/ethernet/realtek/r8169_main.c

> > +++ b/drivers/net/ethernet/realtek/r8169_main.c

> > @@ -623,7 +623,10 @@ struct rtl8169_private {

> >       } wk;

> >

> >       unsigned supports_gmii:1;

> > -     unsigned aspm_manageable:1;

> > +     unsigned rtl_aspm_enabled:1;

> > +     struct delayed_work aspm_toggle;

> > +     atomic_t aspm_packet_count;

> > +

> >       dma_addr_t counters_phys_addr;

> >       struct rtl8169_counters *counters;

> >       struct rtl8169_tc_offsets tc_offset;

> > @@ -698,6 +701,20 @@ static bool rtl_is_8168evl_up(struct rtl8169_private *tp)

> >              tp->mac_version <= RTL_GIGA_MAC_VER_53;

> >  }

> >

> > +static int rtl_supports_aspm(struct rtl8169_private *tp)

> > +{

> > +     switch (tp->mac_version) {

> > +     case RTL_GIGA_MAC_VER_02 ... RTL_GIGA_MAC_VER_31:

> > +     case RTL_GIGA_MAC_VER_37:

> > +     case RTL_GIGA_MAC_VER_39:

> > +     case RTL_GIGA_MAC_VER_43:

> > +     case RTL_GIGA_MAC_VER_47:

> > +             return 0;

> > +     default:

> > +             return 1;

> > +     }

>

> This part looks like it should be a separate patch.  I would think

> rtl_init_one() could call this once and set a bit in rtl8169_private.

> Then rtl_hw_aspm_clkreq_enable() could just return without doing

> anything if the bit is not set.


OK, will do in next version.

>

> > +}

> > +

> >  static bool rtl_supports_eee(struct rtl8169_private *tp)

> >  {

> >       return tp->mac_version >= RTL_GIGA_MAC_VER_34 &&

> > @@ -2699,8 +2716,15 @@ static void rtl_enable_exit_l1(struct rtl8169_private *tp)

> >

> >  static void rtl_hw_aspm_clkreq_enable(struct rtl8169_private *tp, bool enable)

> >  {

> > +     struct pci_dev *pdev = tp->pci_dev;

> > +

> > +     if (!pcie_aspm_enabled(pdev) && enable)

> > +             return;

> > +

> > +     tp->rtl_aspm_enabled = enable;

> > +

> >       /* Don't enable ASPM in the chip if OS can't control ASPM */

> > -     if (enable && tp->aspm_manageable) {

> > +     if (enable) {

>

> This part also looks like it should be a separate patch, since it is

> strictly concerned with whether the OS can control ASPM and doesn't

> seem related to dynamic ASPM.


OK, will tackle this in next version.

Kai-Heng

>

> >               RTL_W8(tp, Config5, RTL_R8(tp, Config5) | ASPM_en);

> >               RTL_W8(tp, Config2, RTL_R8(tp, Config2) | ClkReqEn);

> >       } else {

> > @@ -4440,6 +4464,7 @@ static void rtl_tx(struct net_device *dev, struct rtl8169_private *tp,

> >

> >       dirty_tx = tp->dirty_tx;

> >

> > +     atomic_add(tp->cur_tx - dirty_tx, &tp->aspm_packet_count);

> >       while (READ_ONCE(tp->cur_tx) != dirty_tx) {

> >               unsigned int entry = dirty_tx % NUM_TX_DESC;

> >               u32 status;

> > @@ -4584,6 +4609,8 @@ static int rtl_rx(struct net_device *dev, struct rtl8169_private *tp, int budget

> >               rtl8169_mark_to_asic(desc);

> >       }

> >

> > +     atomic_add(count, &tp->aspm_packet_count);

> > +

> >       return count;

> >  }

> >

> > @@ -4691,8 +4718,39 @@ static int r8169_phy_connect(struct rtl8169_private *tp)

> >       return 0;

> >  }

> >

> > +#define ASPM_PACKET_THRESHOLD 10

> > +#define ASPM_TOGGLE_INTERVAL 1000

> > +

> > +static void rtl8169_aspm_toggle(struct work_struct *work)

> > +{

> > +     struct rtl8169_private *tp = container_of(work, struct rtl8169_private,

> > +                                               aspm_toggle.work);

> > +     int packet_count;

> > +     bool enable;

> > +

> > +     packet_count = atomic_xchg(&tp->aspm_packet_count, 0);

> > +

> > +     if (pcie_aspm_enabled(tp->pci_dev)) {

> > +             enable = packet_count <= ASPM_PACKET_THRESHOLD;

> > +

> > +             if (tp->rtl_aspm_enabled != enable) {

> > +                     rtl_unlock_config_regs(tp);

> > +                     rtl_hw_aspm_clkreq_enable(tp, enable);

> > +                     rtl_lock_config_regs(tp);

> > +             }

> > +     } else if (tp->rtl_aspm_enabled) {

> > +             rtl_unlock_config_regs(tp);

> > +             rtl_hw_aspm_clkreq_enable(tp, false);

> > +             rtl_lock_config_regs(tp);

> > +     }

> > +

> > +     schedule_delayed_work(&tp->aspm_toggle, msecs_to_jiffies(ASPM_TOGGLE_INTERVAL));

> > +}

> > +

> >  static void rtl8169_down(struct rtl8169_private *tp)

> >  {

> > +     cancel_delayed_work_sync(&tp->aspm_toggle);

> > +

> >       /* Clear all task flags */

> >       bitmap_zero(tp->wk.flags, RTL_FLAG_MAX);

> >

> > @@ -4719,6 +4777,11 @@ static void rtl8169_up(struct rtl8169_private *tp)

> >       rtl_reset_work(tp);

> >

> >       phy_start(tp->phydev);

> > +

> > +     /* pcie_aspm_capable may change after system resume */

> > +     if (pcie_aspm_support_enabled() && pcie_aspm_capable(tp->pci_dev) &&

> > +         rtl_supports_aspm(tp))

> > +             schedule_delayed_work(&tp->aspm_toggle, 0);

> >  }

> >

> >  static int rtl8169_close(struct net_device *dev)

> > @@ -5306,12 +5369,6 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)

> >       if (rc)

> >               return rc;

> >

> > -     /* Disable ASPM L1 as that cause random device stop working

> > -      * problems as well as full system hangs for some PCIe devices users.

> > -      */

> > -     rc = pci_disable_link_state(pdev, PCIE_LINK_STATE_L1);

> > -     tp->aspm_manageable = !rc;

> > -

> >       /* enable device (incl. PCI PM wakeup and hotplug setup) */

> >       rc = pcim_enable_device(pdev);

> >       if (rc < 0) {

> > @@ -5378,6 +5435,10 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)

> >

> >       INIT_WORK(&tp->wk.work, rtl_task);

> >

> > +     INIT_DELAYED_WORK(&tp->aspm_toggle, rtl8169_aspm_toggle);

> > +

> > +     atomic_set(&tp->aspm_packet_count, 0);

> > +

> >       rtl_init_mac_address(tp);

> >

> >       dev->ethtool_ops = &rtl8169_ethtool_ops;

> > --

> > 2.32.0

> >
Heiner Kallweit Sept. 3, 2021, 8 p.m. UTC | #3
On 03.09.2021 17:56, Kai-Heng Feng wrote:
> On Tue, Aug 31, 2021 at 2:09 AM Bjorn Helgaas <helgaas@kernel.org> wrote:

>>

>> On Sat, Aug 28, 2021 at 01:14:52AM +0800, Kai-Heng Feng wrote:

>>> r8169 NICs on some platforms have abysmal speed when ASPM is enabled.

>>> Same issue can be observed with older vendor drivers.

>>>

>>> The issue is however solved by the latest vendor driver. There's a new

>>> mechanism, which disables r8169's internal ASPM when the NIC traffic has

>>> more than 10 packets, and vice versa. The possible reason for this is

>>> likely because the buffer on the chip is too small for its ASPM exit

>>> latency.

>>

>> This sounds like good speculation, but of course, it would be better

>> to have the supporting data.

>>

>> You say above that this problem affects r8169 on "some platforms."  I

>> infer that ASPM works fine on other platforms.  It would be extremely

>> interesting to have some data on both classes, e.g., "lspci -vv"

>> output for the entire system.

> 

> lspci data collected from working and non-working system can be found here:

> https://bugzilla.kernel.org/show_bug.cgi?id=214307

> 

>>

>> If r8169 ASPM works well on some systems, we *should* be able to make

>> it work well on *all* systems, because the device can't tell what

>> system it's in.  All the device can see are the latencies for entry

>> and exit for link states.

> 

> That's definitely better if we can make r8169 ASPM work for all platforms.

> 

>>

>> IIUC this patch makes the driver wake up every 1000ms.  If the NIC has

>> sent or received more than 10 packets in the last 1000ms, it disables

>> ASPM; otherwise it enables ASPM.

> 

> Yes, that's correct.

> 

>>

>> I asked these same questions earlier, but nothing changed, so I won't

>> raise them again if you don't think they're pertinent.  Some patch

>> splitting comments below.

> 

> Sorry about that. The lspci data is attached.

> 


Thanks for the additional details. I see that both systems have the L1
sub-states active. Do you also face the issue if L1 is enabled but
L1.2 and L1.2 are not? Setting the ASPM policy from powersupersave
to powersave should be sufficient to disable them.
I have a test system Asus PRIME H310I-PLUS, BIOS 2603 10/21/2019 with
the same RTL8168h chip version. With L1 active and sub-states inactive
everything is fine. With the sub-states activated I get few missed RX
errors when running iperf3.

One difference between your good and bad logs is the following.
(My test system shows the same LTR value like your bad system.)

Bad:
	Capabilities: [170 v1] Latency Tolerance Reporting
		Max snoop latency: 3145728ns
		Max no snoop latency: 3145728ns

Good:
	Capabilities: [170 v1] Latency Tolerance Reporting
		Max snoop latency: 1048576ns
		Max no snoop latency: 1048576ns

I have to admit that I'm not familiar with LTR and don't know whether
this difference could contribute to the differing behavior.
Kai-Heng Feng Sept. 6, 2021, 3:10 p.m. UTC | #4
On Sat, Sep 4, 2021 at 4:00 AM Heiner Kallweit <hkallweit1@gmail.com> wrote:
>

> On 03.09.2021 17:56, Kai-Heng Feng wrote:

> > On Tue, Aug 31, 2021 at 2:09 AM Bjorn Helgaas <helgaas@kernel.org> wrote:

> >>

> >> On Sat, Aug 28, 2021 at 01:14:52AM +0800, Kai-Heng Feng wrote:

> >>> r8169 NICs on some platforms have abysmal speed when ASPM is enabled.

> >>> Same issue can be observed with older vendor drivers.

> >>>

> >>> The issue is however solved by the latest vendor driver. There's a new

> >>> mechanism, which disables r8169's internal ASPM when the NIC traffic has

> >>> more than 10 packets, and vice versa. The possible reason for this is

> >>> likely because the buffer on the chip is too small for its ASPM exit

> >>> latency.

> >>

> >> This sounds like good speculation, but of course, it would be better

> >> to have the supporting data.

> >>

> >> You say above that this problem affects r8169 on "some platforms."  I

> >> infer that ASPM works fine on other platforms.  It would be extremely

> >> interesting to have some data on both classes, e.g., "lspci -vv"

> >> output for the entire system.

> >

> > lspci data collected from working and non-working system can be found here:

> > https://bugzilla.kernel.org/show_bug.cgi?id=214307

> >

> >>

> >> If r8169 ASPM works well on some systems, we *should* be able to make

> >> it work well on *all* systems, because the device can't tell what

> >> system it's in.  All the device can see are the latencies for entry

> >> and exit for link states.

> >

> > That's definitely better if we can make r8169 ASPM work for all platforms.

> >

> >>

> >> IIUC this patch makes the driver wake up every 1000ms.  If the NIC has

> >> sent or received more than 10 packets in the last 1000ms, it disables

> >> ASPM; otherwise it enables ASPM.

> >

> > Yes, that's correct.

> >

> >>

> >> I asked these same questions earlier, but nothing changed, so I won't

> >> raise them again if you don't think they're pertinent.  Some patch

> >> splitting comments below.

> >

> > Sorry about that. The lspci data is attached.

> >

>

> Thanks for the additional details. I see that both systems have the L1

> sub-states active. Do you also face the issue if L1 is enabled but

> L1.2 and L1.2 are not? Setting the ASPM policy from powersupersave

> to powersave should be sufficient to disable them.

> I have a test system Asus PRIME H310I-PLUS, BIOS 2603 10/21/2019 with

> the same RTL8168h chip version. With L1 active and sub-states inactive

> everything is fine. With the sub-states activated I get few missed RX

> errors when running iperf3.


Once L1.1 and L1.2 are disabled the TX speed can reach 710Mbps and RX
can reach 941 Mbps. So yes it seems to be the same issue.
With dynamic ASPM, TX can reach 750 Mbps while ASPM L1.1 and L1.2 are enabled.

> One difference between your good and bad logs is the following.

> (My test system shows the same LTR value like your bad system.)

>

> Bad:

>         Capabilities: [170 v1] Latency Tolerance Reporting

>                 Max snoop latency: 3145728ns

>                 Max no snoop latency: 3145728ns

>

> Good:

>         Capabilities: [170 v1] Latency Tolerance Reporting

>                 Max snoop latency: 1048576ns

>                 Max no snoop latency: 1048576ns

>

> I have to admit that I'm not familiar with LTR and don't know whether

> this difference could contribute to the differing behavior.


I am also unsure what role LTR plays here, so I tried to change the
LTR value to 1048576ns and yield the same result, the TX and RX remain
very slow.

Kai-Heng
Heiner Kallweit Sept. 6, 2021, 3:34 p.m. UTC | #5
On 06.09.2021 17:10, Kai-Heng Feng wrote:
> On Sat, Sep 4, 2021 at 4:00 AM Heiner Kallweit <hkallweit1@gmail.com> wrote:

>>

>> On 03.09.2021 17:56, Kai-Heng Feng wrote:

>>> On Tue, Aug 31, 2021 at 2:09 AM Bjorn Helgaas <helgaas@kernel.org> wrote:

>>>>

>>>> On Sat, Aug 28, 2021 at 01:14:52AM +0800, Kai-Heng Feng wrote:

>>>>> r8169 NICs on some platforms have abysmal speed when ASPM is enabled.

>>>>> Same issue can be observed with older vendor drivers.

>>>>>

>>>>> The issue is however solved by the latest vendor driver. There's a new

>>>>> mechanism, which disables r8169's internal ASPM when the NIC traffic has

>>>>> more than 10 packets, and vice versa. The possible reason for this is

>>>>> likely because the buffer on the chip is too small for its ASPM exit

>>>>> latency.

>>>>

>>>> This sounds like good speculation, but of course, it would be better

>>>> to have the supporting data.

>>>>

>>>> You say above that this problem affects r8169 on "some platforms."  I

>>>> infer that ASPM works fine on other platforms.  It would be extremely

>>>> interesting to have some data on both classes, e.g., "lspci -vv"

>>>> output for the entire system.

>>>

>>> lspci data collected from working and non-working system can be found here:

>>> https://bugzilla.kernel.org/show_bug.cgi?id=214307

>>>

>>>>

>>>> If r8169 ASPM works well on some systems, we *should* be able to make

>>>> it work well on *all* systems, because the device can't tell what

>>>> system it's in.  All the device can see are the latencies for entry

>>>> and exit for link states.

>>>

>>> That's definitely better if we can make r8169 ASPM work for all platforms.

>>>

>>>>

>>>> IIUC this patch makes the driver wake up every 1000ms.  If the NIC has

>>>> sent or received more than 10 packets in the last 1000ms, it disables

>>>> ASPM; otherwise it enables ASPM.

>>>

>>> Yes, that's correct.

>>>

>>>>

>>>> I asked these same questions earlier, but nothing changed, so I won't

>>>> raise them again if you don't think they're pertinent.  Some patch

>>>> splitting comments below.

>>>

>>> Sorry about that. The lspci data is attached.

>>>

>>

>> Thanks for the additional details. I see that both systems have the L1

>> sub-states active. Do you also face the issue if L1 is enabled but

>> L1.2 and L1.2 are not? Setting the ASPM policy from powersupersave

>> to powersave should be sufficient to disable them.

>> I have a test system Asus PRIME H310I-PLUS, BIOS 2603 10/21/2019 with

>> the same RTL8168h chip version. With L1 active and sub-states inactive

>> everything is fine. With the sub-states activated I get few missed RX

>> errors when running iperf3.

> 

> Once L1.1 and L1.2 are disabled the TX speed can reach 710Mbps and RX

> can reach 941 Mbps. So yes it seems to be the same issue.


I reach 940-950Mbps in both directions, but this seems to be unrelated
to what we discuss here.

> With dynamic ASPM, TX can reach 750 Mbps while ASPM L1.1 and L1.2 are enabled.

> 

>> One difference between your good and bad logs is the following.

>> (My test system shows the same LTR value like your bad system.)

>>

>> Bad:

>>         Capabilities: [170 v1] Latency Tolerance Reporting

>>                 Max snoop latency: 3145728ns

>>                 Max no snoop latency: 3145728ns

>>

>> Good:

>>         Capabilities: [170 v1] Latency Tolerance Reporting

>>                 Max snoop latency: 1048576ns

>>                 Max no snoop latency: 1048576ns

>>

>> I have to admit that I'm not familiar with LTR and don't know whether

>> this difference could contribute to the differing behavior.

> 

> I am also unsure what role LTR plays here, so I tried to change the

> LTR value to 1048576ns and yield the same result, the TX and RX remain

> very slow.

> 

> Kai-Heng

>
Kai-Heng Feng Sept. 7, 2021, 4:58 a.m. UTC | #6
On Tue, Sep 7, 2021 at 12:11 AM Heiner Kallweit <hkallweit1@gmail.com> wrote:
>

> On 06.09.2021 17:10, Kai-Heng Feng wrote:

> > On Sat, Sep 4, 2021 at 4:00 AM Heiner Kallweit <hkallweit1@gmail.com> wrote:

> >>

> >> On 03.09.2021 17:56, Kai-Heng Feng wrote:

> >>> On Tue, Aug 31, 2021 at 2:09 AM Bjorn Helgaas <helgaas@kernel.org> wrote:

> >>>>

> >>>> On Sat, Aug 28, 2021 at 01:14:52AM +0800, Kai-Heng Feng wrote:

> >>>>> r8169 NICs on some platforms have abysmal speed when ASPM is enabled.

> >>>>> Same issue can be observed with older vendor drivers.

> >>>>>

> >>>>> The issue is however solved by the latest vendor driver. There's a new

> >>>>> mechanism, which disables r8169's internal ASPM when the NIC traffic has

> >>>>> more than 10 packets, and vice versa. The possible reason for this is

> >>>>> likely because the buffer on the chip is too small for its ASPM exit

> >>>>> latency.

> >>>>

> >>>> This sounds like good speculation, but of course, it would be better

> >>>> to have the supporting data.

> >>>>

> >>>> You say above that this problem affects r8169 on "some platforms."  I

> >>>> infer that ASPM works fine on other platforms.  It would be extremely

> >>>> interesting to have some data on both classes, e.g., "lspci -vv"

> >>>> output for the entire system.

> >>>

> >>> lspci data collected from working and non-working system can be found here:

> >>> https://bugzilla.kernel.org/show_bug.cgi?id=214307

> >>>

> >>>>

> >>>> If r8169 ASPM works well on some systems, we *should* be able to make

> >>>> it work well on *all* systems, because the device can't tell what

> >>>> system it's in.  All the device can see are the latencies for entry

> >>>> and exit for link states.

> >>>

> >>> That's definitely better if we can make r8169 ASPM work for all platforms.

> >>>

> >>>>

> >>>> IIUC this patch makes the driver wake up every 1000ms.  If the NIC has

> >>>> sent or received more than 10 packets in the last 1000ms, it disables

> >>>> ASPM; otherwise it enables ASPM.

> >>>

> >>> Yes, that's correct.

> >>>

> >>>>

> >>>> I asked these same questions earlier, but nothing changed, so I won't

> >>>> raise them again if you don't think they're pertinent.  Some patch

> >>>> splitting comments below.

> >>>

> >>> Sorry about that. The lspci data is attached.

> >>>

> >>

> >> Thanks for the additional details. I see that both systems have the L1

> >> sub-states active. Do you also face the issue if L1 is enabled but

> >> L1.2 and L1.2 are not? Setting the ASPM policy from powersupersave

> >> to powersave should be sufficient to disable them.

> >> I have a test system Asus PRIME H310I-PLUS, BIOS 2603 10/21/2019 with

> >> the same RTL8168h chip version. With L1 active and sub-states inactive

> >> everything is fine. With the sub-states activated I get few missed RX

> >> errors when running iperf3.

> >

> > Once L1.1 and L1.2 are disabled the TX speed can reach 710Mbps and RX

> > can reach 941 Mbps. So yes it seems to be the same issue.

>

> I reach 940-950Mbps in both directions, but this seems to be unrelated

> to what we discuss here.


OK. Is there anything more I need to address in next iteration?

Kai-Heng

>

> > With dynamic ASPM, TX can reach 750 Mbps while ASPM L1.1 and L1.2 are enabled.

> >

> >> One difference between your good and bad logs is the following.

> >> (My test system shows the same LTR value like your bad system.)

> >>

> >> Bad:

> >>         Capabilities: [170 v1] Latency Tolerance Reporting

> >>                 Max snoop latency: 3145728ns

> >>                 Max no snoop latency: 3145728ns

> >>

> >> Good:

> >>         Capabilities: [170 v1] Latency Tolerance Reporting

> >>                 Max snoop latency: 1048576ns

> >>                 Max no snoop latency: 1048576ns

> >>

> >> I have to admit that I'm not familiar with LTR and don't know whether

> >> this difference could contribute to the differing behavior.

> >

> > I am also unsure what role LTR plays here, so I tried to change the

> > LTR value to 1048576ns and yield the same result, the TX and RX remain

> > very slow.

> >

> > Kai-Heng

> >

>
Heiner Kallweit Sept. 7, 2021, 6:03 a.m. UTC | #7
On 27.08.2021 19:14, Kai-Heng Feng wrote:
> r8169 NICs on some platforms have abysmal speed when ASPM is enabled.

> Same issue can be observed with older vendor drivers.

> 

> The issue is however solved by the latest vendor driver. There's a new

> mechanism, which disables r8169's internal ASPM when the NIC traffic has

> more than 10 packets, and vice versa. The possible reason for this is

> likely because the buffer on the chip is too small for its ASPM exit

> latency.

> 

> Realtek confirmed that all their PCIe LAN NICs, r8106, r8168 and r8125

> use dynamic ASPM under Windows. So implement the same mechanism here to

> resolve the issue.

> 

> Because ASPM control may not be granted by BIOS while ASPM is enabled,

> remove aspm_manageable and use pcie_aspm_capable() instead. If BIOS

> enables ASPM for the device, we want to enable dynamic ASPM on it.

> 

> In addition, since PCIe ASPM can be switched via sysfs, enable/disable

> dynamic ASPM accordingly by checking pcie_aspm_enabled().

> 

> Signed-off-by: Kai-Heng Feng <kai.heng.feng@canonical.com>

> ---

> v4:

>  - Squash two patches

>  - Remove aspm_manageable and use pcie_aspm_capable()

>    pcie_aspm_enabled() accordingly

> 

> v3:

>  - Use msecs_to_jiffies() for delay time

>  - Use atomic_t instead of mutex for bh

>  - Mention the buffer size and ASPM exit latency in commit message

> 

> v2: 

>  - Use delayed_work instead of timer_list to avoid interrupt context

>  - Use mutex to serialize packet counter read/write

>  - Wording change

>  drivers/net/ethernet/realtek/r8169_main.c | 77 ++++++++++++++++++++---

>  1 file changed, 69 insertions(+), 8 deletions(-)

> 

> diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c

> index 46a6ff9a782d7..97dba8f437b78 100644

> --- a/drivers/net/ethernet/realtek/r8169_main.c

> +++ b/drivers/net/ethernet/realtek/r8169_main.c

> @@ -623,7 +623,10 @@ struct rtl8169_private {

>  	} wk;

>  

>  	unsigned supports_gmii:1;

> -	unsigned aspm_manageable:1;

> +	unsigned rtl_aspm_enabled:1;

> +	struct delayed_work aspm_toggle;

> +	atomic_t aspm_packet_count;

> +

>  	dma_addr_t counters_phys_addr;

>  	struct rtl8169_counters *counters;

>  	struct rtl8169_tc_offsets tc_offset;

> @@ -698,6 +701,20 @@ static bool rtl_is_8168evl_up(struct rtl8169_private *tp)

>  	       tp->mac_version <= RTL_GIGA_MAC_VER_53;

>  }

>  

> +static int rtl_supports_aspm(struct rtl8169_private *tp)

> +{

> +	switch (tp->mac_version) {

> +	case RTL_GIGA_MAC_VER_02 ... RTL_GIGA_MAC_VER_31:

> +	case RTL_GIGA_MAC_VER_37:

> +	case RTL_GIGA_MAC_VER_39:

> +	case RTL_GIGA_MAC_VER_43:

> +	case RTL_GIGA_MAC_VER_47:

> +		return 0;

> +	default:

> +		return 1;

> +	}


Why is this needed now that you have pcie_aspm_capable()?

> +}

> +

>  static bool rtl_supports_eee(struct rtl8169_private *tp)

>  {

>  	return tp->mac_version >= RTL_GIGA_MAC_VER_34 &&

> @@ -2699,8 +2716,15 @@ static void rtl_enable_exit_l1(struct rtl8169_private *tp)

>  

>  static void rtl_hw_aspm_clkreq_enable(struct rtl8169_private *tp, bool enable)

>  {

> +	struct pci_dev *pdev = tp->pci_dev;

> +

> +	if (!pcie_aspm_enabled(pdev) && enable)

> +		return;

> +

> +	tp->rtl_aspm_enabled = enable;

> +

>  	/* Don't enable ASPM in the chip if OS can't control ASPM */

> -	if (enable && tp->aspm_manageable) {

> +	if (enable) {

>  		RTL_W8(tp, Config5, RTL_R8(tp, Config5) | ASPM_en);

>  		RTL_W8(tp, Config2, RTL_R8(tp, Config2) | ClkReqEn);

>  	} else {

> @@ -4440,6 +4464,7 @@ static void rtl_tx(struct net_device *dev, struct rtl8169_private *tp,

>  

>  	dirty_tx = tp->dirty_tx;

>  

> +	atomic_add(tp->cur_tx - dirty_tx, &tp->aspm_packet_count);

>  	while (READ_ONCE(tp->cur_tx) != dirty_tx) {

>  		unsigned int entry = dirty_tx % NUM_TX_DESC;

>  		u32 status;

> @@ -4584,6 +4609,8 @@ static int rtl_rx(struct net_device *dev, struct rtl8169_private *tp, int budget

>  		rtl8169_mark_to_asic(desc);

>  	}

>  

> +	atomic_add(count, &tp->aspm_packet_count);

> +

>  	return count;

>  }

>  

> @@ -4691,8 +4718,39 @@ static int r8169_phy_connect(struct rtl8169_private *tp)

>  	return 0;

>  }

>  

> +#define ASPM_PACKET_THRESHOLD 10

> +#define ASPM_TOGGLE_INTERVAL 1000

> +

> +static void rtl8169_aspm_toggle(struct work_struct *work)

> +{

> +	struct rtl8169_private *tp = container_of(work, struct rtl8169_private,

> +						  aspm_toggle.work);

> +	int packet_count;

> +	bool enable;

> +

> +	packet_count = atomic_xchg(&tp->aspm_packet_count, 0);

> +

> +	if (pcie_aspm_enabled(tp->pci_dev)) {

> +		enable = packet_count <= ASPM_PACKET_THRESHOLD;

> +

> +		if (tp->rtl_aspm_enabled != enable) {

> +			rtl_unlock_config_regs(tp);


This looks racy. Another unlock_config_regs/do_something/lock_config_regs
can run in parallel. And if such a parallel lock_config_regs is executed
exactly here, then rtl_hw_aspm_clkreq_enable() may fail.

> +			rtl_hw_aspm_clkreq_enable(tp, enable);

> +			rtl_lock_config_regs(tp);

> +		}

> +	} else if (tp->rtl_aspm_enabled) {

> +		rtl_unlock_config_regs(tp);

> +		rtl_hw_aspm_clkreq_enable(tp, false);

> +		rtl_lock_config_regs(tp);

> +	}

> +

> +	schedule_delayed_work(&tp->aspm_toggle, msecs_to_jiffies(ASPM_TOGGLE_INTERVAL));

> +}

> +

>  static void rtl8169_down(struct rtl8169_private *tp)

>  {

> +	cancel_delayed_work_sync(&tp->aspm_toggle);

> +

>  	/* Clear all task flags */

>  	bitmap_zero(tp->wk.flags, RTL_FLAG_MAX);

>  

> @@ -4719,6 +4777,11 @@ static void rtl8169_up(struct rtl8169_private *tp)

>  	rtl_reset_work(tp);

>  

>  	phy_start(tp->phydev);

> +

> +	/* pcie_aspm_capable may change after system resume */

> +	if (pcie_aspm_support_enabled() && pcie_aspm_capable(tp->pci_dev) &&

> +	    rtl_supports_aspm(tp))

> +		schedule_delayed_work(&tp->aspm_toggle, 0);

>  }

>  

>  static int rtl8169_close(struct net_device *dev)

> @@ -5306,12 +5369,6 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)

>  	if (rc)

>  		return rc;

>  

> -	/* Disable ASPM L1 as that cause random device stop working

> -	 * problems as well as full system hangs for some PCIe devices users.

> -	 */

> -	rc = pci_disable_link_state(pdev, PCIE_LINK_STATE_L1);

> -	tp->aspm_manageable = !rc;

> -

>  	/* enable device (incl. PCI PM wakeup and hotplug setup) */

>  	rc = pcim_enable_device(pdev);

>  	if (rc < 0) {

> @@ -5378,6 +5435,10 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)

>  

>  	INIT_WORK(&tp->wk.work, rtl_task);

>  

> +	INIT_DELAYED_WORK(&tp->aspm_toggle, rtl8169_aspm_toggle);

> +

> +	atomic_set(&tp->aspm_packet_count, 0);

> +

>  	rtl_init_mac_address(tp);

>  

>  	dev->ethtool_ops = &rtl8169_ethtool_ops;

>
Kai-Heng Feng Sept. 15, 2021, 3:54 p.m. UTC | #8
On Tue, Sep 7, 2021 at 2:03 PM Heiner Kallweit <hkallweit1@gmail.com> wrote:
>

> On 27.08.2021 19:14, Kai-Heng Feng wrote:

> > r8169 NICs on some platforms have abysmal speed when ASPM is enabled.

> > Same issue can be observed with older vendor drivers.

> >

> > The issue is however solved by the latest vendor driver. There's a new

> > mechanism, which disables r8169's internal ASPM when the NIC traffic has

> > more than 10 packets, and vice versa. The possible reason for this is

> > likely because the buffer on the chip is too small for its ASPM exit

> > latency.

> >

> > Realtek confirmed that all their PCIe LAN NICs, r8106, r8168 and r8125

> > use dynamic ASPM under Windows. So implement the same mechanism here to

> > resolve the issue.

> >

> > Because ASPM control may not be granted by BIOS while ASPM is enabled,

> > remove aspm_manageable and use pcie_aspm_capable() instead. If BIOS

> > enables ASPM for the device, we want to enable dynamic ASPM on it.

> >

> > In addition, since PCIe ASPM can be switched via sysfs, enable/disable

> > dynamic ASPM accordingly by checking pcie_aspm_enabled().

> >

> > Signed-off-by: Kai-Heng Feng <kai.heng.feng@canonical.com>

> > ---

> > v4:

> >  - Squash two patches

> >  - Remove aspm_manageable and use pcie_aspm_capable()

> >    pcie_aspm_enabled() accordingly

> >

> > v3:

> >  - Use msecs_to_jiffies() for delay time

> >  - Use atomic_t instead of mutex for bh

> >  - Mention the buffer size and ASPM exit latency in commit message

> >

> > v2:

> >  - Use delayed_work instead of timer_list to avoid interrupt context

> >  - Use mutex to serialize packet counter read/write

> >  - Wording change

> >  drivers/net/ethernet/realtek/r8169_main.c | 77 ++++++++++++++++++++---

> >  1 file changed, 69 insertions(+), 8 deletions(-)

> >

> > diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c

> > index 46a6ff9a782d7..97dba8f437b78 100644

> > --- a/drivers/net/ethernet/realtek/r8169_main.c

> > +++ b/drivers/net/ethernet/realtek/r8169_main.c

> > @@ -623,7 +623,10 @@ struct rtl8169_private {

> >       } wk;

> >

> >       unsigned supports_gmii:1;

> > -     unsigned aspm_manageable:1;

> > +     unsigned rtl_aspm_enabled:1;

> > +     struct delayed_work aspm_toggle;

> > +     atomic_t aspm_packet_count;

> > +

> >       dma_addr_t counters_phys_addr;

> >       struct rtl8169_counters *counters;

> >       struct rtl8169_tc_offsets tc_offset;

> > @@ -698,6 +701,20 @@ static bool rtl_is_8168evl_up(struct rtl8169_private *tp)

> >              tp->mac_version <= RTL_GIGA_MAC_VER_53;

> >  }

> >

> > +static int rtl_supports_aspm(struct rtl8169_private *tp)

> > +{

> > +     switch (tp->mac_version) {

> > +     case RTL_GIGA_MAC_VER_02 ... RTL_GIGA_MAC_VER_31:

> > +     case RTL_GIGA_MAC_VER_37:

> > +     case RTL_GIGA_MAC_VER_39:

> > +     case RTL_GIGA_MAC_VER_43:

> > +     case RTL_GIGA_MAC_VER_47:

> > +             return 0;

> > +     default:

> > +             return 1;

> > +     }

>

> Why is this needed now that you have pcie_aspm_capable()?


The black list is copied from vendor driver.
Will remove it in next iteration and hopefully pcie_aspm_capable() is
sufficient.

>

> > +}

> > +

> >  static bool rtl_supports_eee(struct rtl8169_private *tp)

> >  {

> >       return tp->mac_version >= RTL_GIGA_MAC_VER_34 &&

> > @@ -2699,8 +2716,15 @@ static void rtl_enable_exit_l1(struct rtl8169_private *tp)

> >

> >  static void rtl_hw_aspm_clkreq_enable(struct rtl8169_private *tp, bool enable)

> >  {

> > +     struct pci_dev *pdev = tp->pci_dev;

> > +

> > +     if (!pcie_aspm_enabled(pdev) && enable)

> > +             return;

> > +

> > +     tp->rtl_aspm_enabled = enable;

> > +

> >       /* Don't enable ASPM in the chip if OS can't control ASPM */

> > -     if (enable && tp->aspm_manageable) {

> > +     if (enable) {

> >               RTL_W8(tp, Config5, RTL_R8(tp, Config5) | ASPM_en);

> >               RTL_W8(tp, Config2, RTL_R8(tp, Config2) | ClkReqEn);

> >       } else {

> > @@ -4440,6 +4464,7 @@ static void rtl_tx(struct net_device *dev, struct rtl8169_private *tp,

> >

> >       dirty_tx = tp->dirty_tx;

> >

> > +     atomic_add(tp->cur_tx - dirty_tx, &tp->aspm_packet_count);

> >       while (READ_ONCE(tp->cur_tx) != dirty_tx) {

> >               unsigned int entry = dirty_tx % NUM_TX_DESC;

> >               u32 status;

> > @@ -4584,6 +4609,8 @@ static int rtl_rx(struct net_device *dev, struct rtl8169_private *tp, int budget

> >               rtl8169_mark_to_asic(desc);

> >       }

> >

> > +     atomic_add(count, &tp->aspm_packet_count);

> > +

> >       return count;

> >  }

> >

> > @@ -4691,8 +4718,39 @@ static int r8169_phy_connect(struct rtl8169_private *tp)

> >       return 0;

> >  }

> >

> > +#define ASPM_PACKET_THRESHOLD 10

> > +#define ASPM_TOGGLE_INTERVAL 1000

> > +

> > +static void rtl8169_aspm_toggle(struct work_struct *work)

> > +{

> > +     struct rtl8169_private *tp = container_of(work, struct rtl8169_private,

> > +                                               aspm_toggle.work);

> > +     int packet_count;

> > +     bool enable;

> > +

> > +     packet_count = atomic_xchg(&tp->aspm_packet_count, 0);

> > +

> > +     if (pcie_aspm_enabled(tp->pci_dev)) {

> > +             enable = packet_count <= ASPM_PACKET_THRESHOLD;

> > +

> > +             if (tp->rtl_aspm_enabled != enable) {

> > +                     rtl_unlock_config_regs(tp);

>

> This looks racy. Another unlock_config_regs/do_something/lock_config_regs

> can run in parallel. And if such a parallel lock_config_regs is executed

> exactly here, then rtl_hw_aspm_clkreq_enable() may fail.


Yes this is racy.
Will add a lock to prevent the race.

Kai-Heng

>

> > +                     rtl_hw_aspm_clkreq_enable(tp, enable);

> > +                     rtl_lock_config_regs(tp);

> > +             }

> > +     } else if (tp->rtl_aspm_enabled) {

> > +             rtl_unlock_config_regs(tp);

> > +             rtl_hw_aspm_clkreq_enable(tp, false);

> > +             rtl_lock_config_regs(tp);

> > +     }

> > +

> > +     schedule_delayed_work(&tp->aspm_toggle, msecs_to_jiffies(ASPM_TOGGLE_INTERVAL));

> > +}

> > +

> >  static void rtl8169_down(struct rtl8169_private *tp)

> >  {

> > +     cancel_delayed_work_sync(&tp->aspm_toggle);

> > +

> >       /* Clear all task flags */

> >       bitmap_zero(tp->wk.flags, RTL_FLAG_MAX);

> >

> > @@ -4719,6 +4777,11 @@ static void rtl8169_up(struct rtl8169_private *tp)

> >       rtl_reset_work(tp);

> >

> >       phy_start(tp->phydev);

> > +

> > +     /* pcie_aspm_capable may change after system resume */

> > +     if (pcie_aspm_support_enabled() && pcie_aspm_capable(tp->pci_dev) &&

> > +         rtl_supports_aspm(tp))

> > +             schedule_delayed_work(&tp->aspm_toggle, 0);

> >  }

> >

> >  static int rtl8169_close(struct net_device *dev)

> > @@ -5306,12 +5369,6 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)

> >       if (rc)

> >               return rc;

> >

> > -     /* Disable ASPM L1 as that cause random device stop working

> > -      * problems as well as full system hangs for some PCIe devices users.

> > -      */

> > -     rc = pci_disable_link_state(pdev, PCIE_LINK_STATE_L1);

> > -     tp->aspm_manageable = !rc;

> > -

> >       /* enable device (incl. PCI PM wakeup and hotplug setup) */

> >       rc = pcim_enable_device(pdev);

> >       if (rc < 0) {

> > @@ -5378,6 +5435,10 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)

> >

> >       INIT_WORK(&tp->wk.work, rtl_task);

> >

> > +     INIT_DELAYED_WORK(&tp->aspm_toggle, rtl8169_aspm_toggle);

> > +

> > +     atomic_set(&tp->aspm_packet_count, 0);

> > +

> >       rtl_init_mac_address(tp);

> >

> >       dev->ethtool_ops = &rtl8169_ethtool_ops;

> >

>
diff mbox series

Patch

diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c
index 46a6ff9a782d7..97dba8f437b78 100644
--- a/drivers/net/ethernet/realtek/r8169_main.c
+++ b/drivers/net/ethernet/realtek/r8169_main.c
@@ -623,7 +623,10 @@  struct rtl8169_private {
 	} wk;
 
 	unsigned supports_gmii:1;
-	unsigned aspm_manageable:1;
+	unsigned rtl_aspm_enabled:1;
+	struct delayed_work aspm_toggle;
+	atomic_t aspm_packet_count;
+
 	dma_addr_t counters_phys_addr;
 	struct rtl8169_counters *counters;
 	struct rtl8169_tc_offsets tc_offset;
@@ -698,6 +701,20 @@  static bool rtl_is_8168evl_up(struct rtl8169_private *tp)
 	       tp->mac_version <= RTL_GIGA_MAC_VER_53;
 }
 
+static int rtl_supports_aspm(struct rtl8169_private *tp)
+{
+	switch (tp->mac_version) {
+	case RTL_GIGA_MAC_VER_02 ... RTL_GIGA_MAC_VER_31:
+	case RTL_GIGA_MAC_VER_37:
+	case RTL_GIGA_MAC_VER_39:
+	case RTL_GIGA_MAC_VER_43:
+	case RTL_GIGA_MAC_VER_47:
+		return 0;
+	default:
+		return 1;
+	}
+}
+
 static bool rtl_supports_eee(struct rtl8169_private *tp)
 {
 	return tp->mac_version >= RTL_GIGA_MAC_VER_34 &&
@@ -2699,8 +2716,15 @@  static void rtl_enable_exit_l1(struct rtl8169_private *tp)
 
 static void rtl_hw_aspm_clkreq_enable(struct rtl8169_private *tp, bool enable)
 {
+	struct pci_dev *pdev = tp->pci_dev;
+
+	if (!pcie_aspm_enabled(pdev) && enable)
+		return;
+
+	tp->rtl_aspm_enabled = enable;
+
 	/* Don't enable ASPM in the chip if OS can't control ASPM */
-	if (enable && tp->aspm_manageable) {
+	if (enable) {
 		RTL_W8(tp, Config5, RTL_R8(tp, Config5) | ASPM_en);
 		RTL_W8(tp, Config2, RTL_R8(tp, Config2) | ClkReqEn);
 	} else {
@@ -4440,6 +4464,7 @@  static void rtl_tx(struct net_device *dev, struct rtl8169_private *tp,
 
 	dirty_tx = tp->dirty_tx;
 
+	atomic_add(tp->cur_tx - dirty_tx, &tp->aspm_packet_count);
 	while (READ_ONCE(tp->cur_tx) != dirty_tx) {
 		unsigned int entry = dirty_tx % NUM_TX_DESC;
 		u32 status;
@@ -4584,6 +4609,8 @@  static int rtl_rx(struct net_device *dev, struct rtl8169_private *tp, int budget
 		rtl8169_mark_to_asic(desc);
 	}
 
+	atomic_add(count, &tp->aspm_packet_count);
+
 	return count;
 }
 
@@ -4691,8 +4718,39 @@  static int r8169_phy_connect(struct rtl8169_private *tp)
 	return 0;
 }
 
+#define ASPM_PACKET_THRESHOLD 10
+#define ASPM_TOGGLE_INTERVAL 1000
+
+static void rtl8169_aspm_toggle(struct work_struct *work)
+{
+	struct rtl8169_private *tp = container_of(work, struct rtl8169_private,
+						  aspm_toggle.work);
+	int packet_count;
+	bool enable;
+
+	packet_count = atomic_xchg(&tp->aspm_packet_count, 0);
+
+	if (pcie_aspm_enabled(tp->pci_dev)) {
+		enable = packet_count <= ASPM_PACKET_THRESHOLD;
+
+		if (tp->rtl_aspm_enabled != enable) {
+			rtl_unlock_config_regs(tp);
+			rtl_hw_aspm_clkreq_enable(tp, enable);
+			rtl_lock_config_regs(tp);
+		}
+	} else if (tp->rtl_aspm_enabled) {
+		rtl_unlock_config_regs(tp);
+		rtl_hw_aspm_clkreq_enable(tp, false);
+		rtl_lock_config_regs(tp);
+	}
+
+	schedule_delayed_work(&tp->aspm_toggle, msecs_to_jiffies(ASPM_TOGGLE_INTERVAL));
+}
+
 static void rtl8169_down(struct rtl8169_private *tp)
 {
+	cancel_delayed_work_sync(&tp->aspm_toggle);
+
 	/* Clear all task flags */
 	bitmap_zero(tp->wk.flags, RTL_FLAG_MAX);
 
@@ -4719,6 +4777,11 @@  static void rtl8169_up(struct rtl8169_private *tp)
 	rtl_reset_work(tp);
 
 	phy_start(tp->phydev);
+
+	/* pcie_aspm_capable may change after system resume */
+	if (pcie_aspm_support_enabled() && pcie_aspm_capable(tp->pci_dev) &&
+	    rtl_supports_aspm(tp))
+		schedule_delayed_work(&tp->aspm_toggle, 0);
 }
 
 static int rtl8169_close(struct net_device *dev)
@@ -5306,12 +5369,6 @@  static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 	if (rc)
 		return rc;
 
-	/* Disable ASPM L1 as that cause random device stop working
-	 * problems as well as full system hangs for some PCIe devices users.
-	 */
-	rc = pci_disable_link_state(pdev, PCIE_LINK_STATE_L1);
-	tp->aspm_manageable = !rc;
-
 	/* enable device (incl. PCI PM wakeup and hotplug setup) */
 	rc = pcim_enable_device(pdev);
 	if (rc < 0) {
@@ -5378,6 +5435,10 @@  static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 
 	INIT_WORK(&tp->wk.work, rtl_task);
 
+	INIT_DELAYED_WORK(&tp->aspm_toggle, rtl8169_aspm_toggle);
+
+	atomic_set(&tp->aspm_packet_count, 0);
+
 	rtl_init_mac_address(tp);
 
 	dev->ethtool_ops = &rtl8169_ethtool_ops;