diff mbox series

[bpf-next,2/3] net: bonding: Use per-cpu rr_tx_counter

Message ID 20210609135537.1460244-3-joamaki@gmail.com
State New
Headers show
Series XDP bonding support | expand

Commit Message

Jussi Maki June 9, 2021, 1:55 p.m. UTC
The round-robin rr_tx_counter was shared across CPUs leading
to significant cache trashing at high packet rates. This patch
switches the round-robin mechanism to use a per-cpu counter to
decide the destination device.

On a 100Gbit 64 byte packet test this reduces the CPU load from
50% to 10% on the test system.

Signed-off-by: Jussi Maki <joamaki@gmail.com>
---
 drivers/net/bonding/bond_main.c | 18 +++++++++++++++---
 include/net/bonding.h           |  2 +-
 2 files changed, 16 insertions(+), 4 deletions(-)

Comments

Jay Vosburgh June 10, 2021, 12:04 a.m. UTC | #1
Jussi Maki <joamaki@gmail.com> wrote:

>The round-robin rr_tx_counter was shared across CPUs leading
>to significant cache trashing at high packet rates. This patch

	"trashing" -> "thrashing" ?

>switches the round-robin mechanism to use a per-cpu counter to
>decide the destination device.
>
>On a 100Gbit 64 byte packet test this reduces the CPU load from
>50% to 10% on the test system.
>
>Signed-off-by: Jussi Maki <joamaki@gmail.com>
>---
> drivers/net/bonding/bond_main.c | 18 +++++++++++++++---
> include/net/bonding.h           |  2 +-
> 2 files changed, 16 insertions(+), 4 deletions(-)
>
>diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
>index 38eea7e096f3..917dd2cdcbf4 100644
>--- a/drivers/net/bonding/bond_main.c
>+++ b/drivers/net/bonding/bond_main.c
>@@ -4314,16 +4314,16 @@ static u32 bond_rr_gen_slave_id(struct bonding *bond)
> 		slave_id = prandom_u32();
> 		break;
> 	case 1:
>-		slave_id = bond->rr_tx_counter;
>+		slave_id = this_cpu_inc_return(*bond->rr_tx_counter);
> 		break;
> 	default:
> 		reciprocal_packets_per_slave =
> 			bond->params.reciprocal_packets_per_slave;
>-		slave_id = reciprocal_divide(bond->rr_tx_counter,
>+		slave_id = this_cpu_inc_return(*bond->rr_tx_counter);
>+		slave_id = reciprocal_divide(slave_id,
> 					     reciprocal_packets_per_slave);

	With the rr_tx_counter is per-cpu, each CPU is essentially doing
its own round-robin logic, independently of other CPUs, so the resulting
spread of transmitted packets may not be as evenly distributed (as
multiple CPUs could select the same interface to transmit on
approximately in lock-step).  I'm not sure if this could cause actual
problems in practice, though, as particular flows shouldn't skip between
CPUs (and thus rr_tx_counters) very often, and round-robin already
shouldn't be the first choice if no packet reordering is a hard
requirement.

	I think this patch could be submitted against net-next
independently of the rest of the series.

Acked-by: Jay Vosburgh <jay.vosburgh@canonical.com>

	-J

> 		break;
> 	}
>-	bond->rr_tx_counter++;
> 
> 	return slave_id;
> }
>@@ -5278,6 +5278,9 @@ static void bond_uninit(struct net_device *bond_dev)
> 
> 	list_del(&bond->bond_list);
> 
>+	if (BOND_MODE(bond) == BOND_MODE_ROUNDROBIN)
>+		free_percpu(bond->rr_tx_counter);
>+
> 	bond_debug_unregister(bond);
> }
> 
>@@ -5681,6 +5684,15 @@ static int bond_init(struct net_device *bond_dev)
> 	if (!bond->wq)
> 		return -ENOMEM;
> 
>+	if (BOND_MODE(bond) == BOND_MODE_ROUNDROBIN) {
>+		bond->rr_tx_counter = alloc_percpu(u32);
>+		if (!bond->rr_tx_counter) {
>+			destroy_workqueue(bond->wq);
>+			bond->wq = NULL;
>+			return -ENOMEM;
>+		}
>+	}
>+
> 	spin_lock_init(&bond->stats_lock);
> 	netdev_lockdep_set_classes(bond_dev);
> 
>diff --git a/include/net/bonding.h b/include/net/bonding.h
>index 34acb81b4234..8de8180f1be8 100644
>--- a/include/net/bonding.h
>+++ b/include/net/bonding.h
>@@ -232,7 +232,7 @@ struct bonding {
> 	char     proc_file_name[IFNAMSIZ];
> #endif /* CONFIG_PROC_FS */
> 	struct   list_head bond_list;
>-	u32      rr_tx_counter;
>+	u32 __percpu *rr_tx_counter;
> 	struct   ad_bond_info ad_info;
> 	struct   alb_bond_info alb_info;
> 	struct   bond_params params;
>-- 
>2.30.2
>
Jussi Maki June 14, 2021, 7:54 a.m. UTC | #2
On Thu, Jun 10, 2021 at 2:04 AM Jay Vosburgh <jay.vosburgh@canonical.com> wrote:
>

> Jussi Maki <joamaki@gmail.com> wrote:

>

>         With the rr_tx_counter is per-cpu, each CPU is essentially doing

> its own round-robin logic, independently of other CPUs, so the resulting

> spread of transmitted packets may not be as evenly distributed (as

> multiple CPUs could select the same interface to transmit on

> approximately in lock-step).  I'm not sure if this could cause actual

> problems in practice, though, as particular flows shouldn't skip between

> CPUs (and thus rr_tx_counters) very often, and round-robin already

> shouldn't be the first choice if no packet reordering is a hard

> requirement.

>

>         I think this patch could be submitted against net-next

> independently of the rest of the series.


Yes this makes sense. I'll submit it separately against net-next today
and drop it off from this patchset.
diff mbox series

Patch

diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 38eea7e096f3..917dd2cdcbf4 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -4314,16 +4314,16 @@  static u32 bond_rr_gen_slave_id(struct bonding *bond)
 		slave_id = prandom_u32();
 		break;
 	case 1:
-		slave_id = bond->rr_tx_counter;
+		slave_id = this_cpu_inc_return(*bond->rr_tx_counter);
 		break;
 	default:
 		reciprocal_packets_per_slave =
 			bond->params.reciprocal_packets_per_slave;
-		slave_id = reciprocal_divide(bond->rr_tx_counter,
+		slave_id = this_cpu_inc_return(*bond->rr_tx_counter);
+		slave_id = reciprocal_divide(slave_id,
 					     reciprocal_packets_per_slave);
 		break;
 	}
-	bond->rr_tx_counter++;
 
 	return slave_id;
 }
@@ -5278,6 +5278,9 @@  static void bond_uninit(struct net_device *bond_dev)
 
 	list_del(&bond->bond_list);
 
+	if (BOND_MODE(bond) == BOND_MODE_ROUNDROBIN)
+		free_percpu(bond->rr_tx_counter);
+
 	bond_debug_unregister(bond);
 }
 
@@ -5681,6 +5684,15 @@  static int bond_init(struct net_device *bond_dev)
 	if (!bond->wq)
 		return -ENOMEM;
 
+	if (BOND_MODE(bond) == BOND_MODE_ROUNDROBIN) {
+		bond->rr_tx_counter = alloc_percpu(u32);
+		if (!bond->rr_tx_counter) {
+			destroy_workqueue(bond->wq);
+			bond->wq = NULL;
+			return -ENOMEM;
+		}
+	}
+
 	spin_lock_init(&bond->stats_lock);
 	netdev_lockdep_set_classes(bond_dev);
 
diff --git a/include/net/bonding.h b/include/net/bonding.h
index 34acb81b4234..8de8180f1be8 100644
--- a/include/net/bonding.h
+++ b/include/net/bonding.h
@@ -232,7 +232,7 @@  struct bonding {
 	char     proc_file_name[IFNAMSIZ];
 #endif /* CONFIG_PROC_FS */
 	struct   list_head bond_list;
-	u32      rr_tx_counter;
+	u32 __percpu *rr_tx_counter;
 	struct   ad_bond_info ad_info;
 	struct   alb_bond_info alb_info;
 	struct   bond_params params;