mbox series

[RFC,net-next,00/16] LAG offload for Ocelot DSA switches

Message ID 20201208120802.1268708-1-vladimir.oltean@nxp.com
Headers show
Series LAG offload for Ocelot DSA switches | expand

Message

Vladimir Oltean Dec. 8, 2020, 12:07 p.m. UTC
This patch series comes as a continuation of the discussion started with
Tobias Waldekranz in his patch series to offload bonding/team from DSA:
https://patchwork.kernel.org/project/netdevbpf/patch/20201202091356.24075-3-tobias@waldekranz.com/

On one hand, it shows the rework that needs to be done to ocelot such
that a pure switchdev and a DSA driver could share the same implementation.

On the other hand, it tries to identify what data structures does DSA
really need to keep and pass along to drivers, and which structures are
best left for the drivers to deal privately with them.

Testing has been done in the following topology:

         +----------------------------------+
         | Board 1         br0              |
         |             +---------+          |
         |            /           \         |
         |            |           |         |
         |            |         bond0       |
         |            |        +-----+      |
         |            |       /       \     |
         |  eno0     swp0    swp1    swp2   |
         +---|--------|-------|-------|-----+
             |        |       |       |
             +--------+       |       |
               Cable          |       |
                         Cable|       |Cable
               Cable          |       |
             +--------+       |       |
             |        |       |       |
         +---|--------|-------|-------|-----+
         |  eno0     swp0    swp1    swp2   |
         |            |       \       /     |
         |            |        +-----+      |
         |            |         bond0       |
         |            |           |         |
         |            \           /         |
         |             +---------+          |
         | Board 2         br0              |
         +----------------------------------+

The same script can be run on both Board 1 and Board 2 to set this up:

#!/bin/bash

ip link del bond0
ip link add bond0 type bond mode 802.3ad
ip link set swp1 down && ip link set swp1 master bond0 && ip link set swp1 up
ip link set swp2 down && ip link set swp2 master bond0 && ip link set swp2 up
ip link del br0
ip link add br0 type bridge
ip link set bond0 master br0
ip link set swp0 master br0

Then traffic can be tested between eno0 of Board 1 and eno0 of Board 2.

Vladimir Oltean (16):
  net: mscc: ocelot: offload bridge port flags to device
  net: mscc: ocelot: allow offloading of bridge on top of LAG
  net: mscc: ocelot: rename ocelot_netdevice_port_event to
    ocelot_netdevice_changeupper
  net: mscc: ocelot: use a switch-case statement in
    ocelot_netdevice_event
  net: mscc: ocelot: don't refuse bonding interfaces we can't offload
  net: mscc: ocelot: use ipv6 in the aggregation code
  net: mscc: ocelot: set up the bonding mask in a way that avoids a
    net_device
  net: mscc: ocelot: avoid unneeded "lp" variable in LAG join
  net: mscc: ocelot: use "lag" variable name in
    ocelot_bridge_stp_state_set
  net: mscc: ocelot: reapply bridge forwarding mask on bonding
    join/leave
  net: mscc: ocelot: set up logical port IDs centrally
  net: mscc: ocelot: drop the use of the "lags" array
  net: mscc: ocelot: rename aggr_count to num_ports_in_lag
  net: mscc: ocelot: rebalance LAGs on link up/down events
  net: dsa: felix: propagate the LAG offload ops towards the ocelot lib
  net: dsa: ocelot: tell DSA that we can offload link aggregation

 drivers/net/dsa/ocelot/felix.c         |  28 +++
 drivers/net/ethernet/mscc/ocelot.c     | 276 +++++++++++++++----------
 drivers/net/ethernet/mscc/ocelot.h     |   7 +-
 drivers/net/ethernet/mscc/ocelot_net.c | 139 ++++++++-----
 include/soc/mscc/ocelot.h              |  13 +-
 5 files changed, 298 insertions(+), 165 deletions(-)

Comments

Alexandre Belloni Dec. 15, 2020, 2:37 p.m. UTC | #1
On 08/12/2020 14:07:47+0200, Vladimir Oltean wrote:
> We should not be unconditionally enabling address learning, since doing

> that is actively detrimential when a port is standalone and not offloading

> a bridge. Namely, if a port in the switch is standalone and others are

> offloading the bridge, then we could enter a situation where we learn an

> address towards the standalone port, but the bridged ports could not

> forward the packet there, because the CPU is the only path between the

> standalone and the bridged ports. The solution of course is to not

> enable address learning unless the bridge asks for it. Currently this is

> the only bridge port flag we are looking at. The others (flooding etc)

> are TBD.

> 

> Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>

Reviewed-by: Alexandre Belloni <alexandre.belloni@bootlin.com>


> ---

>  drivers/net/ethernet/mscc/ocelot.c     | 21 ++++++++++++++++++++-

>  drivers/net/ethernet/mscc/ocelot.h     |  3 +++

>  drivers/net/ethernet/mscc/ocelot_net.c |  4 ++++

>  include/soc/mscc/ocelot.h              |  2 ++

>  4 files changed, 29 insertions(+), 1 deletion(-)

> 

> diff --git a/drivers/net/ethernet/mscc/ocelot.c b/drivers/net/ethernet/mscc/ocelot.c

> index b9626eec8db6..7a5c534099d3 100644

> --- a/drivers/net/ethernet/mscc/ocelot.c

> +++ b/drivers/net/ethernet/mscc/ocelot.c

> @@ -883,6 +883,7 @@ EXPORT_SYMBOL(ocelot_get_ts_info);

>  

>  void ocelot_bridge_stp_state_set(struct ocelot *ocelot, int port, u8 state)

>  {

> +	struct ocelot_port *ocelot_port = ocelot->ports[port];

>  	u32 port_cfg;

>  	int p, i;

>  

> @@ -896,7 +897,8 @@ void ocelot_bridge_stp_state_set(struct ocelot *ocelot, int port, u8 state)

>  		ocelot->bridge_fwd_mask |= BIT(port);

>  		fallthrough;

>  	case BR_STATE_LEARNING:

> -		port_cfg |= ANA_PORT_PORT_CFG_LEARN_ENA;

> +		if (ocelot_port->brport_flags & BR_LEARNING)

> +			port_cfg |= ANA_PORT_PORT_CFG_LEARN_ENA;

>  		break;

>  

>  	default:

> @@ -1178,6 +1180,7 @@ EXPORT_SYMBOL(ocelot_port_bridge_join);

>  int ocelot_port_bridge_leave(struct ocelot *ocelot, int port,

>  			     struct net_device *bridge)

>  {

> +	struct ocelot_port *ocelot_port = ocelot->ports[port];

>  	struct ocelot_vlan pvid = {0}, native_vlan = {0};

>  	struct switchdev_trans trans;

>  	int ret;

> @@ -1200,6 +1203,10 @@ int ocelot_port_bridge_leave(struct ocelot *ocelot, int port,

>  	ocelot_port_set_pvid(ocelot, port, pvid);

>  	ocelot_port_set_native_vlan(ocelot, port, native_vlan);

>  

> +	ocelot_port->brport_flags = 0;

> +	ocelot_rmw_gix(ocelot, 0, ANA_PORT_PORT_CFG_LEARN_ENA,

> +		       ANA_PORT_PORT_CFG, port);

> +

>  	return 0;

>  }

>  EXPORT_SYMBOL(ocelot_port_bridge_leave);

> @@ -1391,6 +1398,18 @@ int ocelot_get_max_mtu(struct ocelot *ocelot, int port)

>  }

>  EXPORT_SYMBOL(ocelot_get_max_mtu);

>  

> +void ocelot_port_bridge_flags(struct ocelot *ocelot, int port,

> +			      unsigned long flags,

> +			      struct switchdev_trans *trans)

> +{

> +	struct ocelot_port *ocelot_port = ocelot->ports[port];

> +

> +	if (switchdev_trans_ph_prepare(trans))

> +		return;

> +

> +	ocelot_port->brport_flags = flags;

> +}

> +

>  void ocelot_init_port(struct ocelot *ocelot, int port)

>  {

>  	struct ocelot_port *ocelot_port = ocelot->ports[port];

> diff --git a/drivers/net/ethernet/mscc/ocelot.h b/drivers/net/ethernet/mscc/ocelot.h

> index 291d39d49c4e..739bd201d951 100644

> --- a/drivers/net/ethernet/mscc/ocelot.h

> +++ b/drivers/net/ethernet/mscc/ocelot.h

> @@ -102,6 +102,9 @@ struct ocelot_multicast {

>  	struct ocelot_pgid *pgid;

>  };

>  

> +void ocelot_port_bridge_flags(struct ocelot *ocelot, int port,

> +			      unsigned long flags,

> +			      struct switchdev_trans *trans);

>  int ocelot_port_fdb_do_dump(const unsigned char *addr, u16 vid,

>  			    bool is_static, void *data);

>  int ocelot_mact_learn(struct ocelot *ocelot, int port,

> diff --git a/drivers/net/ethernet/mscc/ocelot_net.c b/drivers/net/ethernet/mscc/ocelot_net.c

> index 9ba7e2b166e9..93ecd5274156 100644

> --- a/drivers/net/ethernet/mscc/ocelot_net.c

> +++ b/drivers/net/ethernet/mscc/ocelot_net.c

> @@ -882,6 +882,10 @@ static int ocelot_port_attr_set(struct net_device *dev,

>  	case SWITCHDEV_ATTR_ID_BRIDGE_MC_DISABLED:

>  		ocelot_port_attr_mc_set(ocelot, port, !attr->u.mc_disabled);

>  		break;

> +	case SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS:

> +		ocelot_port_bridge_flags(ocelot, port, attr->u.brport_flags,

> +					 trans);

> +		break;

>  	default:

>  		err = -EOPNOTSUPP;

>  		break;

> diff --git a/include/soc/mscc/ocelot.h b/include/soc/mscc/ocelot.h

> index 2f4cd3288bcc..50514c087231 100644

> --- a/include/soc/mscc/ocelot.h

> +++ b/include/soc/mscc/ocelot.h

> @@ -581,6 +581,8 @@ struct ocelot_port {

>  

>  	struct regmap			*target;

>  

> +	unsigned long			brport_flags;

> +

>  	bool				vlan_aware;

>  	/* VLAN that untagged frames are classified to, on ingress */

>  	struct ocelot_vlan		pvid_vlan;

> -- 

> 2.25.1

> 


-- 
Alexandre Belloni, Bootlin
Embedded Linux and Kernel engineering
https://bootlin.com
Alexandre Belloni Dec. 15, 2020, 3:56 p.m. UTC | #2
On 08/12/2020 14:07:51+0200, Vladimir Oltean wrote:
> Since switchdev/DSA exposes network interfaces that fulfill many of the

> same user space expectations that dedicated NICs do, it makes sense to

> not deny bonding interfaces with a bonding policy that we cannot offload,

> but instead allow the bonding driver to select the egress interface in

> software.

> 

> Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>

Reviewed-by: Alexandre Belloni <alexandre.belloni@bootlin.com>


> ---

>  drivers/net/ethernet/mscc/ocelot_net.c | 38 ++++++++++----------------

>  1 file changed, 15 insertions(+), 23 deletions(-)

> 

> diff --git a/drivers/net/ethernet/mscc/ocelot_net.c b/drivers/net/ethernet/mscc/ocelot_net.c

> index 47b620967156..77957328722a 100644

> --- a/drivers/net/ethernet/mscc/ocelot_net.c

> +++ b/drivers/net/ethernet/mscc/ocelot_net.c

> @@ -1022,6 +1022,15 @@ static int ocelot_netdevice_changeupper(struct net_device *dev,

>  		}

>  	}

>  	if (netif_is_lag_master(info->upper_dev)) {

> +		struct netdev_lag_upper_info *lag_upper_info;

> +

> +		lag_upper_info = info->upper_info;

> +

> +		/* Only offload what we can */

> +		if (lag_upper_info &&

> +		    lag_upper_info->tx_type != NETDEV_LAG_TX_TYPE_HASH)

> +			return NOTIFY_DONE;

> +

>  		if (info->linking)

>  			err = ocelot_port_lag_join(ocelot, port,

>  						   info->upper_dev);

> @@ -1037,10 +1046,16 @@ static int

>  ocelot_netdevice_lag_changeupper(struct net_device *dev,

>  				 struct netdev_notifier_changeupper_info *info)

>  {

> +	struct netdev_lag_upper_info *lag_upper_info = info->upper_info;

>  	struct net_device *lower;

>  	struct list_head *iter;

>  	int err = NOTIFY_DONE;

>  

> +	/* Can't offload LAG => also do bridging in software */

> +	if (lag_upper_info &&

> +	    lag_upper_info->tx_type != NETDEV_LAG_TX_TYPE_HASH)

> +		return NOTIFY_DONE;

> +

>  	netdev_for_each_lower_dev(dev, lower, iter) {

>  		err = ocelot_netdevice_changeupper(lower, info);

>  		if (err)

> @@ -1056,29 +1071,6 @@ static int ocelot_netdevice_event(struct notifier_block *unused,

>  	struct net_device *dev = netdev_notifier_info_to_dev(ptr);

>  

>  	switch (event) {

> -	case NETDEV_PRECHANGEUPPER: {

> -		struct netdev_notifier_changeupper_info *info = ptr;

> -		struct netdev_lag_upper_info *lag_upper_info;

> -		struct netlink_ext_ack *extack;

> -

> -		if (!ocelot_netdevice_dev_check(dev))

> -			break;

> -

> -		if (!netif_is_lag_master(info->upper_dev))

> -			break;

> -

> -		lag_upper_info = info->upper_info;

> -

> -		if (lag_upper_info &&

> -		    lag_upper_info->tx_type != NETDEV_LAG_TX_TYPE_HASH) {

> -			extack = netdev_notifier_info_to_extack(&info->info);

> -			NL_SET_ERR_MSG_MOD(extack, "LAG device using unsupported Tx type");

> -

> -			return NOTIFY_BAD;

> -		}

> -

> -		break;

> -	}

>  	case NETDEV_CHANGEUPPER: {

>  		struct netdev_notifier_changeupper_info *info = ptr;

>  

> -- 

> 2.25.1

> 


-- 
Alexandre Belloni, Bootlin
Embedded Linux and Kernel engineering
https://bootlin.com
Alexandre Belloni Dec. 15, 2020, 4:03 p.m. UTC | #3
On 08/12/2020 14:07:52+0200, Vladimir Oltean wrote:
> IPv6 header information is not currently part of the entropy source for

> the 4-bit aggregation code used for LAG offload, even though it could be.

> The hardware reference manual says about these fields:

> 

> ANA::AGGR_CFG.AC_IP6_TCPUDP_PORT_ENA

> Use IPv6 TCP/UDP port when calculating aggregation code. Configure

> identically for all ports. Recommended value is 1.

> 

> ANA::AGGR_CFG.AC_IP6_FLOW_LBL_ENA

> Use IPv6 flow label when calculating AC. Configure identically for all

> ports. Recommended value is 1.

> 

> Integration with the xmit_hash_policy of the bonding interface is TBD.

> 

> Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>

Reviewed-by: Alexandre Belloni <alexandre.belloni@bootlin.com>


> ---

>  drivers/net/ethernet/mscc/ocelot.c | 5 ++++-

>  1 file changed, 4 insertions(+), 1 deletion(-)

> 

> diff --git a/drivers/net/ethernet/mscc/ocelot.c b/drivers/net/ethernet/mscc/ocelot.c

> index 7a5c534099d3..13e86dd71e5a 100644

> --- a/drivers/net/ethernet/mscc/ocelot.c

> +++ b/drivers/net/ethernet/mscc/ocelot.c

> @@ -1557,7 +1557,10 @@ int ocelot_init(struct ocelot *ocelot)

>  	ocelot_write(ocelot, ANA_AGGR_CFG_AC_SMAC_ENA |

>  			     ANA_AGGR_CFG_AC_DMAC_ENA |

>  			     ANA_AGGR_CFG_AC_IP4_SIPDIP_ENA |

> -			     ANA_AGGR_CFG_AC_IP4_TCPUDP_ENA, ANA_AGGR_CFG);

> +			     ANA_AGGR_CFG_AC_IP4_TCPUDP_ENA |

> +			     ANA_AGGR_CFG_AC_IP6_FLOW_LBL_ENA |

> +			     ANA_AGGR_CFG_AC_IP6_TCPUDP_ENA,

> +			     ANA_AGGR_CFG);

>  

>  	/* Set MAC age time to default value. The entry is aged after

>  	 * 2*AGE_PERIOD

> -- 

> 2.25.1

> 


-- 
Alexandre Belloni, Bootlin
Embedded Linux and Kernel engineering
https://bootlin.com
Alexandre Belloni Dec. 15, 2020, 4:47 p.m. UTC | #4
On 08/12/2020 14:07:54+0200, Vladimir Oltean wrote:
> The index of the LAG is equal to the logical port ID that all the

> physical port members have, which is further equal to the index of the

> first physical port that is a member of the LAG.

> 

> The code gets a bit carried away with logic like this:

> 

> 	if (a == b)

> 		c = a;

> 	else

> 		c = b;

> 

> which can be simplified, of course, into:

> 

> 	c = b;

> 

> (with a being port, b being lp, c being lag)

> 

> This further makes the "lp" variable redundant, since we can use "lag"

> everywhere where "lp" (logical port) was used. So instead of a "c = b"

> assignment, we can do a complete deletion of b. Only one comment here:

> 

> 		if (bond_mask) {

> 			lp = __ffs(bond_mask);

> 			ocelot->lags[lp] = 0;

> 		}

> 

> lp was clobbered before, because it was used as a temporary variable to

> hold the new smallest port ID from the bond. Now that we don't have "lp"

> any longer, we'll just avoid the temporary variable and zeroize the

> bonding mask directly.

> 

> Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>

Reviewed-by: Alexandre Belloni <alexandre.belloni@bootlin.com>


> ---

>  drivers/net/ethernet/mscc/ocelot.c | 16 ++++++----------

>  1 file changed, 6 insertions(+), 10 deletions(-)

> 

> diff --git a/drivers/net/ethernet/mscc/ocelot.c b/drivers/net/ethernet/mscc/ocelot.c

> index 30dee1f957d1..080fd4ce37ea 100644

> --- a/drivers/net/ethernet/mscc/ocelot.c

> +++ b/drivers/net/ethernet/mscc/ocelot.c

> @@ -1291,28 +1291,24 @@ int ocelot_port_lag_join(struct ocelot *ocelot, int port,

>  			 struct net_device *bond)

>  {

>  	u32 bond_mask = 0;

> -	int lag, lp;

> +	int lag;

>  

>  	ocelot->ports[port]->bond = bond;

>  

>  	bond_mask = ocelot_get_bond_mask(ocelot, bond);

>  

> -	lp = __ffs(bond_mask);

> +	lag = __ffs(bond_mask);

>  

>  	/* If the new port is the lowest one, use it as the logical port from

>  	 * now on

>  	 */

> -	if (port == lp) {

> -		lag = port;

> +	if (port == lag) {

>  		ocelot->lags[port] = bond_mask;

>  		bond_mask &= ~BIT(port);

> -		if (bond_mask) {

> -			lp = __ffs(bond_mask);

> -			ocelot->lags[lp] = 0;

> -		}

> +		if (bond_mask)

> +			ocelot->lags[__ffs(bond_mask)] = 0;

>  	} else {

> -		lag = lp;

> -		ocelot->lags[lp] |= BIT(port);

> +		ocelot->lags[lag] |= BIT(port);

>  	}

>  

>  	ocelot_setup_lag(ocelot, lag);

> -- 

> 2.25.1

> 


-- 
Alexandre Belloni, Bootlin
Embedded Linux and Kernel engineering
https://bootlin.com
Alexandre Belloni Dec. 15, 2020, 4:49 p.m. UTC | #5
On 08/12/2020 14:07:55+0200, Vladimir Oltean wrote:
> In anticipation of further simplification, make it more clear what we're

> iterating over.

> 

> Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>

Reviewed-by: Alexandre Belloni <alexandre.belloni@bootlin.com>


> ---

>  drivers/net/ethernet/mscc/ocelot.c | 11 +++++++----

>  1 file changed, 7 insertions(+), 4 deletions(-)

> 

> diff --git a/drivers/net/ethernet/mscc/ocelot.c b/drivers/net/ethernet/mscc/ocelot.c

> index 080fd4ce37ea..c3c6682e6e79 100644

> --- a/drivers/net/ethernet/mscc/ocelot.c

> +++ b/drivers/net/ethernet/mscc/ocelot.c

> @@ -903,7 +903,7 @@ void ocelot_bridge_stp_state_set(struct ocelot *ocelot, int port, u8 state)

>  {

>  	struct ocelot_port *ocelot_port = ocelot->ports[port];

>  	u32 port_cfg;

> -	int p, i;

> +	int p;

>  

>  	if (!(BIT(port) & ocelot->bridge_mask))

>  		return;

> @@ -928,14 +928,17 @@ void ocelot_bridge_stp_state_set(struct ocelot *ocelot, int port, u8 state)

>  	ocelot_write_gix(ocelot, port_cfg, ANA_PORT_PORT_CFG, port);

>  

>  	/* Apply FWD mask. The loop is needed to add/remove the current port as

> -	 * a source for the other ports.

> +	 * a source for the other ports. If the source port is in a bond, then

> +	 * all the other ports from that bond need to be removed from this

> +	 * source port's forwarding mask.

>  	 */

>  	for (p = 0; p < ocelot->num_phys_ports; p++) {

>  		if (ocelot->bridge_fwd_mask & BIT(p)) {

>  			unsigned long mask = ocelot->bridge_fwd_mask & ~BIT(p);

> +			int lag;

>  

> -			for (i = 0; i < ocelot->num_phys_ports; i++) {

> -				unsigned long bond_mask = ocelot->lags[i];

> +			for (lag = 0; lag < ocelot->num_phys_ports; lag++) {

> +				unsigned long bond_mask = ocelot->lags[lag];

>  

>  				if (!bond_mask)

>  					continue;

> -- 

> 2.25.1

> 


-- 
Alexandre Belloni, Bootlin
Embedded Linux and Kernel engineering
https://bootlin.com
Alexandre Belloni Dec. 16, 2020, 8:15 p.m. UTC | #6
On 08/12/2020 14:07:57+0200, Vladimir Oltean wrote:
> The setup of logical port IDs is done in two places: from the inconclusively

> named ocelot_setup_lag and from ocelot_port_lag_leave, a function that

> also calls ocelot_setup_lag (which apparently does an incomplete setup

> of the LAG).

> 

> To improve this situation, we can rename ocelot_setup_lag into

> ocelot_setup_logical_port_ids, and drop the "lag" argument. It will now

> set up the logical port IDs of all switch ports, which may be just

> slightly more inefficient but more maintainable.

> 

> Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>

Reviewed-by: Alexandre Belloni <alexandre.belloni@bootlin.com>


> ---

>  drivers/net/ethernet/mscc/ocelot.c | 47 ++++++++++++++++++------------

>  1 file changed, 28 insertions(+), 19 deletions(-)

> 

> diff --git a/drivers/net/ethernet/mscc/ocelot.c b/drivers/net/ethernet/mscc/ocelot.c

> index ee0fcee8e09a..1a98c24af056 100644

> --- a/drivers/net/ethernet/mscc/ocelot.c

> +++ b/drivers/net/ethernet/mscc/ocelot.c

> @@ -1279,20 +1279,36 @@ static void ocelot_set_aggr_pgids(struct ocelot *ocelot)

>  	}

>  }

>  

> -static void ocelot_setup_lag(struct ocelot *ocelot, int lag)

> +/* When offloading a bonding interface, the switch ports configured under the

> + * same bond must have the same logical port ID, equal to the physical port ID

> + * of the lowest numbered physical port in that bond. Otherwise, in standalone/

> + * bridged mode, each port has a logical port ID equal to its physical port ID.

> + */

> +static void ocelot_setup_logical_port_ids(struct ocelot *ocelot)

>  {

> -	unsigned long bond_mask = ocelot->lags[lag];

> -	unsigned int p;

> +	int port;

>  

> -	for_each_set_bit(p, &bond_mask, ocelot->num_phys_ports) {

> -		u32 port_cfg = ocelot_read_gix(ocelot, ANA_PORT_PORT_CFG, p);

> +	for (port = 0; port < ocelot->num_phys_ports; port++) {

> +		struct ocelot_port *ocelot_port = ocelot->ports[port];

> +		struct net_device *bond;

> +

> +		if (!ocelot_port)

> +			continue;

>  

> -		port_cfg &= ~ANA_PORT_PORT_CFG_PORTID_VAL_M;

> +		bond = ocelot_port->bond;

> +		if (bond) {

> +			int lag = __ffs(ocelot_get_bond_mask(ocelot, bond));

>  

> -		/* Use lag port as logical port for port i */

> -		ocelot_write_gix(ocelot, port_cfg |

> -				 ANA_PORT_PORT_CFG_PORTID_VAL(lag),

> -				 ANA_PORT_PORT_CFG, p);

> +			ocelot_rmw_gix(ocelot,

> +				       ANA_PORT_PORT_CFG_PORTID_VAL(lag),

> +				       ANA_PORT_PORT_CFG_PORTID_VAL_M,

> +				       ANA_PORT_PORT_CFG, port);

> +		} else {

> +			ocelot_rmw_gix(ocelot,

> +				       ANA_PORT_PORT_CFG_PORTID_VAL(port),

> +				       ANA_PORT_PORT_CFG_PORTID_VAL_M,

> +				       ANA_PORT_PORT_CFG, port);

> +		}

>  	}

>  }

>  

> @@ -1320,7 +1336,7 @@ int ocelot_port_lag_join(struct ocelot *ocelot, int port,

>  		ocelot->lags[lag] |= BIT(port);

>  	}

>  

> -	ocelot_setup_lag(ocelot, lag);

> +	ocelot_setup_logical_port_ids(ocelot);

>  	ocelot_apply_bridge_fwd_mask(ocelot);

>  	ocelot_set_aggr_pgids(ocelot);

>  

> @@ -1331,7 +1347,6 @@ EXPORT_SYMBOL(ocelot_port_lag_join);

>  void ocelot_port_lag_leave(struct ocelot *ocelot, int port,

>  			   struct net_device *bond)

>  {

> -	u32 port_cfg;

>  	int i;

>  

>  	ocelot->ports[port]->bond = NULL;

> @@ -1348,15 +1363,9 @@ void ocelot_port_lag_leave(struct ocelot *ocelot, int port,

>  

>  		ocelot->lags[n] = ocelot->lags[port];

>  		ocelot->lags[port] = 0;

> -

> -		ocelot_setup_lag(ocelot, n);

>  	}

>  

> -	port_cfg = ocelot_read_gix(ocelot, ANA_PORT_PORT_CFG, port);

> -	port_cfg &= ~ANA_PORT_PORT_CFG_PORTID_VAL_M;

> -	ocelot_write_gix(ocelot, port_cfg | ANA_PORT_PORT_CFG_PORTID_VAL(port),

> -			 ANA_PORT_PORT_CFG, port);

> -

> +	ocelot_setup_logical_port_ids(ocelot);

>  	ocelot_apply_bridge_fwd_mask(ocelot);

>  	ocelot_set_aggr_pgids(ocelot);

>  }

> -- 

> 2.25.1

> 


-- 
Alexandre Belloni, Bootlin
Embedded Linux and Kernel engineering
https://bootlin.com
Alexandre Belloni Dec. 16, 2020, 9:31 p.m. UTC | #7
On 08/12/2020 14:07:59+0200, Vladimir Oltean wrote:
> It makes it a bit easier to read and understand the code that deals with

> balancing the 16 aggregation codes among the ports in a certain LAG.

> 

> Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>

Reviewed-by: Alexandre Belloni <alexandre.belloni@bootlin.com>


> ---

>  drivers/net/ethernet/mscc/ocelot.c | 7 +++----

>  1 file changed, 3 insertions(+), 4 deletions(-)

> 

> diff --git a/drivers/net/ethernet/mscc/ocelot.c b/drivers/net/ethernet/mscc/ocelot.c

> index d4dbba66aa65..d87e80a15ca5 100644

> --- a/drivers/net/ethernet/mscc/ocelot.c

> +++ b/drivers/net/ethernet/mscc/ocelot.c

> @@ -1263,8 +1263,8 @@ static int ocelot_set_aggr_pgids(struct ocelot *ocelot)

>  

>  	/* Now, set PGIDs for each LAG */

>  	for (lag = 0; lag < ocelot->num_phys_ports; lag++) {

> +		int num_ports_in_lag = 0;

>  		unsigned long bond_mask;

> -		int aggr_count = 0;

>  		u8 aggr_idx[16];

>  

>  		if (!bonds[lag])

> @@ -1276,8 +1276,7 @@ static int ocelot_set_aggr_pgids(struct ocelot *ocelot)

>  			// Destination mask

>  			ocelot_write_rix(ocelot, bond_mask,

>  					 ANA_PGID_PGID, port);

> -			aggr_idx[aggr_count] = port;

> -			aggr_count++;

> +			aggr_idx[num_ports_in_lag++] = port;

>  		}

>  

>  		for_each_aggr_pgid(ocelot, i) {

> @@ -1285,7 +1284,7 @@ static int ocelot_set_aggr_pgids(struct ocelot *ocelot)

>  

>  			ac = ocelot_read_rix(ocelot, ANA_PGID_PGID, i);

>  			ac &= ~bond_mask;

> -			ac |= BIT(aggr_idx[i % aggr_count]);

> +			ac |= BIT(aggr_idx[i % num_ports_in_lag]);

>  			ocelot_write_rix(ocelot, ac, ANA_PGID_PGID, i);

>  		}

>  

> -- 

> 2.25.1

> 


-- 
Alexandre Belloni, Bootlin
Embedded Linux and Kernel engineering
https://bootlin.com
Alexandre Belloni Dec. 16, 2020, 9:32 p.m. UTC | #8
On 08/12/2020 14:08:00+0200, Vladimir Oltean wrote:
> At present there is an issue when ocelot is offloading a bonding

> interface, but one of the links of the physical ports goes down. Traffic

> keeps being hashed towards that destination, and of course gets dropped

> on egress.

> 

> Monitor the netdev notifier events emitted by the bonding driver for

> changes in the physical state of lower interfaces, to determine which

> ports are active and which ones are no longer.

> 

> Then extend ocelot_get_bond_mask to return either the configured bonding

> interfaces, or the active ones, depending on a boolean argument. The

> code that does rebalancing only needs to do so among the active ports,

> whereas the bridge forwarding mask and the logical port IDs still need

> to look at the permanently bonded ports.

> 

> Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>

> ---

>  drivers/net/ethernet/mscc/ocelot.c     | 43 ++++++++++++++++++++------

>  drivers/net/ethernet/mscc/ocelot.h     |  2 ++

>  drivers/net/ethernet/mscc/ocelot_net.c | 26 ++++++++++++++++

>  include/soc/mscc/ocelot.h              |  1 +

>  4 files changed, 63 insertions(+), 9 deletions(-)

> 

> diff --git a/drivers/net/ethernet/mscc/ocelot.c b/drivers/net/ethernet/mscc/ocelot.c

> index d87e80a15ca5..5c71d121048d 100644

> --- a/drivers/net/ethernet/mscc/ocelot.c

> +++ b/drivers/net/ethernet/mscc/ocelot.c

> @@ -881,7 +881,8 @@ int ocelot_get_ts_info(struct ocelot *ocelot, int port,

>  }

>  EXPORT_SYMBOL(ocelot_get_ts_info);

>  

> -static u32 ocelot_get_bond_mask(struct ocelot *ocelot, struct net_device *bond)

> +static u32 ocelot_get_bond_mask(struct ocelot *ocelot, struct net_device *bond,

> +				bool just_active_ports)


only_active_ports maybe ?

>  {

>  	u32 bond_mask = 0;

>  	int port;

> @@ -892,8 +893,12 @@ static u32 ocelot_get_bond_mask(struct ocelot *ocelot, struct net_device *bond)

>  		if (!ocelot_port)

>  			continue;

>  

> -		if (ocelot_port->bond == bond)

> +		if (ocelot_port->bond == bond) {

> +			if (just_active_ports && !ocelot_port->lag_tx_active)

> +				continue;

> +

>  			bond_mask |= BIT(port);

> +		}

>  	}

>  

>  	return bond_mask;

> @@ -919,7 +924,7 @@ static void ocelot_apply_bridge_fwd_mask(struct ocelot *ocelot)

>  			struct net_device *bond = ocelot_port->bond;

>  

>  			if (bond)

> -				mask &= ~ocelot_get_bond_mask(ocelot, bond);

> +				mask &= ~ocelot_get_bond_mask(ocelot, bond, false);

>  

>  			ocelot_write_rix(ocelot, mask,

>  					 ANA_PGID_PGID, PGID_SRC + port);

> @@ -1261,22 +1266,22 @@ static int ocelot_set_aggr_pgids(struct ocelot *ocelot)

>  		bonds[port] = ocelot_port->bond;

>  	}

>  

> -	/* Now, set PGIDs for each LAG */

> +	/* Now, set PGIDs for each active LAG */

>  	for (lag = 0; lag < ocelot->num_phys_ports; lag++) {

> -		int num_ports_in_lag = 0;

> +		int num_active_ports = 0;

>  		unsigned long bond_mask;

>  		u8 aggr_idx[16];

>  

>  		if (!bonds[lag])

>  			continue;

>  

> -		bond_mask = ocelot_get_bond_mask(ocelot, bonds[lag]);

> +		bond_mask = ocelot_get_bond_mask(ocelot, bonds[lag], true);

>  

>  		for_each_set_bit(port, &bond_mask, ocelot->num_phys_ports) {

>  			// Destination mask

>  			ocelot_write_rix(ocelot, bond_mask,

>  					 ANA_PGID_PGID, port);

> -			aggr_idx[num_ports_in_lag++] = port;

> +			aggr_idx[num_active_ports++] = port;

>  		}

>  

>  		for_each_aggr_pgid(ocelot, i) {

> @@ -1284,7 +1289,11 @@ static int ocelot_set_aggr_pgids(struct ocelot *ocelot)

>  

>  			ac = ocelot_read_rix(ocelot, ANA_PGID_PGID, i);

>  			ac &= ~bond_mask;

> -			ac |= BIT(aggr_idx[i % num_ports_in_lag]);

> +			/* Don't do division by zero if there was no active

> +			 * port. Just make all aggregation codes zero.

> +			 */

> +			if (num_active_ports)

> +				ac |= BIT(aggr_idx[i % num_active_ports]);

>  			ocelot_write_rix(ocelot, ac, ANA_PGID_PGID, i);

>  		}

>  

> @@ -1320,7 +1329,8 @@ static void ocelot_setup_logical_port_ids(struct ocelot *ocelot)

>  

>  		bond = ocelot_port->bond;

>  		if (bond) {

> -			int lag = __ffs(ocelot_get_bond_mask(ocelot, bond));

> +			int lag = __ffs(ocelot_get_bond_mask(ocelot, bond,

> +							     false));

>  

>  			ocelot_rmw_gix(ocelot,

>  				       ANA_PORT_PORT_CFG_PORTID_VAL(lag),

> @@ -1357,6 +1367,21 @@ int ocelot_port_lag_leave(struct ocelot *ocelot, int port,

>  }

>  EXPORT_SYMBOL(ocelot_port_lag_leave);

>  

> +int ocelot_port_lag_change(struct ocelot *ocelot, int port,

> +			   struct netdev_lag_lower_state_info *info)

> +{

> +	struct ocelot_port *ocelot_port = ocelot->ports[port];

> +	bool is_active = info->link_up && info->tx_enabled;

> +

> +	if (ocelot_port->lag_tx_active == is_active)

> +		return 0;

> +

> +	ocelot_port->lag_tx_active = is_active;

> +

> +	/* Rebalance the LAGs */

> +	return ocelot_set_aggr_pgids(ocelot);

> +}

> +

>  /* Configure the maximum SDU (L2 payload) on RX to the value specified in @sdu.

>   * The length of VLAN tags is accounted for automatically via DEV_MAC_TAGS_CFG.

>   * In the special case that it's the NPI port that we're configuring, the

> diff --git a/drivers/net/ethernet/mscc/ocelot.h b/drivers/net/ethernet/mscc/ocelot.h

> index bef8d5f8e6e5..0860125b623c 100644

> --- a/drivers/net/ethernet/mscc/ocelot.h

> +++ b/drivers/net/ethernet/mscc/ocelot.h

> @@ -116,6 +116,8 @@ int ocelot_port_lag_join(struct ocelot *ocelot, int port,

>  			 struct net_device *bond);

>  int ocelot_port_lag_leave(struct ocelot *ocelot, int port,

>  			  struct net_device *bond);

> +int ocelot_port_lag_change(struct ocelot *ocelot, int port,

> +			   struct netdev_lag_lower_state_info *info);

>  struct net_device *ocelot_port_to_netdev(struct ocelot *ocelot, int port);

>  int ocelot_netdev_to_port(struct net_device *dev);

>  

> diff --git a/drivers/net/ethernet/mscc/ocelot_net.c b/drivers/net/ethernet/mscc/ocelot_net.c

> index 93aaa631e347..714fc99f8339 100644

> --- a/drivers/net/ethernet/mscc/ocelot_net.c

> +++ b/drivers/net/ethernet/mscc/ocelot_net.c

> @@ -1065,6 +1065,23 @@ ocelot_netdevice_lag_changeupper(struct net_device *dev,

>  	return NOTIFY_DONE;

>  }

>  

> +static int

> +ocelot_netdevice_changelowerstate(struct net_device *dev,

> +				  struct netdev_lag_lower_state_info *linfo)

> +{

> +	struct ocelot_port_private *priv = netdev_priv(dev);

> +	struct ocelot_port *ocelot_port = &priv->port;

> +	struct ocelot *ocelot = ocelot_port->ocelot;

> +	int port = priv->chip_port;

> +	int err;

> +

> +	if (!ocelot_port->bond)

> +		return NOTIFY_DONE;

> +

> +	err = ocelot_port_lag_change(ocelot, port, linfo);

> +	return notifier_from_errno(err);

> +}

> +

>  static int ocelot_netdevice_event(struct notifier_block *unused,

>  				  unsigned long event, void *ptr)

>  {

> @@ -1082,6 +1099,15 @@ static int ocelot_netdevice_event(struct notifier_block *unused,

>  

>  		break;

>  	}

> +	case NETDEV_CHANGELOWERSTATE: {

> +		struct netdev_notifier_changelowerstate_info *info = ptr;

> +

> +		if (!ocelot_netdevice_dev_check(dev))

> +			break;

> +

> +		return ocelot_netdevice_changelowerstate(dev,

> +							 info->lower_state_info);

> +	}

>  	default:

>  		break;

>  	}

> diff --git a/include/soc/mscc/ocelot.h b/include/soc/mscc/ocelot.h

> index 0cd45659430f..8a44b9064789 100644

> --- a/include/soc/mscc/ocelot.h

> +++ b/include/soc/mscc/ocelot.h

> @@ -599,6 +599,7 @@ struct ocelot_port {

>  	u8				*xmit_template;

>  

>  	struct net_device		*bond;

> +	bool				lag_tx_active;

>  };

>  

>  struct ocelot {

> -- 

> 2.25.1

> 


-- 
Alexandre Belloni, Bootlin
Embedded Linux and Kernel engineering
https://bootlin.com