[net-next,2/4] net: dsa: Link aggregation support

Message ID	20201119144508.29468-3-tobias@waldekranz.com
State	New
Headers	show Return-Path: <netdev-owner@kernel.org> From: Tobias Waldekranz <tobias@waldekranz.com> To: davem@davemloft.net, kuba@kernel.org Cc: andrew@lunn.ch, vivien.didelot@gmail.com, f.fainelli@gmail.com, olteanv@gmail.com, j.vosburgh@gmail.com, vfalico@gmail.com, andy@greyhouse.net, netdev@vger.kernel.org Subject: [PATCH net-next 2/4] net: dsa: Link aggregation support Date: Thu, 19 Nov 2020 15:45:06 +0100 Message-Id: <20201119144508.29468-3-tobias@waldekranz.com> In-Reply-To: <20201119144508.29468-1-tobias@waldekranz.com> References: <20201119144508.29468-1-tobias@waldekranz.com> Organization: Westermo Precedence: bulk
Series	[net-next,1/4] net: bonding: Notify ports about their initial state \| expand [net-next,1/4] net: bonding: Notify ports about their initial state [net-next,2/4] net: dsa: Link aggregation support

diff --git a/include/net/dsa.h b/include/net/dsa.h index 4e60d2610f20..3fd8f041ddbe 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -147,6 +147,9 @@ struct dsa_switch_tree { /* List of switch ports */ struct list_head ports; + /* List of configured LAGs */ + struct list_head lags; + /* List of DSA links composing the routing table */ struct list_head rtable; }; @@ -180,6 +183,49 @@ struct dsa_mall_tc_entry { }; }; +struct dsa_lag { + struct net_device *dev; + int id; + + struct list_head ports; + + /* For multichip systems, we must ensure that each hash bucket + * is only enabled on a single egress port throughout the + * whole tree, lest we send duplicates. Therefore we must + * maintain a global list of active tx ports, so that each + * switch can figure out which buckets to enable on which + * ports. + */ + struct list_head tx_ports; + int num_tx; + + struct kref refcount; + struct list_head list; +}; + +static inline struct dsa_lag *dsa_lag_by_dev(struct dsa_switch_tree *dst, + struct net_device *dev) +{ + struct dsa_lag *lag; + + list_for_each_entry(lag, &dst->lags, list) + if (lag->dev == dev) + return lag; + + return NULL; +} + +static inline struct net_device *dsa_lag_dev_by_id(struct dsa_switch_tree *dst, + int id) +{ + struct dsa_lag *lag; + + list_for_each_entry_rcu(lag, &dst->lags, list) + if (lag->id == id) + return lag->dev; + + return NULL; +} struct dsa_port { /* A CPU port is physically connected to a master device. @@ -220,6 +266,9 @@ struct dsa_port { bool devlink_port_setup; struct phylink *pl; struct phylink_config pl_config; + struct dsa_lag *lag; + struct list_head lag_list; + struct list_head lag_tx_list; struct list_head list; @@ -624,6 +673,13 @@ struct dsa_switch_ops { void (*crosschip_bridge_leave)(struct dsa_switch *ds, int tree_index, int sw_index, int port, struct net_device *br); + int (*crosschip_lag_change)(struct dsa_switch *ds, int sw_index, + int port, struct net_device *lag_dev, + struct netdev_lag_lower_state_info *info); + int (*crosschip_lag_join)(struct dsa_switch *ds, int sw_index, + int port, struct net_device *lag_dev); + void (*crosschip_lag_leave)(struct dsa_switch *ds, int sw_index, + int port, struct net_device *lag_dev); /* * PTP functionality @@ -655,6 +711,16 @@ struct dsa_switch_ops { int (*port_change_mtu)(struct dsa_switch *ds, int port, int new_mtu); int (*port_max_mtu)(struct dsa_switch *ds, int port); + + /* + * LAG integration + */ + int (*port_lag_change)(struct dsa_switch *ds, int port, + struct netdev_lag_lower_state_info *info); + int (*port_lag_join)(struct dsa_switch *ds, int port, + struct net_device *lag_dev); + void (*port_lag_leave)(struct dsa_switch *ds, int port, + struct net_device *lag_dev); }; #define DSA_DEVLINK_PARAM_DRIVER(_id, _name, _type, _cmodes) \ diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c index 183003e45762..708d5a34e150 100644 --- a/net/dsa/dsa2.c +++ b/net/dsa/dsa2.c @@ -66,6 +66,7 @@ static struct dsa_switch_tree *dsa_tree_alloc(int index) INIT_LIST_HEAD(&dst->rtable); INIT_LIST_HEAD(&dst->ports); + INIT_LIST_HEAD(&dst->lags); INIT_LIST_HEAD(&dst->list); list_add_tail(&dst->list, &dsa_tree_list); @@ -659,6 +660,8 @@ static struct dsa_port *dsa_port_touch(struct dsa_switch *ds, int index) dp->index = index; INIT_LIST_HEAD(&dp->list); + INIT_LIST_HEAD(&dp->lag_list); + INIT_LIST_HEAD(&dp->lag_tx_list); list_add_tail(&dp->list, &dst->ports); return dp; diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h index 7c96aae9062c..214051f3ced0 100644 --- a/net/dsa/dsa_priv.h +++ b/net/dsa/dsa_priv.h @@ -20,6 +20,9 @@ enum { DSA_NOTIFIER_BRIDGE_LEAVE, DSA_NOTIFIER_FDB_ADD, DSA_NOTIFIER_FDB_DEL, + DSA_NOTIFIER_LAG_CHANGE, + DSA_NOTIFIER_LAG_JOIN, + DSA_NOTIFIER_LAG_LEAVE, DSA_NOTIFIER_MDB_ADD, DSA_NOTIFIER_MDB_DEL, DSA_NOTIFIER_VLAN_ADD, @@ -57,6 +60,14 @@ struct dsa_notifier_mdb_info { int port; }; +/* DSA_NOTIFIER_LAG_* */ +struct dsa_notifier_lag_info { + struct netdev_lag_lower_state_info *info; + struct net_device *lag; + int sw_index; + int port; +}; + /* DSA_NOTIFIER_VLAN_* */ struct dsa_notifier_vlan_info { const struct switchdev_obj_port_vlan *vlan; @@ -135,6 +146,10 @@ void dsa_port_disable_rt(struct dsa_port *dp); void dsa_port_disable(struct dsa_port *dp); int dsa_port_bridge_join(struct dsa_port *dp, struct net_device *br); void dsa_port_bridge_leave(struct dsa_port *dp, struct net_device *br); +int dsa_port_lag_change(struct dsa_port *dp, + struct netdev_lag_lower_state_info *linfo); +int dsa_port_lag_join(struct dsa_port *dp, struct net_device *lag_dev); +void dsa_port_lag_leave(struct dsa_port *dp, struct net_device *lag_dev); int dsa_port_vlan_filtering(struct dsa_port *dp, bool vlan_filtering, struct switchdev_trans *trans); bool dsa_port_skip_vlan_configuration(struct dsa_port *dp); @@ -167,6 +182,22 @@ int dsa_port_link_register_of(struct dsa_port *dp); void dsa_port_link_unregister_of(struct dsa_port *dp); extern const struct phylink_mac_ops dsa_port_phylink_mac_ops; +static inline bool dsa_port_can_offload(struct dsa_port *dp, + struct net_device *dev) +{ + /* Switchdev offloading can be configured on: */ + + if (dev == dp->slave) + /* DSA ports directly connected to a bridge. */ + return true; + + if (dp->lag && dev == dp->lag->dev) + /* DSA ports connected to a bridge via a LAG */ + return true; + + return false; +} + /* slave.c */ extern const struct dsa_device_ops notag_netdev_ops; void dsa_slave_mii_bus_init(struct dsa_switch *ds); diff --git a/net/dsa/port.c b/net/dsa/port.c index 73569c9af3cc..4bb8a69d7ec2 100644 --- a/net/dsa/port.c +++ b/net/dsa/port.c @@ -193,6 +193,149 @@ void dsa_port_bridge_leave(struct dsa_port *dp, struct net_device *br) dsa_port_set_state_now(dp, BR_STATE_FORWARDING); } +static struct dsa_lag *dsa_lag_get(struct dsa_switch_tree *dst, + struct net_device *dev) +{ + unsigned long busy = 0; + struct dsa_lag *lag; + int id; + + list_for_each_entry(lag, &dst->lags, list) { + set_bit(lag->id, &busy); + + if (lag->dev == dev) { + kref_get(&lag->refcount); + return lag; + } + } + + id = find_first_zero_bit(&busy, BITS_PER_LONG); + if (id >= BITS_PER_LONG) + return ERR_PTR(-ENOSPC); + + lag = kzalloc(sizeof(*lag), GFP_KERNEL); + if (!lag) + return ERR_PTR(-ENOMEM); + + kref_init(&lag->refcount); + lag->id = id; + lag->dev = dev; + INIT_LIST_HEAD(&lag->ports); + INIT_LIST_HEAD(&lag->tx_ports); + + INIT_LIST_HEAD(&lag->list); + list_add_tail_rcu(&lag->list, &dst->lags); + return lag; +} + +static void dsa_lag_release(struct kref *refcount) +{ + struct dsa_lag *lag = container_of(refcount, struct dsa_lag, refcount); + + list_del_rcu(&lag->list); + synchronize_rcu(); + kfree(lag); +} + +static void dsa_lag_put(struct dsa_lag *lag) +{ + kref_put(&lag->refcount, dsa_lag_release); +} + +int dsa_port_lag_change(struct dsa_port *dp, + struct netdev_lag_lower_state_info *linfo) +{ + struct dsa_notifier_lag_info info = { + .sw_index = dp->ds->index, + .port = dp->index, + .info = linfo, + }; + bool old, new; + + if (!dp->lag) + return 0; + + info.lag = dp->lag->dev; + + /* If this port is on the tx list, it is already enabled. */ + old = !list_empty(&dp->lag_tx_list); + + /* On statically configured aggregates (e.g. loadbalance + * without LACP) ports will always be tx_enabled, even if the + * link is down. Thus we require both link_up and tx_enabled + * in order to include it in the tx set. + */ + new = linfo->link_up && linfo->tx_enabled; + + if (new == old) + return 0; + + if (new) { + dp->lag->num_tx++; + list_add_tail(&dp->lag_tx_list, &dp->lag->tx_ports); + } else { + list_del_init(&dp->lag_tx_list); + dp->lag->num_tx--; + } + + return dsa_port_notify(dp, DSA_NOTIFIER_LAG_CHANGE, &info); +} + +int dsa_port_lag_join(struct dsa_port *dp, struct net_device *lag_dev) +{ + struct dsa_notifier_lag_info info = { + .sw_index = dp->ds->index, + .port = dp->index, + .lag = lag_dev, + }; + struct dsa_lag *lag; + int err; + + lag = dsa_lag_get(dp->ds->dst, lag_dev); + if (IS_ERR(lag)) + return PTR_ERR(lag); + + dp->lag = lag; + list_add_tail(&dp->lag_list, &lag->ports); + + err = dsa_port_notify(dp, DSA_NOTIFIER_LAG_JOIN, &info); + if (err) { + dp->lag = NULL; + list_del_init(&dp->lag_list); + dsa_lag_put(lag); + } + + return err; +} + +void dsa_port_lag_leave(struct dsa_port *dp, struct net_device *lag_dev) +{ + struct dsa_notifier_lag_info info = { + .sw_index = dp->ds->index, + .port = dp->index, + .lag = lag_dev, + }; + int err; + + /* Port might have been part of a LAG that in turn was + * attached to a bridge. + */ + if (dp->bridge_dev) + dsa_port_bridge_leave(dp, dp->bridge_dev); + + list_del_init(&dp->lag_list); + list_del_init(&dp->lag_tx_list); + + err = dsa_port_notify(dp, DSA_NOTIFIER_LAG_LEAVE, &info); + if (err) + pr_err("DSA: failed to notify DSA_NOTIFIER_LAG_LEAVE: %d\n", + err); + + dsa_lag_put(dp->lag); + + dp->lag = NULL; +} + /* Must be called under rcu_read_lock() */ static bool dsa_port_can_apply_vlan_filtering(struct dsa_port *dp, bool vlan_filtering) diff --git a/net/dsa/slave.c b/net/dsa/slave.c index ff2266d2b998..ca61349886a4 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -334,7 +334,7 @@ static int dsa_slave_vlan_add(struct net_device *dev, struct switchdev_obj_port_vlan vlan; int vid, err; - if (obj->orig_dev != dev) + if (!dsa_port_can_offload(dp, obj->orig_dev)) return -EOPNOTSUPP; if (dsa_port_skip_vlan_configuration(dp)) @@ -391,7 +391,7 @@ static int dsa_slave_port_obj_add(struct net_device *dev, switch (obj->id) { case SWITCHDEV_OBJ_ID_PORT_MDB: - if (obj->orig_dev != dev) + if (!dsa_port_can_offload(dp, obj->orig_dev)) return -EOPNOTSUPP; err = dsa_port_mdb_add(dp, SWITCHDEV_OBJ_PORT_MDB(obj), trans); break; @@ -421,7 +421,7 @@ static int dsa_slave_vlan_del(struct net_device *dev, struct switchdev_obj_port_vlan *vlan; int vid, err; - if (obj->orig_dev != dev) + if (!dsa_port_can_offload(dp, obj->orig_dev)) return -EOPNOTSUPP; if (dsa_port_skip_vlan_configuration(dp)) @@ -450,7 +450,7 @@ static int dsa_slave_port_obj_del(struct net_device *dev, switch (obj->id) { case SWITCHDEV_OBJ_ID_PORT_MDB: - if (obj->orig_dev != dev) + if (!dsa_port_can_offload(dp, obj->orig_dev)) return -EOPNOTSUPP; err = dsa_port_mdb_del(dp, SWITCHDEV_OBJ_PORT_MDB(obj)); break; @@ -1941,6 +1941,33 @@ static int dsa_slave_changeupper(struct net_device *dev, dsa_port_bridge_leave(dp, info->upper_dev); err = NOTIFY_OK; } + } else if (netif_is_lag_master(info->upper_dev)) { + if (info->linking) { + err = dsa_port_lag_join(dp, info->upper_dev); + err = notifier_from_errno(err); + } else { + dsa_port_lag_leave(dp, info->upper_dev); + err = NOTIFY_OK; + } + } + + return err; +} + +static int dsa_slave_lag_changeupper(struct net_device *dev, + struct netdev_notifier_changeupper_info *info) +{ + struct net_device *lower; + struct list_head *iter; + int err = NOTIFY_DONE; + + netdev_for_each_lower_dev(dev, lower, iter) { + if (!dsa_slave_dev_check(lower)) + continue; + + err = dsa_slave_changeupper(lower, info); + if (notifier_to_errno(err)) + break; } return err; @@ -2038,10 +2065,26 @@ static int dsa_slave_netdevice_event(struct notifier_block *nb, break; } case NETDEV_CHANGEUPPER: + if (dsa_slave_dev_check(dev)) + return dsa_slave_changeupper(dev, ptr); + + if (netif_is_lag_master(dev)) + return dsa_slave_lag_changeupper(dev, ptr); + + break; + case NETDEV_CHANGELOWERSTATE: { + struct netdev_notifier_changelowerstate_info *info = ptr; + struct dsa_port *dp; + int err; + if (!dsa_slave_dev_check(dev)) - return NOTIFY_DONE; + break; - return dsa_slave_changeupper(dev, ptr); + dp = dsa_slave_to_port(dev); + + err = dsa_port_lag_change(dp, info->lower_state_info); + return notifier_from_errno(err); + } } return NOTIFY_DONE; diff --git a/net/dsa/switch.c b/net/dsa/switch.c index 3fb362b6874e..3e518df7cd1f 100644 --- a/net/dsa/switch.c +++ b/net/dsa/switch.c @@ -178,6 +178,46 @@ static int dsa_switch_fdb_del(struct dsa_switch *ds, return ds->ops->port_fdb_del(ds, port, info->addr, info->vid); } +static int dsa_switch_lag_change(struct dsa_switch *ds, + struct dsa_notifier_lag_info *info) +{ + if (ds->index == info->sw_index && ds->ops->port_lag_change) + return ds->ops->port_lag_change(ds, info->port, info->info); + + if (ds->index != info->sw_index && ds->ops->crosschip_lag_change) + return ds->ops->crosschip_lag_change(ds, info->sw_index, + info->port, info->lag, + info->info); + + return 0; +} + +static int dsa_switch_lag_join(struct dsa_switch *ds, + struct dsa_notifier_lag_info *info) +{ + if (ds->index == info->sw_index && ds->ops->port_lag_join) + return ds->ops->port_lag_join(ds, info->port, info->lag); + + if (ds->index != info->sw_index && ds->ops->crosschip_lag_join) + return ds->ops->crosschip_lag_join(ds, info->sw_index, + info->port, info->lag); + + return 0; +} + +static int dsa_switch_lag_leave(struct dsa_switch *ds, + struct dsa_notifier_lag_info *info) +{ + if (ds->index == info->sw_index && ds->ops->port_lag_leave) + ds->ops->port_lag_leave(ds, info->port, info->lag); + + if (ds->index != info->sw_index && ds->ops->crosschip_lag_leave) + ds->ops->crosschip_lag_leave(ds, info->sw_index, + info->port, info->lag); + + return 0; +} + static bool dsa_switch_mdb_match(struct dsa_switch *ds, int port, struct dsa_notifier_mdb_info *info) { @@ -325,6 +365,15 @@ static int dsa_switch_event(struct notifier_block *nb, case DSA_NOTIFIER_FDB_DEL: err = dsa_switch_fdb_del(ds, info); break; + case DSA_NOTIFIER_LAG_CHANGE: + err = dsa_switch_lag_change(ds, info); + break; + case DSA_NOTIFIER_LAG_JOIN: + err = dsa_switch_lag_join(ds, info); + break; + case DSA_NOTIFIER_LAG_LEAVE: + err = dsa_switch_lag_leave(ds, info); + break; case DSA_NOTIFIER_MDB_ADD: err = dsa_switch_mdb_add(ds, info); break;

[net-next,2/4] net: dsa: Link aggregation support

Commit Message

Comments

Patch