diff mbox series

[RFC,2/3] net: Provide switchdev driver for NXP's More Than IP L2 switch

Message ID 20210622144111.19647-3-lukma@denx.de
State New
Headers show
Series net: imx: Provide support for L2 switch as switchdev accelerator | expand

Commit Message

Lukasz Majewski June 22, 2021, 2:41 p.m. UTC
This change provides driver for More Than IP L2 switch IP block to hook
into switchdev subsystem to provide bridging offloading for i.MX28
SoC (imx287 to be precise).

This code is responsible for configuring this device as L2 bridge when
one decides to bridge eth[01] interfaces (so no vlan, filtering table
aging supported).

This driver shall be regarded as a complementary one for NXP's FEC
(fec_main.c). It reuses some code from it.

Signed-off-by: Lukasz Majewski <lukma@denx.de>
---
 drivers/net/ethernet/freescale/Kconfig        |   1 +
 drivers/net/ethernet/freescale/Makefile       |   1 +
 drivers/net/ethernet/freescale/mtipsw/Kconfig |  11 +
 .../net/ethernet/freescale/mtipsw/Makefile    |   3 +
 .../net/ethernet/freescale/mtipsw/fec_mtip.c  | 438 ++++++++++++++++++
 .../net/ethernet/freescale/mtipsw/fec_mtip.h  | 213 +++++++++
 6 files changed, 667 insertions(+)
 create mode 100644 drivers/net/ethernet/freescale/mtipsw/Kconfig
 create mode 100644 drivers/net/ethernet/freescale/mtipsw/Makefile
 create mode 100644 drivers/net/ethernet/freescale/mtipsw/fec_mtip.c
 create mode 100644 drivers/net/ethernet/freescale/mtipsw/fec_mtip.h

Comments

Andrew Lunn June 22, 2021, 3:03 p.m. UTC | #1
> +static void write_atable(struct mtipl2sw_priv *priv, int index,
> +	unsigned long write_lo, unsigned long write_hi)
> +{
> +	unsigned long atable_base = (unsigned long)priv->hwentry;
> +
> +	writel(write_lo, (volatile void *)atable_base + (index << 3));
> +	writel(write_hi, (volatile void *)atable_base + (index << 3) + 4);

Using volatile is generally wrong. Why do you need it?

 > +}
> +
> +/*
> + * Clear complete MAC Look Up Table
> + */
> +static void esw_clear_atable(struct mtipl2sw_priv *priv)
> +{
> +	int index;
> +	for (index = 0; index < 2048; index++)
> +		write_atable(priv, index, 0, 0);
> +}
> +
> +static int mtipl2_sw_enable(struct mtipl2sw_priv *priv)
> +{
> +	/*
> +	 * L2 switch - reset
> +	 */
> +	writel(MCF_ESW_MODE_SW_RST, &priv->fecp->ESW_MODE);
> +	udelay(10);
> +
> +	/* Configure switch*/
> +	writel(MCF_ESW_MODE_STATRST, &priv->fecp->ESW_MODE);
> +	writel(MCF_ESW_MODE_SW_EN, &priv->fecp->ESW_MODE);
> +
> +	/* Management port configuration, make port 0 as management port */
> +	writel(0, &priv->fecp->ESW_BMPC);
> +
> +	/*
> +	 * Set backpressure threshold to minimize discarded frames
> +	 * during due to congestion.
> +	 */
> +	writel(P0BC_THRESHOLD, &priv->fecp->ESW_P0BCT);
> +
> +	/* Set the max rx buffer size */
> +	writel(L2SW_PKT_MAXBLR_SIZE, priv->hwpsw + MCF_ESW_R_BUFF_SIZE);
> +	/* Enable broadcast on all ports */
> +	writel(0x7, &priv->fecp->ESW_DBCR);
> +
> +	/* Enable multicast on all ports */
> +	writel(0x7, &priv->fecp->ESW_DMCR);
> +
> +	esw_clear_atable(priv);
> +
> +	/* Clear all pending interrupts */
> +	writel(0xffffffff, priv->hwpsw + FEC_IEVENT);
> +
> +	/* Enable interrupts we wish to service */
> +	writel(FEC_MTIP_DEFAULT_IMASK, priv->hwpsw + FEC_IMASK);
> +	priv->l2sw_on = true;
> +
> +	return 0;
> +}
> +
> +static void mtipl2_sw_disable(struct mtipl2sw_priv *priv)
> +{
> +	writel(0, &priv->fecp->ESW_MODE);
> +}
> +
> +static int mtipl2_port_enable (struct mtipl2sw_priv *priv, int port)
> +{
> +	u32 l2_ports_en;
> +
> +	pr_err("%s: PORT ENABLE %d\n", __func__, port);
> +
> +	/* Enable tx/rx on L2 switch ports */
> +	l2_ports_en = readl(&priv->fecp->ESW_PER);
> +	if (!(l2_ports_en & MCF_ESW_ENA_PORT_0))
> +		l2_ports_en = MCF_ESW_ENA_PORT_0;
> +
> +	if (port == 0 && !(l2_ports_en & MCF_ESW_ENA_PORT_1))
> +		l2_ports_en |= MCF_ESW_ENA_PORT_1;
> +
> +	if (port == 1 && !(l2_ports_en & MCF_ESW_ENA_PORT_2))
> +		l2_ports_en |= MCF_ESW_ENA_PORT_2;
> +
> +	writel(l2_ports_en, &priv->fecp->ESW_PER);
> +
> +	/*
> +	 * Check if MAC IP block is already enabled (after switch initializtion)
> +	 * or if we need to enable it after mtipl2_port_disable was called.
> +	 */
> +
> +	return 0;
> +}
> +
> +static void mtipl2_port_disable (struct mtipl2sw_priv *priv, int port)
> +{
> +	u32 l2_ports_en;
> +
> +	pr_err(" %s: PORT DISABLE %d\n", __func__, port);

Please clean up debug code this this.

> +
> +	l2_ports_en = readl(&priv->fecp->ESW_PER);
> +	if (port == 0)
> +		l2_ports_en &= ~MCF_ESW_ENA_PORT_1;
> +
> +	if (port == 1)
> +		l2_ports_en &= ~MCF_ESW_ENA_PORT_2;
> +
> +	/* Finally disable tx/rx on port 0 */
> +	if (!(l2_ports_en & MCF_ESW_ENA_PORT_1) &&
> +	    !(l2_ports_en & MCF_ESW_ENA_PORT_2))
> +		l2_ports_en &= ~MCF_ESW_ENA_PORT_0;
> +
> +	writel(l2_ports_en, &priv->fecp->ESW_PER);
> +}
> +
> +irqreturn_t
> +mtip_l2sw_interrupt_handler(int irq, void *dev_id)
> +{
> +	struct mtipl2sw_priv *priv = dev_id;
> +	struct fec_enet_private *fep = priv->fep[0];
> +	irqreturn_t ret = IRQ_NONE;
> +	u32 int_events, int_imask;

Reverse christmas tree.

> +
> +	int_events = readl(fec_hwp(fep) + FEC_IEVENT);
> +	writel(int_events, fec_hwp(fep) + FEC_IEVENT);
> +
> +	if ((int_events & FEC_MTIP_DEFAULT_IMASK) && fep->link) {
> +		ret = IRQ_HANDLED;
> +
> +		if (napi_schedule_prep(&fep->napi)) {
> +			/* Disable RX interrupts */
> +			int_imask = readl(fec_hwp(fep) + FEC_IMASK);
> +			int_imask &= ~FEC_MTIP_ENET_RXF;
> +			writel(int_imask, fec_hwp(fep) + FEC_IMASK);
> +			__napi_schedule(&fep->napi);
> +		}
> +	}
> +
> +	return ret;
> +}
> +
> +static int mtipl2_parse_of(struct mtipl2sw_priv *priv, struct device_node *np)
> +{
> +	struct device_node *port, *p, *fep_np;
> +	struct platform_device *pdev;
> +	struct net_device *ndev;
> +	unsigned int port_num;
> +
> +	p = of_find_node_by_name(np, "ports");
> +
> +	for_each_available_child_of_node(p, port) {
> +		if (of_property_read_u32(port, "reg", &port_num))
> +			continue;
> +
> +		priv->n_ports = port_num;
> +
> +		fep_np = of_parse_phandle(port, "phy-handle", 0);

As i said, phy-handle points to a phy. It minimum, you need to call
this mac-handle. But that then makes this switch driver very different
to every other switch driver.

> +		pdev = of_find_device_by_node(fep_np);
> +		ndev = platform_get_drvdata(pdev);
> +		priv->fep[port_num - 1] = netdev_priv(ndev);

What happens when somebody puts reg=<42>; in DT?

I would say, your basic structure needs to change, to make it more
like other switchdev drivers. You need to replace the two FEC device
instances with one switchdev driver. The switchdev driver will then
instantiate the two netdevs for the two external MACs. You can
hopefully reuse some of the FEC code for that, but i expect you are
going to have to refactor the FEC driver and turn part of it into a
library, which the switchdev driver can then use.

	 Andrew
Lukasz Majewski June 23, 2021, 11:37 a.m. UTC | #2
Hi Andrew,

> > +static void write_atable(struct mtipl2sw_priv *priv, int index,

> > +	unsigned long write_lo, unsigned long write_hi)

> > +{

> > +	unsigned long atable_base = (unsigned long)priv->hwentry;

> > +

> > +	writel(write_lo, (volatile void *)atable_base + (index <<

> > 3));

> > +	writel(write_hi, (volatile void *)atable_base + (index <<

> > 3) + 4);  

> 

> Using volatile is generally wrong. Why do you need it?


This was the code, which I took from the legacy driver. I will adjust
it.

> 

>  > +}

> > +

> > +/*

> > + * Clear complete MAC Look Up Table

> > + */

> > +static void esw_clear_atable(struct mtipl2sw_priv *priv)

> > +{

> > +	int index;

> > +	for (index = 0; index < 2048; index++)

> > +		write_atable(priv, index, 0, 0);

> > +}

> > +

> > +static int mtipl2_sw_enable(struct mtipl2sw_priv *priv)

> > +{

> > +	/*

> > +	 * L2 switch - reset

> > +	 */

> > +	writel(MCF_ESW_MODE_SW_RST, &priv->fecp->ESW_MODE);

> > +	udelay(10);

> > +

> > +	/* Configure switch*/

> > +	writel(MCF_ESW_MODE_STATRST, &priv->fecp->ESW_MODE);

> > +	writel(MCF_ESW_MODE_SW_EN, &priv->fecp->ESW_MODE);

> > +

> > +	/* Management port configuration, make port 0 as

> > management port */

> > +	writel(0, &priv->fecp->ESW_BMPC);

> > +

> > +	/*

> > +	 * Set backpressure threshold to minimize discarded frames

> > +	 * during due to congestion.

> > +	 */

> > +	writel(P0BC_THRESHOLD, &priv->fecp->ESW_P0BCT);

> > +

> > +	/* Set the max rx buffer size */

> > +	writel(L2SW_PKT_MAXBLR_SIZE, priv->hwpsw +

> > MCF_ESW_R_BUFF_SIZE);

> > +	/* Enable broadcast on all ports */

> > +	writel(0x7, &priv->fecp->ESW_DBCR);

> > +

> > +	/* Enable multicast on all ports */

> > +	writel(0x7, &priv->fecp->ESW_DMCR);

> > +

> > +	esw_clear_atable(priv);

> > +

> > +	/* Clear all pending interrupts */

> > +	writel(0xffffffff, priv->hwpsw + FEC_IEVENT);

> > +

> > +	/* Enable interrupts we wish to service */

> > +	writel(FEC_MTIP_DEFAULT_IMASK, priv->hwpsw + FEC_IMASK);

> > +	priv->l2sw_on = true;

> > +

> > +	return 0;

> > +}

> > +

> > +static void mtipl2_sw_disable(struct mtipl2sw_priv *priv)

> > +{

> > +	writel(0, &priv->fecp->ESW_MODE);

> > +}

> > +

> > +static int mtipl2_port_enable (struct mtipl2sw_priv *priv, int

> > port) +{

> > +	u32 l2_ports_en;

> > +

> > +	pr_err("%s: PORT ENABLE %d\n", __func__, port);

> > +

> > +	/* Enable tx/rx on L2 switch ports */

> > +	l2_ports_en = readl(&priv->fecp->ESW_PER);

> > +	if (!(l2_ports_en & MCF_ESW_ENA_PORT_0))

> > +		l2_ports_en = MCF_ESW_ENA_PORT_0;

> > +

> > +	if (port == 0 && !(l2_ports_en & MCF_ESW_ENA_PORT_1))

> > +		l2_ports_en |= MCF_ESW_ENA_PORT_1;

> > +

> > +	if (port == 1 && !(l2_ports_en & MCF_ESW_ENA_PORT_2))

> > +		l2_ports_en |= MCF_ESW_ENA_PORT_2;

> > +

> > +	writel(l2_ports_en, &priv->fecp->ESW_PER);

> > +

> > +	/*

> > +	 * Check if MAC IP block is already enabled (after switch

> > initializtion)

> > +	 * or if we need to enable it after mtipl2_port_disable

> > was called.

> > +	 */

> > +

> > +	return 0;

> > +}

> > +

> > +static void mtipl2_port_disable (struct mtipl2sw_priv *priv, int

> > port) +{

> > +	u32 l2_ports_en;

> > +

> > +	pr_err(" %s: PORT DISABLE %d\n", __func__, port);  

> 

> Please clean up debug code this this.

> 


Ok.

> > +

> > +	l2_ports_en = readl(&priv->fecp->ESW_PER);

> > +	if (port == 0)

> > +		l2_ports_en &= ~MCF_ESW_ENA_PORT_1;

> > +

> > +	if (port == 1)

> > +		l2_ports_en &= ~MCF_ESW_ENA_PORT_2;

> > +

> > +	/* Finally disable tx/rx on port 0 */

> > +	if (!(l2_ports_en & MCF_ESW_ENA_PORT_1) &&

> > +	    !(l2_ports_en & MCF_ESW_ENA_PORT_2))

> > +		l2_ports_en &= ~MCF_ESW_ENA_PORT_0;

> > +

> > +	writel(l2_ports_en, &priv->fecp->ESW_PER);

> > +}

> > +

> > +irqreturn_t

> > +mtip_l2sw_interrupt_handler(int irq, void *dev_id)

> > +{

> > +	struct mtipl2sw_priv *priv = dev_id;

> > +	struct fec_enet_private *fep = priv->fep[0];

> > +	irqreturn_t ret = IRQ_NONE;

> > +	u32 int_events, int_imask;  

> 

> Reverse christmas tree.


Ok.

> 

> > +

> > +	int_events = readl(fec_hwp(fep) + FEC_IEVENT);

> > +	writel(int_events, fec_hwp(fep) + FEC_IEVENT);

> > +

> > +	if ((int_events & FEC_MTIP_DEFAULT_IMASK) && fep->link) {

> > +		ret = IRQ_HANDLED;

> > +

> > +		if (napi_schedule_prep(&fep->napi)) {

> > +			/* Disable RX interrupts */

> > +			int_imask = readl(fec_hwp(fep) +

> > FEC_IMASK);

> > +			int_imask &= ~FEC_MTIP_ENET_RXF;

> > +			writel(int_imask, fec_hwp(fep) +

> > FEC_IMASK);

> > +			__napi_schedule(&fep->napi);

> > +		}

> > +	}

> > +

> > +	return ret;

> > +}

> > +

> > +static int mtipl2_parse_of(struct mtipl2sw_priv *priv, struct

> > device_node *np) +{

> > +	struct device_node *port, *p, *fep_np;

> > +	struct platform_device *pdev;

> > +	struct net_device *ndev;

> > +	unsigned int port_num;

> > +

> > +	p = of_find_node_by_name(np, "ports");

> > +

> > +	for_each_available_child_of_node(p, port) {

> > +		if (of_property_read_u32(port, "reg", &port_num))

> > +			continue;

> > +

> > +		priv->n_ports = port_num;

> > +

> > +		fep_np = of_parse_phandle(port, "phy-handle", 0);  

> 

> As i said, phy-handle points to a phy. It minimum, you need to call

> this mac-handle. But that then makes this switch driver very different

> to every other switch driver.


Other drivers (DSA for example) use "ethernet" or "link" properties.
Maybe those can be reused?

> 

> > +		pdev = of_find_device_by_node(fep_np);

> > +		ndev = platform_get_drvdata(pdev);

> > +		priv->fep[port_num - 1] = netdev_priv(ndev);  

> 

> What happens when somebody puts reg=<42>; in DT?


I do guess that this will break the code.

However, DSA DT descriptions also rely on the exact numbering [1] (via
e.g. reg property) of the ports. I've followed this paradigm.

> 

> I would say, your basic structure needs to change, to make it more

> like other switchdev drivers. You need to replace the two FEC device

> instances with one switchdev driver.


I've used the cpsw_new.c as the example.

> The switchdev driver will then

> instantiate the two netdevs for the two external MACs.


Then there is a question - what about eth[01], which already exists?

Shall I close them and then reuse (create as a new one?) eth0 to be
connected to switch port0 (via DMA0)?

Then, I do need two net_device ports, which would only control PHY
device and setup ENET-MAC for rmii, as the L2 switch will provide data
for transmission. Those two ports are connected to switch's port[12]
and look very similar to ports created by DSA driver (but shall not
transmit and receive data).

Maybe I've overlooked something, but the rocker switchdev driver
(rocker_main.c) sets netdev_ops (with .ndo_start_xmit) for ports which
it creates. The prestera's prestera_sdma_xmit() (in prestera_rxtx.c)
also setups the SDMA for those.

In i.MX L2 switch case - one just needs to setup DMA0 to send data to
engress ports. IMHO, those ports just need to handle PHY link (up/down
10/100 change) and statistics.

> You can

> hopefully reuse some of the FEC code for that, but i expect you are

> going to have to refactor the FEC driver and turn part of it into a

> library, which the switchdev driver can then use.


To be honest - such driver for L2 switch already has been forward
ported by me [2] to v4.19.

It has the L2 switch enabled after the boot and there is no way to
disable it.

When you look on the code - it is a copy-paste of the FEC driver, with
some necessary adjustments. 

The FEC driver itself is large and used by almost _ALL_ i.MX SoCs.
Turning it into library will move the already working code around. I
wanted to avoid it.

The idea behind this patch series is as follows (to offload bridging to
MTIP L2 switch IP block):
-------------------------
1. When bridge is created disable eth0 and eth1 (fec_close)

2. Set a flag for fec driver, so DMA descriptors registers and ones to
initiate transfer are adjusted for DMA0 (eth0) device. Also L2 switch
IP block has different bits positions for interrupts.

3. DMA1 - which with normal setup corresponds to eth1 - is not used.

4. FEC driver also monitors PHY changes (link up/down speed change
10/100) (for both eth0, eth1).

5. When br0 is deleted - the mtip support flag is cleared and FEC
network interfaces (eth[01]) are opened again (via fec_open).

With above approach many operations are already performed - like
ENET-MAC setup, buffers allocation, etc.
To make L2 switch working with this setup we need to re-map 4 registers
and two interrupt bits in fec_main driver.

I'm just wondering if is it worth to refactor already working driver to
library, instantiate new interfaces and re-init all the already
initialized stuff ?


> 

> 	 Andrew



Links:

[1] -
https://elixir.bootlin.com/linux/latest/source/arch/arm/boot/dts/armada-388-clearfog.dts#L93

[2] - https://github.com/lmajewski/linux-imx28-l2switch/commits/master

Best regards,

Lukasz Majewski

--

DENX Software Engineering GmbH,      Managing Director: Wolfgang Denk
HRB 165235 Munich, Office: Kirchenstr.5, D-82194 Groebenzell, Germany
Phone: (+49)-8142-66989-59 Fax: (+49)-8142-66989-80 Email: lukma@denx.de
Andrew Lunn June 23, 2021, 8:01 p.m. UTC | #3
> > Using volatile is generally wrong. Why do you need it?

> 

> This was the code, which I took from the legacy driver. I will adjust

> it.


It is called 'vendor crap' for a reason.

> > > +	for_each_available_child_of_node(p, port) {

> > > +		if (of_property_read_u32(port, "reg", &port_num))

> > > +			continue;

> > > +

> > > +		priv->n_ports = port_num;

> > > +

> > > +		fep_np = of_parse_phandle(port, "phy-handle", 0);  

> > 

> > As i said, phy-handle points to a phy. It minimum, you need to call

> > this mac-handle. But that then makes this switch driver very different

> > to every other switch driver.

> 

> Other drivers (DSA for example) use "ethernet" or "link" properties.

> Maybe those can be reused?


Not really. They have well known meanings and they are nothing like
what you are trying to do. You need a new name. Maybe 'mac-handle'?


> > > +		pdev = of_find_device_by_node(fep_np);

> > > +		ndev = platform_get_drvdata(pdev);

> > > +		priv->fep[port_num - 1] = netdev_priv(ndev);  

> > 

> > What happens when somebody puts reg=<42>; in DT?

> 

> I do guess that this will break the code.

> 

> However, DSA DT descriptions also rely on the exact numbering [1] (via

> e.g. reg property) of the ports. I've followed this paradigm.


DSA does a range check:

        for_each_available_child_of_node(ports, port) {
                err = of_property_read_u32(port, "reg", &reg);
                if (err)
                        goto out_put_node;

                if (reg >= ds->num_ports) {
                        dev_err(ds->dev, "port %pOF index %u exceeds num_ports (%zu)\n",
                                port, reg, ds->num_ports);
                        err = -EINVAL;
                        goto out_put_node;
                }

> > I would say, your basic structure needs to change, to make it more

> > like other switchdev drivers. You need to replace the two FEC device

> > instances with one switchdev driver.

> 

> I've used the cpsw_new.c as the example.

> 

> > The switchdev driver will then

> > instantiate the two netdevs for the two external MACs.

> 

> Then there is a question - what about eth[01], which already exists?


They don't exist. cpsw_new is not used at the same time as cpsw, it
replaces it. This new driver would replace the FEC driver. cpsw_new
makes use of some of the code in the cpsw driver to implement two
netdevs. This new FEC switch driver would do the same, make use of
some of the low level code, e.g. for DMA access, MDIO bus, etc.

> To be honest - such driver for L2 switch already has been forward

> ported by me [2] to v4.19.


Which is fine, you can do whatever you want in your own fork. But for
mainline, we need a clean architecture. I'm not convinced your code is
that clean, and how well future features can be added. Do you have
support for VLANS? Adding and removing entries to the lookup tables?
How will IGMP snooping work? How will STP work?

    Andrew
Lukasz Majewski June 24, 2021, 10:53 a.m. UTC | #4
Hi Andrew,

> > > Using volatile is generally wrong. Why do you need it?  

> > 

> > This was the code, which I took from the legacy driver. I will

> > adjust it.  

> 

> It is called 'vendor crap' for a reason.


:-)

> 

> > > > +	for_each_available_child_of_node(p, port) {

> > > > +		if (of_property_read_u32(port, "reg",

> > > > &port_num))

> > > > +			continue;

> > > > +

> > > > +		priv->n_ports = port_num;

> > > > +

> > > > +		fep_np = of_parse_phandle(port, "phy-handle",

> > > > 0);    

> > > 

> > > As i said, phy-handle points to a phy. It minimum, you need to

> > > call this mac-handle. But that then makes this switch driver very

> > > different to every other switch driver.  

> > 

> > Other drivers (DSA for example) use "ethernet" or "link" properties.

> > Maybe those can be reused?  

> 

> Not really. They have well known meanings and they are nothing like

> what you are trying to do. You need a new name. Maybe 'mac-handle'?


Ok.

> 

> 

> > > > +		pdev = of_find_device_by_node(fep_np);

> > > > +		ndev = platform_get_drvdata(pdev);

> > > > +		priv->fep[port_num - 1] = netdev_priv(ndev);

> > > >  

> > > 

> > > What happens when somebody puts reg=<42>; in DT?  

> > 

> > I do guess that this will break the code.

> > 

> > However, DSA DT descriptions also rely on the exact numbering [1]

> > (via e.g. reg property) of the ports. I've followed this paradigm.  

> 

> DSA does a range check:

> 

>         for_each_available_child_of_node(ports, port) {

>                 err = of_property_read_u32(port, "reg", &reg);

>                 if (err)

>                         goto out_put_node;

> 

>                 if (reg >= ds->num_ports) {

>                         dev_err(ds->dev, "port %pOF index %u exceeds

> num_ports (%zu)\n", port, reg, ds->num_ports);

>                         err = -EINVAL;

>                         goto out_put_node;

>                 }

> 


Ok.

> > > I would say, your basic structure needs to change, to make it more

> > > like other switchdev drivers. You need to replace the two FEC

> > > device instances with one switchdev driver.  

> > 

> > I've used the cpsw_new.c as the example.

> >   

> > > The switchdev driver will then

> > > instantiate the two netdevs for the two external MACs.  

> > 

> > Then there is a question - what about eth[01], which already

> > exists?  

> 

> They don't exist. cpsw_new is not used at the same time as cpsw, it

> replaces it. This new driver would replace the FEC driver.


I see.

> cpsw_new

> makes use of some of the code in the cpsw driver to implement two

> netdevs. This new FEC switch driver would do the same, make use of

> some of the low level code, e.g. for DMA access, MDIO bus, etc.


I'm not sure if the imx28 switch is similar to one from TI (cpsw-3g)
- it looks to me that the bypass mode for both seems to be very
different. For example, on NXP when switch is disabled we need to
handle two DMA[01]. When it is enabled, only one is used. The approach
with two DMAs is best handled with FEC driver instantiation.

> 

> > To be honest - such driver for L2 switch already has been forward

> > ported by me [2] to v4.19.  

> 

> Which is fine, you can do whatever you want in your own fork. But for

> mainline, we need a clean architecture.


This code is a forward port of vendor's (Freescale) old driver. It uses
the _wrong_ approach, but it can (still) be used in production after
some adjustments.

> I'm not convinced your code is

> that clean,


The code from [2] needs some vendor ioctl based tool (or hardcode) to
configure the switch. 

> and how well future features can be added. Do you have

> support for VLANS? Adding and removing entries to the lookup tables?

> How will IGMP snooping work? How will STP work?


This can be easily added with serving netstack hooks (as it is already
done with cpsw_new) in the new switchdev based version [3] (based on
v5.12).

> 

>     Andrew


Links:

[3] -
https://source.denx.de/linux/linux-imx28-l2switch/-/commits/imx28-v5.12-L2-upstream-switchdev-RFC_v1


Best regards,

Lukasz Majewski

--

DENX Software Engineering GmbH,      Managing Director: Wolfgang Denk
HRB 165235 Munich, Office: Kirchenstr.5, D-82194 Groebenzell, Germany
Phone: (+49)-8142-66989-59 Fax: (+49)-8142-66989-80 Email: lukma@denx.de
Andrew Lunn June 24, 2021, 1:34 p.m. UTC | #5
> I'm not sure if the imx28 switch is similar to one from TI (cpsw-3g)

> - it looks to me that the bypass mode for both seems to be very

> different. For example, on NXP when switch is disabled we need to

> handle two DMA[01]. When it is enabled, only one is used. The approach

> with two DMAs is best handled with FEC driver instantiation.


I don't know if it applies to the FEC, but switches often have
registers which control which egress port an ingress port can send
packets to. So by default, you allow CPU to port0, CPU to port1, but
block between port0 to port1. This would give you two independent
interface, the switch enabled, and using one DMA. When the bridge is
configured, you simply allow port0 and send/receive packets to/from
port1. No change to the DMA setup, etc.

> The code from [2] needs some vendor ioctl based tool (or hardcode) to

> configure the switch. 


This would not be allowed. You configure switches in Linux using the
existing user space tools. No vendor tools are used.

> > and how well future features can be added. Do you have

> > support for VLANS? Adding and removing entries to the lookup tables?

> > How will IGMP snooping work? How will STP work?

> 

> This can be easily added with serving netstack hooks (as it is already

> done with cpsw_new) in the new switchdev based version [3] (based on

> v5.12).


Here i'm less convinced. I expect a fully functioning switch driver is
going to need switch specific versions of some of the netdev ops
functions, maybe the ethtool ops as well. It is going to want to add
devlink ops. By hacking around with the FEC driver in the way you are,
you might get very basic switch operation working. But as we have seen
with cpsw, going from very basic to a fully functioning switchdev
driver required a new driver, cpsw_new. It was getting more and more
difficult to add features because its structure was just wrong. We
don't want to add code to the kernel which is probably a dead end.

      Andrew
Lukasz Majewski June 24, 2021, 2:35 p.m. UTC | #6
Hi Andrew,

> > I'm not sure if the imx28 switch is similar to one from TI (cpsw-3g)

> > - it looks to me that the bypass mode for both seems to be very

> > different. For example, on NXP when switch is disabled we need to

> > handle two DMA[01]. When it is enabled, only one is used. The

> > approach with two DMAs is best handled with FEC driver

> > instantiation.  

> 

> I don't know if it applies to the FEC, but switches often have

> registers which control which egress port an ingress port can send

> packets to. So by default, you allow CPU to port0, CPU to port1, but

> block between port0 to port1. This would give you two independent

> interface, the switch enabled, and using one DMA. When the bridge is

> configured, you simply allow port0 and send/receive packets to/from

> port1. No change to the DMA setup, etc.


Please correct me if I misunderstood this concept - but it seems like
you refer to the use case where the switch is enabled, and by changing
it's "allowed internal port's" mapping it decides if frames are passed
between engress ports (port1 and port2).

	----------
DMA0 ->	|P0    P1| -> ENET-MAC (PHY control) -> eth0 (lan1)
	|L2 SW	 |
	|      P2| -> ENET-MAC (PHY control) -> eth1 (lan2)
	----------

DMA1 (not used)

We can use this approach when we keep always enabled L2 switch.

However now in FEC we use the "bypass" mode, where:
DMA0 -> ENET-MAC (FEC instance driver 1) -> eth0
DMA1 -> ENET-MAC (FEC instance driver 2) -> eth1

And the "bypass" mode is the default one.


I'm just concerned how we are going to gracefully "switch" between L2
switch and bypass configuration? In this patch series - I used the
"hook" corresponding to 'ip link set eth[01] master br0' command.

In other words - how we want to manage DMA0 and DMA1 when switch is
enabled and disabled (in "bypass mode").

> 

> > The code from [2] needs some vendor ioctl based tool (or hardcode)

> > to configure the switch.   

> 

> This would not be allowed. You configure switches in Linux using the

> existing user space tools. No vendor tools are used.


Exactly - that was the rationale to bring support for L2 switch to
mainline kernel.

> 

> > > and how well future features can be added. Do you have

> > > support for VLANS? Adding and removing entries to the lookup

> > > tables? How will IGMP snooping work? How will STP work?  

> > 

> > This can be easily added with serving netstack hooks (as it is

> > already done with cpsw_new) in the new switchdev based version [3]

> > (based on v5.12).  

> 

> Here i'm less convinced. I expect a fully functioning switch driver is

> going to need switch specific versions of some of the netdev ops

> functions, maybe the ethtool ops as well. 


Definately, the current L2 switch driver would need more work.

> It is going to want to add

> devlink ops. By hacking around with the FEC driver 


I believe that I will not touch fec_main.[hc] files more than I did in
the 
"[RFC 3/3] net: imx: Adjust fec_main.c to provide support for L2
switch"

as the switch management (and hooks) are going to be added solely to 
drivers/net/ethernet/freescale/mtipsw/fec_mtip.[hc]. [*]

This would separate L2 switch driver from the current FEC driver.

> in the way you are,

> you might get very basic switch operation working. 


Yes, this is the current status - only simple L2 switching works.

> But as we have seen

> with cpsw, going from very basic to a fully functioning switchdev

> driver required a new driver, cpsw_new.


The new driver for L2 switch has been introduced in [*]. The legacy FEC
driver will also work without it.

> It was getting more and more

> difficult to add features because its structure was just wrong. We

> don't want to add code to the kernel which is probably a dead end.

> 


I cannot say for sure, but all the switch/bridge related hooks can be
added to [*], so fec_main will not bloat.

>       Andrew





Best regards,

Lukasz Majewski

--

DENX Software Engineering GmbH,      Managing Director: Wolfgang Denk
HRB 165235 Munich, Office: Kirchenstr.5, D-82194 Groebenzell, Germany
Phone: (+49)-8142-66989-59 Fax: (+49)-8142-66989-80 Email: lukma@denx.de
Andrew Lunn June 24, 2021, 4:11 p.m. UTC | #7
On Thu, Jun 24, 2021 at 04:35:42PM +0200, Lukasz Majewski wrote:
> Hi Andrew,

> 

> > > I'm not sure if the imx28 switch is similar to one from TI (cpsw-3g)

> > > - it looks to me that the bypass mode for both seems to be very

> > > different. For example, on NXP when switch is disabled we need to

> > > handle two DMA[01]. When it is enabled, only one is used. The

> > > approach with two DMAs is best handled with FEC driver

> > > instantiation.  

> > 

> > I don't know if it applies to the FEC, but switches often have

> > registers which control which egress port an ingress port can send

> > packets to. So by default, you allow CPU to port0, CPU to port1, but

> > block between port0 to port1. This would give you two independent

> > interface, the switch enabled, and using one DMA. When the bridge is

> > configured, you simply allow port0 and send/receive packets to/from

> > port1. No change to the DMA setup, etc.

> 

> Please correct me if I misunderstood this concept - but it seems like

> you refer to the use case where the switch is enabled, and by changing

> it's "allowed internal port's" mapping it decides if frames are passed

> between engress ports (port1 and port2).


Correct.


> 	----------

> DMA0 ->	|P0    P1| -> ENET-MAC (PHY control) -> eth0 (lan1)

> 	|L2 SW	 |

> 	|      P2| -> ENET-MAC (PHY control) -> eth1 (lan2)

> 	----------

> 

> DMA1 (not used)

> 

> We can use this approach when we keep always enabled L2 switch.

> 

> However now in FEC we use the "bypass" mode, where:

> DMA0 -> ENET-MAC (FEC instance driver 1) -> eth0

> DMA1 -> ENET-MAC (FEC instance driver 2) -> eth1

> 

> And the "bypass" mode is the default one.


Which is not a problem, when you refactor the FEC into a library and a
driver, plus add a new switch driver. When the FEC loads, it uses
bypass mode, the switch disabled. When the new switch driver loads, it
always enables the switch, but disables communication between the two
ports until they both join the same bridge.

But i doubt we are actually getting anywhere. You say you don't have
time to write a new driver. I'm not convinced you can hack the FEC
like you are suggesting and not end up in the mess the cpsw had,
before they wrote a new driver.

       Andrew
Lukasz Majewski June 25, 2021, 9:59 a.m. UTC | #8
Hi Andrew,

> On Thu, Jun 24, 2021 at 04:35:42PM +0200, Lukasz Majewski wrote:

> > Hi Andrew,

> >   

> > > > I'm not sure if the imx28 switch is similar to one from TI

> > > > (cpsw-3g)

> > > > - it looks to me that the bypass mode for both seems to be very

> > > > different. For example, on NXP when switch is disabled we need

> > > > to handle two DMA[01]. When it is enabled, only one is used. The

> > > > approach with two DMAs is best handled with FEC driver

> > > > instantiation.    

> > > 

> > > I don't know if it applies to the FEC, but switches often have

> > > registers which control which egress port an ingress port can send

> > > packets to. So by default, you allow CPU to port0, CPU to port1,

> > > but block between port0 to port1. This would give you two

> > > independent interface, the switch enabled, and using one DMA.

> > > When the bridge is configured, you simply allow port0 and

> > > send/receive packets to/from port1. No change to the DMA setup,

> > > etc.  

> > 

> > Please correct me if I misunderstood this concept - but it seems

> > like you refer to the use case where the switch is enabled, and by

> > changing it's "allowed internal port's" mapping it decides if

> > frames are passed between engress ports (port1 and port2).  

> 

> Correct.

> 

> 

> > 	----------

> > DMA0 ->	|P0    P1| -> ENET-MAC (PHY control) -> eth0 (lan1)

> > 	|L2 SW	 |

> > 	|      P2| -> ENET-MAC (PHY control) -> eth1 (lan2)

> > 	----------

> > 

> > DMA1 (not used)

> > 

> > We can use this approach when we keep always enabled L2 switch.

> > 

> > However now in FEC we use the "bypass" mode, where:

> > DMA0 -> ENET-MAC (FEC instance driver 1) -> eth0

> > DMA1 -> ENET-MAC (FEC instance driver 2) -> eth1

> > 

> > And the "bypass" mode is the default one.  

> 

> Which is not a problem, when you refactor the FEC into a library and a

> driver, plus add a new switch driver. When the FEC loads, it uses

> bypass mode, the switch disabled. When the new switch driver loads, it

> always enables the switch, but disables communication between the two

> ports until they both join the same bridge.


Ok, the proposed idea would be to use FEC (refactored) on devices which
are not equipped with the switch.

On devices, which have this IP block (like vf610, imx287) we would use
the driver with switch enabled and then in switch either bridge or
separate the traffic?

> 

> But i doubt we are actually getting anywhere. You say you don't have

> time to write a new driver.


Yes, I believe that this would be a very time consuming task. Joakim
also pointed out that the rewrite from NXP will not happen anytime soon.

> I'm not convinced you can hack the FEC

> like you are suggesting 


I do believe that I can just extend the L2 switch driver (fec_mtip.c
file to be precise) to provide full blown L2 switch functionality
without touching the legacy FEC more than in this patch set.

Would you consider applying this patch series then?

> and not end up in the mess the cpsw had,

> before they wrote a new driver.


I do see some conceptual differences between those two drivers.

> 

>        Andrew



Best regards,

Lukasz Majewski

--

DENX Software Engineering GmbH,      Managing Director: Wolfgang Denk
HRB 165235 Munich, Office: Kirchenstr.5, D-82194 Groebenzell, Germany
Phone: (+49)-8142-66989-59 Fax: (+49)-8142-66989-80 Email: lukma@denx.de
Andrew Lunn June 25, 2021, 2:40 p.m. UTC | #9
> I do believe that I can just extend the L2 switch driver (fec_mtip.c

> file to be precise) to provide full blown L2 switch functionality

> without touching the legacy FEC more than in this patch set.

> 

> Would you consider applying this patch series then?


What is most important is the ABI. If something is merged now, we need
to ensure it does not block later refactoring to a clean new
driver. The DT binding is considered ABI. So the DT binding needs to
be like a traditional switchdev driver. Florian already pointed out,
you can use a binding very similar to DSA. ti,cpsw-switch.yaml is
another good example.

So before considering merging your changes, i would like to see a
usable binding.

I also don't remember seeing support for STP. Without that, your
network has broadcast storm problems when there are loops. So i would
like to see the code needed to put ports into blocking, listening,
learning, and forwarding states.

	  Andrew
Lukasz Majewski June 28, 2021, 12:05 p.m. UTC | #10
Hi Andrew,

> > I do believe that I can just extend the L2 switch driver (fec_mtip.c

> > file to be precise) to provide full blown L2 switch functionality

> > without touching the legacy FEC more than in this patch set.

> > 

> > Would you consider applying this patch series then?  

> 

> What is most important is the ABI. If something is merged now, we need

> to ensure it does not block later refactoring to a clean new

> driver. The DT binding is considered ABI. So the DT binding needs to

> be like a traditional switchdev driver. Florian already pointed out,

> you can use a binding very similar to DSA. ti,cpsw-switch.yaml is

> another good example.


The best I could get would be:

&eth_switch {
	compatible = "imx,mtip-l2switch";
	reg = <0x800f8000 0x400>, <0x800fC000 0x4000>;

	interrupts = <100>;
	status = "okay";

	ethernet-ports {
		port1@1 {
			reg = <1>;
			label = "eth0";
			phys = <&mac0 0>;
		};

		port2@2 {
			reg = <2>;
			label = "eth1";
			phys = <&mac1 1>;
		};
	};
};

Which would abuse the "phys" properties usages - as 'mac[01]' are
referring to ethernet controllers.

On TI SoCs (e.g. am33xx-l4.dtsi) phys refer to some separate driver
responsible for PHY management. On NXP this is integrated with FEC
driver itself.

> 

> So before considering merging your changes, i would like to see a

> usable binding.

> 

> I also don't remember seeing support for STP. Without that, your

> network has broadcast storm problems when there are loops. So i would

> like to see the code needed to put ports into blocking, listening,

> learning, and forwarding states.

> 

> 	  Andrew





Best regards,

Lukasz Majewski

--

DENX Software Engineering GmbH,      Managing Director: Wolfgang Denk
HRB 165235 Munich, Office: Kirchenstr.5, D-82194 Groebenzell, Germany
Phone: (+49)-8142-66989-59 Fax: (+49)-8142-66989-80 Email: lukma@denx.de
Vladimir Oltean June 28, 2021, 12:48 p.m. UTC | #11
On Mon, Jun 28, 2021 at 02:05:26PM +0200, Lukasz Majewski wrote:
> Hi Andrew,

>

> > > I do believe that I can just extend the L2 switch driver (fec_mtip.c

> > > file to be precise) to provide full blown L2 switch functionality

> > > without touching the legacy FEC more than in this patch set.

> > >

> > > Would you consider applying this patch series then?

> >

> > What is most important is the ABI. If something is merged now, we need

> > to ensure it does not block later refactoring to a clean new

> > driver. The DT binding is considered ABI. So the DT binding needs to

> > be like a traditional switchdev driver. Florian already pointed out,

> > you can use a binding very similar to DSA. ti,cpsw-switch.yaml is

> > another good example.

>

> The best I could get would be:

>

> &eth_switch {

> 	compatible = "imx,mtip-l2switch";

> 	reg = <0x800f8000 0x400>, <0x800fC000 0x4000>;

>

> 	interrupts = <100>;

> 	status = "okay";

>

> 	ethernet-ports {

> 		port1@1 {

> 			reg = <1>;

> 			label = "eth0";

> 			phys = <&mac0 0>;

> 		};

>

> 		port2@2 {

> 			reg = <2>;

> 			label = "eth1";

> 			phys = <&mac1 1>;

> 		};

> 	};

> };

>

> Which would abuse the "phys" properties usages - as 'mac[01]' are

> referring to ethernet controllers.

>

> On TI SoCs (e.g. am33xx-l4.dtsi) phys refer to some separate driver

> responsible for PHY management. On NXP this is integrated with FEC

> driver itself.


If we were really honest, the binding would need to be called

port@0 {
	puppet = <&mac0>;
};

port@1 {
	puppet = <&mac1>;
};

which speaks for itself as to why accepting "puppet master" drivers is
not really very compelling. I concur with the recommendation given by
Andrew and Florian to refactor FEC as a multi-port single driver.

> >

> > So before considering merging your changes, i would like to see a

> > usable binding.

> >

> > I also don't remember seeing support for STP. Without that, your

> > network has broadcast storm problems when there are loops. So i would

> > like to see the code needed to put ports into blocking, listening,

> > learning, and forwarding states.

> >

> > 	  Andrew


I cannot stress enough how important it is for us to see STP support and
consequently the ndo_start_xmit procedure for switch ports.
Let me see if I understand correctly. When the switch is enabled, eth0
sends packets towards both physical switch ports, and eth1 sends packets
towards none, but eth0 handles the link state of switch port 0, and eth1
handles the link state of switch port 1?
Andrew Lunn June 28, 2021, 1:23 p.m. UTC | #12
> The best I could get would be:

> 

> &eth_switch {

> 	compatible = "imx,mtip-l2switch";

> 	reg = <0x800f8000 0x400>, <0x800fC000 0x4000>;

> 

> 	interrupts = <100>;

> 	status = "okay";

> 

> 	ethernet-ports {

> 		port1@1 {

> 			reg = <1>;

> 			label = "eth0";

> 			phys = <&mac0 0>;

> 		};

> 

> 		port2@2 {

> 			reg = <2>;

> 			label = "eth1";

> 			phys = <&mac1 1>;

> 		};

> 	};

> };

> 

> Which would abuse the "phys" properties usages - as 'mac[01]' are

> referring to ethernet controllers.


This is not how a dedicated driver would have its binding. We should
not establish this as ABI.

So, sorry, but no.

    Andrew
Lukasz Majewski June 28, 2021, 2:13 p.m. UTC | #13
Hi Vladimir,

> On Mon, Jun 28, 2021 at 02:05:26PM +0200, Lukasz Majewski wrote:

> > Hi Andrew,

> >  

> > > > I do believe that I can just extend the L2 switch driver

> > > > (fec_mtip.c file to be precise) to provide full blown L2 switch

> > > > functionality without touching the legacy FEC more than in this

> > > > patch set.

> > > >

> > > > Would you consider applying this patch series then?  

> > >

> > > What is most important is the ABI. If something is merged now, we

> > > need to ensure it does not block later refactoring to a clean new

> > > driver. The DT binding is considered ABI. So the DT binding needs

> > > to be like a traditional switchdev driver. Florian already

> > > pointed out, you can use a binding very similar to DSA.

> > > ti,cpsw-switch.yaml is another good example.  

> >

> > The best I could get would be:

> >

> > &eth_switch {

> > 	compatible = "imx,mtip-l2switch";

> > 	reg = <0x800f8000 0x400>, <0x800fC000 0x4000>;

> >

> > 	interrupts = <100>;

> > 	status = "okay";

> >

> > 	ethernet-ports {

> > 		port1@1 {

> > 			reg = <1>;

> > 			label = "eth0";

> > 			phys = <&mac0 0>;

> > 		};

> >

> > 		port2@2 {

> > 			reg = <2>;

> > 			label = "eth1";

> > 			phys = <&mac1 1>;

> > 		};

> > 	};

> > };

> >

> > Which would abuse the "phys" properties usages - as 'mac[01]' are

> > referring to ethernet controllers.

> >

> > On TI SoCs (e.g. am33xx-l4.dtsi) phys refer to some separate driver

> > responsible for PHY management. On NXP this is integrated with FEC

> > driver itself.  

> 

> If we were really honest, the binding would need to be called

> 

> port@0 {

> 	puppet = <&mac0>;

> };

> 

> port@1 {

> 	puppet = <&mac1>;

> };

> 

> which speaks for itself as to why accepting "puppet master" drivers is

> not really very compelling. I concur with the recommendation given by

> Andrew and Florian to refactor FEC as a multi-port single driver.


Ok.

> 

> > >

> > > So before considering merging your changes, i would like to see a

> > > usable binding.

> > >

> > > I also don't remember seeing support for STP. Without that, your

> > > network has broadcast storm problems when there are loops. So i

> > > would like to see the code needed to put ports into blocking,

> > > listening, learning, and forwarding states.

> > >

> > > 	  Andrew  

> 

> I cannot stress enough how important it is for us to see STP support

> and consequently the ndo_start_xmit procedure for switch ports.


Ok.

> Let me see if I understand correctly. When the switch is enabled, eth0

> sends packets towards both physical switch ports, and eth1 sends

> packets towards none, but eth0 handles the link state of switch port

> 0, and eth1 handles the link state of switch port 1?


Exactly, this is how FEC driver is utilized for this switch. 


Best regards,

Lukasz Majewski

--

DENX Software Engineering GmbH,      Managing Director: Wolfgang Denk
HRB 165235 Munich, Office: Kirchenstr.5, D-82194 Groebenzell, Germany
Phone: (+49)-8142-66989-59 Fax: (+49)-8142-66989-80 Email: lukma@denx.de
Lukasz Majewski June 28, 2021, 2:14 p.m. UTC | #14
Hi Andrew,

> > The best I could get would be:

> > 

> > &eth_switch {

> > 	compatible = "imx,mtip-l2switch";

> > 	reg = <0x800f8000 0x400>, <0x800fC000 0x4000>;

> > 

> > 	interrupts = <100>;

> > 	status = "okay";

> > 

> > 	ethernet-ports {

> > 		port1@1 {

> > 			reg = <1>;

> > 			label = "eth0";

> > 			phys = <&mac0 0>;

> > 		};

> > 

> > 		port2@2 {

> > 			reg = <2>;

> > 			label = "eth1";

> > 			phys = <&mac1 1>;

> > 		};

> > 	};

> > };

> > 

> > Which would abuse the "phys" properties usages - as 'mac[01]' are

> > referring to ethernet controllers.  

> 

> This is not how a dedicated driver would have its binding. We should

> not establish this as ABI.

> 

> So, sorry, but no.


Thanks for the clear statement about upstream requirements.

> 

>     Andrew





Best regards,

Lukasz Majewski

--

DENX Software Engineering GmbH,      Managing Director: Wolfgang Denk
HRB 165235 Munich, Office: Kirchenstr.5, D-82194 Groebenzell, Germany
Phone: (+49)-8142-66989-59 Fax: (+49)-8142-66989-80 Email: lukma@denx.de
Vladimir Oltean June 28, 2021, 2:23 p.m. UTC | #15
On Mon, Jun 28, 2021 at 04:13:14PM +0200, Lukasz Majewski wrote:
> > > > So before considering merging your changes, i would like to see a

> > > > usable binding.

> > > >

> > > > I also don't remember seeing support for STP. Without that, your

> > > > network has broadcast storm problems when there are loops. So i

> > > > would like to see the code needed to put ports into blocking,

> > > > listening, learning, and forwarding states.

> > > >

> > > > 	  Andrew

> >

> > I cannot stress enough how important it is for us to see STP support

> > and consequently the ndo_start_xmit procedure for switch ports.

>

> Ok.

>

> > Let me see if I understand correctly. When the switch is enabled, eth0

> > sends packets towards both physical switch ports, and eth1 sends

> > packets towards none, but eth0 handles the link state of switch port

> > 0, and eth1 handles the link state of switch port 1?

>

> Exactly, this is how FEC driver is utilized for this switch.


This is a much bigger problem than anything which has to do with code
organization. Linux does not have any sort of support for unmanaged
switches. Please try to find out if your switch is supposed to be able
to be managed (run control protocols on the CPU). If not, well, I don't
know what to suggest.
Lukasz Majewski June 29, 2021, 8:09 a.m. UTC | #16
Hi Vladimir,

> On Mon, Jun 28, 2021 at 04:13:14PM +0200, Lukasz Majewski wrote:

> > > > > So before considering merging your changes, i would like to

> > > > > see a usable binding.

> > > > >

> > > > > I also don't remember seeing support for STP. Without that,

> > > > > your network has broadcast storm problems when there are

> > > > > loops. So i would like to see the code needed to put ports

> > > > > into blocking, listening, learning, and forwarding states.

> > > > >

> > > > > 	  Andrew  

> > >

> > > I cannot stress enough how important it is for us to see STP

> > > support and consequently the ndo_start_xmit procedure for switch

> > > ports.  

> >

> > Ok.

> >  

> > > Let me see if I understand correctly. When the switch is enabled,

> > > eth0 sends packets towards both physical switch ports, and eth1

> > > sends packets towards none, but eth0 handles the link state of

> > > switch port 0, and eth1 handles the link state of switch port 1?  

> >

> > Exactly, this is how FEC driver is utilized for this switch.  

> 

> This is a much bigger problem than anything which has to do with code

> organization. Linux does not have any sort of support for unmanaged

> switches.


My impression is similar. This switch cannot easily fit into DSA (lack
of appending tags) nor to switchdev.

The latter is caused by two modes of operation:

- Bypass mode (no switch) -> DMA1 and DMA0 are used
- Switch mode -> only DMA0 is used


Moreover, from my understanding of the CPSW - looks like it uses always
just a single DMA, and the switching seems to be the default operation
for two ethernet ports.

The "bypass mode" from NXP's L2 switch seems to be achieved inside the
CPSW switch, by configuring it to not pass packets between those ports.

> Please try to find out if your switch is supposed to be able

> to be managed (run control protocols on the CPU).


It can support all the "normal" set of L2 switch features:

- VLANs, lookup table (with learning), filtering and forwarding
  (Multicast, Broadcast, Unicast), priority queues, IP snooping, etc.

Frames for BPDU are recognized by the switch and can be used to
implement support for RSTP. However, this switch has a separate address
space (not covered and accessed by FEC address).

> If not, well, I

> don't know what to suggest.


For me it looks like the NXP's L2 switch shall be treated _just_ as
offloading IP block to accelerate switching (NXP already support
dpaa[2] for example).

The idea with having it configured on demand, when:
ip link add name br0 type bridge; ip link set br0 up;
ip link set eth0 master br0;
ip link set eth1 master br0;

Seems to be a reasonable one. In the above scenario it would work hand
by hand with FEC drivers (as those would handle PHY communication
setup and link up/down events).

It would be welcome if the community could come up with some rough idea
how to proceed with this IP block support (especially that for example
imx287 is used in many embedded devices and is going to be in active
production for next 10+ years).


Best regards,

Lukasz Majewski

--

DENX Software Engineering GmbH,      Managing Director: Wolfgang Denk
HRB 165235 Munich, Office: Kirchenstr.5, D-82194 Groebenzell, Germany
Phone: (+49)-8142-66989-59 Fax: (+49)-8142-66989-80 Email: lukma@denx.de
Vladimir Oltean June 29, 2021, 9:30 a.m. UTC | #17
On Tue, Jun 29, 2021 at 10:09:37AM +0200, Lukasz Majewski wrote:
> Hi Vladimir,

>

> > On Mon, Jun 28, 2021 at 04:13:14PM +0200, Lukasz Majewski wrote:

> > > > > > So before considering merging your changes, i would like to

> > > > > > see a usable binding.

> > > > > >

> > > > > > I also don't remember seeing support for STP. Without that,

> > > > > > your network has broadcast storm problems when there are

> > > > > > loops. So i would like to see the code needed to put ports

> > > > > > into blocking, listening, learning, and forwarding states.

> > > > > >

> > > > > > 	  Andrew

> > > >

> > > > I cannot stress enough how important it is for us to see STP

> > > > support and consequently the ndo_start_xmit procedure for switch

> > > > ports.

> > >

> > > Ok.

> > >

> > > > Let me see if I understand correctly. When the switch is enabled,

> > > > eth0 sends packets towards both physical switch ports, and eth1

> > > > sends packets towards none, but eth0 handles the link state of

> > > > switch port 0, and eth1 handles the link state of switch port 1?

> > >

> > > Exactly, this is how FEC driver is utilized for this switch.

> >

> > This is a much bigger problem than anything which has to do with code

> > organization. Linux does not have any sort of support for unmanaged

> > switches.

>

> My impression is similar. This switch cannot easily fit into DSA (lack

> of appending tags)


No, this is not why the switch does not fit the DSA model.
DSA assumes that the master interface and the switch are two completely
separate devices which manage themselves independently. Their boundary
is typically at the level of a MAC-to-MAC connection, although vendors
have sometimes blurred this line a bit in the case of integrated
switches. But the key point is that if there are 2 external ports going
to the switch, these should be managed by the switch driver. But when
the switch is sandwiched between the Ethernet controller of the "DSA
master" (the DMA engine of fec0) and the DSA master's MAC (still owned
by fec), the separation isn't quite what DSA expects, is it? Remember
that in the case of the MTIP switch, the fec driver needs to put the
MACs going to the switch in promiscuous mode such that the switch
behaves as a switch and actually forwards packets by MAC DA instead of
dropping them. So the system is much more tightly coupled.

 +---------------------------------------------------------------------------+
 |                                                                           |
 | +--------------+        +--------------------+--------+      +------------+
 | |              |        | MTIP switch        |        |      |            |
 | |   fec 1 DMA  |---x    |                    | Port 2 |------| fec 1 MAC  |
 | |              |        |            \  /    |        |      |            |
 | +--------------+        |             \/     +--------+      +------------+
 |                         |             /\              |                   |
 | +--------------+        +--------+   /  \    +--------+      +------------+
 | |              |        |        |           |        |      |            |
 | |   fec 0 DMA  |--------| Port 0 |           | Port 1 |------| fec 0 MAC  |
 | |              |        |        |           |        |      |            |
 | +--------------+        +--------+-----------+--------+      +------------+
 |                                                                           |
 +---------------------------------------------------------------------------+

Is this DSA? I don't really think so, but you could still try to argue
otherwise.

The opposite is also true. DSA supports switches that don't append tags
to packets (see sja1105). This doesn't make them "less DSA", just more
of a pain to work with.

> nor to switchdev.

>

> The latter is caused by two modes of operation:

>

> - Bypass mode (no switch) -> DMA1 and DMA0 are used

> - Switch mode -> only DMA0 is used

>

>

> Moreover, from my understanding of the CPSW - looks like it uses always

> just a single DMA, and the switching seems to be the default operation

> for two ethernet ports.

>

> The "bypass mode" from NXP's L2 switch seems to be achieved inside the

> CPSW switch, by configuring it to not pass packets between those ports.


I don't exactly see the point you're trying to make here. At the end of
the day, the only thing that matters is what you expose to the user.
With no way (when the switch is enabled) for a socket opened on eth0 to
send/receive packets coming only from the first port, and a socket
opened on eth1 to send/receive packets coming only from the second port,
I think this driver attempt is a pretty far cry from what a switch
driver in Linux is expected to offer, be it modeled as switchdev or DSA.

> > Please try to find out if your switch is supposed to be able

> > to be managed (run control protocols on the CPU).

>

> It can support all the "normal" set of L2 switch features:

>

> - VLANs, lookup table (with learning), filtering and forwarding

>   (Multicast, Broadcast, Unicast), priority queues, IP snooping, etc.

>

> Frames for BPDU are recognized by the switch and can be used to

> implement support for RSTP. However, this switch has a separate address

> space (not covered and accessed by FEC address).

>

> > If not, well, I

> > don't know what to suggest.

>

> For me it looks like the NXP's L2 switch shall be treated _just_ as

> offloading IP block to accelerate switching (NXP already support

> dpaa[2] for example).

>

> The idea with having it configured on demand, when:

> ip link add name br0 type bridge; ip link set br0 up;

> ip link set eth0 master br0;

> ip link set eth1 master br0;

>

> Seems to be a reasonable one. In the above scenario it would work hand

> by hand with FEC drivers (as those would handle PHY communication

> setup and link up/down events).


You seem to imply that we are suggesting something different.

> It would be welcome if the community could come up with some rough idea

> how to proceed with this IP block support


Ok, so what I would do if I really cared that much about mainline
support is I would refactor the FEC driver to offer its core
functionality to a new multi-port driver that is able to handle the FEC
DMA interfaces, the MACs and the switch. EXPORT_SYMBOL_GPL is your
friend.

This driver would probe on a device tree binding with 3 "reg" values: 1
for the fec@800f0000, 1 for the fec@800f4000 and 1 for the switch@800f8000.
No puppet master driver which coordinates other drivers, just a single
driver that, depending on the operating state, manages all the SoC
resources in a way that will offer a sane and consistent view of the
Ethernet ports.

So it will have a different .ndo_start_xmit implementation depending on
whether the switch is bypassed or not (if you need to send a packet on
eth1 and the switch is bypassed, you send it through the DMA interface
of eth1, otherwise you send it through the DMA interface of eth0 in a
way in which the switch will actually route it to the eth1 physical
port).

Then I would implement support for BPDU RX/TX (I haven't looked at the
documentation, but I expect that what this switch offers for control
traffic doesn't scale at high speeds (if it does, great, then send and
receive all your packets as control packets, to have precise port
identification). If it doesn't, you'll need a way to treat your data
plane packets differently from the control plane packets. For the data
plane, you can perhaps borrow some ideas from net/dsa/tag_8021q.c, or
even from Tobias Waldekranz's proposal to just let data plane packets
coming from the bridge slide into the switch with no precise control of
the destination port at all, just let the switch perform FDB lookups for
those packets because the switch hardware FDB is supposed to be more or
less in sync with the bridge software FDB:
https://patchwork.kernel.org/project/netdevbpf/cover/20210426170411.1789186-1-tobias@waldekranz.com/

> (especially that for example imx287 is used in many embedded devices

> and is going to be in active production for next 10+ years).


Well, I guess you have a plan then. There are still 10+ years left to
enjoy the benefits of a proper driver design...
Lukasz Majewski June 29, 2021, 12:01 p.m. UTC | #18
Hi Vladimir,

> On Tue, Jun 29, 2021 at 10:09:37AM +0200, Lukasz Majewski wrote:

> > Hi Vladimir,

> >  

> > > On Mon, Jun 28, 2021 at 04:13:14PM +0200, Lukasz Majewski wrote:  

> > > > > > > So before considering merging your changes, i would like

> > > > > > > to see a usable binding.

> > > > > > >

> > > > > > > I also don't remember seeing support for STP. Without

> > > > > > > that, your network has broadcast storm problems when

> > > > > > > there are loops. So i would like to see the code needed

> > > > > > > to put ports into blocking, listening, learning, and

> > > > > > > forwarding states.

> > > > > > >

> > > > > > > 	  Andrew  

> > > > >

> > > > > I cannot stress enough how important it is for us to see STP

> > > > > support and consequently the ndo_start_xmit procedure for

> > > > > switch ports.  

> > > >

> > > > Ok.

> > > >  

> > > > > Let me see if I understand correctly. When the switch is

> > > > > enabled, eth0 sends packets towards both physical switch

> > > > > ports, and eth1 sends packets towards none, but eth0 handles

> > > > > the link state of switch port 0, and eth1 handles the link

> > > > > state of switch port 1?  

> > > >

> > > > Exactly, this is how FEC driver is utilized for this switch.  

> > >

> > > This is a much bigger problem than anything which has to do with

> > > code organization. Linux does not have any sort of support for

> > > unmanaged switches.  

> >

> > My impression is similar. This switch cannot easily fit into DSA

> > (lack of appending tags)  

> 

> No, this is not why the switch does not fit the DSA model.

> DSA assumes that the master interface and the switch are two

> completely separate devices which manage themselves independently.

> Their boundary is typically at the level of a MAC-to-MAC connection,

> although vendors have sometimes blurred this line a bit in the case

> of integrated switches. But the key point is that if there are 2

> external ports going to the switch, these should be managed by the

> switch driver. But when the switch is sandwiched between the Ethernet

> controller of the "DSA master" (the DMA engine of fec0) and the DSA

> master's MAC (still owned by fec), the separation isn't quite what

> DSA expects, is it? Remember that in the case of the MTIP switch, the

> fec driver needs to put the MACs going to the switch in promiscuous

> mode such that the switch behaves as a switch and actually forwards

> packets by MAC DA instead of dropping them. So the system is much

> more tightly coupled.

> 

>  +---------------------------------------------------------------------------+

>  |

>        | | +--------------+        +--------------------+--------+

>   +------------+ | |              |        | MTIP switch        |

>    |      |            | | |   fec 1 DMA  |---x    |

>   | Port 2 |------| fec 1 MAC  | | |              |        |

>   \  /    |        |      |            | | +--------------+        |

>            \/     +--------+      +------------+ |

>      |             /\              |                   | |

> +--------------+        +--------+   /  \    +--------+

> +------------+ | |              |        |        |           |

>  |      |            | | |   fec 0 DMA  |--------| Port 0 |

> | Port 1 |------| fec 0 MAC  | | |              |        |        |

>         |        |      |            | | +--------------+

> +--------+-----------+--------+      +------------+ |

>                                                           |

> +---------------------------------------------------------------------------+

> 

> Is this DSA? I don't really think so, but you could still try to argue

> otherwise.

> 

> The opposite is also true. DSA supports switches that don't append

> tags to packets (see sja1105). This doesn't make them "less DSA",

> just more of a pain to work with.

> 

> > nor to switchdev.

> >

> > The latter is caused by two modes of operation:

> >

> > - Bypass mode (no switch) -> DMA1 and DMA0 are used

> > - Switch mode -> only DMA0 is used

> >

> >

> > Moreover, from my understanding of the CPSW - looks like it uses

> > always just a single DMA, and the switching seems to be the default

> > operation for two ethernet ports.

> >

> > The "bypass mode" from NXP's L2 switch seems to be achieved inside

> > the CPSW switch, by configuring it to not pass packets between

> > those ports.  

> 

> I don't exactly see the point you're trying to make here. At the end

> of the day, the only thing that matters is what you expose to the

> user. With no way (when the switch is enabled) for a socket opened on

> eth0 to send/receive packets coming only from the first port, and a

> socket opened on eth1 to send/receive packets coming only from the

> second port, I think this driver attempt is a pretty far cry from

> what a switch driver in Linux is expected to offer, be it modeled as

> switchdev or DSA.

> 

> > > Please try to find out if your switch is supposed to be able

> > > to be managed (run control protocols on the CPU).  

> >

> > It can support all the "normal" set of L2 switch features:

> >

> > - VLANs, lookup table (with learning), filtering and forwarding

> >   (Multicast, Broadcast, Unicast), priority queues, IP snooping,

> > etc.

> >

> > Frames for BPDU are recognized by the switch and can be used to

> > implement support for RSTP. However, this switch has a separate

> > address space (not covered and accessed by FEC address).

> >  

> > > If not, well, I

> > > don't know what to suggest.  

> >

> > For me it looks like the NXP's L2 switch shall be treated _just_ as

> > offloading IP block to accelerate switching (NXP already support

> > dpaa[2] for example).

> >

> > The idea with having it configured on demand, when:

> > ip link add name br0 type bridge; ip link set br0 up;

> > ip link set eth0 master br0;

> > ip link set eth1 master br0;

> >

> > Seems to be a reasonable one. In the above scenario it would work

> > hand by hand with FEC drivers (as those would handle PHY

> > communication setup and link up/down events).  

> 

> You seem to imply that we are suggesting something different.

> 

> > It would be welcome if the community could come up with some rough

> > idea how to proceed with this IP block support  

> 

> Ok, so what I would do if I really cared that much about mainline

> support is I would refactor the FEC driver to offer its core

> functionality to a new multi-port driver that is able to handle the

> FEC DMA interfaces, the MACs and the switch. EXPORT_SYMBOL_GPL is your

> friend.

> 

> This driver would probe on a device tree binding with 3 "reg" values:

> 1 for the fec@800f0000, 1 for the fec@800f4000 and 1 for the

> switch@800f8000. No puppet master driver which coordinates other

> drivers, just a single driver that, depending on the operating state,

> manages all the SoC resources in a way that will offer a sane and

> consistent view of the Ethernet ports.

> 

> So it will have a different .ndo_start_xmit implementation depending

> on whether the switch is bypassed or not (if you need to send a

> packet on eth1 and the switch is bypassed, you send it through the

> DMA interface of eth1, otherwise you send it through the DMA

> interface of eth0 in a way in which the switch will actually route it

> to the eth1 physical port).

> 

> Then I would implement support for BPDU RX/TX (I haven't looked at the

> documentation, but I expect that what this switch offers for control

> traffic doesn't scale at high speeds (if it does, great, then send and

> receive all your packets as control packets, to have precise port

> identification). If it doesn't, you'll need a way to treat your data

> plane packets differently from the control plane packets. For the data

> plane, you can perhaps borrow some ideas from net/dsa/tag_8021q.c, or

> even from Tobias Waldekranz's proposal to just let data plane packets

> coming from the bridge slide into the switch with no precise control

> of the destination port at all, just let the switch perform FDB

> lookups for those packets because the switch hardware FDB is supposed

> to be more or less in sync with the bridge software FDB:

> https://patchwork.kernel.org/project/netdevbpf/cover/20210426170411.1789186-1-tobias@waldekranz.com/

> 


Thanks for sketching and sharing such detailed plan. 

> > (especially that for example imx287 is used in many embedded devices

> > and is going to be in active production for next 10+ years).  

> 

> Well, I guess you have a plan then. There are still 10+ years left to

> enjoy the benefits of a proper driver design...


:-)


Best regards,

Lukasz Majewski

--

DENX Software Engineering GmbH,      Managing Director: Wolfgang Denk
HRB 165235 Munich, Office: Kirchenstr.5, D-82194 Groebenzell, Germany
Phone: (+49)-8142-66989-59 Fax: (+49)-8142-66989-80 Email: lukma@denx.de
diff mbox series

Patch

diff --git a/drivers/net/ethernet/freescale/Kconfig b/drivers/net/ethernet/freescale/Kconfig
index 3f9175bdce77..3cf703aa2a00 100644
--- a/drivers/net/ethernet/freescale/Kconfig
+++ b/drivers/net/ethernet/freescale/Kconfig
@@ -102,5 +102,6 @@  config GIANFAR
 source "drivers/net/ethernet/freescale/dpaa/Kconfig"
 source "drivers/net/ethernet/freescale/dpaa2/Kconfig"
 source "drivers/net/ethernet/freescale/enetc/Kconfig"
+source "drivers/net/ethernet/freescale/mtipsw/Kconfig"
 
 endif # NET_VENDOR_FREESCALE
diff --git a/drivers/net/ethernet/freescale/Makefile b/drivers/net/ethernet/freescale/Makefile
index 67c436400352..12ed0c13f739 100644
--- a/drivers/net/ethernet/freescale/Makefile
+++ b/drivers/net/ethernet/freescale/Makefile
@@ -27,3 +27,4 @@  obj-$(CONFIG_FSL_DPAA2_ETH) += dpaa2/
 obj-$(CONFIG_FSL_ENETC) += enetc/
 obj-$(CONFIG_FSL_ENETC_MDIO) += enetc/
 obj-$(CONFIG_FSL_ENETC_VF) += enetc/
+obj-$(CONFIG_FEC_MTIP_L2SW) += mtipsw/
diff --git a/drivers/net/ethernet/freescale/mtipsw/Kconfig b/drivers/net/ethernet/freescale/mtipsw/Kconfig
new file mode 100644
index 000000000000..e7f40dcad0d0
--- /dev/null
+++ b/drivers/net/ethernet/freescale/mtipsw/Kconfig
@@ -0,0 +1,11 @@ 
+# SPDX-License-Identifier: GPL-2.0-only
+config FEC_MTIP_L2SW
+	tristate "MoreThanIP L2 switch support to FEC driver"
+	depends on OF
+	depends on NET_SWITCHDEV
+	depends on BRIDGE
+	depends on ARCH_MXS
+	select FIXED_PHY
+	help
+	  This enables support for the MoreThan IP on i.MX SoCs (e.g. iMX28)
+	  L2 switch.
diff --git a/drivers/net/ethernet/freescale/mtipsw/Makefile b/drivers/net/ethernet/freescale/mtipsw/Makefile
new file mode 100644
index 000000000000..1aac85f92750
--- /dev/null
+++ b/drivers/net/ethernet/freescale/mtipsw/Makefile
@@ -0,0 +1,3 @@ 
+# SPDX-License-Identifier: GPL-2.0
+
+obj-$(CONFIG_FEC_MTIP_L2SW) += fec_mtip.o
diff --git a/drivers/net/ethernet/freescale/mtipsw/fec_mtip.c b/drivers/net/ethernet/freescale/mtipsw/fec_mtip.c
new file mode 100644
index 000000000000..fe4e3fb34295
--- /dev/null
+++ b/drivers/net/ethernet/freescale/mtipsw/fec_mtip.c
@@ -0,0 +1,438 @@ 
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2021 Lukasz Majewski <lukma@denx.de>
+ * Lukasz Majewski <lukma@denx.de>
+ */
+
+#include <linux/module.h>
+#include <linux/phy.h>
+#include <linux/netdevice.h>
+#include <linux/of_net.h>
+#include <linux/of_platform.h>
+#include <linux/if_bridge.h>
+#include <linux/mdio.h>
+#include <linux/of_mdio.h>
+#include <linux/etherdevice.h>
+
+#include "fec_mtip.h"
+#include "../fec.h"
+
+static void mtipl2_setup_desc_active(struct mtipl2sw_priv *priv, int id)
+{
+	struct fec_enet_private *fec = priv->fep[id];
+
+	fec->rx_queue[0]->bd.reg_desc_active =
+		fec_hwp(fec) + fec_offset_des_active_rxq(fec, 0);
+
+	fec->tx_queue[0]->bd.reg_desc_active =
+		fec_hwp(fec) + fec_offset_des_active_txq(fec, 0);
+}
+
+static int mtipl2_en_rx(struct mtipl2sw_priv *priv)
+{
+	writel(MCF_ESW_RDAR_R_DES_ACTIVE, priv->hwpsw + MCF_ESW_R_RDAR);
+
+	return 0;
+}
+
+static void read_atable(struct mtipl2sw_priv *priv, int index,
+	unsigned long *read_lo, unsigned long *read_hi)
+{
+	unsigned long atable_base = (unsigned long)priv->hwentry;
+
+	*read_lo = readl((const volatile void*)atable_base + (index << 3));
+	*read_hi = readl((const volatile void*)atable_base + (index << 3) + 4);
+}
+
+static void write_atable(struct mtipl2sw_priv *priv, int index,
+	unsigned long write_lo, unsigned long write_hi)
+{
+	unsigned long atable_base = (unsigned long)priv->hwentry;
+
+	writel(write_lo, (volatile void *)atable_base + (index << 3));
+	writel(write_hi, (volatile void *)atable_base + (index << 3) + 4);
+}
+
+/*
+ * Clear complete MAC Look Up Table
+ */
+static void esw_clear_atable(struct mtipl2sw_priv *priv)
+{
+	int index;
+	for (index = 0; index < 2048; index++)
+		write_atable(priv, index, 0, 0);
+}
+
+static int mtipl2_sw_enable(struct mtipl2sw_priv *priv)
+{
+	/*
+	 * L2 switch - reset
+	 */
+	writel(MCF_ESW_MODE_SW_RST, &priv->fecp->ESW_MODE);
+	udelay(10);
+
+	/* Configure switch*/
+	writel(MCF_ESW_MODE_STATRST, &priv->fecp->ESW_MODE);
+	writel(MCF_ESW_MODE_SW_EN, &priv->fecp->ESW_MODE);
+
+	/* Management port configuration, make port 0 as management port */
+	writel(0, &priv->fecp->ESW_BMPC);
+
+	/*
+	 * Set backpressure threshold to minimize discarded frames
+	 * during due to congestion.
+	 */
+	writel(P0BC_THRESHOLD, &priv->fecp->ESW_P0BCT);
+
+	/* Set the max rx buffer size */
+	writel(L2SW_PKT_MAXBLR_SIZE, priv->hwpsw + MCF_ESW_R_BUFF_SIZE);
+	/* Enable broadcast on all ports */
+	writel(0x7, &priv->fecp->ESW_DBCR);
+
+	/* Enable multicast on all ports */
+	writel(0x7, &priv->fecp->ESW_DMCR);
+
+	esw_clear_atable(priv);
+
+	/* Clear all pending interrupts */
+	writel(0xffffffff, priv->hwpsw + FEC_IEVENT);
+
+	/* Enable interrupts we wish to service */
+	writel(FEC_MTIP_DEFAULT_IMASK, priv->hwpsw + FEC_IMASK);
+	priv->l2sw_on = true;
+
+	return 0;
+}
+
+static void mtipl2_sw_disable(struct mtipl2sw_priv *priv)
+{
+	writel(0, &priv->fecp->ESW_MODE);
+}
+
+static int mtipl2_port_enable (struct mtipl2sw_priv *priv, int port)
+{
+	u32 l2_ports_en;
+
+	pr_err("%s: PORT ENABLE %d\n", __func__, port);
+
+	/* Enable tx/rx on L2 switch ports */
+	l2_ports_en = readl(&priv->fecp->ESW_PER);
+	if (!(l2_ports_en & MCF_ESW_ENA_PORT_0))
+		l2_ports_en = MCF_ESW_ENA_PORT_0;
+
+	if (port == 0 && !(l2_ports_en & MCF_ESW_ENA_PORT_1))
+		l2_ports_en |= MCF_ESW_ENA_PORT_1;
+
+	if (port == 1 && !(l2_ports_en & MCF_ESW_ENA_PORT_2))
+		l2_ports_en |= MCF_ESW_ENA_PORT_2;
+
+	writel(l2_ports_en, &priv->fecp->ESW_PER);
+
+	/*
+	 * Check if MAC IP block is already enabled (after switch initializtion)
+	 * or if we need to enable it after mtipl2_port_disable was called.
+	 */
+
+	return 0;
+}
+
+static void mtipl2_port_disable (struct mtipl2sw_priv *priv, int port)
+{
+	u32 l2_ports_en;
+
+	pr_err(" %s: PORT DISABLE %d\n", __func__, port);
+
+	l2_ports_en = readl(&priv->fecp->ESW_PER);
+	if (port == 0)
+		l2_ports_en &= ~MCF_ESW_ENA_PORT_1;
+
+	if (port == 1)
+		l2_ports_en &= ~MCF_ESW_ENA_PORT_2;
+
+	/* Finally disable tx/rx on port 0 */
+	if (!(l2_ports_en & MCF_ESW_ENA_PORT_1) &&
+	    !(l2_ports_en & MCF_ESW_ENA_PORT_2))
+		l2_ports_en &= ~MCF_ESW_ENA_PORT_0;
+
+	writel(l2_ports_en, &priv->fecp->ESW_PER);
+}
+
+irqreturn_t
+mtip_l2sw_interrupt_handler(int irq, void *dev_id)
+{
+	struct mtipl2sw_priv *priv = dev_id;
+	struct fec_enet_private *fep = priv->fep[0];
+	irqreturn_t ret = IRQ_NONE;
+	u32 int_events, int_imask;
+
+	int_events = readl(fec_hwp(fep) + FEC_IEVENT);
+	writel(int_events, fec_hwp(fep) + FEC_IEVENT);
+
+	if ((int_events & FEC_MTIP_DEFAULT_IMASK) && fep->link) {
+		ret = IRQ_HANDLED;
+
+		if (napi_schedule_prep(&fep->napi)) {
+			/* Disable RX interrupts */
+			int_imask = readl(fec_hwp(fep) + FEC_IMASK);
+			int_imask &= ~FEC_MTIP_ENET_RXF;
+			writel(int_imask, fec_hwp(fep) + FEC_IMASK);
+			__napi_schedule(&fep->napi);
+		}
+	}
+
+	return ret;
+}
+
+static int mtipl2_parse_of(struct mtipl2sw_priv *priv, struct device_node *np)
+{
+	struct device_node *port, *p, *fep_np;
+	struct platform_device *pdev;
+	struct net_device *ndev;
+	unsigned int port_num;
+
+	p = of_find_node_by_name(np, "ports");
+
+	for_each_available_child_of_node(p, port) {
+		if (of_property_read_u32(port, "reg", &port_num))
+			continue;
+
+		priv->n_ports = port_num;
+
+		fep_np = of_parse_phandle(port, "phy-handle", 0);
+		pdev = of_find_device_by_node(fep_np);
+		ndev = platform_get_drvdata(pdev);
+		priv->fep[port_num - 1] = netdev_priv(ndev);
+		put_device(&pdev->dev);
+	}
+
+	of_node_put(p);
+
+	return 0;
+}
+
+int mtipl2_ndev_port_link(struct net_device *ndev,
+				 struct net_device *br_ndev)
+{
+	struct fec_enet_private *fec = netdev_priv(ndev);
+	struct mtipl2sw_priv *priv = fec->mtipl2;
+
+	pr_err("%s: ndev: %s br: %s fec: 0x%x 0x%x\n", __func__, ndev->name,
+	       br_ndev->name, (unsigned int) fec, fec->dev_id);
+
+	/* Check if MTIP switch is already enabled */
+	if(!priv->l2sw_on) {
+		/*
+		 * Close running network connections - to safely enable
+		 * support for mtip L2 switching.
+		 */
+		if (netif_oper_up(priv->fep[0]->netdev))
+			fec_enet_close(priv->fep[0]->netdev);
+
+		if (netif_oper_up(priv->fep[1]->netdev))
+			fec_enet_close(priv->fep[1]->netdev);
+
+		/* Configure and enable the L2 switch IP block */
+		mtipl2_sw_enable(priv);
+
+		if(!priv->br_ndev)
+			priv->br_ndev = br_ndev;
+	}
+
+	priv->br_members |= BIT(fec->dev_id);
+
+	/* Enable internal switch port */
+	mtipl2_port_enable(fec->mtipl2, fec->dev_id);
+
+	if (fec->dev_id == 1)
+		return NOTIFY_DONE;
+
+	/*
+	 * Set addresses for DMA0 proper operation to point to L2 switch
+	 * IP block.
+	 *
+	 * The eth1 FEC driver is now only used for controlling the PHY device.
+	 */
+	fec->mtip_l2sw = true;
+
+	mtipl2_setup_desc_active(priv, fec->dev_id);
+	fec_enet_open(priv->fep[fec->dev_id]->netdev);
+
+	mtipl2_en_rx(fec->mtipl2);
+
+	return NOTIFY_DONE;
+}
+
+static void mtipl2_netdevice_port_unlink(struct net_device *ndev)
+{
+	struct fec_enet_private *fec = netdev_priv(ndev);
+	struct mtipl2sw_priv *priv = fec->mtipl2;
+
+	pr_err("%s: ndev: %s id: %d\n", __func__, ndev->name, fec->dev_id);
+
+	/* Disable internal switch port */
+	mtipl2_port_disable(fec->mtipl2, fec->dev_id);
+
+	if (netif_oper_up(priv->fep[fec->dev_id]->netdev))
+		fec_enet_close(priv->fep[fec->dev_id]->netdev);
+
+	priv->br_members &= ~BIT(fec->dev_id);
+
+	fec->mtip_l2sw = false;
+	priv->br_ndev = NULL;
+
+	mtipl2_setup_desc_active(priv, fec->dev_id);
+	fec_enet_open(priv->fep[fec->dev_id]->netdev);
+
+	if (!priv->br_members) {
+		mtipl2_sw_disable(priv);
+		priv->l2sw_on = false;
+	}
+}
+
+bool mtipl2_port_dev_check(const struct net_device *ndev)
+{
+	if(!fec_get_priv(ndev))
+		return false;
+
+	return true;
+}
+
+/* netdev notifier */
+static int mtipl2_netdevice_event(struct notifier_block *unused,
+				  unsigned long event, void *ptr)
+{
+	struct net_device *ndev = netdev_notifier_info_to_dev(ptr);
+	struct netdev_notifier_changeupper_info *info;
+	int ret = NOTIFY_DONE;
+
+	if (!mtipl2_port_dev_check(ndev))
+		return NOTIFY_DONE;
+
+	pr_err("%s: ndev: %s event: 0x%lx\n", __func__, ndev->name, event);
+
+	switch (event) {
+	case NETDEV_CHANGEUPPER:
+		info = ptr;
+
+		if (netif_is_bridge_master(info->upper_dev)) {
+			if (info->linking)
+				ret = mtipl2_ndev_port_link(ndev,
+							    info->upper_dev);
+			else
+				mtipl2_netdevice_port_unlink(ndev);
+		}
+		break;
+	default:
+		return NOTIFY_DONE;
+	}
+
+	return notifier_from_errno(ret);
+}
+
+static struct notifier_block mtipl2_netdevice_nb __read_mostly = {
+	.notifier_call = mtipl2_netdevice_event,
+};
+
+static int mtipl2_register_notifiers(struct mtipl2sw_priv *priv)
+{
+	int ret = 0;
+
+	ret = register_netdevice_notifier(&mtipl2_netdevice_nb);
+	if (ret) {
+		dev_err(priv->dev, "can't register netdevice notifier\n");
+		return ret;
+	}
+
+	return ret;
+}
+
+static void mtipl2_unregister_notifiers(struct mtipl2sw_priv *priv)
+{
+	unregister_netdevice_notifier(&mtipl2_netdevice_nb);
+}
+
+static int mtipl2_sw_probe(struct platform_device *pdev)
+{
+	struct device_node *np = pdev->dev.of_node;
+	struct mtipl2sw_priv *priv;
+	struct resource *r;
+	int ret;
+
+	pr_err("fec: %s\n", __func__);
+
+	priv = devm_kzalloc(&pdev->dev, sizeof(*priv), GFP_KERNEL);
+	if (!priv)
+		return -ENOMEM;
+
+	mtipl2_parse_of(priv, np);
+
+	/* Wait till eth[01] interfaces are up and running */
+	if (!priv->fep[0] || !priv->fep[1] ||
+	    !netif_device_present(priv->fep[0]->netdev) ||
+	    !netif_device_present(priv->fep[1]->netdev))
+		return -EPROBE_DEFER;
+
+	r = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	priv->fecp = devm_ioremap_resource(&pdev->dev, r);
+
+	r = platform_get_resource(pdev, IORESOURCE_MEM, 1);
+	priv->hwentry = devm_ioremap_resource(&pdev->dev, r);
+
+	r = platform_get_resource(pdev, IORESOURCE_MEM, 2);
+	priv->hwpsw = devm_ioremap_resource(&pdev->dev, r);
+
+	/*
+	 * MAC{01} interrupt and descriptors registers have 4 bytes
+	 * offset when compared to L2 switch IP block.
+	 *
+	 * When L2 switch is added "between" ENET (eth0) and MAC{01}
+	 * the code for interrupts and setting up descriptors is
+	 * reused.
+	 *
+	 * As for example FEC_IMASK are used also on MAC{01} to
+	 * perform MII transmission it is better to subtract the
+	 * offset from the outset and reuse defines.
+	 */
+	priv->hwpsw -= L2SW_CTRL_REG_OFFSET;
+
+	priv->fep[0]->hwpsw = priv->hwpsw;
+	priv->fep[1]->hwpsw = priv->hwpsw;
+
+	priv->fep[0]->mtipl2 = priv;
+	priv->fep[1]->mtipl2 = priv;
+
+	ret = devm_request_irq(&pdev->dev, platform_get_irq(pdev, 0),
+			       mtip_l2sw_interrupt_handler,
+			       0, "mtip_l2sw", priv);
+
+	ret = mtipl2_register_notifiers(priv);
+	if (ret)
+		goto clean_unregister_netdev;
+
+	return 0;
+
+ clean_unregister_netdev:
+	mtipl2_unregister_notifiers(priv);
+
+	return ret;
+}
+
+static const struct of_device_id mtipl2_of_match[] = {
+	{ .compatible = "imx,mtip-l2switch" },
+	{ /* sentinel */ },
+};
+
+static struct platform_driver mtipl2plat_driver = {
+	.probe  = mtipl2_sw_probe,
+	.driver = {
+		.name = "mtipl2sw",
+		.of_match_table = mtipl2_of_match,
+	},
+};
+
+module_platform_driver(mtipl2plat_driver);
+
+MODULE_AUTHOR("Lukasz Majewski <lukma@denx.de>");
+MODULE_DESCRIPTION("Driver for MTIP L2 on SOC switch");
+MODULE_LICENSE("GPL v2");
+MODULE_ALIAS("platform:mtipl2sw");
diff --git a/drivers/net/ethernet/freescale/mtipsw/fec_mtip.h b/drivers/net/ethernet/freescale/mtipsw/fec_mtip.h
new file mode 100644
index 000000000000..6f989cde0093
--- /dev/null
+++ b/drivers/net/ethernet/freescale/mtipsw/fec_mtip.h
@@ -0,0 +1,213 @@ 
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2021 DENX Software Engineering GmbH
+ * Lukasz Majewski <lukma@denx.de>
+ */
+
+#ifndef __MTIP_L2SWITCH_H_
+#define __MTIP_L2SWITCH_H_
+
+/* Bit definitions and macros for MCF_ESW_MODE */
+#define MCF_ESW_MODE_SW_RST BIT(0)
+#define MCF_ESW_MODE_SW_EN  BIT(1)
+#define MCF_ESW_MODE_STATRST BIT(31)
+
+/* Port 0 backpressure congestion threshold */
+#define P0BC_THRESHOLD		0x40
+
+struct esw_output_queue_status {
+	unsigned long ESW_MMSR;
+	unsigned long ESW_LMT;
+	unsigned long ESW_LFC;
+	unsigned long ESW_PCSR;
+	unsigned long ESW_IOSR;
+	unsigned long ESW_QWT;
+	unsigned long esw_reserved;
+	unsigned long ESW_P0BCT;
+};
+struct esw_statistics_status {
+	/*
+	 * Total number of incoming frames processed
+	 * but discarded in switch
+	 */
+	unsigned long ESW_DISCN;
+	/*Sum of bytes of frames counted in ESW_DISCN*/
+	unsigned long ESW_DISCB;
+	/*
+	 * Total number of incoming frames processed
+	 * but not discarded in switch
+	 */
+	unsigned long ESW_NDISCN;
+	/*Sum of bytes of frames counted in ESW_NDISCN*/
+	unsigned long ESW_NDISCB;
+};
+
+struct esw_port_statistics_status {
+	/*outgoing frames discarded due to transmit queue congestion*/
+	unsigned long MCF_ESW_POQC;
+	/*incoming frames discarded due to VLAN domain mismatch*/
+	unsigned long MCF_ESW_PMVID;
+	/*incoming frames discarded due to untagged discard*/
+	unsigned long MCF_ESW_PMVTAG;
+	/*incoming frames discarded due port is in blocking state*/
+	unsigned long MCF_ESW_PBL;
+};
+
+struct l2switch_t {
+	unsigned long ESW_REVISION;
+	unsigned long ESW_SCRATCH;
+	unsigned long ESW_PER;
+	unsigned long reserved0[1];
+	unsigned long ESW_VLANV;
+	unsigned long ESW_DBCR;
+	unsigned long ESW_DMCR;
+	unsigned long ESW_BKLR;
+	unsigned long ESW_BMPC;
+	unsigned long ESW_MODE;
+	unsigned long ESW_VIMSEL;
+	unsigned long ESW_VOMSEL;
+	unsigned long ESW_VIMEN;
+	unsigned long ESW_VID;/*0x34*/
+	/*from 0x38 0x3C*/
+	unsigned long esw_reserved0[2];
+	unsigned long ESW_MCR;/*0x40*/
+	unsigned long ESW_EGMAP;
+	unsigned long ESW_INGMAP;
+	unsigned long ESW_INGSAL;
+	unsigned long ESW_INGSAH;
+	unsigned long ESW_INGDAL;
+	unsigned long ESW_INGDAH;
+	unsigned long ESW_ENGSAL;
+	unsigned long ESW_ENGSAH;
+	unsigned long ESW_ENGDAL;
+	unsigned long ESW_ENGDAH;
+	unsigned long ESW_MCVAL;/*0x6C*/
+	/*from 0x70--0x7C*/
+	unsigned long esw_reserved1[4];
+	unsigned long ESW_MMSR;/*0x80*/
+	unsigned long ESW_LMT;
+	unsigned long ESW_LFC;
+	unsigned long ESW_PCSR;
+	unsigned long ESW_IOSR;
+	unsigned long ESW_QWT;/*0x94*/
+	unsigned long esw_reserved2[1];/*0x98*/
+	unsigned long ESW_P0BCT;/*0x9C*/
+	/*from 0xA0-0xB8*/
+	unsigned long esw_reserved3[7];
+	unsigned long ESW_P0FFEN;/*0xBC*/
+	unsigned long ESW_PSNP[8];
+	unsigned long ESW_IPSNP[8];
+	unsigned long ESW_PVRES[3];
+	/*from 0x10C-0x13C*/
+	unsigned long esw_reserved4[13];
+	unsigned long ESW_IPRES;/*0x140*/
+	/*from 0x144-0x17C*/
+	unsigned long esw_reserved5[15];
+	unsigned long ESW_PRES[3];
+	/*from 0x18C-0x1FC*/
+	unsigned long esw_reserved6[29];
+	unsigned long ESW_PID[3];
+	/*from 0x20C-0x27C*/
+	unsigned long esw_reserved7[29];
+	unsigned long ESW_VRES[32];
+	unsigned long ESW_DISCN;/*0x300*/
+	unsigned long ESW_DISCB;
+	unsigned long ESW_NDISCN;
+	unsigned long ESW_NDISCB;/*0xFC0DC30C*/
+	struct esw_port_statistics_status port_statistics_status[3];
+	/*from 0x340-0x400*/
+	unsigned long esw_reserved8[48];
+
+	/*0xFC0DC400---0xFC0DC418*/
+	/*unsigned long MCF_ESW_ISR;*/
+	unsigned long   switch_ievent;             /* Interrupt event reg */
+	/*unsigned long MCF_ESW_IMR;*/
+	unsigned long   switch_imask;              /* Interrupt mask reg */
+	/*unsigned long MCF_ESW_RDSR;*/
+	unsigned long   fec_r_des_start;        /* Receive descriptor ring */
+	/*unsigned long MCF_ESW_TDSR;*/
+	unsigned long   fec_x_des_start;        /* Transmit descriptor ring */
+	/*unsigned long MCF_ESW_MRBR;*/
+	unsigned long   fec_r_buff_size;        /* Maximum receive buff size */
+	/*unsigned long MCF_ESW_RDAR;*/
+	unsigned long   fec_r_des_active;       /* Receive descriptor reg */
+	/*unsigned long MCF_ESW_TDAR;*/
+	unsigned long   fec_x_des_active;       /* Transmit descriptor reg */
+	/*from 0x420-0x4FC*/
+	unsigned long esw_reserved9[57];
+
+	/*0xFC0DC500---0xFC0DC508*/
+	unsigned long ESW_LREC0;
+	unsigned long ESW_LREC1;
+	unsigned long ESW_LSR;
+};
+
+struct  AddrTable64bEntry {
+	unsigned int lo;  /* lower 32 bits */
+	unsigned int hi;  /* upper 32 bits */
+};
+
+struct  eswAddrTable_t {
+	struct AddrTable64bEntry  eswTable64bEntry[2048];
+};
+
+struct mtipl2sw_priv {
+	struct fec_enet_private *fep[2];
+	void __iomem *hwpsw;
+	struct device *dev;
+	struct net_device *br_ndev;
+
+	/*
+	 * Flag to indicate if L2 switch IP block is initialized and
+	 * running.
+	 */
+	bool l2sw_on;
+
+	/* Number of ports */
+	int n_ports;
+
+	/* Bit field with active members */
+	u8 br_members;
+
+	/* Registers to configure/setup L2 switch IP block */
+	struct l2switch_t *fecp;
+
+	/* Look-up MAC address table start from 0x800FC000 */
+	struct eswAddrTable_t *hwentry;
+};
+
+#define MCF_ESW_RDAR_R_DES_ACTIVE BIT(24)
+
+/* HW_ENET_SWI_PORT_ENA */
+#define MCF_ESW_ENA_TRANSMIT_0 BIT(0)
+#define MCF_ESW_ENA_TRANSMIT_1 BIT(1)
+#define MCF_ESW_ENA_TRANSMIT_2 BIT(2)
+
+#define MCF_ESW_ENA_RECEIVE_0 BIT(16)
+#define MCF_ESW_ENA_RECEIVE_1 BIT(17)
+#define MCF_ESW_ENA_RECEIVE_2 BIT(18)
+
+#define MCF_ESW_ENA_PORT_0 (MCF_ESW_ENA_TRANSMIT_0 | MCF_ESW_ENA_RECEIVE_0)
+#define MCF_ESW_ENA_PORT_1 (MCF_ESW_ENA_TRANSMIT_1 | MCF_ESW_ENA_RECEIVE_1)
+#define MCF_ESW_ENA_PORT_2 (MCF_ESW_ENA_TRANSMIT_2 | MCF_ESW_ENA_RECEIVE_2)
+
+/* L2 switch Maximum receive buff size */
+#define MCF_ESW_R_BUFF_SIZE	0x14
+/* L2 switch Receive Descriptor Active Register */
+#define MCF_ESW_R_RDAR          0x18
+
+/*
+ * MCF_FEC_EMRBR corresponds to FEC_R_BUFF_SIZE_0 in fec main driver.
+ * It is duplicated as for L2 switch FEC_R_BUFF_SIZE_0 is aliased
+ * to different offset in switch IC. Hence the need to introduce new
+ * #define.
+ */
+#define MCF_FEC_EMRBR          (0x188)
+
+#define MCF_FEC_ERDSR(x)       ((x) << 2)
+
+#define L2SW_PKT_MAXBLR_SIZE         1520
+
+#define L2SW_CTRL_REG_OFFSET         0x4
+
+#endif /* __MTIP_L2SWITCH_H_ */