diff mbox series

[RESENT,net--stat,1/1] net: ethernet: fec: Revert "net: ethernet: fec: Replace interrupt driven MDIO with polled IO"

Message ID 1587996484-3504-1-git-send-email-fugang.duan@nxp.com
State New
Headers show
Series [RESENT,net--stat,1/1] net: ethernet: fec: Revert "net: ethernet: fec: Replace interrupt driven MDIO with polled IO" | expand

Commit Message

Andy Duan April 27, 2020, 2:08 p.m. UTC
This reverts commit 29ae6bd1b0d8a57d7c00ab12cbb949fc41986eef.

The commit breaks ethernet function on i.MX6SX, i.MX7D, i.MX8MM,
i.MX8MQ, and i.MX8QXP platforms. Boot yocto system by NFS mounting
rootfs will be failed with the commit.

Signed-off-by: Fugang Duan <fugang.duan@nxp.com>

Comments

Laurent Pinchart July 27, 2020, 1:23 a.m. UTC | #1
Hi Fugang,

On Mon, Apr 27, 2020 at 10:08:04PM +0800, Fugang Duan wrote:
> This reverts commit 29ae6bd1b0d8a57d7c00ab12cbb949fc41986eef.

> 

> The commit breaks ethernet function on i.MX6SX, i.MX7D, i.MX8MM,

> i.MX8MQ, and i.MX8QXP platforms. Boot yocto system by NFS mounting

> rootfs will be failed with the commit.


I'm afraid this commit breaks networking on i.MX7D for me :-( My board
is configured to boot over NFS root with IP autoconfiguration through
DHCP. The DHCP request goes out, the reply it sent back by the server,
but never noticed by the fec driver.

v5.7 works fine. As 29ae6bd1b0d8a57d7c00ab12cbb949fc41986eef was merged
during the v5.8 merge window, I suspect something else cropped in
between 29ae6bd1b0d8a57d7c00ab12cbb949fc41986eef and this patch that
needs to be reverted too. We're close to v5.8 and it would be annoying
to see this regression ending up in the released kernel. I can test
patches, but I'm not familiar enough with the driver (or the networking
subsystem) to fix the issue myself.

> Signed-off-by: Fugang Duan <fugang.duan@nxp.com>

> 

> diff --git a/drivers/net/ethernet/freescale/fec.h b/drivers/net/ethernet/freescale/fec.h

> index a6cdd5b..e74dd1f 100644

> --- a/drivers/net/ethernet/freescale/fec.h

> +++ b/drivers/net/ethernet/freescale/fec.h

> @@ -376,7 +376,8 @@ struct bufdesc_ex {

>  #define FEC_ENET_TS_AVAIL       ((uint)0x00010000)

>  #define FEC_ENET_TS_TIMER       ((uint)0x00008000)

>  

> -#define FEC_DEFAULT_IMASK (FEC_ENET_TXF | FEC_ENET_RXF)

> +#define FEC_DEFAULT_IMASK (FEC_ENET_TXF | FEC_ENET_RXF | FEC_ENET_MII)

> +#define FEC_NAPI_IMASK	FEC_ENET_MII

>  #define FEC_RX_DISABLED_IMASK (FEC_DEFAULT_IMASK & (~FEC_ENET_RXF))

>  

>  /* ENET interrupt coalescing macro define */

> @@ -542,6 +543,7 @@ struct fec_enet_private {

>  	int	link;

>  	int	full_duplex;

>  	int	speed;

> +	struct	completion mdio_done;

>  	int	irq[FEC_IRQ_NUM];

>  	bool	bufdesc_ex;

>  	int	pause_flag;

> diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c

> index 1ae075a..c7b84bb 100644

> --- a/drivers/net/ethernet/freescale/fec_main.c

> +++ b/drivers/net/ethernet/freescale/fec_main.c

> @@ -976,8 +976,8 @@ fec_restart(struct net_device *ndev)

>  	writel((__force u32)cpu_to_be32(temp_mac[1]),

>  	       fep->hwp + FEC_ADDR_HIGH);

>  

> -	/* Clear any outstanding interrupt, except MDIO. */

> -	writel((0xffffffff & ~FEC_ENET_MII), fep->hwp + FEC_IEVENT);

> +	/* Clear any outstanding interrupt. */

> +	writel(0xffffffff, fep->hwp + FEC_IEVENT);

>  

>  	fec_enet_bd_init(ndev);

>  

> @@ -1123,7 +1123,7 @@ fec_restart(struct net_device *ndev)

>  	if (fep->link)

>  		writel(FEC_DEFAULT_IMASK, fep->hwp + FEC_IMASK);

>  	else

> -		writel(0, fep->hwp + FEC_IMASK);

> +		writel(FEC_ENET_MII, fep->hwp + FEC_IMASK);

>  

>  	/* Init the interrupt coalescing */

>  	fec_enet_itr_coal_init(ndev);

> @@ -1652,10 +1652,6 @@ fec_enet_interrupt(int irq, void *dev_id)

>  	irqreturn_t ret = IRQ_NONE;

>  

>  	int_events = readl(fep->hwp + FEC_IEVENT);

> -

> -	/* Don't clear MDIO events, we poll for those */

> -	int_events &= ~FEC_ENET_MII;

> -

>  	writel(int_events, fep->hwp + FEC_IEVENT);

>  	fec_enet_collect_events(fep, int_events);

>  

> @@ -1663,12 +1659,16 @@ fec_enet_interrupt(int irq, void *dev_id)

>  		ret = IRQ_HANDLED;

>  

>  		if (napi_schedule_prep(&fep->napi)) {

> -			/* Disable interrupts */

> -			writel(0, fep->hwp + FEC_IMASK);

> +			/* Disable the NAPI interrupts */

> +			writel(FEC_NAPI_IMASK, fep->hwp + FEC_IMASK);

>  			__napi_schedule(&fep->napi);

>  		}

>  	}

>  

> +	if (int_events & FEC_ENET_MII) {

> +		ret = IRQ_HANDLED;

> +		complete(&fep->mdio_done);

> +	}

>  	return ret;

>  }

>  

> @@ -1818,24 +1818,11 @@ static void fec_enet_adjust_link(struct net_device *ndev)

>  		phy_print_status(phy_dev);

>  }

>  

> -static int fec_enet_mdio_wait(struct fec_enet_private *fep)

> -{

> -	uint ievent;

> -	int ret;

> -

> -	ret = readl_poll_timeout_atomic(fep->hwp + FEC_IEVENT, ievent,

> -					ievent & FEC_ENET_MII, 2, 30000);

> -

> -	if (!ret)

> -		writel(FEC_ENET_MII, fep->hwp + FEC_IEVENT);

> -

> -	return ret;

> -}

> -

>  static int fec_enet_mdio_read(struct mii_bus *bus, int mii_id, int regnum)

>  {

>  	struct fec_enet_private *fep = bus->priv;

>  	struct device *dev = &fep->pdev->dev;

> +	unsigned long time_left;

>  	int ret = 0, frame_start, frame_addr, frame_op;

>  	bool is_c45 = !!(regnum & MII_ADDR_C45);

>  

> @@ -1843,6 +1830,8 @@ static int fec_enet_mdio_read(struct mii_bus *bus, int mii_id, int regnum)

>  	if (ret < 0)

>  		return ret;

>  

> +	reinit_completion(&fep->mdio_done);

> +

>  	if (is_c45) {

>  		frame_start = FEC_MMFR_ST_C45;

>  

> @@ -1854,9 +1843,11 @@ static int fec_enet_mdio_read(struct mii_bus *bus, int mii_id, int regnum)

>  		       fep->hwp + FEC_MII_DATA);

>  

>  		/* wait for end of transfer */

> -		ret = fec_enet_mdio_wait(fep);

> -		if (ret) {

> +		time_left = wait_for_completion_timeout(&fep->mdio_done,

> +				usecs_to_jiffies(FEC_MII_TIMEOUT));

> +		if (time_left == 0) {

>  			netdev_err(fep->netdev, "MDIO address write timeout\n");

> +			ret = -ETIMEDOUT;

>  			goto out;

>  		}

>  

> @@ -1875,9 +1866,11 @@ static int fec_enet_mdio_read(struct mii_bus *bus, int mii_id, int regnum)

>  		FEC_MMFR_TA, fep->hwp + FEC_MII_DATA);

>  

>  	/* wait for end of transfer */

> -	ret = fec_enet_mdio_wait(fep);

> -	if (ret) {

> +	time_left = wait_for_completion_timeout(&fep->mdio_done,

> +			usecs_to_jiffies(FEC_MII_TIMEOUT));

> +	if (time_left == 0) {

>  		netdev_err(fep->netdev, "MDIO read timeout\n");

> +		ret = -ETIMEDOUT;

>  		goto out;

>  	}

>  

> @@ -1895,6 +1888,7 @@ static int fec_enet_mdio_write(struct mii_bus *bus, int mii_id, int regnum,

>  {

>  	struct fec_enet_private *fep = bus->priv;

>  	struct device *dev = &fep->pdev->dev;

> +	unsigned long time_left;

>  	int ret, frame_start, frame_addr;

>  	bool is_c45 = !!(regnum & MII_ADDR_C45);

>  

> @@ -1904,6 +1898,8 @@ static int fec_enet_mdio_write(struct mii_bus *bus, int mii_id, int regnum,

>  	else

>  		ret = 0;

>  

> +	reinit_completion(&fep->mdio_done);

> +

>  	if (is_c45) {

>  		frame_start = FEC_MMFR_ST_C45;

>  

> @@ -1915,9 +1911,11 @@ static int fec_enet_mdio_write(struct mii_bus *bus, int mii_id, int regnum,

>  		       fep->hwp + FEC_MII_DATA);

>  

>  		/* wait for end of transfer */

> -		ret = fec_enet_mdio_wait(fep);

> -		if (ret) {

> +		time_left = wait_for_completion_timeout(&fep->mdio_done,

> +			usecs_to_jiffies(FEC_MII_TIMEOUT));

> +		if (time_left == 0) {

>  			netdev_err(fep->netdev, "MDIO address write timeout\n");

> +			ret = -ETIMEDOUT;

>  			goto out;

>  		}

>  	} else {

> @@ -1933,9 +1931,12 @@ static int fec_enet_mdio_write(struct mii_bus *bus, int mii_id, int regnum,

>  		fep->hwp + FEC_MII_DATA);

>  

>  	/* wait for end of transfer */

> -	ret = fec_enet_mdio_wait(fep);

> -	if (ret)

> +	time_left = wait_for_completion_timeout(&fep->mdio_done,

> +			usecs_to_jiffies(FEC_MII_TIMEOUT));

> +	if (time_left == 0) {

>  		netdev_err(fep->netdev, "MDIO write timeout\n");

> +		ret  = -ETIMEDOUT;

> +	}

>  

>  out:

>  	pm_runtime_mark_last_busy(dev);

> @@ -2144,9 +2145,6 @@ static int fec_enet_mii_init(struct platform_device *pdev)

>  

>  	writel(fep->phy_speed, fep->hwp + FEC_MII_SPEED);

>  

> -	/* Clear any pending transaction complete indication */

> -	writel(FEC_ENET_MII, fep->hwp + FEC_IEVENT);

> -

>  	fep->mii_bus = mdiobus_alloc();

>  	if (fep->mii_bus == NULL) {

>  		err = -ENOMEM;

> @@ -3688,6 +3686,7 @@ fec_probe(struct platform_device *pdev)

>  		fep->irq[i] = irq;

>  	}

>  

> +	init_completion(&fep->mdio_done);

>  	ret = fec_enet_mii_init(pdev);

>  	if (ret)

>  		goto failed_mii_init;


-- 
Regards,

Laurent Pinchart
Andrew Lunn July 27, 2020, 1:38 a.m. UTC | #2
On Mon, Jul 27, 2020 at 04:23:54AM +0300, Laurent Pinchart wrote:
> Hi Fugang,

> 

> On Mon, Apr 27, 2020 at 10:08:04PM +0800, Fugang Duan wrote:

> > This reverts commit 29ae6bd1b0d8a57d7c00ab12cbb949fc41986eef.

> > 

> > The commit breaks ethernet function on i.MX6SX, i.MX7D, i.MX8MM,

> > i.MX8MQ, and i.MX8QXP platforms. Boot yocto system by NFS mounting

> > rootfs will be failed with the commit.

> 

> I'm afraid this commit breaks networking on i.MX7D for me :-( My board

> is configured to boot over NFS root with IP autoconfiguration through

> DHCP. The DHCP request goes out, the reply it sent back by the server,

> but never noticed by the fec driver.

> 

> v5.7 works fine. As 29ae6bd1b0d8a57d7c00ab12cbb949fc41986eef was merged

> during the v5.8 merge window, I suspect something else cropped in

> between 29ae6bd1b0d8a57d7c00ab12cbb949fc41986eef and this patch that

> needs to be reverted too. We're close to v5.8 and it would be annoying

> to see this regression ending up in the released kernel. I can test

> patches, but I'm not familiar enough with the driver (or the networking

> subsystem) to fix the issue myself.


Hi Laurent

We had a few reverts and reverts of reverts etc. But in the end it
seemed to work fine for a range of boards/SoCs.

What exactly are you testing here? v5.8-rc7?

Thanks
	Andrew
Laurent Pinchart July 27, 2020, 2:06 a.m. UTC | #3
On Mon, Jul 27, 2020 at 04:24:02AM +0300, Laurent Pinchart wrote:
> On Mon, Apr 27, 2020 at 10:08:04PM +0800, Fugang Duan wrote:

> > This reverts commit 29ae6bd1b0d8a57d7c00ab12cbb949fc41986eef.

> > 

> > The commit breaks ethernet function on i.MX6SX, i.MX7D, i.MX8MM,

> > i.MX8MQ, and i.MX8QXP platforms. Boot yocto system by NFS mounting

> > rootfs will be failed with the commit.

> 

> I'm afraid this commit breaks networking on i.MX7D for me :-( My board

> is configured to boot over NFS root with IP autoconfiguration through

> DHCP. The DHCP request goes out, the reply it sent back by the server,

> but never noticed by the fec driver.

> 

> v5.7 works fine. As 29ae6bd1b0d8a57d7c00ab12cbb949fc41986eef was merged

> during the v5.8 merge window, I suspect something else cropped in

> between 29ae6bd1b0d8a57d7c00ab12cbb949fc41986eef and this patch that

> needs to be reverted too. We're close to v5.8 and it would be annoying

> to see this regression ending up in the released kernel. I can test

> patches, but I'm not familiar enough with the driver (or the networking

> subsystem) to fix the issue myself.


If it can be of any help, I've confirmed that, to get the network back
to usable state from v5.8-rc6, I have to revert all patches up to this
one. This is the top of my branch, on top of v5.8-rc6:

5bbe80c9efea Revert "net: ethernet: fec: Revert "net: ethernet: fec: Replace interrupt driven MDIO with polled IO""
5462896a08c1 Revert "net: ethernet: fec: Replace interrupt driven MDIO with polled IO"
824a82e2bdfa Revert "net: ethernet: fec: move GPR register offset and bit into DT"
bfe330591cab Revert "net: fec: disable correct clk in the err path of fec_enet_clk_enable"
109958cad578 Revert "net: ethernet: fec: prevent tx starvation under high rx load"

> > Signed-off-by: Fugang Duan <fugang.duan@nxp.com>

> > 

> > diff --git a/drivers/net/ethernet/freescale/fec.h b/drivers/net/ethernet/freescale/fec.h

> > index a6cdd5b..e74dd1f 100644

> > --- a/drivers/net/ethernet/freescale/fec.h

> > +++ b/drivers/net/ethernet/freescale/fec.h

> > @@ -376,7 +376,8 @@ struct bufdesc_ex {

> >  #define FEC_ENET_TS_AVAIL       ((uint)0x00010000)

> >  #define FEC_ENET_TS_TIMER       ((uint)0x00008000)

> >  

> > -#define FEC_DEFAULT_IMASK (FEC_ENET_TXF | FEC_ENET_RXF)

> > +#define FEC_DEFAULT_IMASK (FEC_ENET_TXF | FEC_ENET_RXF | FEC_ENET_MII)

> > +#define FEC_NAPI_IMASK	FEC_ENET_MII

> >  #define FEC_RX_DISABLED_IMASK (FEC_DEFAULT_IMASK & (~FEC_ENET_RXF))

> >  

> >  /* ENET interrupt coalescing macro define */

> > @@ -542,6 +543,7 @@ struct fec_enet_private {

> >  	int	link;

> >  	int	full_duplex;

> >  	int	speed;

> > +	struct	completion mdio_done;

> >  	int	irq[FEC_IRQ_NUM];

> >  	bool	bufdesc_ex;

> >  	int	pause_flag;

> > diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c

> > index 1ae075a..c7b84bb 100644

> > --- a/drivers/net/ethernet/freescale/fec_main.c

> > +++ b/drivers/net/ethernet/freescale/fec_main.c

> > @@ -976,8 +976,8 @@ fec_restart(struct net_device *ndev)

> >  	writel((__force u32)cpu_to_be32(temp_mac[1]),

> >  	       fep->hwp + FEC_ADDR_HIGH);

> >  

> > -	/* Clear any outstanding interrupt, except MDIO. */

> > -	writel((0xffffffff & ~FEC_ENET_MII), fep->hwp + FEC_IEVENT);

> > +	/* Clear any outstanding interrupt. */

> > +	writel(0xffffffff, fep->hwp + FEC_IEVENT);

> >  

> >  	fec_enet_bd_init(ndev);

> >  

> > @@ -1123,7 +1123,7 @@ fec_restart(struct net_device *ndev)

> >  	if (fep->link)

> >  		writel(FEC_DEFAULT_IMASK, fep->hwp + FEC_IMASK);

> >  	else

> > -		writel(0, fep->hwp + FEC_IMASK);

> > +		writel(FEC_ENET_MII, fep->hwp + FEC_IMASK);

> >  

> >  	/* Init the interrupt coalescing */

> >  	fec_enet_itr_coal_init(ndev);

> > @@ -1652,10 +1652,6 @@ fec_enet_interrupt(int irq, void *dev_id)

> >  	irqreturn_t ret = IRQ_NONE;

> >  

> >  	int_events = readl(fep->hwp + FEC_IEVENT);

> > -

> > -	/* Don't clear MDIO events, we poll for those */

> > -	int_events &= ~FEC_ENET_MII;

> > -

> >  	writel(int_events, fep->hwp + FEC_IEVENT);

> >  	fec_enet_collect_events(fep, int_events);

> >  

> > @@ -1663,12 +1659,16 @@ fec_enet_interrupt(int irq, void *dev_id)

> >  		ret = IRQ_HANDLED;

> >  

> >  		if (napi_schedule_prep(&fep->napi)) {

> > -			/* Disable interrupts */

> > -			writel(0, fep->hwp + FEC_IMASK);

> > +			/* Disable the NAPI interrupts */

> > +			writel(FEC_NAPI_IMASK, fep->hwp + FEC_IMASK);

> >  			__napi_schedule(&fep->napi);

> >  		}

> >  	}

> >  

> > +	if (int_events & FEC_ENET_MII) {

> > +		ret = IRQ_HANDLED;

> > +		complete(&fep->mdio_done);

> > +	}

> >  	return ret;

> >  }

> >  

> > @@ -1818,24 +1818,11 @@ static void fec_enet_adjust_link(struct net_device *ndev)

> >  		phy_print_status(phy_dev);

> >  }

> >  

> > -static int fec_enet_mdio_wait(struct fec_enet_private *fep)

> > -{

> > -	uint ievent;

> > -	int ret;

> > -

> > -	ret = readl_poll_timeout_atomic(fep->hwp + FEC_IEVENT, ievent,

> > -					ievent & FEC_ENET_MII, 2, 30000);

> > -

> > -	if (!ret)

> > -		writel(FEC_ENET_MII, fep->hwp + FEC_IEVENT);

> > -

> > -	return ret;

> > -}

> > -

> >  static int fec_enet_mdio_read(struct mii_bus *bus, int mii_id, int regnum)

> >  {

> >  	struct fec_enet_private *fep = bus->priv;

> >  	struct device *dev = &fep->pdev->dev;

> > +	unsigned long time_left;

> >  	int ret = 0, frame_start, frame_addr, frame_op;

> >  	bool is_c45 = !!(regnum & MII_ADDR_C45);

> >  

> > @@ -1843,6 +1830,8 @@ static int fec_enet_mdio_read(struct mii_bus *bus, int mii_id, int regnum)

> >  	if (ret < 0)

> >  		return ret;

> >  

> > +	reinit_completion(&fep->mdio_done);

> > +

> >  	if (is_c45) {

> >  		frame_start = FEC_MMFR_ST_C45;

> >  

> > @@ -1854,9 +1843,11 @@ static int fec_enet_mdio_read(struct mii_bus *bus, int mii_id, int regnum)

> >  		       fep->hwp + FEC_MII_DATA);

> >  

> >  		/* wait for end of transfer */

> > -		ret = fec_enet_mdio_wait(fep);

> > -		if (ret) {

> > +		time_left = wait_for_completion_timeout(&fep->mdio_done,

> > +				usecs_to_jiffies(FEC_MII_TIMEOUT));

> > +		if (time_left == 0) {

> >  			netdev_err(fep->netdev, "MDIO address write timeout\n");

> > +			ret = -ETIMEDOUT;

> >  			goto out;

> >  		}

> >  

> > @@ -1875,9 +1866,11 @@ static int fec_enet_mdio_read(struct mii_bus *bus, int mii_id, int regnum)

> >  		FEC_MMFR_TA, fep->hwp + FEC_MII_DATA);

> >  

> >  	/* wait for end of transfer */

> > -	ret = fec_enet_mdio_wait(fep);

> > -	if (ret) {

> > +	time_left = wait_for_completion_timeout(&fep->mdio_done,

> > +			usecs_to_jiffies(FEC_MII_TIMEOUT));

> > +	if (time_left == 0) {

> >  		netdev_err(fep->netdev, "MDIO read timeout\n");

> > +		ret = -ETIMEDOUT;

> >  		goto out;

> >  	}

> >  

> > @@ -1895,6 +1888,7 @@ static int fec_enet_mdio_write(struct mii_bus *bus, int mii_id, int regnum,

> >  {

> >  	struct fec_enet_private *fep = bus->priv;

> >  	struct device *dev = &fep->pdev->dev;

> > +	unsigned long time_left;

> >  	int ret, frame_start, frame_addr;

> >  	bool is_c45 = !!(regnum & MII_ADDR_C45);

> >  

> > @@ -1904,6 +1898,8 @@ static int fec_enet_mdio_write(struct mii_bus *bus, int mii_id, int regnum,

> >  	else

> >  		ret = 0;

> >  

> > +	reinit_completion(&fep->mdio_done);

> > +

> >  	if (is_c45) {

> >  		frame_start = FEC_MMFR_ST_C45;

> >  

> > @@ -1915,9 +1911,11 @@ static int fec_enet_mdio_write(struct mii_bus *bus, int mii_id, int regnum,

> >  		       fep->hwp + FEC_MII_DATA);

> >  

> >  		/* wait for end of transfer */

> > -		ret = fec_enet_mdio_wait(fep);

> > -		if (ret) {

> > +		time_left = wait_for_completion_timeout(&fep->mdio_done,

> > +			usecs_to_jiffies(FEC_MII_TIMEOUT));

> > +		if (time_left == 0) {

> >  			netdev_err(fep->netdev, "MDIO address write timeout\n");

> > +			ret = -ETIMEDOUT;

> >  			goto out;

> >  		}

> >  	} else {

> > @@ -1933,9 +1931,12 @@ static int fec_enet_mdio_write(struct mii_bus *bus, int mii_id, int regnum,

> >  		fep->hwp + FEC_MII_DATA);

> >  

> >  	/* wait for end of transfer */

> > -	ret = fec_enet_mdio_wait(fep);

> > -	if (ret)

> > +	time_left = wait_for_completion_timeout(&fep->mdio_done,

> > +			usecs_to_jiffies(FEC_MII_TIMEOUT));

> > +	if (time_left == 0) {

> >  		netdev_err(fep->netdev, "MDIO write timeout\n");

> > +		ret  = -ETIMEDOUT;

> > +	}

> >  

> >  out:

> >  	pm_runtime_mark_last_busy(dev);

> > @@ -2144,9 +2145,6 @@ static int fec_enet_mii_init(struct platform_device *pdev)

> >  

> >  	writel(fep->phy_speed, fep->hwp + FEC_MII_SPEED);

> >  

> > -	/* Clear any pending transaction complete indication */

> > -	writel(FEC_ENET_MII, fep->hwp + FEC_IEVENT);

> > -

> >  	fep->mii_bus = mdiobus_alloc();

> >  	if (fep->mii_bus == NULL) {

> >  		err = -ENOMEM;

> > @@ -3688,6 +3686,7 @@ fec_probe(struct platform_device *pdev)

> >  		fep->irq[i] = irq;

> >  	}

> >  

> > +	init_completion(&fep->mdio_done);

> >  	ret = fec_enet_mii_init(pdev);

> >  	if (ret)

> >  		goto failed_mii_init;


-- 
Regards,

Laurent Pinchart
Chris Healy July 27, 2020, 2:13 a.m. UTC | #4
On Sun, Jul 26, 2020 at 7:06 PM Laurent Pinchart
<laurent.pinchart@ideasonboard.com> wrote:
>

> On Mon, Jul 27, 2020 at 04:24:02AM +0300, Laurent Pinchart wrote:

> > On Mon, Apr 27, 2020 at 10:08:04PM +0800, Fugang Duan wrote:

> > > This reverts commit 29ae6bd1b0d8a57d7c00ab12cbb949fc41986eef.

> > >

> > > The commit breaks ethernet function on i.MX6SX, i.MX7D, i.MX8MM,

> > > i.MX8MQ, and i.MX8QXP platforms. Boot yocto system by NFS mounting

> > > rootfs will be failed with the commit.

> >

> > I'm afraid this commit breaks networking on i.MX7D for me :-( My board

> > is configured to boot over NFS root with IP autoconfiguration through

> > DHCP. The DHCP request goes out, the reply it sent back by the server,

> > but never noticed by the fec driver.

> >

> > v5.7 works fine. As 29ae6bd1b0d8a57d7c00ab12cbb949fc41986eef was merged

> > during the v5.8 merge window, I suspect something else cropped in

> > between 29ae6bd1b0d8a57d7c00ab12cbb949fc41986eef and this patch that

> > needs to be reverted too. We're close to v5.8 and it would be annoying

> > to see this regression ending up in the released kernel. I can test

> > patches, but I'm not familiar enough with the driver (or the networking

> > subsystem) to fix the issue myself.

>

> If it can be of any help, I've confirmed that, to get the network back

> to usable state from v5.8-rc6, I have to revert all patches up to this

> one. This is the top of my branch, on top of v5.8-rc6:

>

> 5bbe80c9efea Revert "net: ethernet: fec: Revert "net: ethernet: fec: Replace interrupt driven MDIO with polled IO""

> 5462896a08c1 Revert "net: ethernet: fec: Replace interrupt driven MDIO with polled IO"

> 824a82e2bdfa Revert "net: ethernet: fec: move GPR register offset and bit into DT"

> bfe330591cab Revert "net: fec: disable correct clk in the err path of fec_enet_clk_enable"

> 109958cad578 Revert "net: ethernet: fec: prevent tx starvation under high rx load"


I just fired up net-next on my i.MX7d based design (not NFS root
though).  I can bring up the network interface with a gigabit
connection but ALL RX traffic is failing with CRC errors.  Now, my
design is using a Micrel KSZ9031 which might be part of the problem
for me as there were some recent KSZ9031 changes made so take what I'm
seeing with a grain of salt.

Laurent, couple questions:

1) Are you able to boot without NFS root and communicate correctly or
is this issue just when doing an NFS root?
2) If you are able to boot up without NFS root, can you check the
ethtool statistics and see the same RX CRC errors I'm seeing?


>

> > > Signed-off-by: Fugang Duan <fugang.duan@nxp.com>

> > >

> > > diff --git a/drivers/net/ethernet/freescale/fec.h b/drivers/net/ethernet/freescale/fec.h

> > > index a6cdd5b..e74dd1f 100644

> > > --- a/drivers/net/ethernet/freescale/fec.h

> > > +++ b/drivers/net/ethernet/freescale/fec.h

> > > @@ -376,7 +376,8 @@ struct bufdesc_ex {

> > >  #define FEC_ENET_TS_AVAIL       ((uint)0x00010000)

> > >  #define FEC_ENET_TS_TIMER       ((uint)0x00008000)

> > >

> > > -#define FEC_DEFAULT_IMASK (FEC_ENET_TXF | FEC_ENET_RXF)

> > > +#define FEC_DEFAULT_IMASK (FEC_ENET_TXF | FEC_ENET_RXF | FEC_ENET_MII)

> > > +#define FEC_NAPI_IMASK     FEC_ENET_MII

> > >  #define FEC_RX_DISABLED_IMASK (FEC_DEFAULT_IMASK & (~FEC_ENET_RXF))

> > >

> > >  /* ENET interrupt coalescing macro define */

> > > @@ -542,6 +543,7 @@ struct fec_enet_private {

> > >     int     link;

> > >     int     full_duplex;

> > >     int     speed;

> > > +   struct  completion mdio_done;

> > >     int     irq[FEC_IRQ_NUM];

> > >     bool    bufdesc_ex;

> > >     int     pause_flag;

> > > diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c

> > > index 1ae075a..c7b84bb 100644

> > > --- a/drivers/net/ethernet/freescale/fec_main.c

> > > +++ b/drivers/net/ethernet/freescale/fec_main.c

> > > @@ -976,8 +976,8 @@ fec_restart(struct net_device *ndev)

> > >     writel((__force u32)cpu_to_be32(temp_mac[1]),

> > >            fep->hwp + FEC_ADDR_HIGH);

> > >

> > > -   /* Clear any outstanding interrupt, except MDIO. */

> > > -   writel((0xffffffff & ~FEC_ENET_MII), fep->hwp + FEC_IEVENT);

> > > +   /* Clear any outstanding interrupt. */

> > > +   writel(0xffffffff, fep->hwp + FEC_IEVENT);

> > >

> > >     fec_enet_bd_init(ndev);

> > >

> > > @@ -1123,7 +1123,7 @@ fec_restart(struct net_device *ndev)

> > >     if (fep->link)

> > >             writel(FEC_DEFAULT_IMASK, fep->hwp + FEC_IMASK);

> > >     else

> > > -           writel(0, fep->hwp + FEC_IMASK);

> > > +           writel(FEC_ENET_MII, fep->hwp + FEC_IMASK);

> > >

> > >     /* Init the interrupt coalescing */

> > >     fec_enet_itr_coal_init(ndev);

> > > @@ -1652,10 +1652,6 @@ fec_enet_interrupt(int irq, void *dev_id)

> > >     irqreturn_t ret = IRQ_NONE;

> > >

> > >     int_events = readl(fep->hwp + FEC_IEVENT);

> > > -

> > > -   /* Don't clear MDIO events, we poll for those */

> > > -   int_events &= ~FEC_ENET_MII;

> > > -

> > >     writel(int_events, fep->hwp + FEC_IEVENT);

> > >     fec_enet_collect_events(fep, int_events);

> > >

> > > @@ -1663,12 +1659,16 @@ fec_enet_interrupt(int irq, void *dev_id)

> > >             ret = IRQ_HANDLED;

> > >

> > >             if (napi_schedule_prep(&fep->napi)) {

> > > -                   /* Disable interrupts */

> > > -                   writel(0, fep->hwp + FEC_IMASK);

> > > +                   /* Disable the NAPI interrupts */

> > > +                   writel(FEC_NAPI_IMASK, fep->hwp + FEC_IMASK);

> > >                     __napi_schedule(&fep->napi);

> > >             }

> > >     }

> > >

> > > +   if (int_events & FEC_ENET_MII) {

> > > +           ret = IRQ_HANDLED;

> > > +           complete(&fep->mdio_done);

> > > +   }

> > >     return ret;

> > >  }

> > >

> > > @@ -1818,24 +1818,11 @@ static void fec_enet_adjust_link(struct net_device *ndev)

> > >             phy_print_status(phy_dev);

> > >  }

> > >

> > > -static int fec_enet_mdio_wait(struct fec_enet_private *fep)

> > > -{

> > > -   uint ievent;

> > > -   int ret;

> > > -

> > > -   ret = readl_poll_timeout_atomic(fep->hwp + FEC_IEVENT, ievent,

> > > -                                   ievent & FEC_ENET_MII, 2, 30000);

> > > -

> > > -   if (!ret)

> > > -           writel(FEC_ENET_MII, fep->hwp + FEC_IEVENT);

> > > -

> > > -   return ret;

> > > -}

> > > -

> > >  static int fec_enet_mdio_read(struct mii_bus *bus, int mii_id, int regnum)

> > >  {

> > >     struct fec_enet_private *fep = bus->priv;

> > >     struct device *dev = &fep->pdev->dev;

> > > +   unsigned long time_left;

> > >     int ret = 0, frame_start, frame_addr, frame_op;

> > >     bool is_c45 = !!(regnum & MII_ADDR_C45);

> > >

> > > @@ -1843,6 +1830,8 @@ static int fec_enet_mdio_read(struct mii_bus *bus, int mii_id, int regnum)

> > >     if (ret < 0)

> > >             return ret;

> > >

> > > +   reinit_completion(&fep->mdio_done);

> > > +

> > >     if (is_c45) {

> > >             frame_start = FEC_MMFR_ST_C45;

> > >

> > > @@ -1854,9 +1843,11 @@ static int fec_enet_mdio_read(struct mii_bus *bus, int mii_id, int regnum)

> > >                    fep->hwp + FEC_MII_DATA);

> > >

> > >             /* wait for end of transfer */

> > > -           ret = fec_enet_mdio_wait(fep);

> > > -           if (ret) {

> > > +           time_left = wait_for_completion_timeout(&fep->mdio_done,

> > > +                           usecs_to_jiffies(FEC_MII_TIMEOUT));

> > > +           if (time_left == 0) {

> > >                     netdev_err(fep->netdev, "MDIO address write timeout\n");

> > > +                   ret = -ETIMEDOUT;

> > >                     goto out;

> > >             }

> > >

> > > @@ -1875,9 +1866,11 @@ static int fec_enet_mdio_read(struct mii_bus *bus, int mii_id, int regnum)

> > >             FEC_MMFR_TA, fep->hwp + FEC_MII_DATA);

> > >

> > >     /* wait for end of transfer */

> > > -   ret = fec_enet_mdio_wait(fep);

> > > -   if (ret) {

> > > +   time_left = wait_for_completion_timeout(&fep->mdio_done,

> > > +                   usecs_to_jiffies(FEC_MII_TIMEOUT));

> > > +   if (time_left == 0) {

> > >             netdev_err(fep->netdev, "MDIO read timeout\n");

> > > +           ret = -ETIMEDOUT;

> > >             goto out;

> > >     }

> > >

> > > @@ -1895,6 +1888,7 @@ static int fec_enet_mdio_write(struct mii_bus *bus, int mii_id, int regnum,

> > >  {

> > >     struct fec_enet_private *fep = bus->priv;

> > >     struct device *dev = &fep->pdev->dev;

> > > +   unsigned long time_left;

> > >     int ret, frame_start, frame_addr;

> > >     bool is_c45 = !!(regnum & MII_ADDR_C45);

> > >

> > > @@ -1904,6 +1898,8 @@ static int fec_enet_mdio_write(struct mii_bus *bus, int mii_id, int regnum,

> > >     else

> > >             ret = 0;

> > >

> > > +   reinit_completion(&fep->mdio_done);

> > > +

> > >     if (is_c45) {

> > >             frame_start = FEC_MMFR_ST_C45;

> > >

> > > @@ -1915,9 +1911,11 @@ static int fec_enet_mdio_write(struct mii_bus *bus, int mii_id, int regnum,

> > >                    fep->hwp + FEC_MII_DATA);

> > >

> > >             /* wait for end of transfer */

> > > -           ret = fec_enet_mdio_wait(fep);

> > > -           if (ret) {

> > > +           time_left = wait_for_completion_timeout(&fep->mdio_done,

> > > +                   usecs_to_jiffies(FEC_MII_TIMEOUT));

> > > +           if (time_left == 0) {

> > >                     netdev_err(fep->netdev, "MDIO address write timeout\n");

> > > +                   ret = -ETIMEDOUT;

> > >                     goto out;

> > >             }

> > >     } else {

> > > @@ -1933,9 +1931,12 @@ static int fec_enet_mdio_write(struct mii_bus *bus, int mii_id, int regnum,

> > >             fep->hwp + FEC_MII_DATA);

> > >

> > >     /* wait for end of transfer */

> > > -   ret = fec_enet_mdio_wait(fep);

> > > -   if (ret)

> > > +   time_left = wait_for_completion_timeout(&fep->mdio_done,

> > > +                   usecs_to_jiffies(FEC_MII_TIMEOUT));

> > > +   if (time_left == 0) {

> > >             netdev_err(fep->netdev, "MDIO write timeout\n");

> > > +           ret  = -ETIMEDOUT;

> > > +   }

> > >

> > >  out:

> > >     pm_runtime_mark_last_busy(dev);

> > > @@ -2144,9 +2145,6 @@ static int fec_enet_mii_init(struct platform_device *pdev)

> > >

> > >     writel(fep->phy_speed, fep->hwp + FEC_MII_SPEED);

> > >

> > > -   /* Clear any pending transaction complete indication */

> > > -   writel(FEC_ENET_MII, fep->hwp + FEC_IEVENT);

> > > -

> > >     fep->mii_bus = mdiobus_alloc();

> > >     if (fep->mii_bus == NULL) {

> > >             err = -ENOMEM;

> > > @@ -3688,6 +3686,7 @@ fec_probe(struct platform_device *pdev)

> > >             fep->irq[i] = irq;

> > >     }

> > >

> > > +   init_completion(&fep->mdio_done);

> > >     ret = fec_enet_mii_init(pdev);

> > >     if (ret)

> > >             goto failed_mii_init;

>

> --

> Regards,

>

> Laurent Pinchart
Andrew Lunn July 27, 2020, 2:14 a.m. UTC | #5
On Mon, Jul 27, 2020 at 05:06:31AM +0300, Laurent Pinchart wrote:
> On Mon, Jul 27, 2020 at 04:24:02AM +0300, Laurent Pinchart wrote:

> > On Mon, Apr 27, 2020 at 10:08:04PM +0800, Fugang Duan wrote:

> > > This reverts commit 29ae6bd1b0d8a57d7c00ab12cbb949fc41986eef.

> > > 

> > > The commit breaks ethernet function on i.MX6SX, i.MX7D, i.MX8MM,

> > > i.MX8MQ, and i.MX8QXP platforms. Boot yocto system by NFS mounting

> > > rootfs will be failed with the commit.

> > 

> > I'm afraid this commit breaks networking on i.MX7D for me :-( My board

> > is configured to boot over NFS root with IP autoconfiguration through

> > DHCP. The DHCP request goes out, the reply it sent back by the server,

> > but never noticed by the fec driver.

> > 

> > v5.7 works fine. As 29ae6bd1b0d8a57d7c00ab12cbb949fc41986eef was merged

> > during the v5.8 merge window, I suspect something else cropped in

> > between 29ae6bd1b0d8a57d7c00ab12cbb949fc41986eef and this patch that

> > needs to be reverted too. We're close to v5.8 and it would be annoying

> > to see this regression ending up in the released kernel. I can test

> > patches, but I'm not familiar enough with the driver (or the networking

> > subsystem) to fix the issue myself.

> 

> If it can be of any help, I've confirmed that, to get the network back

> to usable state from v5.8-rc6, I have to revert all patches up to this

> one. This is the top of my branch, on top of v5.8-rc6:

> 

> 5bbe80c9efea Revert "net: ethernet: fec: Revert "net: ethernet: fec: Replace interrupt driven MDIO with polled IO""

> 5462896a08c1 Revert "net: ethernet: fec: Replace interrupt driven MDIO with polled IO"

> 824a82e2bdfa Revert "net: ethernet: fec: move GPR register offset and bit into DT"

> bfe330591cab Revert "net: fec: disable correct clk in the err path of fec_enet_clk_enable"

> 109958cad578 Revert "net: ethernet: fec: prevent tx starvation under high rx load"


OK.

What PHY are you using? A Micrel?

And which DT file?

Thanks
	Andrew
Andy Duan July 27, 2020, 2:21 a.m. UTC | #6
From: Chris Healy <cphealy@gmail.com> Sent: Monday, July 27, 2020 10:13 AM

> On Sun, Jul 26, 2020 at 7:06 PM Laurent Pinchart

> <laurent.pinchart@ideasonboard.com> wrote:

> >

> > On Mon, Jul 27, 2020 at 04:24:02AM +0300, Laurent Pinchart wrote:

> > > On Mon, Apr 27, 2020 at 10:08:04PM +0800, Fugang Duan wrote:

> > > > This reverts commit 29ae6bd1b0d8a57d7c00ab12cbb949fc41986eef.

> > > >

> > > > The commit breaks ethernet function on i.MX6SX, i.MX7D, i.MX8MM,

> > > > i.MX8MQ, and i.MX8QXP platforms. Boot yocto system by NFS mounting

> > > > rootfs will be failed with the commit.

> > >

> > > I'm afraid this commit breaks networking on i.MX7D for me :-( My

> > > board is configured to boot over NFS root with IP autoconfiguration

> > > through DHCP. The DHCP request goes out, the reply it sent back by

> > > the server, but never noticed by the fec driver.

> > >

> > > v5.7 works fine. As 29ae6bd1b0d8a57d7c00ab12cbb949fc41986eef was

> > > merged during the v5.8 merge window, I suspect something else

> > > cropped in between 29ae6bd1b0d8a57d7c00ab12cbb949fc41986eef and

> this

> > > patch that needs to be reverted too. We're close to v5.8 and it

> > > would be annoying to see this regression ending up in the released

> > > kernel. I can test patches, but I'm not familiar enough with the

> > > driver (or the networking

> > > subsystem) to fix the issue myself.

> >

> > If it can be of any help, I've confirmed that, to get the network back

> > to usable state from v5.8-rc6, I have to revert all patches up to this

> > one. This is the top of my branch, on top of v5.8-rc6:

> >

> > 5bbe80c9efea Revert "net: ethernet: fec: Revert "net: ethernet: fec: Replace

> interrupt driven MDIO with polled IO""

> > 5462896a08c1 Revert "net: ethernet: fec: Replace interrupt driven MDIO

> with polled IO"

> > 824a82e2bdfa Revert "net: ethernet: fec: move GPR register offset and bit

> into DT"

> > bfe330591cab Revert "net: fec: disable correct clk in the err path of

> fec_enet_clk_enable"

> > 109958cad578 Revert "net: ethernet: fec: prevent tx starvation under high rx

> load"

> 

> I just fired up net-next on my i.MX7d based design (not NFS root though).  I can

> bring up the network interface with a gigabit connection but ALL RX traffic is

> failing with CRC errors.  Now, my design is using a Micrel KSZ9031 which might

> be part of the problem for me as there were some recent KSZ9031 changes

> made so take what I'm seeing with a grain of salt.

> 

> Laurent, couple questions:

> 

> 1) Are you able to boot without NFS root and communicate correctly or is this

> issue just when doing an NFS root?

> 2) If you are able to boot up without NFS root, can you check the ethtool

> statistics and see the same RX CRC errors I'm seeing?

> 

> 

Laurent, Chris, 

I am trying imx7d sdb board with BCM54220 siwtch PHY on net-next now,
will update my test result ASAP.

Regards,
Fugang
> >

> > > > Signed-off-by: Fugang Duan <fugang.duan@nxp.com>

> > > >

> > > > diff --git a/drivers/net/ethernet/freescale/fec.h

> > > > b/drivers/net/ethernet/freescale/fec.h

> > > > index a6cdd5b..e74dd1f 100644

> > > > --- a/drivers/net/ethernet/freescale/fec.h

> > > > +++ b/drivers/net/ethernet/freescale/fec.h

> > > > @@ -376,7 +376,8 @@ struct bufdesc_ex {

> > > >  #define FEC_ENET_TS_AVAIL       ((uint)0x00010000)

> > > >  #define FEC_ENET_TS_TIMER       ((uint)0x00008000)

> > > >

> > > > -#define FEC_DEFAULT_IMASK (FEC_ENET_TXF | FEC_ENET_RXF)

> > > > +#define FEC_DEFAULT_IMASK (FEC_ENET_TXF | FEC_ENET_RXF |

> FEC_ENET_MII)

> > > > +#define FEC_NAPI_IMASK     FEC_ENET_MII

> > > >  #define FEC_RX_DISABLED_IMASK (FEC_DEFAULT_IMASK &

> > > > (~FEC_ENET_RXF))

> > > >

> > > >  /* ENET interrupt coalescing macro define */ @@ -542,6 +543,7 @@

> > > > struct fec_enet_private {

> > > >     int     link;

> > > >     int     full_duplex;

> > > >     int     speed;

> > > > +   struct  completion mdio_done;

> > > >     int     irq[FEC_IRQ_NUM];

> > > >     bool    bufdesc_ex;

> > > >     int     pause_flag;

> > > > diff --git a/drivers/net/ethernet/freescale/fec_main.c

> > > > b/drivers/net/ethernet/freescale/fec_main.c

> > > > index 1ae075a..c7b84bb 100644

> > > > --- a/drivers/net/ethernet/freescale/fec_main.c

> > > > +++ b/drivers/net/ethernet/freescale/fec_main.c

> > > > @@ -976,8 +976,8 @@ fec_restart(struct net_device *ndev)

> > > >     writel((__force u32)cpu_to_be32(temp_mac[1]),

> > > >            fep->hwp + FEC_ADDR_HIGH);

> > > >

> > > > -   /* Clear any outstanding interrupt, except MDIO. */

> > > > -   writel((0xffffffff & ~FEC_ENET_MII), fep->hwp + FEC_IEVENT);

> > > > +   /* Clear any outstanding interrupt. */

> > > > +   writel(0xffffffff, fep->hwp + FEC_IEVENT);

> > > >

> > > >     fec_enet_bd_init(ndev);

> > > >

> > > > @@ -1123,7 +1123,7 @@ fec_restart(struct net_device *ndev)

> > > >     if (fep->link)

> > > >             writel(FEC_DEFAULT_IMASK, fep->hwp + FEC_IMASK);

> > > >     else

> > > > -           writel(0, fep->hwp + FEC_IMASK);

> > > > +           writel(FEC_ENET_MII, fep->hwp + FEC_IMASK);

> > > >

> > > >     /* Init the interrupt coalescing */

> > > >     fec_enet_itr_coal_init(ndev);

> > > > @@ -1652,10 +1652,6 @@ fec_enet_interrupt(int irq, void *dev_id)

> > > >     irqreturn_t ret = IRQ_NONE;

> > > >

> > > >     int_events = readl(fep->hwp + FEC_IEVENT);

> > > > -

> > > > -   /* Don't clear MDIO events, we poll for those */

> > > > -   int_events &= ~FEC_ENET_MII;

> > > > -

> > > >     writel(int_events, fep->hwp + FEC_IEVENT);

> > > >     fec_enet_collect_events(fep, int_events);

> > > >

> > > > @@ -1663,12 +1659,16 @@ fec_enet_interrupt(int irq, void *dev_id)

> > > >             ret = IRQ_HANDLED;

> > > >

> > > >             if (napi_schedule_prep(&fep->napi)) {

> > > > -                   /* Disable interrupts */

> > > > -                   writel(0, fep->hwp + FEC_IMASK);

> > > > +                   /* Disable the NAPI interrupts */

> > > > +                   writel(FEC_NAPI_IMASK, fep->hwp +

> FEC_IMASK);

> > > >                     __napi_schedule(&fep->napi);

> > > >             }

> > > >     }

> > > >

> > > > +   if (int_events & FEC_ENET_MII) {

> > > > +           ret = IRQ_HANDLED;

> > > > +           complete(&fep->mdio_done);

> > > > +   }

> > > >     return ret;

> > > >  }

> > > >

> > > > @@ -1818,24 +1818,11 @@ static void fec_enet_adjust_link(struct

> net_device *ndev)

> > > >             phy_print_status(phy_dev);  }

> > > >

> > > > -static int fec_enet_mdio_wait(struct fec_enet_private *fep) -{

> > > > -   uint ievent;

> > > > -   int ret;

> > > > -

> > > > -   ret = readl_poll_timeout_atomic(fep->hwp + FEC_IEVENT, ievent,

> > > > -                                   ievent & FEC_ENET_MII, 2,

> 30000);

> > > > -

> > > > -   if (!ret)

> > > > -           writel(FEC_ENET_MII, fep->hwp + FEC_IEVENT);

> > > > -

> > > > -   return ret;

> > > > -}

> > > > -

> > > >  static int fec_enet_mdio_read(struct mii_bus *bus, int mii_id,

> > > > int regnum)  {

> > > >     struct fec_enet_private *fep = bus->priv;

> > > >     struct device *dev = &fep->pdev->dev;

> > > > +   unsigned long time_left;

> > > >     int ret = 0, frame_start, frame_addr, frame_op;

> > > >     bool is_c45 = !!(regnum & MII_ADDR_C45);

> > > >

> > > > @@ -1843,6 +1830,8 @@ static int fec_enet_mdio_read(struct mii_bus

> *bus, int mii_id, int regnum)

> > > >     if (ret < 0)

> > > >             return ret;

> > > >

> > > > +   reinit_completion(&fep->mdio_done);

> > > > +

> > > >     if (is_c45) {

> > > >             frame_start = FEC_MMFR_ST_C45;

> > > >

> > > > @@ -1854,9 +1843,11 @@ static int fec_enet_mdio_read(struct mii_bus

> *bus, int mii_id, int regnum)

> > > >                    fep->hwp + FEC_MII_DATA);

> > > >

> > > >             /* wait for end of transfer */

> > > > -           ret = fec_enet_mdio_wait(fep);

> > > > -           if (ret) {

> > > > +           time_left =

> wait_for_completion_timeout(&fep->mdio_done,

> > > > +                           usecs_to_jiffies(FEC_MII_TIMEOUT));

> > > > +           if (time_left == 0) {

> > > >                     netdev_err(fep->netdev, "MDIO address write

> > > > timeout\n");

> > > > +                   ret = -ETIMEDOUT;

> > > >                     goto out;

> > > >             }

> > > >

> > > > @@ -1875,9 +1866,11 @@ static int fec_enet_mdio_read(struct mii_bus

> *bus, int mii_id, int regnum)

> > > >             FEC_MMFR_TA, fep->hwp + FEC_MII_DATA);

> > > >

> > > >     /* wait for end of transfer */

> > > > -   ret = fec_enet_mdio_wait(fep);

> > > > -   if (ret) {

> > > > +   time_left = wait_for_completion_timeout(&fep->mdio_done,

> > > > +                   usecs_to_jiffies(FEC_MII_TIMEOUT));

> > > > +   if (time_left == 0) {

> > > >             netdev_err(fep->netdev, "MDIO read timeout\n");

> > > > +           ret = -ETIMEDOUT;

> > > >             goto out;

> > > >     }

> > > >

> > > > @@ -1895,6 +1888,7 @@ static int fec_enet_mdio_write(struct

> > > > mii_bus *bus, int mii_id, int regnum,  {

> > > >     struct fec_enet_private *fep = bus->priv;

> > > >     struct device *dev = &fep->pdev->dev;

> > > > +   unsigned long time_left;

> > > >     int ret, frame_start, frame_addr;

> > > >     bool is_c45 = !!(regnum & MII_ADDR_C45);

> > > >

> > > > @@ -1904,6 +1898,8 @@ static int fec_enet_mdio_write(struct mii_bus

> *bus, int mii_id, int regnum,

> > > >     else

> > > >             ret = 0;

> > > >

> > > > +   reinit_completion(&fep->mdio_done);

> > > > +

> > > >     if (is_c45) {

> > > >             frame_start = FEC_MMFR_ST_C45;

> > > >

> > > > @@ -1915,9 +1911,11 @@ static int fec_enet_mdio_write(struct mii_bus

> *bus, int mii_id, int regnum,

> > > >                    fep->hwp + FEC_MII_DATA);

> > > >

> > > >             /* wait for end of transfer */

> > > > -           ret = fec_enet_mdio_wait(fep);

> > > > -           if (ret) {

> > > > +           time_left =

> wait_for_completion_timeout(&fep->mdio_done,

> > > > +                   usecs_to_jiffies(FEC_MII_TIMEOUT));

> > > > +           if (time_left == 0) {

> > > >                     netdev_err(fep->netdev, "MDIO address write

> > > > timeout\n");

> > > > +                   ret = -ETIMEDOUT;

> > > >                     goto out;

> > > >             }

> > > >     } else {

> > > > @@ -1933,9 +1931,12 @@ static int fec_enet_mdio_write(struct mii_bus

> *bus, int mii_id, int regnum,

> > > >             fep->hwp + FEC_MII_DATA);

> > > >

> > > >     /* wait for end of transfer */

> > > > -   ret = fec_enet_mdio_wait(fep);

> > > > -   if (ret)

> > > > +   time_left = wait_for_completion_timeout(&fep->mdio_done,

> > > > +                   usecs_to_jiffies(FEC_MII_TIMEOUT));

> > > > +   if (time_left == 0) {

> > > >             netdev_err(fep->netdev, "MDIO write timeout\n");

> > > > +           ret  = -ETIMEDOUT;

> > > > +   }

> > > >

> > > >  out:

> > > >     pm_runtime_mark_last_busy(dev); @@ -2144,9 +2145,6 @@ static

> > > > int fec_enet_mii_init(struct platform_device *pdev)

> > > >

> > > >     writel(fep->phy_speed, fep->hwp + FEC_MII_SPEED);

> > > >

> > > > -   /* Clear any pending transaction complete indication */

> > > > -   writel(FEC_ENET_MII, fep->hwp + FEC_IEVENT);

> > > > -

> > > >     fep->mii_bus = mdiobus_alloc();

> > > >     if (fep->mii_bus == NULL) {

> > > >             err = -ENOMEM;

> > > > @@ -3688,6 +3686,7 @@ fec_probe(struct platform_device *pdev)

> > > >             fep->irq[i] = irq;

> > > >     }

> > > >

> > > > +   init_completion(&fep->mdio_done);

> > > >     ret = fec_enet_mii_init(pdev);

> > > >     if (ret)

> > > >             goto failed_mii_init;

> >

> > --

> > Regards,

> >

> > Laurent Pinchart
Laurent Pinchart July 27, 2020, 2:33 a.m. UTC | #7
Hi Andrew,

On Mon, Jul 27, 2020 at 04:14:32AM +0200, Andrew Lunn wrote:
> On Mon, Jul 27, 2020 at 05:06:31AM +0300, Laurent Pinchart wrote:

> > On Mon, Jul 27, 2020 at 04:24:02AM +0300, Laurent Pinchart wrote:

> > > On Mon, Apr 27, 2020 at 10:08:04PM +0800, Fugang Duan wrote:

> > > > This reverts commit 29ae6bd1b0d8a57d7c00ab12cbb949fc41986eef.

> > > > 

> > > > The commit breaks ethernet function on i.MX6SX, i.MX7D, i.MX8MM,

> > > > i.MX8MQ, and i.MX8QXP platforms. Boot yocto system by NFS mounting

> > > > rootfs will be failed with the commit.

> > > 

> > > I'm afraid this commit breaks networking on i.MX7D for me :-( My board

> > > is configured to boot over NFS root with IP autoconfiguration through

> > > DHCP. The DHCP request goes out, the reply it sent back by the server,

> > > but never noticed by the fec driver.

> > > 

> > > v5.7 works fine. As 29ae6bd1b0d8a57d7c00ab12cbb949fc41986eef was merged

> > > during the v5.8 merge window, I suspect something else cropped in

> > > between 29ae6bd1b0d8a57d7c00ab12cbb949fc41986eef and this patch that

> > > needs to be reverted too. We're close to v5.8 and it would be annoying

> > > to see this regression ending up in the released kernel. I can test

> > > patches, but I'm not familiar enough with the driver (or the networking

> > > subsystem) to fix the issue myself.

> > 

> > If it can be of any help, I've confirmed that, to get the network back

> > to usable state from v5.8-rc6, I have to revert all patches up to this

> > one. This is the top of my branch, on top of v5.8-rc6:

> > 

> > 5bbe80c9efea Revert "net: ethernet: fec: Revert "net: ethernet: fec: Replace interrupt driven MDIO with polled IO""

> > 5462896a08c1 Revert "net: ethernet: fec: Replace interrupt driven MDIO with polled IO"

> > 824a82e2bdfa Revert "net: ethernet: fec: move GPR register offset and bit into DT"

> > bfe330591cab Revert "net: fec: disable correct clk in the err path of fec_enet_clk_enable"

> > 109958cad578 Revert "net: ethernet: fec: prevent tx starvation under high rx load"

> 

> OK.

> 

> What PHY are you using? A Micrel?


KSZ9031RNXIA

> And which DT file?


It's out of tree.

&fec1 {
        pinctrl-names = "default";
        pinctrl-0 = <&pinctrl_enet1>;
        assigned-clocks = <&clks IMX7D_ENET1_TIME_ROOT_SRC>,
                          <&clks IMX7D_ENET1_TIME_ROOT_CLK>;
        assigned-clock-parents = <&clks IMX7D_PLL_ENET_MAIN_100M_CLK>;
        assigned-clock-rates = <0>, <100000000>;
        phy-mode = "rgmii";
        phy-handle = <&ethphy0>;
        phy-reset-gpios = <&gpio1 13 GPIO_ACTIVE_LOW>;
        phy-supply = <&reg_3v3_sw>;
        fsl,magic-packet;
        status = "okay";

        mdio {
                #address-cells = <1>;
                #size-cells = <0>;

                ethphy0: ethernet-phy@0 {
                        reg = <1>;
                };

                ethphy1: ethernet-phy@1 {
                        reg = <2>;
                };
        };
};

I can provide the full DT if needed. 

-- 
Regards,

Laurent Pinchart
Chris Healy July 27, 2020, 2:35 a.m. UTC | #8
Hi Laurent,

I have the exact same copper PHY.  I just reverted a patch specific to
this PHY and went from broken to working.  Give this a try:

git revert bcf3440c6dd78bfe5836ec0990fe36d7b4bb7d20

Regards,

Chris

On Sun, Jul 26, 2020 at 7:33 PM Laurent Pinchart
<laurent.pinchart@ideasonboard.com> wrote:
>

> Hi Andrew,

>

> On Mon, Jul 27, 2020 at 04:14:32AM +0200, Andrew Lunn wrote:

> > On Mon, Jul 27, 2020 at 05:06:31AM +0300, Laurent Pinchart wrote:

> > > On Mon, Jul 27, 2020 at 04:24:02AM +0300, Laurent Pinchart wrote:

> > > > On Mon, Apr 27, 2020 at 10:08:04PM +0800, Fugang Duan wrote:

> > > > > This reverts commit 29ae6bd1b0d8a57d7c00ab12cbb949fc41986eef.

> > > > >

> > > > > The commit breaks ethernet function on i.MX6SX, i.MX7D, i.MX8MM,

> > > > > i.MX8MQ, and i.MX8QXP platforms. Boot yocto system by NFS mounting

> > > > > rootfs will be failed with the commit.

> > > >

> > > > I'm afraid this commit breaks networking on i.MX7D for me :-( My board

> > > > is configured to boot over NFS root with IP autoconfiguration through

> > > > DHCP. The DHCP request goes out, the reply it sent back by the server,

> > > > but never noticed by the fec driver.

> > > >

> > > > v5.7 works fine. As 29ae6bd1b0d8a57d7c00ab12cbb949fc41986eef was merged

> > > > during the v5.8 merge window, I suspect something else cropped in

> > > > between 29ae6bd1b0d8a57d7c00ab12cbb949fc41986eef and this patch that

> > > > needs to be reverted too. We're close to v5.8 and it would be annoying

> > > > to see this regression ending up in the released kernel. I can test

> > > > patches, but I'm not familiar enough with the driver (or the networking

> > > > subsystem) to fix the issue myself.

> > >

> > > If it can be of any help, I've confirmed that, to get the network back

> > > to usable state from v5.8-rc6, I have to revert all patches up to this

> > > one. This is the top of my branch, on top of v5.8-rc6:

> > >

> > > 5bbe80c9efea Revert "net: ethernet: fec: Revert "net: ethernet: fec: Replace interrupt driven MDIO with polled IO""

> > > 5462896a08c1 Revert "net: ethernet: fec: Replace interrupt driven MDIO with polled IO"

> > > 824a82e2bdfa Revert "net: ethernet: fec: move GPR register offset and bit into DT"

> > > bfe330591cab Revert "net: fec: disable correct clk in the err path of fec_enet_clk_enable"

> > > 109958cad578 Revert "net: ethernet: fec: prevent tx starvation under high rx load"

> >

> > OK.

> >

> > What PHY are you using? A Micrel?

>

> KSZ9031RNXIA

>

> > And which DT file?

>

> It's out of tree.

>

> &fec1 {

>         pinctrl-names = "default";

>         pinctrl-0 = <&pinctrl_enet1>;

>         assigned-clocks = <&clks IMX7D_ENET1_TIME_ROOT_SRC>,

>                           <&clks IMX7D_ENET1_TIME_ROOT_CLK>;

>         assigned-clock-parents = <&clks IMX7D_PLL_ENET_MAIN_100M_CLK>;

>         assigned-clock-rates = <0>, <100000000>;

>         phy-mode = "rgmii";

>         phy-handle = <&ethphy0>;

>         phy-reset-gpios = <&gpio1 13 GPIO_ACTIVE_LOW>;

>         phy-supply = <&reg_3v3_sw>;

>         fsl,magic-packet;

>         status = "okay";

>

>         mdio {

>                 #address-cells = <1>;

>                 #size-cells = <0>;

>

>                 ethphy0: ethernet-phy@0 {

>                         reg = <1>;

>                 };

>

>                 ethphy1: ethernet-phy@1 {

>                         reg = <2>;

>                 };

>         };

> };

>

> I can provide the full DT if needed.

>

> --

> Regards,

>

> Laurent Pinchart
Laurent Pinchart July 27, 2020, 2:36 a.m. UTC | #9
Hi Chris,

On Sun, Jul 26, 2020 at 07:13:20PM -0700, Chris Healy wrote:
> On Sun, Jul 26, 2020 at 7:06 PM Laurent Pinchart wrote:

> > On Mon, Jul 27, 2020 at 04:24:02AM +0300, Laurent Pinchart wrote:

> >> On Mon, Apr 27, 2020 at 10:08:04PM +0800, Fugang Duan wrote:

> >>> This reverts commit 29ae6bd1b0d8a57d7c00ab12cbb949fc41986eef.

> >>>

> >>> The commit breaks ethernet function on i.MX6SX, i.MX7D, i.MX8MM,

> >>> i.MX8MQ, and i.MX8QXP platforms. Boot yocto system by NFS mounting

> >>> rootfs will be failed with the commit.

> >>

> >> I'm afraid this commit breaks networking on i.MX7D for me :-( My board

> >> is configured to boot over NFS root with IP autoconfiguration through

> >> DHCP. The DHCP request goes out, the reply it sent back by the server,

> >> but never noticed by the fec driver.

> >>

> >> v5.7 works fine. As 29ae6bd1b0d8a57d7c00ab12cbb949fc41986eef was merged

> >> during the v5.8 merge window, I suspect something else cropped in

> >> between 29ae6bd1b0d8a57d7c00ab12cbb949fc41986eef and this patch that

> >> needs to be reverted too. We're close to v5.8 and it would be annoying

> >> to see this regression ending up in the released kernel. I can test

> >> patches, but I'm not familiar enough with the driver (or the networking

> >> subsystem) to fix the issue myself.

> >

> > If it can be of any help, I've confirmed that, to get the network back

> > to usable state from v5.8-rc6, I have to revert all patches up to this

> > one. This is the top of my branch, on top of v5.8-rc6:

> >

> > 5bbe80c9efea Revert "net: ethernet: fec: Revert "net: ethernet: fec: Replace interrupt driven MDIO with polled IO""

> > 5462896a08c1 Revert "net: ethernet: fec: Replace interrupt driven MDIO with polled IO"

> > 824a82e2bdfa Revert "net: ethernet: fec: move GPR register offset and bit into DT"

> > bfe330591cab Revert "net: fec: disable correct clk in the err path of fec_enet_clk_enable"

> > 109958cad578 Revert "net: ethernet: fec: prevent tx starvation under high rx load"

> 

> I just fired up net-next on my i.MX7d based design (not NFS root

> though).  I can bring up the network interface with a gigabit

> connection but ALL RX traffic is failing with CRC errors.  Now, my

> design is using a Micrel KSZ9031 which might be part of the problem

> for me as there were some recent KSZ9031 changes made so take what I'm

> seeing with a grain of salt.


I'm using the same PHY (KSZ9031RNXIA to be precise).

> Laurent, couple questions:

> 

> 1) Are you able to boot without NFS root and communicate correctly or

> is this issue just when doing an NFS root?

> 2) If you are able to boot up without NFS root, can you check the

> ethtool statistics and see the same RX CRC errors I'm seeing?


I'll try this next. This particular board isn't flashed with a root FS,
but it shouldn't be hard to do so. Just need to add ethtool to my
buildroot FS.

> >>> Signed-off-by: Fugang Duan <fugang.duan@nxp.com>

> >>>

> >>> diff --git a/drivers/net/ethernet/freescale/fec.h b/drivers/net/ethernet/freescale/fec.h

> >>> index a6cdd5b..e74dd1f 100644

> >>> --- a/drivers/net/ethernet/freescale/fec.h

> >>> +++ b/drivers/net/ethernet/freescale/fec.h

> >>> @@ -376,7 +376,8 @@ struct bufdesc_ex {

> >>>  #define FEC_ENET_TS_AVAIL       ((uint)0x00010000)

> >>>  #define FEC_ENET_TS_TIMER       ((uint)0x00008000)

> >>>

> >>> -#define FEC_DEFAULT_IMASK (FEC_ENET_TXF | FEC_ENET_RXF)

> >>> +#define FEC_DEFAULT_IMASK (FEC_ENET_TXF | FEC_ENET_RXF | FEC_ENET_MII)

> >>> +#define FEC_NAPI_IMASK     FEC_ENET_MII

> >>>  #define FEC_RX_DISABLED_IMASK (FEC_DEFAULT_IMASK & (~FEC_ENET_RXF))

> >>>

> >>>  /* ENET interrupt coalescing macro define */

> >>> @@ -542,6 +543,7 @@ struct fec_enet_private {

> >>>     int     link;

> >>>     int     full_duplex;

> >>>     int     speed;

> >>> +   struct  completion mdio_done;

> >>>     int     irq[FEC_IRQ_NUM];

> >>>     bool    bufdesc_ex;

> >>>     int     pause_flag;

> >>> diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c

> >>> index 1ae075a..c7b84bb 100644

> >>> --- a/drivers/net/ethernet/freescale/fec_main.c

> >>> +++ b/drivers/net/ethernet/freescale/fec_main.c

> >>> @@ -976,8 +976,8 @@ fec_restart(struct net_device *ndev)

> >>>     writel((__force u32)cpu_to_be32(temp_mac[1]),

> >>>            fep->hwp + FEC_ADDR_HIGH);

> >>>

> >>> -   /* Clear any outstanding interrupt, except MDIO. */

> >>> -   writel((0xffffffff & ~FEC_ENET_MII), fep->hwp + FEC_IEVENT);

> >>> +   /* Clear any outstanding interrupt. */

> >>> +   writel(0xffffffff, fep->hwp + FEC_IEVENT);

> >>>

> >>>     fec_enet_bd_init(ndev);

> >>>

> >>> @@ -1123,7 +1123,7 @@ fec_restart(struct net_device *ndev)

> >>>     if (fep->link)

> >>>             writel(FEC_DEFAULT_IMASK, fep->hwp + FEC_IMASK);

> >>>     else

> >>> -           writel(0, fep->hwp + FEC_IMASK);

> >>> +           writel(FEC_ENET_MII, fep->hwp + FEC_IMASK);

> >>>

> >>>     /* Init the interrupt coalescing */

> >>>     fec_enet_itr_coal_init(ndev);

> >>> @@ -1652,10 +1652,6 @@ fec_enet_interrupt(int irq, void *dev_id)

> >>>     irqreturn_t ret = IRQ_NONE;

> >>>

> >>>     int_events = readl(fep->hwp + FEC_IEVENT);

> >>> -

> >>> -   /* Don't clear MDIO events, we poll for those */

> >>> -   int_events &= ~FEC_ENET_MII;

> >>> -

> >>>     writel(int_events, fep->hwp + FEC_IEVENT);

> >>>     fec_enet_collect_events(fep, int_events);

> >>>

> >>> @@ -1663,12 +1659,16 @@ fec_enet_interrupt(int irq, void *dev_id)

> >>>             ret = IRQ_HANDLED;

> >>>

> >>>             if (napi_schedule_prep(&fep->napi)) {

> >>> -                   /* Disable interrupts */

> >>> -                   writel(0, fep->hwp + FEC_IMASK);

> >>> +                   /* Disable the NAPI interrupts */

> >>> +                   writel(FEC_NAPI_IMASK, fep->hwp + FEC_IMASK);

> >>>                     __napi_schedule(&fep->napi);

> >>>             }

> >>>     }

> >>>

> >>> +   if (int_events & FEC_ENET_MII) {

> >>> +           ret = IRQ_HANDLED;

> >>> +           complete(&fep->mdio_done);

> >>> +   }

> >>>     return ret;

> >>>  }

> >>>

> >>> @@ -1818,24 +1818,11 @@ static void fec_enet_adjust_link(struct net_device *ndev)

> >>>             phy_print_status(phy_dev);

> >>>  }

> >>>

> >>> -static int fec_enet_mdio_wait(struct fec_enet_private *fep)

> >>> -{

> >>> -   uint ievent;

> >>> -   int ret;

> >>> -

> >>> -   ret = readl_poll_timeout_atomic(fep->hwp + FEC_IEVENT, ievent,

> >>> -                                   ievent & FEC_ENET_MII, 2, 30000);

> >>> -

> >>> -   if (!ret)

> >>> -           writel(FEC_ENET_MII, fep->hwp + FEC_IEVENT);

> >>> -

> >>> -   return ret;

> >>> -}

> >>> -

> >>>  static int fec_enet_mdio_read(struct mii_bus *bus, int mii_id, int regnum)

> >>>  {

> >>>     struct fec_enet_private *fep = bus->priv;

> >>>     struct device *dev = &fep->pdev->dev;

> >>> +   unsigned long time_left;

> >>>     int ret = 0, frame_start, frame_addr, frame_op;

> >>>     bool is_c45 = !!(regnum & MII_ADDR_C45);

> >>>

> >>> @@ -1843,6 +1830,8 @@ static int fec_enet_mdio_read(struct mii_bus *bus, int mii_id, int regnum)

> >>>     if (ret < 0)

> >>>             return ret;

> >>>

> >>> +   reinit_completion(&fep->mdio_done);

> >>> +

> >>>     if (is_c45) {

> >>>             frame_start = FEC_MMFR_ST_C45;

> >>>

> >>> @@ -1854,9 +1843,11 @@ static int fec_enet_mdio_read(struct mii_bus *bus, int mii_id, int regnum)

> >>>                    fep->hwp + FEC_MII_DATA);

> >>>

> >>>             /* wait for end of transfer */

> >>> -           ret = fec_enet_mdio_wait(fep);

> >>> -           if (ret) {

> >>> +           time_left = wait_for_completion_timeout(&fep->mdio_done,

> >>> +                           usecs_to_jiffies(FEC_MII_TIMEOUT));

> >>> +           if (time_left == 0) {

> >>>                     netdev_err(fep->netdev, "MDIO address write timeout\n");

> >>> +                   ret = -ETIMEDOUT;

> >>>                     goto out;

> >>>             }

> >>>

> >>> @@ -1875,9 +1866,11 @@ static int fec_enet_mdio_read(struct mii_bus *bus, int mii_id, int regnum)

> >>>             FEC_MMFR_TA, fep->hwp + FEC_MII_DATA);

> >>>

> >>>     /* wait for end of transfer */

> >>> -   ret = fec_enet_mdio_wait(fep);

> >>> -   if (ret) {

> >>> +   time_left = wait_for_completion_timeout(&fep->mdio_done,

> >>> +                   usecs_to_jiffies(FEC_MII_TIMEOUT));

> >>> +   if (time_left == 0) {

> >>>             netdev_err(fep->netdev, "MDIO read timeout\n");

> >>> +           ret = -ETIMEDOUT;

> >>>             goto out;

> >>>     }

> >>>

> >>> @@ -1895,6 +1888,7 @@ static int fec_enet_mdio_write(struct mii_bus *bus, int mii_id, int regnum,

> >>>  {

> >>>     struct fec_enet_private *fep = bus->priv;

> >>>     struct device *dev = &fep->pdev->dev;

> >>> +   unsigned long time_left;

> >>>     int ret, frame_start, frame_addr;

> >>>     bool is_c45 = !!(regnum & MII_ADDR_C45);

> >>>

> >>> @@ -1904,6 +1898,8 @@ static int fec_enet_mdio_write(struct mii_bus *bus, int mii_id, int regnum,

> >>>     else

> >>>             ret = 0;

> >>>

> >>> +   reinit_completion(&fep->mdio_done);

> >>> +

> >>>     if (is_c45) {

> >>>             frame_start = FEC_MMFR_ST_C45;

> >>>

> >>> @@ -1915,9 +1911,11 @@ static int fec_enet_mdio_write(struct mii_bus *bus, int mii_id, int regnum,

> >>>                    fep->hwp + FEC_MII_DATA);

> >>>

> >>>             /* wait for end of transfer */

> >>> -           ret = fec_enet_mdio_wait(fep);

> >>> -           if (ret) {

> >>> +           time_left = wait_for_completion_timeout(&fep->mdio_done,

> >>> +                   usecs_to_jiffies(FEC_MII_TIMEOUT));

> >>> +           if (time_left == 0) {

> >>>                     netdev_err(fep->netdev, "MDIO address write timeout\n");

> >>> +                   ret = -ETIMEDOUT;

> >>>                     goto out;

> >>>             }

> >>>     } else {

> >>> @@ -1933,9 +1931,12 @@ static int fec_enet_mdio_write(struct mii_bus *bus, int mii_id, int regnum,

> >>>             fep->hwp + FEC_MII_DATA);

> >>>

> >>>     /* wait for end of transfer */

> >>> -   ret = fec_enet_mdio_wait(fep);

> >>> -   if (ret)

> >>> +   time_left = wait_for_completion_timeout(&fep->mdio_done,

> >>> +                   usecs_to_jiffies(FEC_MII_TIMEOUT));

> >>> +   if (time_left == 0) {

> >>>             netdev_err(fep->netdev, "MDIO write timeout\n");

> >>> +           ret  = -ETIMEDOUT;

> >>> +   }

> >>>

> >>>  out:

> >>>     pm_runtime_mark_last_busy(dev);

> >>> @@ -2144,9 +2145,6 @@ static int fec_enet_mii_init(struct platform_device *pdev)

> >>>

> >>>     writel(fep->phy_speed, fep->hwp + FEC_MII_SPEED);

> >>>

> >>> -   /* Clear any pending transaction complete indication */

> >>> -   writel(FEC_ENET_MII, fep->hwp + FEC_IEVENT);

> >>> -

> >>>     fep->mii_bus = mdiobus_alloc();

> >>>     if (fep->mii_bus == NULL) {

> >>>             err = -ENOMEM;

> >>> @@ -3688,6 +3686,7 @@ fec_probe(struct platform_device *pdev)

> >>>             fep->irq[i] = irq;

> >>>     }

> >>>

> >>> +   init_completion(&fep->mdio_done);

> >>>     ret = fec_enet_mii_init(pdev);

> >>>     if (ret)

> >>>             goto failed_mii_init;


-- 
Regards,

Laurent Pinchart
Laurent Pinchart July 27, 2020, 2:39 a.m. UTC | #10
Hi Chris,

On Sun, Jul 26, 2020 at 07:35:51PM -0700, Chris Healy wrote:
> Hi Laurent,

> 

> I have the exact same copper PHY.  I just reverted a patch specific to

> this PHY and went from broken to working.  Give this a try:

> 

> git revert bcf3440c6dd78bfe5836ec0990fe36d7b4bb7d20


Reverting this on top of v5.8-rc6 (without any revert of FEC commits)
fixes the issue too.

> On Sun, Jul 26, 2020 at 7:33 PM Laurent Pinchart wrote:

> > On Mon, Jul 27, 2020 at 04:14:32AM +0200, Andrew Lunn wrote:

> > > On Mon, Jul 27, 2020 at 05:06:31AM +0300, Laurent Pinchart wrote:

> > > > On Mon, Jul 27, 2020 at 04:24:02AM +0300, Laurent Pinchart wrote:

> > > > > On Mon, Apr 27, 2020 at 10:08:04PM +0800, Fugang Duan wrote:

> > > > > > This reverts commit 29ae6bd1b0d8a57d7c00ab12cbb949fc41986eef.

> > > > > >

> > > > > > The commit breaks ethernet function on i.MX6SX, i.MX7D, i.MX8MM,

> > > > > > i.MX8MQ, and i.MX8QXP platforms. Boot yocto system by NFS mounting

> > > > > > rootfs will be failed with the commit.

> > > > >

> > > > > I'm afraid this commit breaks networking on i.MX7D for me :-( My board

> > > > > is configured to boot over NFS root with IP autoconfiguration through

> > > > > DHCP. The DHCP request goes out, the reply it sent back by the server,

> > > > > but never noticed by the fec driver.

> > > > >

> > > > > v5.7 works fine. As 29ae6bd1b0d8a57d7c00ab12cbb949fc41986eef was merged

> > > > > during the v5.8 merge window, I suspect something else cropped in

> > > > > between 29ae6bd1b0d8a57d7c00ab12cbb949fc41986eef and this patch that

> > > > > needs to be reverted too. We're close to v5.8 and it would be annoying

> > > > > to see this regression ending up in the released kernel. I can test

> > > > > patches, but I'm not familiar enough with the driver (or the networking

> > > > > subsystem) to fix the issue myself.

> > > >

> > > > If it can be of any help, I've confirmed that, to get the network back

> > > > to usable state from v5.8-rc6, I have to revert all patches up to this

> > > > one. This is the top of my branch, on top of v5.8-rc6:

> > > >

> > > > 5bbe80c9efea Revert "net: ethernet: fec: Revert "net: ethernet: fec: Replace interrupt driven MDIO with polled IO""

> > > > 5462896a08c1 Revert "net: ethernet: fec: Replace interrupt driven MDIO with polled IO"

> > > > 824a82e2bdfa Revert "net: ethernet: fec: move GPR register offset and bit into DT"

> > > > bfe330591cab Revert "net: fec: disable correct clk in the err path of fec_enet_clk_enable"

> > > > 109958cad578 Revert "net: ethernet: fec: prevent tx starvation under high rx load"

> > >

> > > OK.

> > >

> > > What PHY are you using? A Micrel?

> >

> > KSZ9031RNXIA

> >

> > > And which DT file?

> >

> > It's out of tree.

> >

> > &fec1 {

> >         pinctrl-names = "default";

> >         pinctrl-0 = <&pinctrl_enet1>;

> >         assigned-clocks = <&clks IMX7D_ENET1_TIME_ROOT_SRC>,

> >                           <&clks IMX7D_ENET1_TIME_ROOT_CLK>;

> >         assigned-clock-parents = <&clks IMX7D_PLL_ENET_MAIN_100M_CLK>;

> >         assigned-clock-rates = <0>, <100000000>;

> >         phy-mode = "rgmii";

> >         phy-handle = <&ethphy0>;

> >         phy-reset-gpios = <&gpio1 13 GPIO_ACTIVE_LOW>;

> >         phy-supply = <&reg_3v3_sw>;

> >         fsl,magic-packet;

> >         status = "okay";

> >

> >         mdio {

> >                 #address-cells = <1>;

> >                 #size-cells = <0>;

> >

> >                 ethphy0: ethernet-phy@0 {

> >                         reg = <1>;

> >                 };

> >

> >                 ethphy1: ethernet-phy@1 {

> >                         reg = <2>;

> >                 };

> >         };

> > };

> >

> > I can provide the full DT if needed.


-- 
Regards,

Laurent Pinchart
Chris Healy July 27, 2020, 2:40 a.m. UTC | #11
Actually, I was a little quick to say it went from broken to working.

With net-next, I'm getting CRC errors on 100% of inbound packets.
With bcf3440c6dd78bfe5836ec0990fe36d7b4bb7d20 reverted, I drop down to
a 1% error rate.

This very much feels like a KSZ9031 RGMII timing issue to me...

On Sun, Jul 26, 2020 at 7:35 PM Chris Healy <cphealy@gmail.com> wrote:
>

> Hi Laurent,

>

> I have the exact same copper PHY.  I just reverted a patch specific to

> this PHY and went from broken to working.  Give this a try:

>

> git revert bcf3440c6dd78bfe5836ec0990fe36d7b4bb7d20

>

> Regards,

>

> Chris

>

> On Sun, Jul 26, 2020 at 7:33 PM Laurent Pinchart

> <laurent.pinchart@ideasonboard.com> wrote:

> >

> > Hi Andrew,

> >

> > On Mon, Jul 27, 2020 at 04:14:32AM +0200, Andrew Lunn wrote:

> > > On Mon, Jul 27, 2020 at 05:06:31AM +0300, Laurent Pinchart wrote:

> > > > On Mon, Jul 27, 2020 at 04:24:02AM +0300, Laurent Pinchart wrote:

> > > > > On Mon, Apr 27, 2020 at 10:08:04PM +0800, Fugang Duan wrote:

> > > > > > This reverts commit 29ae6bd1b0d8a57d7c00ab12cbb949fc41986eef.

> > > > > >

> > > > > > The commit breaks ethernet function on i.MX6SX, i.MX7D, i.MX8MM,

> > > > > > i.MX8MQ, and i.MX8QXP platforms. Boot yocto system by NFS mounting

> > > > > > rootfs will be failed with the commit.

> > > > >

> > > > > I'm afraid this commit breaks networking on i.MX7D for me :-( My board

> > > > > is configured to boot over NFS root with IP autoconfiguration through

> > > > > DHCP. The DHCP request goes out, the reply it sent back by the server,

> > > > > but never noticed by the fec driver.

> > > > >

> > > > > v5.7 works fine. As 29ae6bd1b0d8a57d7c00ab12cbb949fc41986eef was merged

> > > > > during the v5.8 merge window, I suspect something else cropped in

> > > > > between 29ae6bd1b0d8a57d7c00ab12cbb949fc41986eef and this patch that

> > > > > needs to be reverted too. We're close to v5.8 and it would be annoying

> > > > > to see this regression ending up in the released kernel. I can test

> > > > > patches, but I'm not familiar enough with the driver (or the networking

> > > > > subsystem) to fix the issue myself.

> > > >

> > > > If it can be of any help, I've confirmed that, to get the network back

> > > > to usable state from v5.8-rc6, I have to revert all patches up to this

> > > > one. This is the top of my branch, on top of v5.8-rc6:

> > > >

> > > > 5bbe80c9efea Revert "net: ethernet: fec: Revert "net: ethernet: fec: Replace interrupt driven MDIO with polled IO""

> > > > 5462896a08c1 Revert "net: ethernet: fec: Replace interrupt driven MDIO with polled IO"

> > > > 824a82e2bdfa Revert "net: ethernet: fec: move GPR register offset and bit into DT"

> > > > bfe330591cab Revert "net: fec: disable correct clk in the err path of fec_enet_clk_enable"

> > > > 109958cad578 Revert "net: ethernet: fec: prevent tx starvation under high rx load"

> > >

> > > OK.

> > >

> > > What PHY are you using? A Micrel?

> >

> > KSZ9031RNXIA

> >

> > > And which DT file?

> >

> > It's out of tree.

> >

> > &fec1 {

> >         pinctrl-names = "default";

> >         pinctrl-0 = <&pinctrl_enet1>;

> >         assigned-clocks = <&clks IMX7D_ENET1_TIME_ROOT_SRC>,

> >                           <&clks IMX7D_ENET1_TIME_ROOT_CLK>;

> >         assigned-clock-parents = <&clks IMX7D_PLL_ENET_MAIN_100M_CLK>;

> >         assigned-clock-rates = <0>, <100000000>;

> >         phy-mode = "rgmii";

> >         phy-handle = <&ethphy0>;

> >         phy-reset-gpios = <&gpio1 13 GPIO_ACTIVE_LOW>;

> >         phy-supply = <&reg_3v3_sw>;

> >         fsl,magic-packet;

> >         status = "okay";

> >

> >         mdio {

> >                 #address-cells = <1>;

> >                 #size-cells = <0>;

> >

> >                 ethphy0: ethernet-phy@0 {

> >                         reg = <1>;

> >                 };

> >

> >                 ethphy1: ethernet-phy@1 {

> >                         reg = <2>;

> >                 };

> >         };

> > };

> >

> > I can provide the full DT if needed.

> >

> > --

> > Regards,

> >

> > Laurent Pinchart
Andy Duan July 27, 2020, 2:51 a.m. UTC | #12
From: Chris Healy <cphealy@gmail.com> Sent: Monday, July 27, 2020 10:40 AM

> Actually, I was a little quick to say it went from broken to working.

> 

> With net-next, I'm getting CRC errors on 100% of inbound packets.

> With bcf3440c6dd78bfe5836ec0990fe36d7b4bb7d20 reverted, I drop down to a

> 1% error rate.

> 

> This very much feels like a KSZ9031 RGMII timing issue to me...


@Chris/@Laurent, I run net-next on imx7d sdb board with BCM54220 switch PHY,
there have no NFS boot issue, no crc error issue by receving 1.1GiB data.

~# ifconfig
eth0      Link encap:Ethernet  HWaddr 00:20:30:40:50:02
          inet addr:10.192.242.202  Bcast:10.192.242.255  Mask:255.255.255.0
          inet6 addr: fe80::220:30ff:fe40:5002/64 Scope:Link
          UP BROADCAST RUNNING MULTICAST  MTU:1500  Metric:1
          RX packets:853885 errors:0 dropped:0 overruns:0 frame:0
          TX packets:22370 errors:0 dropped:0 overruns:0 carrier:0
          collisions:0 txqueuelen:1000
          RX bytes:1284666734 (1.1 GiB)  TX bytes:3147617 (3.0 MiB)

lo        Link encap:Local Loopback
          inet addr:127.0.0.1  Mask:255.0.0.0
          inet6 addr: ::1/128 Scope:Host
          UP LOOPBACK RUNNING  MTU:65536  Metric:1
          RX packets:322 errors:0 dropped:0 overruns:0 frame:0
          TX packets:322 errors:0 dropped:0 overruns:0 carrier:0
          collisions:0 txqueuelen:1000
          RX bytes:24460 (23.8 KiB)  TX bytes:24460 (23.8 KiB)

~# uname -r
5.8.0-rc6-01914-ga57066b1a019


So, the issue you caught relates to Micrel PHY RGMII timing issue.



Regards,
Fugang

> 

> On Sun, Jul 26, 2020 at 7:35 PM Chris Healy <cphealy@gmail.com> wrote:

> >

> > Hi Laurent,

> >

> > I have the exact same copper PHY.  I just reverted a patch specific to

> > this PHY and went from broken to working.  Give this a try:

> >

> > git revert bcf3440c6dd78bfe5836ec0990fe36d7b4bb7d20

> >

> > Regards,

> >

> > Chris

> >

> > On Sun, Jul 26, 2020 at 7:33 PM Laurent Pinchart

> > <laurent.pinchart@ideasonboard.com> wrote:

> > >

> > > Hi Andrew,

> > >

> > > On Mon, Jul 27, 2020 at 04:14:32AM +0200, Andrew Lunn wrote:

> > > > On Mon, Jul 27, 2020 at 05:06:31AM +0300, Laurent Pinchart wrote:

> > > > > On Mon, Jul 27, 2020 at 04:24:02AM +0300, Laurent Pinchart wrote:

> > > > > > On Mon, Apr 27, 2020 at 10:08:04PM +0800, Fugang Duan wrote:

> > > > > > > This reverts commit 29ae6bd1b0d8a57d7c00ab12cbb949fc41986eef.

> > > > > > >

> > > > > > > The commit breaks ethernet function on i.MX6SX, i.MX7D,

> > > > > > > i.MX8MM, i.MX8MQ, and i.MX8QXP platforms. Boot yocto system

> > > > > > > by NFS mounting rootfs will be failed with the commit.

> > > > > >

> > > > > > I'm afraid this commit breaks networking on i.MX7D for me :-(

> > > > > > My board is configured to boot over NFS root with IP

> > > > > > autoconfiguration through DHCP. The DHCP request goes out, the

> > > > > > reply it sent back by the server, but never noticed by the fec driver.

> > > > > >

> > > > > > v5.7 works fine. As 29ae6bd1b0d8a57d7c00ab12cbb949fc41986eef

> > > > > > was merged during the v5.8 merge window, I suspect something

> > > > > > else cropped in between

> > > > > > 29ae6bd1b0d8a57d7c00ab12cbb949fc41986eef and this patch that

> > > > > > needs to be reverted too. We're close to v5.8 and it would be

> > > > > > annoying to see this regression ending up in the released

> > > > > > kernel. I can test patches, but I'm not familiar enough with

> > > > > > the driver (or the networking

> > > > > > subsystem) to fix the issue myself.

> > > > >

> > > > > If it can be of any help, I've confirmed that, to get the

> > > > > network back to usable state from v5.8-rc6, I have to revert all

> > > > > patches up to this one. This is the top of my branch, on top of v5.8-rc6:

> > > > >

> > > > > 5bbe80c9efea Revert "net: ethernet: fec: Revert "net: ethernet: fec:

> Replace interrupt driven MDIO with polled IO""

> > > > > 5462896a08c1 Revert "net: ethernet: fec: Replace interrupt driven

> MDIO with polled IO"

> > > > > 824a82e2bdfa Revert "net: ethernet: fec: move GPR register offset and

> bit into DT"

> > > > > bfe330591cab Revert "net: fec: disable correct clk in the err path of

> fec_enet_clk_enable"

> > > > > 109958cad578 Revert "net: ethernet: fec: prevent tx starvation under

> high rx load"

> > > >

> > > > OK.

> > > >

> > > > What PHY are you using? A Micrel?

> > >

> > > KSZ9031RNXIA

> > >

> > > > And which DT file?

> > >

> > > It's out of tree.

> > >

> > > &fec1 {

> > >         pinctrl-names = "default";

> > >         pinctrl-0 = <&pinctrl_enet1>;

> > >         assigned-clocks = <&clks IMX7D_ENET1_TIME_ROOT_SRC>,

> > >                           <&clks IMX7D_ENET1_TIME_ROOT_CLK>;

> > >         assigned-clock-parents = <&clks

> IMX7D_PLL_ENET_MAIN_100M_CLK>;

> > >         assigned-clock-rates = <0>, <100000000>;

> > >         phy-mode = "rgmii";

> > >         phy-handle = <&ethphy0>;

> > >         phy-reset-gpios = <&gpio1 13 GPIO_ACTIVE_LOW>;

> > >         phy-supply = <&reg_3v3_sw>;

> > >         fsl,magic-packet;

> > >         status = "okay";

> > >

> > >         mdio {

> > >                 #address-cells = <1>;

> > >                 #size-cells = <0>;

> > >

> > >                 ethphy0: ethernet-phy@0 {

> > >                         reg = <1>;

> > >                 };

> > >

> > >                 ethphy1: ethernet-phy@1 {

> > >                         reg = <2>;

> > >                 };

> > >         };

> > > };

> > >

> > > I can provide the full DT if needed.

> > >

> > > --

> > > Regards,

> > >

> > > Laurent Pinchart
Chris Healy July 27, 2020, 3:01 a.m. UTC | #13
It appears quite a few boards were affected by this micrel PHY driver change:

2ccb0161a0e9eb06f538557d38987e436fc39b8d
80bf72598663496d08b3c0231377db6a99d7fd68
2de00450c0126ec8838f72157577578e85cae5d8
820f8a870f6575acda1bf7f1a03c701c43ed5d79

I just updated the phy-mode with my board from rgmii to rgmii-id and
everything started working fine with net-next again:

eth0      Link encap:Ethernet  HWaddr E6:85:48:8F:93:64
          inet addr:172.16.1.1  Bcast:172.16.255.255  Mask:255.255.0.0
          UP BROADCAST RUNNING MULTICAST  MTU:1500  Metric:1
          RX packets:4643690 errors:0 dropped:0 overruns:0 frame:0
          TX packets:76178 errors:0 dropped:0 overruns:0 carrier:0
          collisions:0 txqueuelen:1000
          RX bytes:2762845502 (2.5 GiB)  TX bytes:5026376 (4.7 MiB)



On Sun, Jul 26, 2020 at 7:40 PM Chris Healy <cphealy@gmail.com> wrote:
>

> Actually, I was a little quick to say it went from broken to working.

>

> With net-next, I'm getting CRC errors on 100% of inbound packets.

> With bcf3440c6dd78bfe5836ec0990fe36d7b4bb7d20 reverted, I drop down to

> a 1% error rate.

>

> This very much feels like a KSZ9031 RGMII timing issue to me...

>

> On Sun, Jul 26, 2020 at 7:35 PM Chris Healy <cphealy@gmail.com> wrote:

> >

> > Hi Laurent,

> >

> > I have the exact same copper PHY.  I just reverted a patch specific to

> > this PHY and went from broken to working.  Give this a try:

> >

> > git revert bcf3440c6dd78bfe5836ec0990fe36d7b4bb7d20

> >

> > Regards,

> >

> > Chris

> >

> > On Sun, Jul 26, 2020 at 7:33 PM Laurent Pinchart

> > <laurent.pinchart@ideasonboard.com> wrote:

> > >

> > > Hi Andrew,

> > >

> > > On Mon, Jul 27, 2020 at 04:14:32AM +0200, Andrew Lunn wrote:

> > > > On Mon, Jul 27, 2020 at 05:06:31AM +0300, Laurent Pinchart wrote:

> > > > > On Mon, Jul 27, 2020 at 04:24:02AM +0300, Laurent Pinchart wrote:

> > > > > > On Mon, Apr 27, 2020 at 10:08:04PM +0800, Fugang Duan wrote:

> > > > > > > This reverts commit 29ae6bd1b0d8a57d7c00ab12cbb949fc41986eef.

> > > > > > >

> > > > > > > The commit breaks ethernet function on i.MX6SX, i.MX7D, i.MX8MM,

> > > > > > > i.MX8MQ, and i.MX8QXP platforms. Boot yocto system by NFS mounting

> > > > > > > rootfs will be failed with the commit.

> > > > > >

> > > > > > I'm afraid this commit breaks networking on i.MX7D for me :-( My board

> > > > > > is configured to boot over NFS root with IP autoconfiguration through

> > > > > > DHCP. The DHCP request goes out, the reply it sent back by the server,

> > > > > > but never noticed by the fec driver.

> > > > > >

> > > > > > v5.7 works fine. As 29ae6bd1b0d8a57d7c00ab12cbb949fc41986eef was merged

> > > > > > during the v5.8 merge window, I suspect something else cropped in

> > > > > > between 29ae6bd1b0d8a57d7c00ab12cbb949fc41986eef and this patch that

> > > > > > needs to be reverted too. We're close to v5.8 and it would be annoying

> > > > > > to see this regression ending up in the released kernel. I can test

> > > > > > patches, but I'm not familiar enough with the driver (or the networking

> > > > > > subsystem) to fix the issue myself.

> > > > >

> > > > > If it can be of any help, I've confirmed that, to get the network back

> > > > > to usable state from v5.8-rc6, I have to revert all patches up to this

> > > > > one. This is the top of my branch, on top of v5.8-rc6:

> > > > >

> > > > > 5bbe80c9efea Revert "net: ethernet: fec: Revert "net: ethernet: fec: Replace interrupt driven MDIO with polled IO""

> > > > > 5462896a08c1 Revert "net: ethernet: fec: Replace interrupt driven MDIO with polled IO"

> > > > > 824a82e2bdfa Revert "net: ethernet: fec: move GPR register offset and bit into DT"

> > > > > bfe330591cab Revert "net: fec: disable correct clk in the err path of fec_enet_clk_enable"

> > > > > 109958cad578 Revert "net: ethernet: fec: prevent tx starvation under high rx load"

> > > >

> > > > OK.

> > > >

> > > > What PHY are you using? A Micrel?

> > >

> > > KSZ9031RNXIA

> > >

> > > > And which DT file?

> > >

> > > It's out of tree.

> > >

> > > &fec1 {

> > >         pinctrl-names = "default";

> > >         pinctrl-0 = <&pinctrl_enet1>;

> > >         assigned-clocks = <&clks IMX7D_ENET1_TIME_ROOT_SRC>,

> > >                           <&clks IMX7D_ENET1_TIME_ROOT_CLK>;

> > >         assigned-clock-parents = <&clks IMX7D_PLL_ENET_MAIN_100M_CLK>;

> > >         assigned-clock-rates = <0>, <100000000>;

> > >         phy-mode = "rgmii";

> > >         phy-handle = <&ethphy0>;

> > >         phy-reset-gpios = <&gpio1 13 GPIO_ACTIVE_LOW>;

> > >         phy-supply = <&reg_3v3_sw>;

> > >         fsl,magic-packet;

> > >         status = "okay";

> > >

> > >         mdio {

> > >                 #address-cells = <1>;

> > >                 #size-cells = <0>;

> > >

> > >                 ethphy0: ethernet-phy@0 {

> > >                         reg = <1>;

> > >                 };

> > >

> > >                 ethphy1: ethernet-phy@1 {

> > >                         reg = <2>;

> > >                 };

> > >         };

> > > };

> > >

> > > I can provide the full DT if needed.

> > >

> > > --

> > > Regards,

> > >

> > > Laurent Pinchart
Andy Duan July 27, 2020, 3:08 a.m. UTC | #14
From: Chris Healy <cphealy@gmail.com> Sent: Monday, July 27, 2020 11:01 AM

> It appears quite a few boards were affected by this micrel PHY driver change:

> 

> 2ccb0161a0e9eb06f538557d38987e436fc39b8d

> 80bf72598663496d08b3c0231377db6a99d7fd68

> 2de00450c0126ec8838f72157577578e85cae5d8

> 820f8a870f6575acda1bf7f1a03c701c43ed5d79

> 

> I just updated the phy-mode with my board from rgmii to rgmii-id and

> everything started working fine with net-next again:

> 

> eth0      Link encap:Ethernet  HWaddr E6:85:48:8F:93:64

>           inet addr:172.16.1.1  Bcast:172.16.255.255  Mask:255.255.0.0

>           UP BROADCAST RUNNING MULTICAST  MTU:1500  Metric:1

>           RX packets:4643690 errors:0 dropped:0 overruns:0 frame:0

>           TX packets:76178 errors:0 dropped:0 overruns:0 carrier:0

>           collisions:0 txqueuelen:1000

>           RX bytes:2762845502 (2.5 GiB)  TX bytes:5026376 (4.7 MiB)

> 

> 


It is reasonable to change phy-mode to "rgmii-id" to let PHY supply
Tx/rx skew since MAC doesn't support delay.


Regards,
Fugang
> 

> On Sun, Jul 26, 2020 at 7:40 PM Chris Healy <cphealy@gmail.com> wrote:

> >

> > Actually, I was a little quick to say it went from broken to working.

> >

> > With net-next, I'm getting CRC errors on 100% of inbound packets.

> > With bcf3440c6dd78bfe5836ec0990fe36d7b4bb7d20 reverted, I drop down to

> > a 1% error rate.

> >

> > This very much feels like a KSZ9031 RGMII timing issue to me...

> >

> > On Sun, Jul 26, 2020 at 7:35 PM Chris Healy <cphealy@gmail.com> wrote:

> > >

> > > Hi Laurent,

> > >

> > > I have the exact same copper PHY.  I just reverted a patch specific

> > > to this PHY and went from broken to working.  Give this a try:

> > >

> > > git revert bcf3440c6dd78bfe5836ec0990fe36d7b4bb7d20

> > >

> > > Regards,

> > >

> > > Chris

> > >

> > > On Sun, Jul 26, 2020 at 7:33 PM Laurent Pinchart

> > > <laurent.pinchart@ideasonboard.com> wrote:

> > > >

> > > > Hi Andrew,

> > > >

> > > > On Mon, Jul 27, 2020 at 04:14:32AM +0200, Andrew Lunn wrote:

> > > > > On Mon, Jul 27, 2020 at 05:06:31AM +0300, Laurent Pinchart wrote:

> > > > > > On Mon, Jul 27, 2020 at 04:24:02AM +0300, Laurent Pinchart wrote:

> > > > > > > On Mon, Apr 27, 2020 at 10:08:04PM +0800, Fugang Duan wrote:

> > > > > > > > This reverts commit

> 29ae6bd1b0d8a57d7c00ab12cbb949fc41986eef.

> > > > > > > >

> > > > > > > > The commit breaks ethernet function on i.MX6SX, i.MX7D,

> > > > > > > > i.MX8MM, i.MX8MQ, and i.MX8QXP platforms. Boot yocto

> > > > > > > > system by NFS mounting rootfs will be failed with the commit.

> > > > > > >

> > > > > > > I'm afraid this commit breaks networking on i.MX7D for me

> > > > > > > :-( My board is configured to boot over NFS root with IP

> > > > > > > autoconfiguration through DHCP. The DHCP request goes out,

> > > > > > > the reply it sent back by the server, but never noticed by the fec

> driver.

> > > > > > >

> > > > > > > v5.7 works fine. As 29ae6bd1b0d8a57d7c00ab12cbb949fc41986eef

> > > > > > > was merged during the v5.8 merge window, I suspect something

> > > > > > > else cropped in between

> > > > > > > 29ae6bd1b0d8a57d7c00ab12cbb949fc41986eef and this patch that

> > > > > > > needs to be reverted too. We're close to v5.8 and it would

> > > > > > > be annoying to see this regression ending up in the released

> > > > > > > kernel. I can test patches, but I'm not familiar enough with

> > > > > > > the driver (or the networking

> > > > > > > subsystem) to fix the issue myself.

> > > > > >

> > > > > > If it can be of any help, I've confirmed that, to get the

> > > > > > network back to usable state from v5.8-rc6, I have to revert

> > > > > > all patches up to this one. This is the top of my branch, on top of

> v5.8-rc6:

> > > > > >

> > > > > > 5bbe80c9efea Revert "net: ethernet: fec: Revert "net: ethernet: fec:

> Replace interrupt driven MDIO with polled IO""

> > > > > > 5462896a08c1 Revert "net: ethernet: fec: Replace interrupt driven

> MDIO with polled IO"

> > > > > > 824a82e2bdfa Revert "net: ethernet: fec: move GPR register offset

> and bit into DT"

> > > > > > bfe330591cab Revert "net: fec: disable correct clk in the err path of

> fec_enet_clk_enable"

> > > > > > 109958cad578 Revert "net: ethernet: fec: prevent tx starvation under

> high rx load"

> > > > >

> > > > > OK.

> > > > >

> > > > > What PHY are you using? A Micrel?

> > > >

> > > > KSZ9031RNXIA

> > > >

> > > > > And which DT file?

> > > >

> > > > It's out of tree.

> > > >

> > > > &fec1 {

> > > >         pinctrl-names = "default";

> > > >         pinctrl-0 = <&pinctrl_enet1>;

> > > >         assigned-clocks = <&clks IMX7D_ENET1_TIME_ROOT_SRC>,

> > > >                           <&clks

> IMX7D_ENET1_TIME_ROOT_CLK>;

> > > >         assigned-clock-parents = <&clks

> IMX7D_PLL_ENET_MAIN_100M_CLK>;

> > > >         assigned-clock-rates = <0>, <100000000>;

> > > >         phy-mode = "rgmii";

> > > >         phy-handle = <&ethphy0>;

> > > >         phy-reset-gpios = <&gpio1 13 GPIO_ACTIVE_LOW>;

> > > >         phy-supply = <&reg_3v3_sw>;

> > > >         fsl,magic-packet;

> > > >         status = "okay";

> > > >

> > > >         mdio {

> > > >                 #address-cells = <1>;

> > > >                 #size-cells = <0>;

> > > >

> > > >                 ethphy0: ethernet-phy@0 {

> > > >                         reg = <1>;

> > > >                 };

> > > >

> > > >                 ethphy1: ethernet-phy@1 {

> > > >                         reg = <2>;

> > > >                 };

> > > >         };

> > > > };

> > > >

> > > > I can provide the full DT if needed.

> > > >

> > > > --

> > > > Regards,

> > > >

> > > > Laurent Pinchart
Andrew Lunn July 27, 2020, 12:05 p.m. UTC | #15
On Sun, Jul 26, 2020 at 08:01:25PM -0700, Chris Healy wrote:
> It appears quite a few boards were affected by this micrel PHY driver change:

> 

> 2ccb0161a0e9eb06f538557d38987e436fc39b8d

> 80bf72598663496d08b3c0231377db6a99d7fd68

> 2de00450c0126ec8838f72157577578e85cae5d8

> 820f8a870f6575acda1bf7f1a03c701c43ed5d79

> 

> I just updated the phy-mode with my board from rgmii to rgmii-id and

> everything started working fine with net-next again:


Hi Chris

Is this a mainline supported board? Do you plan to submit a patch?

Laurent, does the change also work for your board? This is another one
of those cases were a bug in the PHY driver, not respecting the
phy-mode, has masked a bug in the device tree, using the wrong
phy-mode. We had the same issue with the Atheros PHY a while back.

   Andrew
Chris Healy July 27, 2020, 1:30 p.m. UTC | #16
> > I just updated the phy-mode with my board from rgmii to rgmii-id and

> > everything started working fine with net-next again:

>

> Hi Chris

>

> Is this a mainline supported board? Do you plan to submit a patch?

>

Yes, my board is in mainline so I plan on submitting a patch.

> Laurent, does the change also work for your board? This is another one

> of those cases were a bug in the PHY driver, not respecting the

> phy-mode, has masked a bug in the device tree, using the wrong

> phy-mode. We had the same issue with the Atheros PHY a while back.

>

>    Andrew
Laurent Pinchart July 27, 2020, 3:24 p.m. UTC | #17
Hi Andrew,

On Mon, Jul 27, 2020 at 02:05:45PM +0200, Andrew Lunn wrote:
> On Sun, Jul 26, 2020 at 08:01:25PM -0700, Chris Healy wrote:

> > It appears quite a few boards were affected by this micrel PHY driver change:

> > 

> > 2ccb0161a0e9eb06f538557d38987e436fc39b8d

> > 80bf72598663496d08b3c0231377db6a99d7fd68

> > 2de00450c0126ec8838f72157577578e85cae5d8

> > 820f8a870f6575acda1bf7f1a03c701c43ed5d79

> > 

> > I just updated the phy-mode with my board from rgmii to rgmii-id and

> > everything started working fine with net-next again:

> 

> Hi Chris

> 

> Is this a mainline supported board? Do you plan to submit a patch?

> 

> Laurent, does the change also work for your board? This is another one

> of those cases were a bug in the PHY driver, not respecting the

> phy-mode, has masked a bug in the device tree, using the wrong

> phy-mode. We had the same issue with the Atheros PHY a while back.


Yes, setting the phy-mode to rgmii-id fixes the issue.

Thank you everybody for your quick responses and very useful help !

On a side note, when the kernel boots, there's a ~10s delay for the
ethernet connection to come up:

[    4.050754] Micrel KSZ9031 Gigabit PHY 30be0000.ethernet-1:01: attached PHY driver [Micrel KSZ9031 Gigabit PHY] (mii_bus:phy_addr=30be0000.ethernet-1:01, irq=POLL)
[   15.628528] fec 30be0000.ethernet eth0: Link is Up - 1Gbps/Full - flow control rx/tx
[   15.676961] Sending DHCP requests ., OK
[   15.720925] IP-Config: Got DHCP answer from 192.168.2.47, my address is 192.168.2.210

The LED on the connected switch confirms this, it lits up synchronously
with the "Link is up" message. It's not an urgent issue, but if someone
had a few pointers on how I could debug that, it would be appreciated.

-- 
Regards,

Laurent Pinchart
Chris Healy July 27, 2020, 3:41 p.m. UTC | #18
On Mon, Jul 27, 2020 at 8:24 AM Laurent Pinchart
<laurent.pinchart@ideasonboard.com> wrote:
>

> Hi Andrew,

>

> On Mon, Jul 27, 2020 at 02:05:45PM +0200, Andrew Lunn wrote:

> > On Sun, Jul 26, 2020 at 08:01:25PM -0700, Chris Healy wrote:

> > > It appears quite a few boards were affected by this micrel PHY driver change:

> > >

> > > 2ccb0161a0e9eb06f538557d38987e436fc39b8d

> > > 80bf72598663496d08b3c0231377db6a99d7fd68

> > > 2de00450c0126ec8838f72157577578e85cae5d8

> > > 820f8a870f6575acda1bf7f1a03c701c43ed5d79

> > >

> > > I just updated the phy-mode with my board from rgmii to rgmii-id and

> > > everything started working fine with net-next again:

> >

> > Hi Chris

> >

> > Is this a mainline supported board? Do you plan to submit a patch?

> >

> > Laurent, does the change also work for your board? This is another one

> > of those cases were a bug in the PHY driver, not respecting the

> > phy-mode, has masked a bug in the device tree, using the wrong

> > phy-mode. We had the same issue with the Atheros PHY a while back.

>

> Yes, setting the phy-mode to rgmii-id fixes the issue.

>

> Thank you everybody for your quick responses and very useful help !

>

> On a side note, when the kernel boots, there's a ~10s delay for the

> ethernet connection to come up:

>

> [    4.050754] Micrel KSZ9031 Gigabit PHY 30be0000.ethernet-1:01: attached PHY driver [Micrel KSZ9031 Gigabit PHY] (mii_bus:phy_addr=30be0000.ethernet-1:01, irq=POLL)

> [   15.628528] fec 30be0000.ethernet eth0: Link is Up - 1Gbps/Full - flow control rx/tx

> [   15.676961] Sending DHCP requests ., OK

> [   15.720925] IP-Config: Got DHCP answer from 192.168.2.47, my address is 192.168.2.210

>

> The LED on the connected switch confirms this, it lits up synchronously

> with the "Link is up" message. It's not an urgent issue, but if someone

> had a few pointers on how I could debug that, it would be appreciated.


Here's a few suggestions that could help in learning more:

1) Review the KSZ9031 HW errata and compare against the PHY driver
code.  There's a number of errata that could cause this from my quick
review.
2) Based on what I read in the HW errata, try different link partners
that utilize different copper PHYs to see if it results in different
behaviour.
3) Try setting your autonegotiate advertisement to only advertise
100Mbps and see if this affects the timing.  Obviously this would not
be a solution but might help in better understanding the issue.

>

> --

> Regards,

>

> Laurent Pinchart
Laurent Pinchart July 27, 2020, 5:37 p.m. UTC | #19
Hi Chris,

On Mon, Jul 27, 2020 at 08:41:23AM -0700, Chris Healy wrote:
> On Mon, Jul 27, 2020 at 8:24 AM Laurent Pinchart wrote:

> > On Mon, Jul 27, 2020 at 02:05:45PM +0200, Andrew Lunn wrote:

> > > On Sun, Jul 26, 2020 at 08:01:25PM -0700, Chris Healy wrote:

> > > > It appears quite a few boards were affected by this micrel PHY driver change:

> > > >

> > > > 2ccb0161a0e9eb06f538557d38987e436fc39b8d

> > > > 80bf72598663496d08b3c0231377db6a99d7fd68

> > > > 2de00450c0126ec8838f72157577578e85cae5d8

> > > > 820f8a870f6575acda1bf7f1a03c701c43ed5d79

> > > >

> > > > I just updated the phy-mode with my board from rgmii to rgmii-id and

> > > > everything started working fine with net-next again:

> > >

> > > Hi Chris

> > >

> > > Is this a mainline supported board? Do you plan to submit a patch?

> > >

> > > Laurent, does the change also work for your board? This is another one

> > > of those cases were a bug in the PHY driver, not respecting the

> > > phy-mode, has masked a bug in the device tree, using the wrong

> > > phy-mode. We had the same issue with the Atheros PHY a while back.

> >

> > Yes, setting the phy-mode to rgmii-id fixes the issue.

> >

> > Thank you everybody for your quick responses and very useful help !

> >

> > On a side note, when the kernel boots, there's a ~10s delay for the

> > ethernet connection to come up:

> >

> > [    4.050754] Micrel KSZ9031 Gigabit PHY 30be0000.ethernet-1:01: attached PHY driver [Micrel KSZ9031 Gigabit PHY] (mii_bus:phy_addr=30be0000.ethernet-1:01, irq=POLL)

> > [   15.628528] fec 30be0000.ethernet eth0: Link is Up - 1Gbps/Full - flow control rx/tx

> > [   15.676961] Sending DHCP requests ., OK

> > [   15.720925] IP-Config: Got DHCP answer from 192.168.2.47, my address is 192.168.2.210

> >

> > The LED on the connected switch confirms this, it lits up synchronously

> > with the "Link is up" message. It's not an urgent issue, but if someone

> > had a few pointers on how I could debug that, it would be appreciated.

> 

> Here's a few suggestions that could help in learning more:

> 

> 1) Review the KSZ9031 HW errata and compare against the PHY driver

> code.  There's a number of errata that could cause this from my quick

> review.


I'll have a look at that, thanks.

> 2) Based on what I read in the HW errata, try different link partners

> that utilize different copper PHYs to see if it results in different

> behaviour.


I have limited available test equipment, but I can give it a try.

> 3) Try setting your autonegotiate advertisement to only advertise

> 100Mbps and see if this affects the timing.  Obviously this would not

> be a solution but might help in better understanding the issue.


I've tested this, and the link then comes up in ~2 seconds instead of
~10. That's clearly an improvement, but I have no idea what it implies
:-)

[    4.090655] Micrel KSZ9031 Gigabit PHY 30be0000.ethernet-1:01: attached PHY driver [Micrel KSZ9031 Gigabit PHY] (mii_bus:phy_addr=30be0000.ethernet-1:01, irq=POLL)
[    6.188347] fec 30be0000.ethernet eth0: Link is Up - 100Mbps/Full - flow control rx/tx
[    6.236843] Sending DHCP requests ., OK
[    6.280807] IP-Config: Got DHCP answer from 192.168.2.47, my address is 192.168.2.210

-- 
Regards,

Laurent Pinchart
Laurent Pinchart July 27, 2020, 6:03 p.m. UTC | #20
On Mon, Jul 27, 2020 at 08:37:20PM +0300, Laurent Pinchart wrote:
> On Mon, Jul 27, 2020 at 08:41:23AM -0700, Chris Healy wrote:

> > On Mon, Jul 27, 2020 at 8:24 AM Laurent Pinchart wrote:

> > > On Mon, Jul 27, 2020 at 02:05:45PM +0200, Andrew Lunn wrote:

> > > > On Sun, Jul 26, 2020 at 08:01:25PM -0700, Chris Healy wrote:

> > > > > It appears quite a few boards were affected by this micrel PHY driver change:

> > > > >

> > > > > 2ccb0161a0e9eb06f538557d38987e436fc39b8d

> > > > > 80bf72598663496d08b3c0231377db6a99d7fd68

> > > > > 2de00450c0126ec8838f72157577578e85cae5d8

> > > > > 820f8a870f6575acda1bf7f1a03c701c43ed5d79

> > > > >

> > > > > I just updated the phy-mode with my board from rgmii to rgmii-id and

> > > > > everything started working fine with net-next again:

> > > >

> > > > Hi Chris

> > > >

> > > > Is this a mainline supported board? Do you plan to submit a patch?

> > > >

> > > > Laurent, does the change also work for your board? This is another one

> > > > of those cases were a bug in the PHY driver, not respecting the

> > > > phy-mode, has masked a bug in the device tree, using the wrong

> > > > phy-mode. We had the same issue with the Atheros PHY a while back.

> > >

> > > Yes, setting the phy-mode to rgmii-id fixes the issue.

> > >

> > > Thank you everybody for your quick responses and very useful help !

> > >

> > > On a side note, when the kernel boots, there's a ~10s delay for the

> > > ethernet connection to come up:

> > >

> > > [    4.050754] Micrel KSZ9031 Gigabit PHY 30be0000.ethernet-1:01: attached PHY driver [Micrel KSZ9031 Gigabit PHY] (mii_bus:phy_addr=30be0000.ethernet-1:01, irq=POLL)

> > > [   15.628528] fec 30be0000.ethernet eth0: Link is Up - 1Gbps/Full - flow control rx/tx

> > > [   15.676961] Sending DHCP requests ., OK

> > > [   15.720925] IP-Config: Got DHCP answer from 192.168.2.47, my address is 192.168.2.210

> > >

> > > The LED on the connected switch confirms this, it lits up synchronously

> > > with the "Link is up" message. It's not an urgent issue, but if someone

> > > had a few pointers on how I could debug that, it would be appreciated.

> > 

> > Here's a few suggestions that could help in learning more:

> > 

> > 1) Review the KSZ9031 HW errata and compare against the PHY driver

> > code.  There's a number of errata that could cause this from my quick

> > review.

> 

> I'll have a look at that, thanks.


I thought issue 5 ("Auto-Negotiation link-up failure / long link-up time
due to default FLP interval setting") was a likely candidate, but it
seems it's already handled in the driver (implemented in
ksz9031_center_flp_timing()).

I've run a few more tests, adding a WARN_ON in ksz9031_config_init() to
trace its callers. It looks like the initial negotiation fails, until
ksz9031_read_status() restarts it after the maximum number of
iterations:

[    4.047515] ------------[ cut here ]------------
[    4.052388] WARNING: CPU: 0 PID: 1 at drivers/net/phy/micrel.c:693 ksz9031_config_init+0x34/0x344
[    4.061827] Modules linked in:
[    4.064932] CPU: 0 PID: 1 Comm: swapper/0 Not tainted 5.8.0-rc6-00102-g6a84d7a1fa75-dirty #505
[    4.073570] Hardware name: Freescale i.MX7 Dual (Device Tree)
[    4.079364] [<c01119d4>] (unwind_backtrace) from [<c010c0d0>] (show_stack+0x10/0x14)
[    4.087147] [<c010c0d0>] (show_stack) from [<c055bd78>] (dump_stack+0xe8/0x120)
[    4.094498] [<c055bd78>] (dump_stack) from [<c0123888>] (__warn+0xc0/0x108)
[    4.101497] [<c0123888>] (__warn) from [<c0123c60>] (warn_slowpath_fmt+0x60/0xbc)
[    4.109018] [<c0123c60>] (warn_slowpath_fmt) from [<c0760558>] (ksz9031_config_init+0x34/0x344)
[    4.117754] [<c0760558>] (ksz9031_config_init) from [<c075b388>] (phy_attach_direct+0xfc/0x2b0)
[    4.126487] [<c075b388>] (phy_attach_direct) from [<c075b680>] (phy_connect_direct+0x1c/0x58)
[    4.135052] [<c075b680>] (phy_connect_direct) from [<c08d9bb8>] (of_phy_connect+0x38/0x60)
[    4.143354] [<c08d9bb8>] (of_phy_connect) from [<c0766920>] (fec_enet_mii_probe+0x40/0x1ac)
[    4.151741] [<c0766920>] (fec_enet_mii_probe) from [<c0769370>] (fec_enet_open+0x27c/0x340)
[    4.160130] [<c0769370>] (fec_enet_open) from [<c096f45c>] (__dev_open+0xd0/0x158)
[    4.167734] [<c096f45c>] (__dev_open) from [<c096f830>] (__dev_change_flags+0x168/0x1d4)
[    4.175860] [<c096f830>] (__dev_change_flags) from [<c096f8b4>] (dev_change_flags+0x18/0x48)
[    4.184338] [<c096f8b4>] (dev_change_flags) from [<c113d390>] (ip_auto_config+0x270/0x1068)
[    4.192727] [<c113d390>] (ip_auto_config) from [<c01021a4>] (do_one_initcall+0x80/0x348)
[    4.200855] [<c01021a4>] (do_one_initcall) from [<c1100fd0>] (kernel_init_freeable+0x15c/0x20c)
[    4.209591] [<c1100fd0>] (kernel_init_freeable) from [<c0bd7d44>] (kernel_init+0x8/0x114)
[    4.217804] [<c0bd7d44>] (kernel_init) from [<c0100134>] (ret_from_fork+0x14/0x20)
[    4.225402] Exception stack(0xec0edfb0 to 0xec0edff8)
[    4.230484] dfa0:                                     00000000 00000000 00000000 00000000
[    4.238692] dfc0: 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000
[    4.246899] dfe0: 00000000 00000000 00000000 00000000 00000013 00000000
[    4.254068] irq event stamp: 270123
[    4.257672] hardirqs last  enabled at (270141): [<c018dddc>] console_unlock+0x41c/0x5d8
[    4.265710] hardirqs last disabled at (270148): [<c018da68>] console_unlock+0xa8/0x5d8
[    4.273754] softirqs last  enabled at (270166): [<c0101580>] __do_softirq+0x2b8/0x508
[    4.281683] softirqs last disabled at (270177): [<c012b974>] irq_exit+0xfc/0x17c
[    4.289194] ---[ end trace 620517544bb2f528 ]---
[    4.295734] ksz9031_center_flp_timing
[    4.299781] ksz9031_center_flp_timing: restart negotiation
[    4.305402] ksz9031_config_init: 0
[    4.311116] Micrel KSZ9031 Gigabit PHY 30be0000.ethernet-1:01: attached PHY driver [Micrel KSZ9031 Gigabit PHY] (mii_bus:phy_addr=30be0000.ethernet-1:01, irq=POLL)
[   11.677512] ------------[ cut here ]------------
[   11.682341] WARNING: CPU: 0 PID: 12 at drivers/net/phy/micrel.c:693 ksz9031_config_init+0x34/0x344
[   11.691410] Modules linked in:
[   11.694515] CPU: 0 PID: 12 Comm: kworker/0:1 Tainted: G        W         5.8.0-rc6-00102-g6a84d7a1fa75-dirty #505
[   11.704803] Hardware name: Freescale i.MX7 Dual (Device Tree)
[   11.710591] Workqueue: events_power_efficient phy_state_machine
[   11.716561] [<c01119d4>] (unwind_backtrace) from [<c010c0d0>] (show_stack+0x10/0x14)
[   11.724346] [<c010c0d0>] (show_stack) from [<c055bd78>] (dump_stack+0xe8/0x120)
[   11.731699] [<c055bd78>] (dump_stack) from [<c0123888>] (__warn+0xc0/0x108)
[   11.738700] [<c0123888>] (__warn) from [<c0123c60>] (warn_slowpath_fmt+0x60/0xbc)
[   11.746223] [<c0123c60>] (warn_slowpath_fmt) from [<c0760558>] (ksz9031_config_init+0x34/0x344)
[   11.754960] [<c0760558>] (ksz9031_config_init) from [<c0760b78>] (ksz9031_read_status+0x40/0x84)
[   11.763785] [<c0760b78>] (ksz9031_read_status) from [<c0757414>] (phy_check_link_status+0x54/0xec)
[   11.772783] [<c0757414>] (phy_check_link_status) from [<c0758318>] (phy_state_machine+0x190/0x204)
[   11.781781] [<c0758318>] (phy_state_machine) from [<c0143a5c>] (process_one_work+0x2d0/0x778)
[   11.790343] [<c0143a5c>] (process_one_work) from [<c0143f30>] (worker_thread+0x2c/0x588)
[   11.798472] [<c0143f30>] (worker_thread) from [<c014b240>] (kthread+0x130/0x144)
[   11.805907] [<c014b240>] (kthread) from [<c0100134>] (ret_from_fork+0x14/0x20)
[   11.813159] Exception stack(0xec12bfb0 to 0xec12bff8)
[   11.818243] bfa0:                                     00000000 00000000 00000000 00000000
[   11.826454] bfc0: 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000
[   11.834662] bfe0: 00000000 00000000 00000000 00000000 00000013 00000000
[   11.841402] irq event stamp: 15245
[   11.844848] hardirqs last  enabled at (15253): [<c018dddc>] console_unlock+0x41c/0x5d8
[   11.852868] hardirqs last disabled at (15270): [<c018da68>] console_unlock+0xa8/0x5d8
[   11.860800] softirqs last  enabled at (15286): [<c0101580>] __do_softirq+0x2b8/0x508
[   11.868647] softirqs last disabled at (15297): [<c012b974>] irq_exit+0xfc/0x17c
[   11.875990] ---[ end trace 620517544bb2f529 ]---
[   11.881668] ksz9031_center_flp_timing
[   11.885649] ksz9031_center_flp_timing: restart negotiation
[   11.891308] ksz9031_config_init: 0
[   14.988788] fec 30be0000.ethernet eth0: Link is Up - 1Gbps/Full - flow control rx/tx

> > 2) Based on what I read in the HW errata, try different link partners

> > that utilize different copper PHYs to see if it results in different

> > behaviour.

> 

> I have limited available test equipment, but I can give it a try.

> 

> > 3) Try setting your autonegotiate advertisement to only advertise

> > 100Mbps and see if this affects the timing.  Obviously this would not

> > be a solution but might help in better understanding the issue.

> 

> I've tested this, and the link then comes up in ~2 seconds instead of

> ~10. That's clearly an improvement, but I have no idea what it implies

> :-)

> 

> [    4.090655] Micrel KSZ9031 Gigabit PHY 30be0000.ethernet-1:01: attached PHY driver [Micrel KSZ9031 Gigabit PHY] (mii_bus:phy_addr=30be0000.ethernet-1:01, irq=POLL)

> [    6.188347] fec 30be0000.ethernet eth0: Link is Up - 100Mbps/Full - flow control rx/tx

> [    6.236843] Sending DHCP requests ., OK

> [    6.280807] IP-Config: Got DHCP answer from 192.168.2.47, my address is 192.168.2.210


-- 
Regards,

Laurent Pinchart
diff mbox series

Patch

diff --git a/drivers/net/ethernet/freescale/fec.h b/drivers/net/ethernet/freescale/fec.h
index a6cdd5b..e74dd1f 100644
--- a/drivers/net/ethernet/freescale/fec.h
+++ b/drivers/net/ethernet/freescale/fec.h
@@ -376,7 +376,8 @@  struct bufdesc_ex {
 #define FEC_ENET_TS_AVAIL       ((uint)0x00010000)
 #define FEC_ENET_TS_TIMER       ((uint)0x00008000)
 
-#define FEC_DEFAULT_IMASK (FEC_ENET_TXF | FEC_ENET_RXF)
+#define FEC_DEFAULT_IMASK (FEC_ENET_TXF | FEC_ENET_RXF | FEC_ENET_MII)
+#define FEC_NAPI_IMASK	FEC_ENET_MII
 #define FEC_RX_DISABLED_IMASK (FEC_DEFAULT_IMASK & (~FEC_ENET_RXF))
 
 /* ENET interrupt coalescing macro define */
@@ -542,6 +543,7 @@  struct fec_enet_private {
 	int	link;
 	int	full_duplex;
 	int	speed;
+	struct	completion mdio_done;
 	int	irq[FEC_IRQ_NUM];
 	bool	bufdesc_ex;
 	int	pause_flag;
diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c
index 1ae075a..c7b84bb 100644
--- a/drivers/net/ethernet/freescale/fec_main.c
+++ b/drivers/net/ethernet/freescale/fec_main.c
@@ -976,8 +976,8 @@  fec_restart(struct net_device *ndev)
 	writel((__force u32)cpu_to_be32(temp_mac[1]),
 	       fep->hwp + FEC_ADDR_HIGH);
 
-	/* Clear any outstanding interrupt, except MDIO. */
-	writel((0xffffffff & ~FEC_ENET_MII), fep->hwp + FEC_IEVENT);
+	/* Clear any outstanding interrupt. */
+	writel(0xffffffff, fep->hwp + FEC_IEVENT);
 
 	fec_enet_bd_init(ndev);
 
@@ -1123,7 +1123,7 @@  fec_restart(struct net_device *ndev)
 	if (fep->link)
 		writel(FEC_DEFAULT_IMASK, fep->hwp + FEC_IMASK);
 	else
-		writel(0, fep->hwp + FEC_IMASK);
+		writel(FEC_ENET_MII, fep->hwp + FEC_IMASK);
 
 	/* Init the interrupt coalescing */
 	fec_enet_itr_coal_init(ndev);
@@ -1652,10 +1652,6 @@  fec_enet_interrupt(int irq, void *dev_id)
 	irqreturn_t ret = IRQ_NONE;
 
 	int_events = readl(fep->hwp + FEC_IEVENT);
-
-	/* Don't clear MDIO events, we poll for those */
-	int_events &= ~FEC_ENET_MII;
-
 	writel(int_events, fep->hwp + FEC_IEVENT);
 	fec_enet_collect_events(fep, int_events);
 
@@ -1663,12 +1659,16 @@  fec_enet_interrupt(int irq, void *dev_id)
 		ret = IRQ_HANDLED;
 
 		if (napi_schedule_prep(&fep->napi)) {
-			/* Disable interrupts */
-			writel(0, fep->hwp + FEC_IMASK);
+			/* Disable the NAPI interrupts */
+			writel(FEC_NAPI_IMASK, fep->hwp + FEC_IMASK);
 			__napi_schedule(&fep->napi);
 		}
 	}
 
+	if (int_events & FEC_ENET_MII) {
+		ret = IRQ_HANDLED;
+		complete(&fep->mdio_done);
+	}
 	return ret;
 }
 
@@ -1818,24 +1818,11 @@  static void fec_enet_adjust_link(struct net_device *ndev)
 		phy_print_status(phy_dev);
 }
 
-static int fec_enet_mdio_wait(struct fec_enet_private *fep)
-{
-	uint ievent;
-	int ret;
-
-	ret = readl_poll_timeout_atomic(fep->hwp + FEC_IEVENT, ievent,
-					ievent & FEC_ENET_MII, 2, 30000);
-
-	if (!ret)
-		writel(FEC_ENET_MII, fep->hwp + FEC_IEVENT);
-
-	return ret;
-}
-
 static int fec_enet_mdio_read(struct mii_bus *bus, int mii_id, int regnum)
 {
 	struct fec_enet_private *fep = bus->priv;
 	struct device *dev = &fep->pdev->dev;
+	unsigned long time_left;
 	int ret = 0, frame_start, frame_addr, frame_op;
 	bool is_c45 = !!(regnum & MII_ADDR_C45);
 
@@ -1843,6 +1830,8 @@  static int fec_enet_mdio_read(struct mii_bus *bus, int mii_id, int regnum)
 	if (ret < 0)
 		return ret;
 
+	reinit_completion(&fep->mdio_done);
+
 	if (is_c45) {
 		frame_start = FEC_MMFR_ST_C45;
 
@@ -1854,9 +1843,11 @@  static int fec_enet_mdio_read(struct mii_bus *bus, int mii_id, int regnum)
 		       fep->hwp + FEC_MII_DATA);
 
 		/* wait for end of transfer */
-		ret = fec_enet_mdio_wait(fep);
-		if (ret) {
+		time_left = wait_for_completion_timeout(&fep->mdio_done,
+				usecs_to_jiffies(FEC_MII_TIMEOUT));
+		if (time_left == 0) {
 			netdev_err(fep->netdev, "MDIO address write timeout\n");
+			ret = -ETIMEDOUT;
 			goto out;
 		}
 
@@ -1875,9 +1866,11 @@  static int fec_enet_mdio_read(struct mii_bus *bus, int mii_id, int regnum)
 		FEC_MMFR_TA, fep->hwp + FEC_MII_DATA);
 
 	/* wait for end of transfer */
-	ret = fec_enet_mdio_wait(fep);
-	if (ret) {
+	time_left = wait_for_completion_timeout(&fep->mdio_done,
+			usecs_to_jiffies(FEC_MII_TIMEOUT));
+	if (time_left == 0) {
 		netdev_err(fep->netdev, "MDIO read timeout\n");
+		ret = -ETIMEDOUT;
 		goto out;
 	}
 
@@ -1895,6 +1888,7 @@  static int fec_enet_mdio_write(struct mii_bus *bus, int mii_id, int regnum,
 {
 	struct fec_enet_private *fep = bus->priv;
 	struct device *dev = &fep->pdev->dev;
+	unsigned long time_left;
 	int ret, frame_start, frame_addr;
 	bool is_c45 = !!(regnum & MII_ADDR_C45);
 
@@ -1904,6 +1898,8 @@  static int fec_enet_mdio_write(struct mii_bus *bus, int mii_id, int regnum,
 	else
 		ret = 0;
 
+	reinit_completion(&fep->mdio_done);
+
 	if (is_c45) {
 		frame_start = FEC_MMFR_ST_C45;
 
@@ -1915,9 +1911,11 @@  static int fec_enet_mdio_write(struct mii_bus *bus, int mii_id, int regnum,
 		       fep->hwp + FEC_MII_DATA);
 
 		/* wait for end of transfer */
-		ret = fec_enet_mdio_wait(fep);
-		if (ret) {
+		time_left = wait_for_completion_timeout(&fep->mdio_done,
+			usecs_to_jiffies(FEC_MII_TIMEOUT));
+		if (time_left == 0) {
 			netdev_err(fep->netdev, "MDIO address write timeout\n");
+			ret = -ETIMEDOUT;
 			goto out;
 		}
 	} else {
@@ -1933,9 +1931,12 @@  static int fec_enet_mdio_write(struct mii_bus *bus, int mii_id, int regnum,
 		fep->hwp + FEC_MII_DATA);
 
 	/* wait for end of transfer */
-	ret = fec_enet_mdio_wait(fep);
-	if (ret)
+	time_left = wait_for_completion_timeout(&fep->mdio_done,
+			usecs_to_jiffies(FEC_MII_TIMEOUT));
+	if (time_left == 0) {
 		netdev_err(fep->netdev, "MDIO write timeout\n");
+		ret  = -ETIMEDOUT;
+	}
 
 out:
 	pm_runtime_mark_last_busy(dev);
@@ -2144,9 +2145,6 @@  static int fec_enet_mii_init(struct platform_device *pdev)
 
 	writel(fep->phy_speed, fep->hwp + FEC_MII_SPEED);
 
-	/* Clear any pending transaction complete indication */
-	writel(FEC_ENET_MII, fep->hwp + FEC_IEVENT);
-
 	fep->mii_bus = mdiobus_alloc();
 	if (fep->mii_bus == NULL) {
 		err = -ENOMEM;
@@ -3688,6 +3686,7 @@  fec_probe(struct platform_device *pdev)
 		fep->irq[i] = irq;
 	}
 
+	init_completion(&fep->mdio_done);
 	ret = fec_enet_mii_init(pdev);
 	if (ret)
 		goto failed_mii_init;