diff mbox

[3/3] net: hisilicon: new hip04 ethernet driver

Message ID 1395414570-25515-4-git-send-email-zhangfei.gao@linaro.org
State Changes Requested
Headers show

Commit Message

Zhangfei Gao March 21, 2014, 3:09 p.m. UTC
Support Hisilicon hip04 ethernet driver, including 100M / 1000M controller

Signed-off-by: Zhangfei Gao <zhangfei.gao@linaro.org>
---
 drivers/net/ethernet/hisilicon/Makefile    |    2 +-
 drivers/net/ethernet/hisilicon/hip04_eth.c |  730 ++++++++++++++++++++++++++++
 2 files changed, 731 insertions(+), 1 deletion(-)
 create mode 100644 drivers/net/ethernet/hisilicon/hip04_eth.c

Comments

Arnd Bergmann March 21, 2014, 3:27 p.m. UTC | #1
On Friday 21 March 2014 23:09:30 Zhangfei Gao wrote:

> +
> +static void __iomem *ppebase;

Any reason why you still have this, rather than using a separate
driver for it as we discussed? If you have comments that you still
plan to address, please mention those in the introductory mail,
so you don't get the same review comments multiple times.


> +static void hip04_tx_reclaim(struct net_device *ndev, bool force)
> +{
> +	struct hip04_priv *priv = netdev_priv(ndev);
> +	unsigned tx_head = priv->tx_head;
> +	unsigned tx_tail = priv->tx_tail;
> +	struct tx_desc *desc = &priv->tx_desc[priv->tx_tail];
> +
> +	while (tx_tail != tx_head) {
> +		if (desc->send_addr != 0) {
> +			if (force)
> +				desc->send_addr = 0;
> +			else
> +				break;
> +		}
> +		if (priv->tx_phys[tx_tail]) {
> +			dma_unmap_single(&ndev->dev, priv->tx_phys[tx_tail],
> +				priv->tx_skb[tx_tail]->len, DMA_TO_DEVICE);
> +			priv->tx_phys[tx_tail] = 0;
> +		}
> +		dev_kfree_skb_irq(priv->tx_skb[tx_tail]);
> +		priv->tx_skb[tx_tail] = NULL;
> +		tx_tail = TX_NEXT(tx_tail);
> +		priv->tx_count--;
> +	}
> +	priv->tx_tail = tx_tail;
> +}

You call this function from start_xmit(), which may be too early, causing the
dma_unmap_single() and dev_kfree_skb_irq() functions to be called while the
device is still accessing the data. This is bad.

You have to ensure that you only ever clean up tx buffers that have been
successfully transmitted. Also, you should use an interrupt to notify you
of this in case there is no further xmit packet. Otherwise you may have
a user space program waiting indefinitely for a single packet to get sent
on a socket.

It's ok to also call the cleanup from start_xmit, but calling it from the
poll() function or another appropriate place is required.

> +	priv->id = of_alias_get_id(node, "ethernet");
> +	if (priv->id < 0) {
> +		dev_warn(d, "no ethernet alias\n");
> +		ret = -EINVAL;
> +		goto init_fail;
> +	}

Apparently you try to rely on the alias to refer to a specific piece
of hardware, which is not correct. The alias is meant to be selectable
to match e.g. the numbering written on the external connector, which
is totally independent of the internal hardware.

	Arnd

--
To unsubscribe from this list: send the line "unsubscribe devicetree" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Zhangfei Gao March 22, 2014, 1:18 a.m. UTC | #2
Dear Arnd

On 03/21/2014 11:27 PM, Arnd Bergmann wrote:
> On Friday 21 March 2014 23:09:30 Zhangfei Gao wrote:
>
>> +
>> +static void __iomem *ppebase;
>
> Any reason why you still have this, rather than using a separate
> driver for it as we discussed? If you have comments that you still
> plan to address, please mention those in the introductory mail,
> so you don't get the same review comments multiple times.

Sorry for my bad understanding.
I thought you agreed to use this.
Will check your earlier comments more carefully.

>
>
>> +static void hip04_tx_reclaim(struct net_device *ndev, bool force)
>> +{
>> +	struct hip04_priv *priv = netdev_priv(ndev);
>> +	unsigned tx_head = priv->tx_head;
>> +	unsigned tx_tail = priv->tx_tail;
>> +	struct tx_desc *desc = &priv->tx_desc[priv->tx_tail];
>> +
>> +	while (tx_tail != tx_head) {
>> +		if (desc->send_addr != 0) {
>> +			if (force)
>> +				desc->send_addr = 0;
>> +			else
>> +				break;
>> +		}
>> +		if (priv->tx_phys[tx_tail]) {
>> +			dma_unmap_single(&ndev->dev, priv->tx_phys[tx_tail],
>> +				priv->tx_skb[tx_tail]->len, DMA_TO_DEVICE);
>> +			priv->tx_phys[tx_tail] = 0;
>> +		}
>> +		dev_kfree_skb_irq(priv->tx_skb[tx_tail]);
>> +		priv->tx_skb[tx_tail] = NULL;
>> +		tx_tail = TX_NEXT(tx_tail);
>> +		priv->tx_count--;
>> +	}
>> +	priv->tx_tail = tx_tail;
>> +}
>
> You call this function from start_xmit(), which may be too early, causing the
> dma_unmap_single() and dev_kfree_skb_irq() functions to be called while the
> device is still accessing the data. This is bad.

There is a protection.
Only after xmit done, desc->send_addr = 0, which is cleared by hardware.

>
> You have to ensure that you only ever clean up tx buffers that have been
> successfully transmitted. Also, you should use an interrupt to notify you
> of this in case there is no further xmit packet. Otherwise you may have
> a user space program waiting indefinitely for a single packet to get sent
> on a socket.
>
> It's ok to also call the cleanup from start_xmit, but calling it from the
> poll() function or another appropriate place is required.

There is no transmit done interrupt, so relying on every xmit to reclaim 
finished buffer.
I thought it would be enough, since there are TX_DESC_NUM descs.
It is a good idea also put reclaim in poll, then will add spin lock etc.

>
>> +	priv->id = of_alias_get_id(node, "ethernet");
>> +	if (priv->id < 0) {
>> +		dev_warn(d, "no ethernet alias\n");
>> +		ret = -EINVAL;
>> +		goto init_fail;
>> +	}
>
> Apparently you try to rely on the alias to refer to a specific piece
> of hardware, which is not correct. The alias is meant to be selectable
> to match e.g. the numbering written on the external connector, which
> is totally independent of the internal hardware.

Thanks for clarifying alisa.
The id will be used for start channel in ppe, RX_DESC_NUM * priv->id;
Is it suitable directly use id in the dts, or other name such as start-chan?

Thanks
--
To unsubscribe from this list: send the line "unsubscribe devicetree" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Arnd Bergmann March 22, 2014, 8:08 a.m. UTC | #3
On Saturday 22 March 2014 09:18:35 zhangfei wrote:
> >> +static void hip04_tx_reclaim(struct net_device *ndev, bool force)
> >> +{
> >> +	struct hip04_priv *priv = netdev_priv(ndev);
> >> +	unsigned tx_head = priv->tx_head;
> >> +	unsigned tx_tail = priv->tx_tail;
> >> +	struct tx_desc *desc = &priv->tx_desc[priv->tx_tail];
> >> +
> >> +	while (tx_tail != tx_head) {
> >> +		if (desc->send_addr != 0) {
> >> +			if (force)
> >> +				desc->send_addr = 0;
> >> +			else
> >> +				break;
> >> +		}
> >> +		if (priv->tx_phys[tx_tail]) {
> >> +			dma_unmap_single(&ndev->dev, priv->tx_phys[tx_tail],
> >> +				priv->tx_skb[tx_tail]->len, DMA_TO_DEVICE);
> >> +			priv->tx_phys[tx_tail] = 0;
> >> +		}
> >> +		dev_kfree_skb_irq(priv->tx_skb[tx_tail]);
> >> +		priv->tx_skb[tx_tail] = NULL;
> >> +		tx_tail = TX_NEXT(tx_tail);
> >> +		priv->tx_count--;
> >> +	}
> >> +	priv->tx_tail = tx_tail;
> >> +}
> >
> > You call this function from start_xmit(), which may be too early, causing the
> > dma_unmap_single() and dev_kfree_skb_irq() functions to be called while the
> > device is still accessing the data. This is bad.
> 
> There is a protection.
> Only after xmit done, desc->send_addr = 0, which is cleared by hardware.

Ok, I see.

> > You have to ensure that you only ever clean up tx buffers that have been
> > successfully transmitted. Also, you should use an interrupt to notify you
> > of this in case there is no further xmit packet. Otherwise you may have
> > a user space program waiting indefinitely for a single packet to get sent
> > on a socket.
> >
> > It's ok to also call the cleanup from start_xmit, but calling it from the
> > poll() function or another appropriate place is required.
> 
> There is no transmit done interrupt, so relying on every xmit to reclaim 
> finished buffer.
> I thought it would be enough, since there are TX_DESC_NUM descs.
> It is a good idea also put reclaim in poll, then will add spin lock etc.

It may be simpler to call napi_schedule() when you want to do tx descriptor
cleanup and have the function always be called in one place. If you do this
carefully, you can probably avoid most of the locking.

> >> +	priv->id = of_alias_get_id(node, "ethernet");
> >> +	if (priv->id < 0) {
> >> +		dev_warn(d, "no ethernet alias\n");
> >> +		ret = -EINVAL;
> >> +		goto init_fail;
> >> +	}
> >
> > Apparently you try to rely on the alias to refer to a specific piece
> > of hardware, which is not correct. The alias is meant to be selectable
> > to match e.g. the numbering written on the external connector, which
> > is totally independent of the internal hardware.
> 
> Thanks for clarifying alisa.
> The id will be used for start channel in ppe, RX_DESC_NUM * priv->id;
> Is it suitable directly use id in the dts, or other name such as start-chan?

Yes, I think it's best to just pass the id the same way as the channel
number.

	Arnd
--
To unsubscribe from this list: send the line "unsubscribe devicetree" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/drivers/net/ethernet/hisilicon/Makefile b/drivers/net/ethernet/hisilicon/Makefile
index 1d6eb6e..e6fe7af 100644
--- a/drivers/net/ethernet/hisilicon/Makefile
+++ b/drivers/net/ethernet/hisilicon/Makefile
@@ -2,4 +2,4 @@ 
 # Makefile for the HISILICON network device drivers.
 #
 
-obj-$(CONFIG_HIP04_ETH) += hip04_mdio.o
+obj-$(CONFIG_HIP04_ETH) += hip04_eth.o hip04_mdio.o
diff --git a/drivers/net/ethernet/hisilicon/hip04_eth.c b/drivers/net/ethernet/hisilicon/hip04_eth.c
new file mode 100644
index 0000000..1211fb4
--- /dev/null
+++ b/drivers/net/ethernet/hisilicon/hip04_eth.c
@@ -0,0 +1,730 @@ 
+
+/* Copyright (c) 2014 Linaro Ltd.
+ * Copyright (c) 2014 Hisilicon Limited.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <linux/module.h>
+#include <linux/etherdevice.h>
+#include <linux/platform_device.h>
+#include <linux/interrupt.h>
+#include <linux/of_address.h>
+#include <linux/phy.h>
+#include <linux/of_mdio.h>
+#include <linux/of_net.h>
+
+#define PPE_CFG_RX_CFF_ADDR		0x100
+#define PPE_CFG_POOL_GRP		0x300
+#define PPE_CFG_RX_BUF_SIZE		0x400
+#define PPE_CFG_RX_FIFO_SIZE		0x500
+#define PPE_CURR_BUF_CNT_REG		0xa200
+
+#define GE_DUPLEX_TYPE			0x8
+#define GE_MAX_FRM_SIZE_REG		0x3c
+#define GE_PORT_MODE			0x40
+#define GE_PORT_EN			0x44
+#define GE_SHORT_RUNTS_THR_REG		0x50
+#define GE_TX_LOCAL_PAGE_REG		0x5c
+#define GE_TRANSMIT_CONTROL_REG		0x60
+#define GE_CF_CRC_STRIP_REG		0x1b0
+#define GE_MODE_CHANGE_EN		0x1b4
+#define GE_RECV_CONTROL_REG		0x1e0
+#define GE_STATION_MAC_ADDRESS		0x210
+#define PPE_CFG_TX_PKT_BD_ADDR		0x420
+#define PPE_CFG_MAX_FRAME_LEN_REG	0x408
+#define PPE_CFG_BUS_CTRL_REG		0x424
+#define PPE_CFG_RX_CTRL_REG		0x428
+#define PPE_CFG_RX_PKT_MODE_REG		0x438
+#define PPE_CFG_QOS_VMID_GEN		0x500
+#define PPE_CFG_RX_PKT_INT		0x538
+#define PPE_INTEN			0x600
+#define PPE_INTSTS			0x608
+#define PPE_RINT			0x604
+#define PPE_CFG_STS_MODE		0x700
+#define PPE_HIS_RX_PKT_CNT		0x804
+
+/* REG_INTERRUPT */
+#define RCV_INT				BIT(10)
+#define RCV_NOBUF			BIT(8)
+#define DEF_INT_MASK			0x41fdf
+
+#define RX_DESC_NUM			64
+#define TX_DESC_NUM			64
+#define TX_NEXT(N)			(((N) + 1) & (TX_DESC_NUM-1))
+#define RX_NEXT(N)			(((N) + 1) & (RX_DESC_NUM-1))
+
+#define GMAC_PPE_RX_PKT_MAX_LEN		379
+#define GMAC_MAX_PKT_LEN		1516
+#define DESC_DEF_CFG			0x14
+#define RX_BUF_SIZE			1600
+#define RX_PKT_ERR			0x3
+#define TX_TIMEOUT			(6 * HZ)
+
+#define DRV_NAME			"hip04-ether"
+
+struct tx_desc {
+	u32 send_addr;
+	u16 reserved_16;
+	u16 send_size;
+	u32 reserved_32;
+	u32 cfg;
+	u32 wb_addr;
+} ____cacheline_aligned;
+
+struct rx_desc {
+	u16 reserved_16;
+	u16 pkt_len;
+	u32 reserve1[3];
+	u32 pkt_err;
+	u32 reserve2[4];
+};
+
+struct hip04_priv {
+	void __iomem *base;
+	int phy_mode;
+	int id;
+	unsigned int port;
+	unsigned int speed;
+	unsigned int duplex;
+	unsigned int reg_inten;
+
+	struct napi_struct napi;
+	struct net_device *ndev;
+
+	struct tx_desc *tx_desc;
+	dma_addr_t tx_desc_dma;
+	struct sk_buff *tx_skb[TX_DESC_NUM];
+	dma_addr_t tx_phys[TX_DESC_NUM];
+	unsigned int tx_head;
+	unsigned int tx_tail;
+	unsigned int tx_count;
+
+	unsigned char *rx_buf[RX_DESC_NUM];
+	dma_addr_t rx_phys[RX_DESC_NUM];
+	unsigned int rx_head;
+	unsigned int rx_buf_size;
+
+	struct device_node *phy_node;
+	struct phy_device *phy;
+};
+
+static void __iomem *ppebase;
+
+static void hip04_config_port(struct hip04_priv *priv, u32 speed, u32 duplex)
+{
+	u32 val;
+
+	priv->speed = speed;
+	priv->duplex = duplex;
+
+	switch (priv->phy_mode) {
+	case PHY_INTERFACE_MODE_SGMII:
+		if (speed == SPEED_1000)
+			val = 8;
+		else
+			val = 7;
+		break;
+	case PHY_INTERFACE_MODE_MII:
+		val = 1;	/* SPEED_100 */
+		break;
+	default:
+		val = 0;
+		break;
+	}
+	writel_relaxed(val, priv->base + GE_PORT_MODE);
+
+	val = (duplex) ? BIT(0) : 0;
+	writel_relaxed(val, priv->base + GE_DUPLEX_TYPE);
+
+	val = BIT(0);
+	writel_relaxed(val, priv->base + GE_MODE_CHANGE_EN);
+}
+
+static void hip04_reset_ppe(struct hip04_priv *priv)
+{
+	u32 val;
+
+	do {
+		val =
+		readl_relaxed(ppebase + priv->port * 4 + PPE_CURR_BUF_CNT_REG);
+		readl_relaxed(ppebase + priv->port * 4 + PPE_CFG_RX_CFF_ADDR);
+	} while (val & 0xfff);
+}
+
+static void hip04_config_fifo(struct hip04_priv *priv)
+{
+	u32 val;
+
+	val = readl_relaxed(priv->base + PPE_CFG_STS_MODE);
+	val |= BIT(12);			/* PPE_HIS_RX_PKT_CNT read clear */
+	writel_relaxed(val, priv->base + PPE_CFG_STS_MODE);
+
+	val = BIT(priv->port);
+	writel_relaxed(val, ppebase + priv->port * 4 + PPE_CFG_POOL_GRP);
+
+	val = priv->port << 8;
+	val |= BIT(14);
+	writel_relaxed(val, priv->base + PPE_CFG_QOS_VMID_GEN);
+
+	val = RX_BUF_SIZE;
+	writel_relaxed(val, ppebase + priv->port * 4 + PPE_CFG_RX_BUF_SIZE);
+
+	val = RX_DESC_NUM << 16;	/* depth */
+	val |= BIT(11);			/* seq: first set first ues */
+	val |= RX_DESC_NUM * priv->id;	/* start_addr */
+	writel_relaxed(val, ppebase + priv->port * 4 + PPE_CFG_RX_FIFO_SIZE);
+
+	/* pkt store format */
+	val = NET_IP_ALIGN << 11;	/* align */
+	writel_relaxed(val, priv->base + PPE_CFG_RX_CTRL_REG);
+
+	/* following cfg required for 1000M */
+	/* pkt mode */
+	val = BIT(18);			/* align */
+	writel_relaxed(val, priv->base + PPE_CFG_RX_PKT_MODE_REG);
+
+	/* set bus ctrl */
+	val = BIT(14);			/* buffer locally release */
+	val |= BIT(0);			/* big endian */
+	writel_relaxed(val, priv->base + PPE_CFG_BUS_CTRL_REG);
+
+	/* set max pkt len, curtail if exceed */
+	val = GMAC_PPE_RX_PKT_MAX_LEN;	/* max buffer len */
+	writel_relaxed(val, priv->base + PPE_CFG_MAX_FRAME_LEN_REG);
+
+	/* set max len of each pkt */
+	val = GMAC_MAX_PKT_LEN;		/* max buffer len */
+	writel_relaxed(val, priv->base + GE_MAX_FRM_SIZE_REG);
+
+	/* set min len of each pkt */
+	val = 31;			/* min buffer len */
+	writel_relaxed(val, priv->base + GE_SHORT_RUNTS_THR_REG);
+
+	/* tx */
+	val = readl_relaxed(priv->base + GE_TRANSMIT_CONTROL_REG);
+	val |= BIT(5);			/* tx auto neg */
+	val |= BIT(6);			/* tx add crc */
+	val |= BIT(7);			/* tx short pad through */
+	writel_relaxed(val, priv->base + GE_TRANSMIT_CONTROL_REG);
+
+	/* rx crc */
+	val = BIT(0);			/* rx strip crc */
+	writel_relaxed(val, priv->base + GE_CF_CRC_STRIP_REG);
+
+	/* rx */
+	val = readl_relaxed(priv->base + GE_RECV_CONTROL_REG);
+	val |= BIT(3);			/* rx strip pad */
+	val |= BIT(4);			/* run pkt en */
+	writel_relaxed(val, priv->base + GE_RECV_CONTROL_REG);
+
+	/* auto neg control */
+	val = BIT(0);
+	writel_relaxed(val, priv->base + GE_TX_LOCAL_PAGE_REG);
+}
+
+static void hip04_mac_enable(struct net_device *ndev, bool enable)
+{
+	struct hip04_priv *priv = netdev_priv(ndev);
+	u32 val;
+
+	if (enable) {
+		/* enable tx & rx */
+		val = readl_relaxed(priv->base + GE_PORT_EN);
+		val |= BIT(1);		/* rx*/
+		val |= BIT(2);		/* tx*/
+		writel_relaxed(val, priv->base + GE_PORT_EN);
+
+		/* enable interrupt */
+		priv->reg_inten = DEF_INT_MASK;
+		writel_relaxed(priv->reg_inten, priv->base + PPE_INTEN);
+
+		/* clear rx int */
+		val = RCV_INT;
+		writel_relaxed(val, priv->base + PPE_RINT);
+
+		/* config recv int*/
+		val = BIT(6);		/* int threshold 1 package */
+		val |= 0x4;		/* recv timeout */
+		writel_relaxed(val, priv->base + PPE_CFG_RX_PKT_INT);
+	} else {
+		/* disable int */
+		priv->reg_inten &= ~(RCV_INT | RCV_NOBUF);
+		writel_relaxed(priv->reg_inten, priv->base + PPE_INTEN);
+
+		/* disable tx & rx */
+		val = readl_relaxed(priv->base + GE_PORT_EN);
+		val &= ~(BIT(1));	/* rx*/
+		val &= ~(BIT(2));	/* tx*/
+		writel_relaxed(val, priv->base + GE_PORT_EN);
+	}
+}
+
+static void hip04_set_xmit_desc(struct hip04_priv *priv, dma_addr_t phys)
+{
+	writel(phys, priv->base + PPE_CFG_TX_PKT_BD_ADDR);
+}
+
+static void hip04_set_recv_desc(struct hip04_priv *priv, dma_addr_t phys)
+{
+	writel(phys, ppebase + priv->port * 4 + PPE_CFG_RX_CFF_ADDR);
+}
+
+static u32 hip04_recv_cnt(struct hip04_priv *priv)
+{
+	return readl(priv->base + PPE_HIS_RX_PKT_CNT);
+}
+
+static void hip04_update_mac_address(struct net_device *ndev)
+{
+	struct hip04_priv *priv = netdev_priv(ndev);
+
+	writel_relaxed(((ndev->dev_addr[0] << 8) | (ndev->dev_addr[1])),
+			priv->base + GE_STATION_MAC_ADDRESS);
+	writel_relaxed(((ndev->dev_addr[2] << 24) | (ndev->dev_addr[3] << 16) |
+			(ndev->dev_addr[4] << 8) | (ndev->dev_addr[5])),
+			priv->base + GE_STATION_MAC_ADDRESS + 4);
+}
+
+static int hip04_set_mac_address(struct net_device *ndev, void *addr)
+{
+	eth_mac_addr(ndev, addr);
+	hip04_update_mac_address(ndev);
+	return 0;
+}
+
+static int hip04_rx_poll(struct napi_struct *napi, int budget)
+{
+	struct hip04_priv *priv = container_of(napi, struct hip04_priv, napi);
+	struct net_device *ndev = priv->ndev;
+	struct net_device_stats *stats = &ndev->stats;
+	unsigned int cnt = hip04_recv_cnt(priv);
+	struct sk_buff *skb;
+	struct rx_desc *desc;
+	unsigned char *buf;
+	dma_addr_t phys;
+	int rx = 0;
+	u16 len;
+	u32 err;
+
+	while (cnt) {
+		buf = priv->rx_buf[priv->rx_head];
+		skb = build_skb(buf, priv->rx_buf_size);
+		if (unlikely(!skb))
+			net_dbg_ratelimited("build_skb failed\n");
+
+		dma_unmap_single(&ndev->dev, priv->rx_phys[priv->rx_head],
+				RX_BUF_SIZE, DMA_FROM_DEVICE);
+		priv->rx_phys[priv->rx_head] = 0;
+
+		desc = (struct rx_desc *)skb->data;
+		len = be16_to_cpu(desc->pkt_len);
+		err = be32_to_cpu(desc->pkt_err);
+
+		if (len > RX_BUF_SIZE)
+			len = RX_BUF_SIZE;
+		if (0 == len)
+			break;
+
+		if (err & RX_PKT_ERR) {
+			dev_kfree_skb_any(skb);
+			stats->rx_dropped++;
+			stats->rx_errors++;
+			continue;
+		}
+
+		stats->rx_packets++;
+		stats->rx_bytes += len;
+
+		skb_reserve(skb, NET_SKB_PAD + NET_IP_ALIGN);
+		skb_put(skb, len);
+		skb->protocol = eth_type_trans(skb, ndev);
+		napi_gro_receive(&priv->napi, skb);
+
+		buf = netdev_alloc_frag(priv->rx_buf_size);
+		if (!buf)
+			return -ENOMEM;
+		phys = dma_map_single(&ndev->dev, buf,
+				RX_BUF_SIZE, DMA_FROM_DEVICE);
+		priv->rx_buf[priv->rx_head] = buf;
+		priv->rx_phys[priv->rx_head] = phys;
+		hip04_set_recv_desc(priv, phys);
+
+		priv->rx_head = RX_NEXT(priv->rx_head);
+		if (rx++ >= budget)
+			break;
+
+		if (--cnt == 0)
+			cnt = hip04_recv_cnt(priv);
+	}
+
+	if (rx < budget) {
+		napi_complete(napi);
+
+		/* enable rx interrupt */
+		priv->reg_inten |= RCV_INT | RCV_NOBUF;
+		writel(priv->reg_inten, priv->base + PPE_INTEN);
+	}
+
+	return rx;
+}
+
+static irqreturn_t hip04_mac_interrupt(int irq, void *dev_id)
+{
+	struct net_device *ndev = (struct net_device *) dev_id;
+	struct hip04_priv *priv = netdev_priv(ndev);
+	u32 ists = readl_relaxed(priv->base + PPE_INTSTS);
+	u32 val = DEF_INT_MASK;
+
+	writel_relaxed(val, priv->base + PPE_RINT);
+
+	if (ists & (RCV_INT | RCV_NOBUF)) {
+		if (napi_schedule_prep(&priv->napi)) {
+			/* disable rx interrupt */
+			priv->reg_inten &= ~(RCV_INT | RCV_NOBUF);
+			writel_relaxed(priv->reg_inten, priv->base + PPE_INTEN);
+			__napi_schedule(&priv->napi);
+		}
+	}
+
+	return IRQ_HANDLED;
+}
+
+static void hip04_tx_reclaim(struct net_device *ndev, bool force)
+{
+	struct hip04_priv *priv = netdev_priv(ndev);
+	unsigned tx_head = priv->tx_head;
+	unsigned tx_tail = priv->tx_tail;
+	struct tx_desc *desc = &priv->tx_desc[priv->tx_tail];
+
+	while (tx_tail != tx_head) {
+		if (desc->send_addr != 0) {
+			if (force)
+				desc->send_addr = 0;
+			else
+				break;
+		}
+		if (priv->tx_phys[tx_tail]) {
+			dma_unmap_single(&ndev->dev, priv->tx_phys[tx_tail],
+				priv->tx_skb[tx_tail]->len, DMA_TO_DEVICE);
+			priv->tx_phys[tx_tail] = 0;
+		}
+		dev_kfree_skb_irq(priv->tx_skb[tx_tail]);
+		priv->tx_skb[tx_tail] = NULL;
+		tx_tail = TX_NEXT(tx_tail);
+		priv->tx_count--;
+	}
+	priv->tx_tail = tx_tail;
+}
+
+static int hip04_mac_start_xmit(struct sk_buff *skb, struct net_device *ndev)
+{
+	struct hip04_priv *priv = netdev_priv(ndev);
+	struct net_device_stats *stats = &ndev->stats;
+	unsigned int tx_head = priv->tx_head;
+	struct tx_desc *desc = &priv->tx_desc[tx_head];
+	dma_addr_t phys;
+
+	hip04_tx_reclaim(ndev, false);
+
+	if (priv->tx_count++ >= TX_DESC_NUM) {
+		net_dbg_ratelimited("no TX space for packet\n");
+		netif_stop_queue(ndev);
+		return NETDEV_TX_BUSY;
+	}
+
+	phys = dma_map_single(&ndev->dev, skb->data, skb->len, DMA_TO_DEVICE);
+	priv->tx_skb[tx_head] = skb;
+	priv->tx_phys[tx_head] = phys;
+	desc->send_addr = cpu_to_be32(phys);
+	desc->send_size = cpu_to_be16(skb->len);
+	desc->cfg = cpu_to_be32(DESC_DEF_CFG);
+	phys = priv->tx_desc_dma + tx_head * sizeof(struct tx_desc);
+	desc->wb_addr = cpu_to_be32(phys);
+	skb_tx_timestamp(skb);
+	hip04_set_xmit_desc(priv, phys);
+	priv->tx_head = TX_NEXT(tx_head);
+
+	stats->tx_bytes += skb->len;
+	stats->tx_packets++;
+
+	return NETDEV_TX_OK;
+}
+
+static void hip04_adjust_link(struct net_device *ndev)
+{
+	struct hip04_priv *priv = netdev_priv(ndev);
+	struct phy_device *phy = priv->phy;
+
+	if ((priv->speed != phy->speed) || (priv->duplex != phy->duplex)) {
+		hip04_config_port(priv, phy->speed, phy->duplex);
+		phy_print_status(phy);
+	}
+}
+
+static int hip04_mac_open(struct net_device *ndev)
+{
+	struct hip04_priv *priv = netdev_priv(ndev);
+	int i;
+
+	priv->rx_head = 0;
+	priv->tx_head = 0;
+	priv->tx_tail = 0;
+	priv->tx_count = 0;
+
+	hip04_reset_ppe(priv);
+	for (i = 0; i < RX_DESC_NUM; i++) {
+		dma_addr_t phys;
+
+		phys = dma_map_single(&ndev->dev, priv->rx_buf[i],
+				RX_BUF_SIZE, DMA_FROM_DEVICE);
+		priv->rx_phys[i] = phys;
+		hip04_set_recv_desc(priv, phys);
+	}
+
+	if (priv->phy_node) {
+		priv->phy = of_phy_connect(ndev, priv->phy_node,
+			&hip04_adjust_link, 0, priv->phy_mode);
+		if (!priv->phy)
+			return -ENODEV;
+		phy_start(priv->phy);
+	}
+
+	netif_start_queue(ndev);
+	hip04_mac_enable(ndev, true);
+	napi_enable(&priv->napi);
+	return 0;
+}
+
+static int hip04_mac_stop(struct net_device *ndev)
+{
+	struct hip04_priv *priv = netdev_priv(ndev);
+	int i;
+
+	if (priv->phy)
+		phy_disconnect(priv->phy);
+
+	priv->phy = NULL;
+	napi_disable(&priv->napi);
+	netif_stop_queue(ndev);
+	hip04_mac_enable(ndev, false);
+	hip04_tx_reclaim(ndev, true);
+	hip04_reset_ppe(priv);
+
+	for (i = 0; i < RX_DESC_NUM; i++) {
+		if (priv->rx_phys[i]) {
+			dma_unmap_single(&ndev->dev, priv->rx_phys[i],
+					RX_BUF_SIZE, DMA_FROM_DEVICE);
+			priv->rx_phys[i] = 0;
+		}
+	}
+
+	return 0;
+}
+
+static void hip04_timeout(struct net_device *ndev)
+{
+	netif_wake_queue(ndev);
+	return;
+}
+
+static struct net_device_ops hip04_netdev_ops = {
+	.ndo_open		= hip04_mac_open,
+	.ndo_stop		= hip04_mac_stop,
+	.ndo_start_xmit		= hip04_mac_start_xmit,
+	.ndo_set_mac_address	= hip04_set_mac_address,
+	.ndo_tx_timeout         = hip04_timeout,
+	.ndo_validate_addr	= eth_validate_addr,
+	.ndo_change_mtu		= eth_change_mtu,
+};
+
+static int hip04_alloc_ring(struct net_device *ndev, struct device *d)
+{
+	struct hip04_priv *priv = netdev_priv(ndev);
+	int i;
+
+	priv->tx_desc = dma_alloc_coherent(d,
+			TX_DESC_NUM * sizeof(struct tx_desc),
+			&priv->tx_desc_dma, GFP_KERNEL);
+	if (!priv->tx_desc)
+		return -ENOMEM;
+
+	priv->rx_buf_size = RX_BUF_SIZE +
+			    SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
+	for (i = 0; i < RX_DESC_NUM; i++) {
+		priv->rx_buf[i] = netdev_alloc_frag(priv->rx_buf_size);
+		if (!priv->rx_buf[i])
+			return -ENOMEM;
+	}
+
+	return 0;
+}
+
+static void hip04_free_ring(struct net_device *ndev, struct device *d)
+{
+	struct hip04_priv *priv = netdev_priv(ndev);
+	int i;
+
+	for (i = 0; i < RX_DESC_NUM; i++)
+		if (priv->rx_buf[i])
+			put_page(virt_to_head_page(priv->rx_buf[i]));
+
+	for (i = 0; i < TX_DESC_NUM; i++)
+		if (priv->tx_skb[i])
+			dev_kfree_skb_any(priv->tx_skb[i]);
+
+	dma_free_coherent(d, TX_DESC_NUM * sizeof(struct tx_desc),
+			priv->tx_desc, priv->tx_desc_dma);
+}
+
+static int hip04_mac_probe(struct platform_device *pdev)
+{
+	struct device *d = &pdev->dev;
+	struct device_node *n, *node = d->of_node;
+	struct net_device *ndev;
+	struct hip04_priv *priv;
+	struct resource *res;
+	unsigned int irq;
+	int ret;
+
+	ndev = alloc_etherdev(sizeof(struct hip04_priv));
+	if (!ndev)
+		return -ENOMEM;
+
+	priv = netdev_priv(ndev);
+	priv->ndev = ndev;
+	platform_set_drvdata(pdev, ndev);
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	priv->base = devm_ioremap_resource(d, res);
+	if (IS_ERR(priv->base)) {
+		ret = PTR_ERR(priv->base);
+		goto init_fail;
+	}
+
+	if (!ppebase) {
+		n = of_find_compatible_node(NULL, NULL, "hisilicon,hip04-ppe");
+		if (!n) {
+			ret = -EINVAL;
+			netdev_err(ndev, "not find hisilicon,ppe\n");
+			goto init_fail;
+		}
+		ppebase = of_iomap(n, 0);
+	}
+
+	n = of_parse_phandle(node, "port-handle", 0);
+	if (n) {
+		ret = of_property_read_u32(n, "reg", &priv->port);
+		if (ret) {
+			dev_warn(d, "no reg info\n");
+			goto init_fail;
+		}
+	} else {
+		dev_warn(d, "no port-handle\n");
+		ret = -EINVAL;
+		goto init_fail;
+	}
+
+	priv->id = of_alias_get_id(node, "ethernet");
+	if (priv->id < 0) {
+		dev_warn(d, "no ethernet alias\n");
+		ret = -EINVAL;
+		goto init_fail;
+	}
+
+	priv->phy_mode = of_get_phy_mode(node);
+	if (priv->phy_mode < 0) {
+		dev_warn(d, "not find phy-mode\n");
+		ret = -EINVAL;
+		goto init_fail;
+	}
+
+	irq = platform_get_irq(pdev, 0);
+	if (irq <= 0) {
+		ret = -EINVAL;
+		goto init_fail;
+	}
+
+	ether_setup(ndev);
+	ndev->netdev_ops = &hip04_netdev_ops;
+	ndev->watchdog_timeo = TX_TIMEOUT;
+	ndev->priv_flags |= IFF_UNICAST_FLT;
+	ndev->irq = irq;
+	netif_napi_add(ndev, &priv->napi, hip04_rx_poll, RX_DESC_NUM);
+	SET_NETDEV_DEV(ndev, &pdev->dev);
+
+	hip04_reset_ppe(priv);
+	if (priv->phy_mode == PHY_INTERFACE_MODE_MII)
+		hip04_config_port(priv, SPEED_100, DUPLEX_FULL);
+
+	hip04_config_fifo(priv);
+	random_ether_addr(ndev->dev_addr);
+	hip04_update_mac_address(ndev);
+
+	ret = hip04_alloc_ring(ndev, d);
+	if (ret) {
+		netdev_err(ndev, "alloc ring fail\n");
+		goto alloc_fail;
+	}
+
+	ret = devm_request_irq(d, irq, hip04_mac_interrupt,
+				0, pdev->name, ndev);
+	if (ret) {
+		netdev_err(ndev, "devm_request_irq failed\n");
+		goto alloc_fail;
+	}
+
+	priv->phy_node = of_parse_phandle(node, "phy-handle", 0);
+
+	ret = register_netdev(ndev);
+	if (ret) {
+		free_netdev(ndev);
+		goto alloc_fail;
+	}
+
+	return 0;
+alloc_fail:
+	hip04_free_ring(ndev, d);
+init_fail:
+	of_node_put(priv->phy_node);
+	free_netdev(ndev);
+	return ret;
+}
+
+static int hip04_remove(struct platform_device *pdev)
+{
+	struct net_device *ndev = platform_get_drvdata(pdev);
+	struct hip04_priv *priv = netdev_priv(ndev);
+	struct device *d = &pdev->dev;
+
+	hip04_free_ring(ndev, d);
+	unregister_netdev(ndev);
+	free_irq(ndev->irq, ndev);
+	of_node_put(priv->phy_node);
+	free_netdev(ndev);
+
+	return 0;
+}
+
+static const struct of_device_id hip04_mac_match[] = {
+	{ .compatible = "hisilicon,hip04-mac" },
+	{ }
+};
+
+static struct platform_driver hip04_mac_driver = {
+	.probe	= hip04_mac_probe,
+	.remove	= hip04_remove,
+	.driver	= {
+		.name		= DRV_NAME,
+		.owner		= THIS_MODULE,
+		.of_match_table	= hip04_mac_match,
+	},
+};
+module_platform_driver(hip04_mac_driver);
+
+MODULE_DESCRIPTION("HISILICON P04 Ethernet driver");
+MODULE_LICENSE("GPL v2");
+MODULE_ALIAS("platform:hip04-ether");