diff mbox series

[net-next,2/3] dpaa2-eth: add rx copybreak support

Message ID 20210401163956.766628-3-ciorneiioana@gmail.com
State Superseded
Headers show
Series dpaa2-eth: add rx copybreak support | expand

Commit Message

Ioana Ciornei April 1, 2021, 4:39 p.m. UTC
From: Ioana Ciornei <ioana.ciornei@nxp.com>

DMA unmapping, allocating a new buffer and DMA mapping it back on the
refill path is really not that efficient. Proper buffer recycling (page
pool, flipping the page and using the other half) cannot be done for
DPAA2 since it's not a ring based controller but it rather deals with
multiple queues which all get their buffers from the same buffer pool on
Rx.

To circumvent these limitations, add support for Rx copybreak. For small
sized packets instead of creating a skb around the buffer in which the
frame was received, allocate a new sk buffer altogether, copy the
contents of the frame and release the initial page back into the buffer
pool.

Signed-off-by: Ioana Ciornei <ioana.ciornei@nxp.com>
---
 .../net/ethernet/freescale/dpaa2/dpaa2-eth.c  | 36 +++++++++++++++++--
 .../net/ethernet/freescale/dpaa2/dpaa2-eth.h  |  2 ++
 2 files changed, 35 insertions(+), 3 deletions(-)

Comments

Ioana Ciornei April 1, 2021, 8:13 p.m. UTC | #1
On Thu, Apr 01, 2021 at 08:49:43PM +0200, Andrew Lunn wrote:
> Hi Ioana
> 
> > +#define DPAA2_ETH_DEFAULT_COPYBREAK	512
> 
> This is quite big. A quick grep suggest other driver use 256.
> 
> Do you have some performance figures for this? 
> 

Hi Andrew,

Yes, I did some tests which made me end up with this default value.

A bit about the setup - a LS2088A SoC, 8 x Cortex A72 @ 1.8GHz, IPfwd
zero loss test @ 20Gbit/s throughput.  I tested multiple frame sizes to
get an idea where is the break even point.

Here are 2 sets of results, (1) is the baseline and (2) is just
allocating a new skb for all frames sizes received (as if the copybreak
was even to the MTU). All numbers are in Mpps.

         64   128    256   512  640   768   896

(1)     3.23  3.23  3.24  3.21  3.1  2.76  2.71
(2)     3.95  3.88  3.79  3.62  3.3  3.02  2.65

It seems that even for 512 bytes frame sizes it's comfortably better when
allocating a new skb. After that, we see diminishing rewards or even worse.

Ioana
Andrew Lunn April 1, 2021, 8:19 p.m. UTC | #2
On Thu, Apr 01, 2021 at 11:13:50PM +0300, Ioana Ciornei wrote:
> On Thu, Apr 01, 2021 at 08:49:43PM +0200, Andrew Lunn wrote:
> > Hi Ioana
> > 
> > > +#define DPAA2_ETH_DEFAULT_COPYBREAK	512
> > 
> > This is quite big. A quick grep suggest other driver use 256.
> > 
> > Do you have some performance figures for this? 
> > 
> 
> Hi Andrew,
> 
> Yes, I did some tests which made me end up with this default value.
> 
> A bit about the setup - a LS2088A SoC, 8 x Cortex A72 @ 1.8GHz, IPfwd
> zero loss test @ 20Gbit/s throughput.  I tested multiple frame sizes to
> get an idea where is the break even point.
> 
> Here are 2 sets of results, (1) is the baseline and (2) is just
> allocating a new skb for all frames sizes received (as if the copybreak
> was even to the MTU). All numbers are in Mpps.
> 
>          64   128    256   512  640   768   896
> 
> (1)     3.23  3.23  3.24  3.21  3.1  2.76  2.71
> (2)     3.95  3.88  3.79  3.62  3.3  3.02  2.65
> 
> It seems that even for 512 bytes frame sizes it's comfortably better when
> allocating a new skb. After that, we see diminishing rewards or even worse.

Nice. If you need to respin, consider putting this in patch 0/3.

      Andrew
diff mbox series

Patch

diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c
index f545cb99388a..200831b41078 100644
--- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c
+++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c
@@ -418,6 +418,33 @@  static u32 dpaa2_eth_run_xdp(struct dpaa2_eth_priv *priv,
 	return xdp_act;
 }
 
+struct sk_buff *dpaa2_eth_copybreak(struct dpaa2_eth_channel *ch,
+				    const struct dpaa2_fd *fd, void *fd_vaddr)
+{
+	u16 fd_offset = dpaa2_fd_get_offset(fd);
+	u32 fd_length = dpaa2_fd_get_len(fd);
+	struct sk_buff *skb = NULL;
+	unsigned int skb_len;
+
+	if (fd_length > DPAA2_ETH_DEFAULT_COPYBREAK)
+		return NULL;
+
+	skb_len = fd_length + dpaa2_eth_needed_headroom(NULL);
+
+	skb = napi_alloc_skb(&ch->napi, skb_len);
+	if (!skb)
+		return NULL;
+
+	skb_reserve(skb, dpaa2_eth_needed_headroom(NULL));
+	skb_put(skb, fd_length);
+
+	memcpy(skb->data, fd_vaddr + fd_offset, fd_length);
+
+	dpaa2_eth_recycle_buf(ch->priv, ch, dpaa2_fd_get_addr(fd));
+
+	return skb;
+}
+
 /* Main Rx frame processing routine */
 static void dpaa2_eth_rx(struct dpaa2_eth_priv *priv,
 			 struct dpaa2_eth_channel *ch,
@@ -459,9 +486,12 @@  static void dpaa2_eth_rx(struct dpaa2_eth_priv *priv,
 			return;
 		}
 
-		dma_unmap_page(dev, addr, priv->rx_buf_size,
-			       DMA_BIDIRECTIONAL);
-		skb = dpaa2_eth_build_linear_skb(ch, fd, vaddr);
+		skb = dpaa2_eth_copybreak(ch, fd, vaddr);
+		if (!skb) {
+			dma_unmap_page(dev, addr, priv->rx_buf_size,
+				       DMA_BIDIRECTIONAL);
+			skb = dpaa2_eth_build_linear_skb(ch, fd, vaddr);
+		}
 	} else if (fd_format == dpaa2_fd_sg) {
 		WARN_ON(priv->xdp_prog);
 
diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.h b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.h
index 9ba31c2706bb..f8d2b4769983 100644
--- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.h
+++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.h
@@ -489,6 +489,8 @@  struct dpaa2_eth_trap_data {
 	struct dpaa2_eth_priv *priv;
 };
 
+#define DPAA2_ETH_DEFAULT_COPYBREAK	512
+
 /* Driver private data */
 struct dpaa2_eth_priv {
 	struct net_device *net_dev;