diff mbox series

[v3,bpf-next] cpumap: bulk skb using netif_receive_skb_list

Message ID 01cd8afa22786b2c8a4cd7250d165741e990a771.1618927173.git.lorenzo@kernel.org
State New
Headers show
Series [v3,bpf-next] cpumap: bulk skb using netif_receive_skb_list | expand

Commit Message

Lorenzo Bianconi April 20, 2021, 2:05 p.m. UTC
Rely on netif_receive_skb_list routine to send skbs converted from
xdp_frames in cpu_map_kthread_run in order to improve i-cache usage.
The proposed patch has been tested running xdp_redirect_cpu bpf sample
available in the kernel tree that is used to redirect UDP frames from
ixgbe driver to a cpumap entry and then to the networking stack.
UDP frames are generated using pkt_gen. Packets are discarded by the
UDP layer.

$xdp_redirect_cpu  --cpu <cpu> --progname xdp_cpu_map0 --dev <eth>

bpf-next: ~2.35Mpps
bpf-next + cpumap skb-list: ~2.72Mpps

Since netif_receive_skb_list does not return number of discarded packets,
remove drop counter from xdp_cpumap_kthread tracepoint and update related
xdp samples.

Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
---
Changes since v2:
- remove drop counter and update related xdp samples
- rebased on top of bpf-next

Changes since v1:
- fixed comment
- rebased on top of bpf-next tree
---
 include/trace/events/xdp.h          | 14 +++++---------
 kernel/bpf/cpumap.c                 | 16 +++++++---------
 samples/bpf/xdp_monitor_kern.c      |  6 ++----
 samples/bpf/xdp_monitor_user.c      | 14 ++++++--------
 samples/bpf/xdp_redirect_cpu_kern.c | 12 +++++-------
 samples/bpf/xdp_redirect_cpu_user.c | 10 ++++------
 6 files changed, 29 insertions(+), 43 deletions(-)

Comments

Jesper Dangaard Brouer April 20, 2021, 4:54 p.m. UTC | #1
On Tue, 20 Apr 2021 16:05:14 +0200
Lorenzo Bianconi <lorenzo@kernel.org> wrote:

> Rely on netif_receive_skb_list routine to send skbs converted from
> xdp_frames in cpu_map_kthread_run in order to improve i-cache usage.
> The proposed patch has been tested running xdp_redirect_cpu bpf sample
> available in the kernel tree that is used to redirect UDP frames from
> ixgbe driver to a cpumap entry and then to the networking stack.
> UDP frames are generated using pkt_gen. Packets are discarded by the
> UDP layer.
> 
> $xdp_redirect_cpu  --cpu <cpu> --progname xdp_cpu_map0 --dev <eth>
> 
> bpf-next: ~2.35Mpps
> bpf-next + cpumap skb-list: ~2.72Mpps
> 
> Since netif_receive_skb_list does not return number of discarded packets,
> remove drop counter from xdp_cpumap_kthread tracepoint and update related
> xdp samples.
> 
> Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
> ---
> Changes since v2:
> - remove drop counter and update related xdp samples
> - rebased on top of bpf-next
> 
> Changes since v1:
> - fixed comment
> - rebased on top of bpf-next tree
> ---
>  include/trace/events/xdp.h          | 14 +++++---------
>  kernel/bpf/cpumap.c                 | 16 +++++++---------
>  samples/bpf/xdp_monitor_kern.c      |  6 ++----
>  samples/bpf/xdp_monitor_user.c      | 14 ++++++--------
>  samples/bpf/xdp_redirect_cpu_kern.c | 12 +++++-------
>  samples/bpf/xdp_redirect_cpu_user.c | 10 ++++------
>  6 files changed, 29 insertions(+), 43 deletions(-)
> 
> diff --git a/include/trace/events/xdp.h b/include/trace/events/xdp.h
> index fcad3645a70b..52ecfe9c7e25 100644
> --- a/include/trace/events/xdp.h
> +++ b/include/trace/events/xdp.h
> @@ -184,16 +184,15 @@ DEFINE_EVENT(xdp_redirect_template, xdp_redirect_map_err,
>  
>  TRACE_EVENT(xdp_cpumap_kthread,
>  
> -	TP_PROTO(int map_id, unsigned int processed,  unsigned int drops,
> -		 int sched, struct xdp_cpumap_stats *xdp_stats),
> +	TP_PROTO(int map_id, unsigned int processed, int sched,
> +		 struct xdp_cpumap_stats *xdp_stats),
>  
> -	TP_ARGS(map_id, processed, drops, sched, xdp_stats),
> +	TP_ARGS(map_id, processed, sched, xdp_stats),
>  
>  	TP_STRUCT__entry(
>  		__field(int, map_id)
>  		__field(u32, act)
>  		__field(int, cpu)
> -		__field(unsigned int, drops)
>  		__field(unsigned int, processed)

So, struct member @processed will takeover the room for @drops.

Can you please test how an old xdp_monitor program will react to this?
Will it fail, or extract and show wrong values?

The xdp_mointor tool is in several external git repos:

 https://github.com/netoptimizer/prototype-kernel/blob/master/kernel/samples/bpf/xdp_monitor_kern.c
 https://github.com/xdp-project/xdp-tutorial/tree/master/tracing02-xdp-monitor

Do you have any plans for fixing those tools?


>  		__field(int, sched)
>  		__field(unsigned int, xdp_pass)
> @@ -205,7 +204,6 @@ TRACE_EVENT(xdp_cpumap_kthread,
>  		__entry->map_id		= map_id;
>  		__entry->act		= XDP_REDIRECT;
>  		__entry->cpu		= smp_processor_id();
> -		__entry->drops		= drops;
>  		__entry->processed	= processed;
>  		__entry->sched	= sched;
>  		__entry->xdp_pass	= xdp_stats->pass;
> @@ -215,13 +213,11 @@ TRACE_EVENT(xdp_cpumap_kthread,
>  
>  	TP_printk("kthread"
>  		  " cpu=%d map_id=%d action=%s"
> -		  " processed=%u drops=%u"
> -		  " sched=%d"
> +		  " processed=%u sched=%u"
>  		  " xdp_pass=%u xdp_drop=%u xdp_redirect=%u",
>  		  __entry->cpu, __entry->map_id,
>  		  __print_symbolic(__entry->act, __XDP_ACT_SYM_TAB),
> -		  __entry->processed, __entry->drops,
> -		  __entry->sched,
> +		  __entry->processed, __entry->sched,
>  		  __entry->xdp_pass, __entry->xdp_drop, __entry->xdp_redirect)
>  );
Lorenzo Bianconi April 20, 2021, 5:09 p.m. UTC | #2
[...]
> > +	TP_ARGS(map_id, processed, sched, xdp_stats),
> >  
> >  	TP_STRUCT__entry(
> >  		__field(int, map_id)
> >  		__field(u32, act)
> >  		__field(int, cpu)
> > -		__field(unsigned int, drops)
> >  		__field(unsigned int, processed)
> 
> So, struct member @processed will takeover the room for @drops.
> 
> Can you please test how an old xdp_monitor program will react to this?
> Will it fail, or extract and show wrong values?

Ack, right. I think we should keep the struct layout in order to maintain
back-compatibility. I will fix it in v4.

> 
> The xdp_mointor tool is in several external git repos:
> 
>  https://github.com/netoptimizer/prototype-kernel/blob/master/kernel/samples/bpf/xdp_monitor_kern.c
>  https://github.com/xdp-project/xdp-tutorial/tree/master/tracing02-xdp-monitor
> 
> Do you have any plans for fixing those tools?

I update xdp_monitor_{kern,user}.c and xdp_redirect_cpu_{kern,user}.c in the
patch. Do you mean to post a dedicated patch for xdp-project tutorial?

Regards,
Lorenzo

> 
> 
> 
> 
> -- 
> Best regards,
>   Jesper Dangaard Brouer
>   MSc.CS, Principal Kernel Engineer at Red Hat
>   LinkedIn: http://www.linkedin.com/in/brouer
>
Lorenzo Bianconi April 22, 2021, 10:59 a.m. UTC | #3
>

> [...]

> > > +   TP_ARGS(map_id, processed, sched, xdp_stats),

> > >

> > >     TP_STRUCT__entry(

> > >             __field(int, map_id)

> > >             __field(u32, act)

> > >             __field(int, cpu)

> > > -           __field(unsigned int, drops)

> > >             __field(unsigned int, processed)

> >

> > So, struct member @processed will takeover the room for @drops.

> >

> > Can you please test how an old xdp_monitor program will react to this?

> > Will it fail, or extract and show wrong values?

>

> Ack, right. I think we should keep the struct layout in order to maintain

> back-compatibility. I will fix it in v4.

>

> >

> > The xdp_mointor tool is in several external git repos:

> >

> >  https://github.com/netoptimizer/prototype-kernel/blob/master/kernel/samples/bpf/xdp_monitor_kern.c

> >  https://github.com/xdp-project/xdp-tutorial/tree/master/tracing02-xdp-monitor


Running an old version of xdp_monitor with a patched kernel, I
verified the xdp sample does not crash but it reports wrong values
(e.g. pps are reported as drops for tracepoint disagliment).
I think we have two possibilities here:
- assuming tracepoints are not a stable ABI, we can just fix xdp
samples available in the kernel tree and provide a patch for
xdp-project
- keep current tracepoint layout and just rename current drop variable
in bpf/cpumap.c in something like skb_alloc_drop.

I am not against both of them. What do you think?

Regards,
Lorenzo

> >

> > Do you have any plans for fixing those tools?

>

> I update xdp_monitor_{kern,user}.c and xdp_redirect_cpu_{kern,user}.c in the

> patch. Do you mean to post a dedicated patch for xdp-project tutorial?

>

> Regards,

> Lorenzo

>

> >

> >

> >

> >

> > --

> > Best regards,

> >   Jesper Dangaard Brouer

> >   MSc.CS, Principal Kernel Engineer at Red Hat

> >   LinkedIn: http://www.linkedin.com/in/brouer

> >
Jesper Dangaard Brouer April 22, 2021, 12:30 p.m. UTC | #4
On Thu, 22 Apr 2021 12:59:31 +0200
Lorenzo Bianconi <lorenzo.bianconi@redhat.com> wrote:

> >

> > [...]  

> > > > +   TP_ARGS(map_id, processed, sched, xdp_stats),

> > > >

> > > >     TP_STRUCT__entry(

> > > >             __field(int, map_id)

> > > >             __field(u32, act)

> > > >             __field(int, cpu)

> > > > -           __field(unsigned int, drops)

> > > >             __field(unsigned int, processed)  

> > >

> > > So, struct member @processed will takeover the room for @drops.

> > >

> > > Can you please test how an old xdp_monitor program will react to this?

> > > Will it fail, or extract and show wrong values?  

> >

> > Ack, right. I think we should keep the struct layout in order to maintain

> > back-compatibility. I will fix it in v4.

> >  

> > >

> > > The xdp_mointor tool is in several external git repos:

> > >

> > >  https://github.com/netoptimizer/prototype-kernel/blob/master/kernel/samples/bpf/xdp_monitor_kern.c

> > >  https://github.com/xdp-project/xdp-tutorial/tree/master/tracing02-xdp-monitor  

> 

> Running an old version of xdp_monitor with a patched kernel, I

> verified the xdp sample does not crash but it reports wrong values

> (e.g. pps are reported as drops for tracepoint disagliment).

> I think we have two possibilities here:

> - assuming tracepoints are not a stable ABI, we can just fix xdp

> samples available in the kernel tree and provide a patch for

> xdp-project

>

> - keep current tracepoint layout and just rename current drop variable

> in bpf/cpumap.c in something like skb_alloc_drop.


I like this one the most, rename to skb_alloc_drop, but keep layout.

-- 
Best regards,
  Jesper Dangaard Brouer
  MSc.CS, Principal Kernel Engineer at Red Hat
  LinkedIn: http://www.linkedin.com/in/brouer
diff mbox series

Patch

diff --git a/include/trace/events/xdp.h b/include/trace/events/xdp.h
index fcad3645a70b..52ecfe9c7e25 100644
--- a/include/trace/events/xdp.h
+++ b/include/trace/events/xdp.h
@@ -184,16 +184,15 @@  DEFINE_EVENT(xdp_redirect_template, xdp_redirect_map_err,
 
 TRACE_EVENT(xdp_cpumap_kthread,
 
-	TP_PROTO(int map_id, unsigned int processed,  unsigned int drops,
-		 int sched, struct xdp_cpumap_stats *xdp_stats),
+	TP_PROTO(int map_id, unsigned int processed, int sched,
+		 struct xdp_cpumap_stats *xdp_stats),
 
-	TP_ARGS(map_id, processed, drops, sched, xdp_stats),
+	TP_ARGS(map_id, processed, sched, xdp_stats),
 
 	TP_STRUCT__entry(
 		__field(int, map_id)
 		__field(u32, act)
 		__field(int, cpu)
-		__field(unsigned int, drops)
 		__field(unsigned int, processed)
 		__field(int, sched)
 		__field(unsigned int, xdp_pass)
@@ -205,7 +204,6 @@  TRACE_EVENT(xdp_cpumap_kthread,
 		__entry->map_id		= map_id;
 		__entry->act		= XDP_REDIRECT;
 		__entry->cpu		= smp_processor_id();
-		__entry->drops		= drops;
 		__entry->processed	= processed;
 		__entry->sched	= sched;
 		__entry->xdp_pass	= xdp_stats->pass;
@@ -215,13 +213,11 @@  TRACE_EVENT(xdp_cpumap_kthread,
 
 	TP_printk("kthread"
 		  " cpu=%d map_id=%d action=%s"
-		  " processed=%u drops=%u"
-		  " sched=%d"
+		  " processed=%u sched=%u"
 		  " xdp_pass=%u xdp_drop=%u xdp_redirect=%u",
 		  __entry->cpu, __entry->map_id,
 		  __print_symbolic(__entry->act, __XDP_ACT_SYM_TAB),
-		  __entry->processed, __entry->drops,
-		  __entry->sched,
+		  __entry->processed, __entry->sched,
 		  __entry->xdp_pass, __entry->xdp_drop, __entry->xdp_redirect)
 );
 
diff --git a/kernel/bpf/cpumap.c b/kernel/bpf/cpumap.c
index 0cf2791d5099..1c22caadf78f 100644
--- a/kernel/bpf/cpumap.c
+++ b/kernel/bpf/cpumap.c
@@ -27,7 +27,7 @@ 
 #include <linux/capability.h>
 #include <trace/events/xdp.h>
 
-#include <linux/netdevice.h>   /* netif_receive_skb_core */
+#include <linux/netdevice.h>   /* netif_receive_skb_list */
 #include <linux/etherdevice.h> /* eth_type_trans */
 
 /* General idea: XDP packets getting XDP redirected to another CPU,
@@ -253,10 +253,11 @@  static int cpu_map_kthread_run(void *data)
 	while (!kthread_should_stop() || !__ptr_ring_empty(rcpu->queue)) {
 		struct xdp_cpumap_stats stats = {}; /* zero stats */
 		gfp_t gfp = __GFP_ZERO | GFP_ATOMIC;
-		unsigned int drops = 0, sched = 0;
 		void *frames[CPUMAP_BATCH];
 		void *skbs[CPUMAP_BATCH];
+		unsigned int sched = 0;
 		int i, n, m, nframes;
+		LIST_HEAD(list);
 
 		/* Release CPU reschedule checks */
 		if (__ptr_ring_empty(rcpu->queue)) {
@@ -297,7 +298,6 @@  static int cpu_map_kthread_run(void *data)
 			if (unlikely(m == 0)) {
 				for (i = 0; i < nframes; i++)
 					skbs[i] = NULL; /* effect: xdp_return_frame */
-				drops += nframes;
 			}
 		}
 
@@ -305,7 +305,6 @@  static int cpu_map_kthread_run(void *data)
 		for (i = 0; i < nframes; i++) {
 			struct xdp_frame *xdpf = frames[i];
 			struct sk_buff *skb = skbs[i];
-			int ret;
 
 			skb = __xdp_build_skb_from_frame(xdpf, skb,
 							 xdpf->dev_rx);
@@ -314,13 +313,12 @@  static int cpu_map_kthread_run(void *data)
 				continue;
 			}
 
-			/* Inject into network stack */
-			ret = netif_receive_skb_core(skb);
-			if (ret == NET_RX_DROP)
-				drops++;
+			list_add_tail(&skb->list, &list);
 		}
+		netif_receive_skb_list(&list);
+
 		/* Feedback loop via tracepoint */
-		trace_xdp_cpumap_kthread(rcpu->map_id, n, drops, sched, &stats);
+		trace_xdp_cpumap_kthread(rcpu->map_id, n, sched, &stats);
 
 		local_bh_enable(); /* resched point, may call do_softirq() */
 	}
diff --git a/samples/bpf/xdp_monitor_kern.c b/samples/bpf/xdp_monitor_kern.c
index 5c955b812c47..f7aeecfa046b 100644
--- a/samples/bpf/xdp_monitor_kern.c
+++ b/samples/bpf/xdp_monitor_kern.c
@@ -186,9 +186,8 @@  struct cpumap_kthread_ctx {
 	int map_id;		//	offset:8;  size:4; signed:1;
 	u32 act;		//	offset:12; size:4; signed:0;
 	int cpu;		//	offset:16; size:4; signed:1;
-	unsigned int drops;	//	offset:20; size:4; signed:0;
-	unsigned int processed;	//	offset:24; size:4; signed:0;
-	int sched;		//	offset:28; size:4; signed:1;
+	unsigned int processed;	//	offset:20; size:4; signed:0;
+	int sched;		//	offset:24; size:4; signed:1;
 };
 
 SEC("tracepoint/xdp/xdp_cpumap_kthread")
@@ -201,7 +200,6 @@  int trace_xdp_cpumap_kthread(struct cpumap_kthread_ctx *ctx)
 	if (!rec)
 		return 0;
 	rec->processed += ctx->processed;
-	rec->dropped   += ctx->drops;
 
 	/* Count times kthread yielded CPU via schedule call */
 	if (ctx->sched)
diff --git a/samples/bpf/xdp_monitor_user.c b/samples/bpf/xdp_monitor_user.c
index 49ebc49aefc3..f31796809cce 100644
--- a/samples/bpf/xdp_monitor_user.c
+++ b/samples/bpf/xdp_monitor_user.c
@@ -432,11 +432,11 @@  static void stats_print(struct stats_record *stats_rec,
 
 	/* cpumap kthread stats */
 	{
-		char *fmt1 = "%-15s %-7d %'-12.0f %'-12.0f %'-10.0f %s\n";
-		char *fmt2 = "%-15s %-7s %'-12.0f %'-12.0f %'-10.0f %s\n";
+		char *fmt1 = "%-15s %-7d %'-12.0f %-12s %'-10.0f %s\n";
+		char *fmt2 = "%-15s %-7s %'-12.0f %-12s %'-10.0f %s\n";
 		struct record *rec, *prev;
-		double drop, info;
 		char *i_str = "";
+		double info;
 
 		rec  =  &stats_rec->xdp_cpumap_kthread;
 		prev = &stats_prev->xdp_cpumap_kthread;
@@ -446,20 +446,18 @@  static void stats_print(struct stats_record *stats_rec,
 			struct datarec *p = &prev->cpu[i];
 
 			pps  = calc_pps(r, p, t);
-			drop = calc_drop(r, p, t);
 			info = calc_info(r, p, t);
 			if (info > 0)
 				i_str = "sched";
-			if (pps > 0 || drop > 0)
+			if (pps > 0)
 				printf(fmt1, "cpumap-kthread",
-				       i, pps, drop, info, i_str);
+				       i, pps, "-", info, i_str);
 		}
 		pps = calc_pps(&rec->total, &prev->total, t);
-		drop = calc_drop(&rec->total, &prev->total, t);
 		info = calc_info(&rec->total, &prev->total, t);
 		if (info > 0)
 			i_str = "sched-sum";
-		printf(fmt2, "cpumap-kthread", "total", pps, drop, info, i_str);
+		printf(fmt2, "cpumap-kthread", "total", pps, "-", info, i_str);
 	}
 
 	/* devmap ndo_xdp_xmit stats */
diff --git a/samples/bpf/xdp_redirect_cpu_kern.c b/samples/bpf/xdp_redirect_cpu_kern.c
index 8255025dea97..34332b0b7020 100644
--- a/samples/bpf/xdp_redirect_cpu_kern.c
+++ b/samples/bpf/xdp_redirect_cpu_kern.c
@@ -699,12 +699,11 @@  struct cpumap_kthread_ctx {
 	int map_id;			//	offset:8;  size:4; signed:1;
 	u32 act;			//	offset:12; size:4; signed:0;
 	int cpu;			//	offset:16; size:4; signed:1;
-	unsigned int drops;		//	offset:20; size:4; signed:0;
-	unsigned int processed;		//	offset:24; size:4; signed:0;
-	int sched;			//	offset:28; size:4; signed:1;
-	unsigned int xdp_pass;		//	offset:32; size:4; signed:0;
-	unsigned int xdp_drop;		//	offset:36; size:4; signed:0;
-	unsigned int xdp_redirect;	//	offset:40; size:4; signed:0;
+	unsigned int processed;		//	offset:20; size:4; signed:0;
+	int sched;			//	offset:24; size:4; signed:1;
+	unsigned int xdp_pass;		//	offset:28; size:4; signed:0;
+	unsigned int xdp_drop;		//	offset:32; size:4; signed:0;
+	unsigned int xdp_redirect;	//	offset:36; size:4; signed:0;
 };
 
 SEC("tracepoint/xdp/xdp_cpumap_kthread")
@@ -717,7 +716,6 @@  int trace_xdp_cpumap_kthread(struct cpumap_kthread_ctx *ctx)
 	if (!rec)
 		return 0;
 	rec->processed += ctx->processed;
-	rec->dropped   += ctx->drops;
 	rec->xdp_pass  += ctx->xdp_pass;
 	rec->xdp_drop  += ctx->xdp_drop;
 	rec->xdp_redirect  += ctx->xdp_redirect;
diff --git a/samples/bpf/xdp_redirect_cpu_user.c b/samples/bpf/xdp_redirect_cpu_user.c
index 576411612523..76346647fd5b 100644
--- a/samples/bpf/xdp_redirect_cpu_user.c
+++ b/samples/bpf/xdp_redirect_cpu_user.c
@@ -436,8 +436,8 @@  static void stats_print(struct stats_record *stats_rec,
 
 	/* cpumap kthread stats */
 	{
-		char *fmt_k = "%-15s %-7d %'-14.0f %'-11.0f %'-10.0f %s\n";
-		char *fm2_k = "%-15s %-7s %'-14.0f %'-11.0f %'-10.0f %s\n";
+		char *fmt_k = "%-15s %-7d %'-14.0f %-11s %'-10.0f %s\n";
+		char *fm2_k = "%-15s %-7s %'-14.0f %-11s %'-10.0f %s\n";
 		char *e_str = "";
 
 		rec  = &stats_rec->kthread;
@@ -448,20 +448,18 @@  static void stats_print(struct stats_record *stats_rec,
 			struct datarec *p = &prev->cpu[i];
 
 			pps  = calc_pps(r, p, t);
-			drop = calc_drop_pps(r, p, t);
 			err  = calc_errs_pps(r, p, t);
 			if (err > 0)
 				e_str = "sched";
 			if (pps > 0)
 				printf(fmt_k, "cpumap_kthread",
-				       i, pps, drop, err, e_str);
+				       i, pps, "-", err, e_str);
 		}
 		pps = calc_pps(&rec->total, &prev->total, t);
-		drop = calc_drop_pps(&rec->total, &prev->total, t);
 		err  = calc_errs_pps(&rec->total, &prev->total, t);
 		if (err > 0)
 			e_str = "sched-sum";
-		printf(fm2_k, "cpumap_kthread", "total", pps, drop, err, e_str);
+		printf(fm2_k, "cpumap_kthread", "total", pps, "-", err, e_str);
 	}
 
 	/* XDP redirect err tracepoints (very unlikely) */