diff mbox series

[v2,bpf-next] cpumap: bulk skb using netif_receive_skb_list

Message ID bb627106428ea3223610f5623142c24270f0e14e.1618330734.git.lorenzo@kernel.org
State New
Headers show
Series [v2,bpf-next] cpumap: bulk skb using netif_receive_skb_list | expand

Commit Message

Lorenzo Bianconi April 13, 2021, 4:22 p.m. UTC
Rely on netif_receive_skb_list routine to send skbs converted from
xdp_frames in cpu_map_kthread_run in order to improve i-cache usage.
The proposed patch has been tested running xdp_redirect_cpu bpf sample
available in the kernel tree that is used to redirect UDP frames from
ixgbe driver to a cpumap entry and then to the networking stack.
UDP frames are generated using pkt_gen.

$xdp_redirect_cpu  --cpu <cpu> --progname xdp_cpu_map0 --dev <eth>

bpf-next: ~2.2Mpps
bpf-next + cpumap skb-list: ~3.15Mpps

Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
---
Changes since v1:
- fixed comment
- rebased on top of bpf-next tree
---
 kernel/bpf/cpumap.c | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

Comments

Daniel Borkmann April 15, 2021, 3:05 p.m. UTC | #1
On 4/13/21 6:22 PM, Lorenzo Bianconi wrote:
> Rely on netif_receive_skb_list routine to send skbs converted from

> xdp_frames in cpu_map_kthread_run in order to improve i-cache usage.

> The proposed patch has been tested running xdp_redirect_cpu bpf sample

> available in the kernel tree that is used to redirect UDP frames from

> ixgbe driver to a cpumap entry and then to the networking stack.

> UDP frames are generated using pkt_gen.

> 

> $xdp_redirect_cpu  --cpu <cpu> --progname xdp_cpu_map0 --dev <eth>

> 

> bpf-next: ~2.2Mpps

> bpf-next + cpumap skb-list: ~3.15Mpps

> 

> Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>

> ---

> Changes since v1:

> - fixed comment

> - rebased on top of bpf-next tree

> ---

>   kernel/bpf/cpumap.c | 11 +++++------

>   1 file changed, 5 insertions(+), 6 deletions(-)

> 

> diff --git a/kernel/bpf/cpumap.c b/kernel/bpf/cpumap.c

> index 0cf2791d5099..d89551a508b2 100644

> --- a/kernel/bpf/cpumap.c

> +++ b/kernel/bpf/cpumap.c

> @@ -27,7 +27,7 @@

>   #include <linux/capability.h>

>   #include <trace/events/xdp.h>

>   

> -#include <linux/netdevice.h>   /* netif_receive_skb_core */

> +#include <linux/netdevice.h>   /* netif_receive_skb_list */

>   #include <linux/etherdevice.h> /* eth_type_trans */

>   

>   /* General idea: XDP packets getting XDP redirected to another CPU,

> @@ -257,6 +257,7 @@ static int cpu_map_kthread_run(void *data)

>   		void *frames[CPUMAP_BATCH];

>   		void *skbs[CPUMAP_BATCH];

>   		int i, n, m, nframes;

> +		LIST_HEAD(list);

>   

>   		/* Release CPU reschedule checks */

>   		if (__ptr_ring_empty(rcpu->queue)) {

> @@ -305,7 +306,6 @@ static int cpu_map_kthread_run(void *data)

>   		for (i = 0; i < nframes; i++) {

>   			struct xdp_frame *xdpf = frames[i];

>   			struct sk_buff *skb = skbs[i];

> -			int ret;

>   

>   			skb = __xdp_build_skb_from_frame(xdpf, skb,

>   							 xdpf->dev_rx);

> @@ -314,11 +314,10 @@ static int cpu_map_kthread_run(void *data)

>   				continue;

>   			}

>   

> -			/* Inject into network stack */

> -			ret = netif_receive_skb_core(skb);

> -			if (ret == NET_RX_DROP)

> -				drops++;

> +			list_add_tail(&skb->list, &list);

>   		}

> +		netif_receive_skb_list(&list);

> +

>   		/* Feedback loop via tracepoint */

>   		trace_xdp_cpumap_kthread(rcpu->map_id, n, drops, sched, &stats);


Given we stop counting drops with the netif_receive_skb_list(), we should then
also remove drops from trace_xdp_cpumap_kthread(), imho, as otherwise it is rather
misleading (as in: drops actually happening, but 0 are shown from the tracepoint).
Given they are not considered stable API, I would just remove those to make it clear
to users that they cannot rely on this counter anymore anyway.

Thanks,
Daniel
Jesper Dangaard Brouer April 15, 2021, 3:21 p.m. UTC | #2
On Thu, 15 Apr 2021 17:05:36 +0200
Daniel Borkmann <daniel@iogearbox.net> wrote:

> On 4/13/21 6:22 PM, Lorenzo Bianconi wrote:

> > Rely on netif_receive_skb_list routine to send skbs converted from

> > xdp_frames in cpu_map_kthread_run in order to improve i-cache usage.

> > The proposed patch has been tested running xdp_redirect_cpu bpf sample

> > available in the kernel tree that is used to redirect UDP frames from

> > ixgbe driver to a cpumap entry and then to the networking stack.

> > UDP frames are generated using pkt_gen.

> > 

> > $xdp_redirect_cpu  --cpu <cpu> --progname xdp_cpu_map0 --dev <eth>

> > 

> > bpf-next: ~2.2Mpps

> > bpf-next + cpumap skb-list: ~3.15Mpps

> > 

> > Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>

> > ---

> > Changes since v1:

> > - fixed comment

> > - rebased on top of bpf-next tree

> > ---

> >   kernel/bpf/cpumap.c | 11 +++++------

> >   1 file changed, 5 insertions(+), 6 deletions(-)

> > 

> > diff --git a/kernel/bpf/cpumap.c b/kernel/bpf/cpumap.c

> > index 0cf2791d5099..d89551a508b2 100644

> > --- a/kernel/bpf/cpumap.c

> > +++ b/kernel/bpf/cpumap.c

> > @@ -27,7 +27,7 @@

> >   #include <linux/capability.h>

> >   #include <trace/events/xdp.h>

> >   

> > -#include <linux/netdevice.h>   /* netif_receive_skb_core */

> > +#include <linux/netdevice.h>   /* netif_receive_skb_list */

> >   #include <linux/etherdevice.h> /* eth_type_trans */

> >   

> >   /* General idea: XDP packets getting XDP redirected to another CPU,

> > @@ -257,6 +257,7 @@ static int cpu_map_kthread_run(void *data)

> >   		void *frames[CPUMAP_BATCH];

> >   		void *skbs[CPUMAP_BATCH];

> >   		int i, n, m, nframes;

> > +		LIST_HEAD(list);

> >   

> >   		/* Release CPU reschedule checks */

> >   		if (__ptr_ring_empty(rcpu->queue)) {

> > @@ -305,7 +306,6 @@ static int cpu_map_kthread_run(void *data)

> >   		for (i = 0; i < nframes; i++) {

> >   			struct xdp_frame *xdpf = frames[i];

> >   			struct sk_buff *skb = skbs[i];

> > -			int ret;

> >   

> >   			skb = __xdp_build_skb_from_frame(xdpf, skb,

> >   							 xdpf->dev_rx);

> > @@ -314,11 +314,10 @@ static int cpu_map_kthread_run(void *data)

> >   				continue;

> >   			}

> >   

> > -			/* Inject into network stack */

> > -			ret = netif_receive_skb_core(skb);

> > -			if (ret == NET_RX_DROP)

> > -				drops++;

> > +			list_add_tail(&skb->list, &list);

> >   		}

> > +		netif_receive_skb_list(&list);

> > +

> >   		/* Feedback loop via tracepoint */

> >   		trace_xdp_cpumap_kthread(rcpu->map_id, n, drops, sched, &stats);  

> 

> Given we stop counting drops with the netif_receive_skb_list(), we should then

> also remove drops from trace_xdp_cpumap_kthread(), imho, as otherwise it is rather

> misleading (as in: drops actually happening, but 0 are shown from the tracepoint).

> Given they are not considered stable API, I would just remove those to make it clear

> to users that they cannot rely on this counter anymore anyway.


After Lorenzo's change, the 'drops' still count if kmem_cache_alloc_bulk
cannot alloc SKBs.  I guess that will not occur very often.  But how
can people/users debug such a case?  Maybe the MM-layer can tell us?

-- 
Best regards,
  Jesper Dangaard Brouer
  MSc.CS, Principal Kernel Engineer at Red Hat
  LinkedIn: http://www.linkedin.com/in/brouer
David Ahern April 15, 2021, 3:55 p.m. UTC | #3
On 4/15/21 8:05 AM, Daniel Borkmann wrote:
> On 4/13/21 6:22 PM, Lorenzo Bianconi wrote:

>> Rely on netif_receive_skb_list routine to send skbs converted from

>> xdp_frames in cpu_map_kthread_run in order to improve i-cache usage.

>> The proposed patch has been tested running xdp_redirect_cpu bpf sample

>> available in the kernel tree that is used to redirect UDP frames from

>> ixgbe driver to a cpumap entry and then to the networking stack.

>> UDP frames are generated using pkt_gen.

>>

>> $xdp_redirect_cpu  --cpu <cpu> --progname xdp_cpu_map0 --dev <eth>

>>

>> bpf-next: ~2.2Mpps

>> bpf-next + cpumap skb-list: ~3.15Mpps

>>

>> Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>

>> ---

>> Changes since v1:

>> - fixed comment

>> - rebased on top of bpf-next tree

>> ---

>>   kernel/bpf/cpumap.c | 11 +++++------

>>   1 file changed, 5 insertions(+), 6 deletions(-)

>>

>> diff --git a/kernel/bpf/cpumap.c b/kernel/bpf/cpumap.c

>> index 0cf2791d5099..d89551a508b2 100644

>> --- a/kernel/bpf/cpumap.c

>> +++ b/kernel/bpf/cpumap.c

>> @@ -27,7 +27,7 @@

>>   #include <linux/capability.h>

>>   #include <trace/events/xdp.h>

>>   -#include <linux/netdevice.h>   /* netif_receive_skb_core */

>> +#include <linux/netdevice.h>   /* netif_receive_skb_list */

>>   #include <linux/etherdevice.h> /* eth_type_trans */

>>     /* General idea: XDP packets getting XDP redirected to another CPU,

>> @@ -257,6 +257,7 @@ static int cpu_map_kthread_run(void *data)

>>           void *frames[CPUMAP_BATCH];

>>           void *skbs[CPUMAP_BATCH];

>>           int i, n, m, nframes;

>> +        LIST_HEAD(list);

>>             /* Release CPU reschedule checks */

>>           if (__ptr_ring_empty(rcpu->queue)) {

>> @@ -305,7 +306,6 @@ static int cpu_map_kthread_run(void *data)

>>           for (i = 0; i < nframes; i++) {

>>               struct xdp_frame *xdpf = frames[i];

>>               struct sk_buff *skb = skbs[i];

>> -            int ret;

>>                 skb = __xdp_build_skb_from_frame(xdpf, skb,

>>                                xdpf->dev_rx);

>> @@ -314,11 +314,10 @@ static int cpu_map_kthread_run(void *data)

>>                   continue;

>>               }

>>   -            /* Inject into network stack */

>> -            ret = netif_receive_skb_core(skb);

>> -            if (ret == NET_RX_DROP)

>> -                drops++;

>> +            list_add_tail(&skb->list, &list);

>>           }

>> +        netif_receive_skb_list(&list);

>> +

>>           /* Feedback loop via tracepoint */

>>           trace_xdp_cpumap_kthread(rcpu->map_id, n, drops, sched,

>> &stats);

> 

> Given we stop counting drops with the netif_receive_skb_list(), we

> should then

> also remove drops from trace_xdp_cpumap_kthread(), imho, as otherwise it

> is rather

> misleading (as in: drops actually happening, but 0 are shown from the

> tracepoint).

> Given they are not considered stable API, I would just remove those to

> make it clear

> to users that they cannot rely on this counter anymore anyway.

> 


What's the visibility into drops then? Seems like it would be fairly
easy to have netif_receive_skb_list return number of drops.
Lorenzo Bianconi April 15, 2021, 4:03 p.m. UTC | #4
> On 4/15/21 8:05 AM, Daniel Borkmann wrote:


[...]
> >> &stats);

> > 

> > Given we stop counting drops with the netif_receive_skb_list(), we

> > should then

> > also remove drops from trace_xdp_cpumap_kthread(), imho, as otherwise it

> > is rather

> > misleading (as in: drops actually happening, but 0 are shown from the

> > tracepoint).

> > Given they are not considered stable API, I would just remove those to

> > make it clear

> > to users that they cannot rely on this counter anymore anyway.

> > 

> 

> What's the visibility into drops then? Seems like it would be fairly

> easy to have netif_receive_skb_list return number of drops.

> 


In order to return drops from netif_receive_skb_list() I guess we need to introduce
some extra checks in the hot path. Moreover packet drops are already accounted
in the networking stack and this is currently the only consumer for this info.
Does it worth to do so?

Regards,
Lorenzo
David Ahern April 15, 2021, 6 p.m. UTC | #5
On 4/15/21 9:03 AM, Lorenzo Bianconi wrote:
>> On 4/15/21 8:05 AM, Daniel Borkmann wrote:

> 

> [...]

>>>> &stats);

>>>

>>> Given we stop counting drops with the netif_receive_skb_list(), we

>>> should then

>>> also remove drops from trace_xdp_cpumap_kthread(), imho, as otherwise it

>>> is rather

>>> misleading (as in: drops actually happening, but 0 are shown from the

>>> tracepoint).

>>> Given they are not considered stable API, I would just remove those to

>>> make it clear

>>> to users that they cannot rely on this counter anymore anyway.

>>>

>>

>> What's the visibility into drops then? Seems like it would be fairly

>> easy to have netif_receive_skb_list return number of drops.

>>

> 

> In order to return drops from netif_receive_skb_list() I guess we need to introduce

> some extra checks in the hot path. Moreover packet drops are already accounted

> in the networking stack and this is currently the only consumer for this info.

> Does it worth to do so?


right - softnet_stat shows the drop. So the loss here is that the packet
is from a cpumap XDP redirect.

Better insights into drops is needed, but I guess in this case coming
from the cpumap does not really aid into why it is dropped - that is
more core to __netif_receive_skb_list_core. I guess this is ok to drop
the counter from the tracepoint.
Lorenzo Bianconi April 15, 2021, 8:10 p.m. UTC | #6
> On 4/15/21 9:03 AM, Lorenzo Bianconi wrote:

> >> On 4/15/21 8:05 AM, Daniel Borkmann wrote:

> > 

> > [...]

> >>>> &stats);

> >>>

> >>> Given we stop counting drops with the netif_receive_skb_list(), we

> >>> should then

> >>> also remove drops from trace_xdp_cpumap_kthread(), imho, as otherwise it

> >>> is rather

> >>> misleading (as in: drops actually happening, but 0 are shown from the

> >>> tracepoint).

> >>> Given they are not considered stable API, I would just remove those to

> >>> make it clear

> >>> to users that they cannot rely on this counter anymore anyway.

> >>>

> >>

> >> What's the visibility into drops then? Seems like it would be fairly

> >> easy to have netif_receive_skb_list return number of drops.

> >>

> > 

> > In order to return drops from netif_receive_skb_list() I guess we need to introduce

> > some extra checks in the hot path. Moreover packet drops are already accounted

> > in the networking stack and this is currently the only consumer for this info.

> > Does it worth to do so?

> 

> right - softnet_stat shows the drop. So the loss here is that the packet

> is from a cpumap XDP redirect.

> 

> Better insights into drops is needed, but I guess in this case coming

> from the cpumap does not really aid into why it is dropped - that is

> more core to __netif_receive_skb_list_core. I guess this is ok to drop

> the counter from the tracepoint.

> 


Applying the current patch, drops just counts the number of kmem_cache_alloc_bulk()
failures. Looking at kmem_cache_alloc_bulk() code, it does not seem to me there any
failure counters. So I am wondering, is this an important info for the user?
Is so I guess we can just rename the counter in something more meaningful
(e.g. skb_alloc_failures).

Regards,
Lorenzo
Daniel Borkmann April 15, 2021, 8:31 p.m. UTC | #7
On 4/15/21 10:10 PM, Lorenzo Bianconi wrote:
>> On 4/15/21 9:03 AM, Lorenzo Bianconi wrote:

>>>> On 4/15/21 8:05 AM, Daniel Borkmann wrote:

>>> [...]

>>>>>> &stats);

>>>>>

>>>>> Given we stop counting drops with the netif_receive_skb_list(), we

>>>>> should then

>>>>> also remove drops from trace_xdp_cpumap_kthread(), imho, as otherwise it

>>>>> is rather

>>>>> misleading (as in: drops actually happening, but 0 are shown from the

>>>>> tracepoint).

>>>>> Given they are not considered stable API, I would just remove those to

>>>>> make it clear

>>>>> to users that they cannot rely on this counter anymore anyway.

>>>>

>>>> What's the visibility into drops then? Seems like it would be fairly

>>>> easy to have netif_receive_skb_list return number of drops.

>>>

>>> In order to return drops from netif_receive_skb_list() I guess we need to introduce

>>> some extra checks in the hot path. Moreover packet drops are already accounted

>>> in the networking stack and this is currently the only consumer for this info.

>>> Does it worth to do so?

>>

>> right - softnet_stat shows the drop. So the loss here is that the packet

>> is from a cpumap XDP redirect.

>>

>> Better insights into drops is needed, but I guess in this case coming

>> from the cpumap does not really aid into why it is dropped - that is

>> more core to __netif_receive_skb_list_core. I guess this is ok to drop

>> the counter from the tracepoint.

> 

> Applying the current patch, drops just counts the number of kmem_cache_alloc_bulk()

> failures. Looking at kmem_cache_alloc_bulk() code, it does not seem to me there any

> failure counters. So I am wondering, is this an important info for the user?

> Is so I guess we can just rename the counter in something more meaningful

> (e.g. skb_alloc_failures).


Right, at min it could be renamed, but I also wonder if cpumap users really run this
tracepoint permanently to check for that ... presumably not, and if there is a temporary
drop due to that when the tracepoint is not enabled you won't see it either. So this
field could probably be dropped and if needed the accounting in cpumap improved in a
different way.
diff mbox series

Patch

diff --git a/kernel/bpf/cpumap.c b/kernel/bpf/cpumap.c
index 0cf2791d5099..d89551a508b2 100644
--- a/kernel/bpf/cpumap.c
+++ b/kernel/bpf/cpumap.c
@@ -27,7 +27,7 @@ 
 #include <linux/capability.h>
 #include <trace/events/xdp.h>
 
-#include <linux/netdevice.h>   /* netif_receive_skb_core */
+#include <linux/netdevice.h>   /* netif_receive_skb_list */
 #include <linux/etherdevice.h> /* eth_type_trans */
 
 /* General idea: XDP packets getting XDP redirected to another CPU,
@@ -257,6 +257,7 @@  static int cpu_map_kthread_run(void *data)
 		void *frames[CPUMAP_BATCH];
 		void *skbs[CPUMAP_BATCH];
 		int i, n, m, nframes;
+		LIST_HEAD(list);
 
 		/* Release CPU reschedule checks */
 		if (__ptr_ring_empty(rcpu->queue)) {
@@ -305,7 +306,6 @@  static int cpu_map_kthread_run(void *data)
 		for (i = 0; i < nframes; i++) {
 			struct xdp_frame *xdpf = frames[i];
 			struct sk_buff *skb = skbs[i];
-			int ret;
 
 			skb = __xdp_build_skb_from_frame(xdpf, skb,
 							 xdpf->dev_rx);
@@ -314,11 +314,10 @@  static int cpu_map_kthread_run(void *data)
 				continue;
 			}
 
-			/* Inject into network stack */
-			ret = netif_receive_skb_core(skb);
-			if (ret == NET_RX_DROP)
-				drops++;
+			list_add_tail(&skb->list, &list);
 		}
+		netif_receive_skb_list(&list);
+
 		/* Feedback loop via tracepoint */
 		trace_xdp_cpumap_kthread(rcpu->map_id, n, drops, sched, &stats);