diff mbox series

[v5,net-next,06/11] skbuff: remove __kfree_skb_flush()

Message ID 20210211185220.9753-7-alobakin@pm.me
State New
Headers show
Series skbuff: introduce skbuff_heads bulking and reusing | expand

Commit Message

Alexander Lobakin Feb. 11, 2021, 6:54 p.m. UTC
This function isn't much needed as NAPI skb queue gets bulk-freed
anyway when there's no more room, and even may reduce the efficiency
of bulk operations.
It will be even less needed after reusing skb cache on allocation path,
so remove it and this way lighten network softirqs a bit.

Suggested-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Alexander Lobakin <alobakin@pm.me>
---
 include/linux/skbuff.h |  1 -
 net/core/dev.c         |  7 +------
 net/core/skbuff.c      | 12 ------------
 3 files changed, 1 insertion(+), 19 deletions(-)

Comments

Alexander Duyck Feb. 12, 2021, 3:28 a.m. UTC | #1
On Thu, Feb 11, 2021 at 10:57 AM Alexander Lobakin <alobakin@pm.me> wrote:
>

> This function isn't much needed as NAPI skb queue gets bulk-freed

> anyway when there's no more room, and even may reduce the efficiency

> of bulk operations.

> It will be even less needed after reusing skb cache on allocation path,

> so remove it and this way lighten network softirqs a bit.

>

> Suggested-by: Eric Dumazet <edumazet@google.com>

> Signed-off-by: Alexander Lobakin <alobakin@pm.me>


I'm wondering if you have any actual gains to show from this patch?

The reason why I ask is because the flushing was happening at the end
of the softirq before the system basically gave control back over to
something else. As such there is a good chance for the memory to be
dropped from the cache by the time we come back to it. So it may be
just as expensive if not more so than accessing memory that was just
freed elsewhere and placed in the slab cache.

> ---

>  include/linux/skbuff.h |  1 -

>  net/core/dev.c         |  7 +------

>  net/core/skbuff.c      | 12 ------------

>  3 files changed, 1 insertion(+), 19 deletions(-)

>

> diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h

> index 0a4e91a2f873..0e0707296098 100644

> --- a/include/linux/skbuff.h

> +++ b/include/linux/skbuff.h

> @@ -2919,7 +2919,6 @@ static inline struct sk_buff *napi_alloc_skb(struct napi_struct *napi,

>  }

>  void napi_consume_skb(struct sk_buff *skb, int budget);

>

> -void __kfree_skb_flush(void);

>  void __kfree_skb_defer(struct sk_buff *skb);

>

>  /**

> diff --git a/net/core/dev.c b/net/core/dev.c

> index 321d41a110e7..4154d4683bb9 100644

> --- a/net/core/dev.c

> +++ b/net/core/dev.c

> @@ -4944,8 +4944,6 @@ static __latent_entropy void net_tx_action(struct softirq_action *h)

>                         else

>                                 __kfree_skb_defer(skb);

>                 }

> -

> -               __kfree_skb_flush();

>         }

>

>         if (sd->output_queue) {

> @@ -7012,7 +7010,6 @@ static int napi_threaded_poll(void *data)

>                         __napi_poll(napi, &repoll);

>                         netpoll_poll_unlock(have);

>

> -                       __kfree_skb_flush();

>                         local_bh_enable();

>

>                         if (!repoll)


So it looks like this is the one exception to my comment above. Here
we should probably be adding a "if (!repoll)" before calling
__kfree_skb_flush().

> @@ -7042,7 +7039,7 @@ static __latent_entropy void net_rx_action(struct softirq_action *h)

>

>                 if (list_empty(&list)) {

>                         if (!sd_has_rps_ipi_waiting(sd) && list_empty(&repoll))

> -                               goto out;

> +                               return;

>                         break;

>                 }

>

> @@ -7069,8 +7066,6 @@ static __latent_entropy void net_rx_action(struct softirq_action *h)

>                 __raise_softirq_irqoff(NET_RX_SOFTIRQ);

>

>         net_rps_action_and_irq_enable(sd);

> -out:

> -       __kfree_skb_flush();

>  }

>

>  struct netdev_adjacent {

> diff --git a/net/core/skbuff.c b/net/core/skbuff.c

> index 1c6f6ef70339..4be2bb969535 100644

> --- a/net/core/skbuff.c

> +++ b/net/core/skbuff.c

> @@ -838,18 +838,6 @@ void __consume_stateless_skb(struct sk_buff *skb)

>         kfree_skbmem(skb);

>  }

>

> -void __kfree_skb_flush(void)

> -{

> -       struct napi_alloc_cache *nc = this_cpu_ptr(&napi_alloc_cache);

> -

> -       /* flush skb_cache if containing objects */

> -       if (nc->skb_count) {

> -               kmem_cache_free_bulk(skbuff_head_cache, nc->skb_count,

> -                                    nc->skb_cache);

> -               nc->skb_count = 0;

> -       }

> -}

> -

>  static inline void _kfree_skb_defer(struct sk_buff *skb)

>  {

>         struct napi_alloc_cache *nc = this_cpu_ptr(&napi_alloc_cache);

> --

> 2.30.1

>

>
Alexander Lobakin Feb. 13, 2021, 1:56 p.m. UTC | #2
From: Alexander Duyck <alexander.duyck@gmail.com>

Date: Thu, 11 Feb 2021 19:28:52 -0800

> On Thu, Feb 11, 2021 at 10:57 AM Alexander Lobakin <alobakin@pm.me> wrote:

> >

> > This function isn't much needed as NAPI skb queue gets bulk-freed

> > anyway when there's no more room, and even may reduce the efficiency

> > of bulk operations.

> > It will be even less needed after reusing skb cache on allocation path,

> > so remove it and this way lighten network softirqs a bit.

> >

> > Suggested-by: Eric Dumazet <edumazet@google.com>

> > Signed-off-by: Alexander Lobakin <alobakin@pm.me>

> 

> I'm wondering if you have any actual gains to show from this patch?

> 

> The reason why I ask is because the flushing was happening at the end

> of the softirq before the system basically gave control back over to

> something else. As such there is a good chance for the memory to be

> dropped from the cache by the time we come back to it. So it may be

> just as expensive if not more so than accessing memory that was just

> freed elsewhere and placed in the slab cache.


Just retested after readding this function (and changing the logics so
it would drop the second half of the cache, like napi_skb_cache_put()
does) and got 10 Mbps drawback with napi_build_skb() +
napi_gro_receive().

So seems like getting a pointer from an array instead of calling
kmem_cache_alloc() is cheaper even if the given object was pulled
out of CPU caches.

> > ---

> >  include/linux/skbuff.h |  1 -

> >  net/core/dev.c         |  7 +------

> >  net/core/skbuff.c      | 12 ------------

> >  3 files changed, 1 insertion(+), 19 deletions(-)

> >

> > diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h

> > index 0a4e91a2f873..0e0707296098 100644

> > --- a/include/linux/skbuff.h

> > +++ b/include/linux/skbuff.h

> > @@ -2919,7 +2919,6 @@ static inline struct sk_buff *napi_alloc_skb(struct napi_struct *napi,

> >  }

> >  void napi_consume_skb(struct sk_buff *skb, int budget);

> >

> > -void __kfree_skb_flush(void);

> >  void __kfree_skb_defer(struct sk_buff *skb);

> >

> >  /**

> > diff --git a/net/core/dev.c b/net/core/dev.c

> > index 321d41a110e7..4154d4683bb9 100644

> > --- a/net/core/dev.c

> > +++ b/net/core/dev.c

> > @@ -4944,8 +4944,6 @@ static __latent_entropy void net_tx_action(struct softirq_action *h)

> >                         else

> >                                 __kfree_skb_defer(skb);

> >                 }

> > -

> > -               __kfree_skb_flush();

> >         }

> >

> >         if (sd->output_queue) {

> > @@ -7012,7 +7010,6 @@ static int napi_threaded_poll(void *data)

> >                         __napi_poll(napi, &repoll);

> >                         netpoll_poll_unlock(have);

> >

> > -                       __kfree_skb_flush();

> >                         local_bh_enable();

> >

> >                         if (!repoll)

> 

> So it looks like this is the one exception to my comment above. Here

> we should probably be adding a "if (!repoll)" before calling

> __kfree_skb_flush().

> 

> > @@ -7042,7 +7039,7 @@ static __latent_entropy void net_rx_action(struct softirq_action *h)

> >

> >                 if (list_empty(&list)) {

> >                         if (!sd_has_rps_ipi_waiting(sd) && list_empty(&repoll))

> > -                               goto out;

> > +                               return;

> >                         break;

> >                 }

> >

> > @@ -7069,8 +7066,6 @@ static __latent_entropy void net_rx_action(struct softirq_action *h)

> >                 __raise_softirq_irqoff(NET_RX_SOFTIRQ);

> >

> >         net_rps_action_and_irq_enable(sd);

> > -out:

> > -       __kfree_skb_flush();

> >  }

> >

> >  struct netdev_adjacent {

> > diff --git a/net/core/skbuff.c b/net/core/skbuff.c

> > index 1c6f6ef70339..4be2bb969535 100644

> > --- a/net/core/skbuff.c

> > +++ b/net/core/skbuff.c

> > @@ -838,18 +838,6 @@ void __consume_stateless_skb(struct sk_buff *skb)

> >         kfree_skbmem(skb);

> >  }

> >

> > -void __kfree_skb_flush(void)

> > -{

> > -       struct napi_alloc_cache *nc = this_cpu_ptr(&napi_alloc_cache);

> > -

> > -       /* flush skb_cache if containing objects */

> > -       if (nc->skb_count) {

> > -               kmem_cache_free_bulk(skbuff_head_cache, nc->skb_count,

> > -                                    nc->skb_cache);

> > -               nc->skb_count = 0;

> > -       }

> > -}

> > -

> >  static inline void _kfree_skb_defer(struct sk_buff *skb)

> >  {

> >         struct napi_alloc_cache *nc = this_cpu_ptr(&napi_alloc_cache);

> > --

> > 2.30.1


Al
diff mbox series

Patch

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 0a4e91a2f873..0e0707296098 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -2919,7 +2919,6 @@  static inline struct sk_buff *napi_alloc_skb(struct napi_struct *napi,
 }
 void napi_consume_skb(struct sk_buff *skb, int budget);
 
-void __kfree_skb_flush(void);
 void __kfree_skb_defer(struct sk_buff *skb);
 
 /**
diff --git a/net/core/dev.c b/net/core/dev.c
index 321d41a110e7..4154d4683bb9 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -4944,8 +4944,6 @@  static __latent_entropy void net_tx_action(struct softirq_action *h)
 			else
 				__kfree_skb_defer(skb);
 		}
-
-		__kfree_skb_flush();
 	}
 
 	if (sd->output_queue) {
@@ -7012,7 +7010,6 @@  static int napi_threaded_poll(void *data)
 			__napi_poll(napi, &repoll);
 			netpoll_poll_unlock(have);
 
-			__kfree_skb_flush();
 			local_bh_enable();
 
 			if (!repoll)
@@ -7042,7 +7039,7 @@  static __latent_entropy void net_rx_action(struct softirq_action *h)
 
 		if (list_empty(&list)) {
 			if (!sd_has_rps_ipi_waiting(sd) && list_empty(&repoll))
-				goto out;
+				return;
 			break;
 		}
 
@@ -7069,8 +7066,6 @@  static __latent_entropy void net_rx_action(struct softirq_action *h)
 		__raise_softirq_irqoff(NET_RX_SOFTIRQ);
 
 	net_rps_action_and_irq_enable(sd);
-out:
-	__kfree_skb_flush();
 }
 
 struct netdev_adjacent {
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 1c6f6ef70339..4be2bb969535 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -838,18 +838,6 @@  void __consume_stateless_skb(struct sk_buff *skb)
 	kfree_skbmem(skb);
 }
 
-void __kfree_skb_flush(void)
-{
-	struct napi_alloc_cache *nc = this_cpu_ptr(&napi_alloc_cache);
-
-	/* flush skb_cache if containing objects */
-	if (nc->skb_count) {
-		kmem_cache_free_bulk(skbuff_head_cache, nc->skb_count,
-				     nc->skb_cache);
-		nc->skb_count = 0;
-	}
-}
-
 static inline void _kfree_skb_defer(struct sk_buff *skb)
 {
 	struct napi_alloc_cache *nc = this_cpu_ptr(&napi_alloc_cache);