diff mbox

[API-NEXT,PATCHv3] linux-generic: packet: tune alloc/free performance for non-reference paths

Message ID 20170114182418.21770-1-bill.fischofer@linaro.org
State New
Headers show

Commit Message

Bill Fischofer Jan. 14, 2017, 6:24 p.m. UTC
As reported in Bug https://bugs.linaro.org/show_bug.cgi?id=2816 packet
alloc/free performance for non-references appears significantly degraded
after the introduction of packet reference support. Add fastpaths for
non-reference packet frees to optimize this.

Signed-off-by: Bill Fischofer <bill.fischofer@linaro.org>

---
Changes for v3:
- Fix memory leak detected by odp_bench_packet.

Changes for v2:
- Reworked odp_packet_free_multi() to streamline processing for reference and
  non-reference paths.
  
 platform/linux-generic/include/odp_pool_internal.h | 22 ++++-
 platform/linux-generic/odp_packet.c                | 97 ++++++++++++++++++++--
 platform/linux-generic/odp_pool.c                  | 62 ++++----------
 3 files changed, 126 insertions(+), 55 deletions(-)

-- 
2.9.3

Comments

Bill Fischofer Jan. 24, 2017, 2:05 p.m. UTC | #1
Ping. This still needs a review.

On Sat, Jan 14, 2017 at 12:24 PM, Bill Fischofer
<bill.fischofer@linaro.org> wrote:
> As reported in Bug https://bugs.linaro.org/show_bug.cgi?id=2816 packet

> alloc/free performance for non-references appears significantly degraded

> after the introduction of packet reference support. Add fastpaths for

> non-reference packet frees to optimize this.

>

> Signed-off-by: Bill Fischofer <bill.fischofer@linaro.org>

> ---

> Changes for v3:

> - Fix memory leak detected by odp_bench_packet.

>

> Changes for v2:

> - Reworked odp_packet_free_multi() to streamline processing for reference and

>   non-reference paths.

>

>  platform/linux-generic/include/odp_pool_internal.h | 22 ++++-

>  platform/linux-generic/odp_packet.c                | 97 ++++++++++++++++++++--

>  platform/linux-generic/odp_pool.c                  | 62 ++++----------

>  3 files changed, 126 insertions(+), 55 deletions(-)

>

> diff --git a/platform/linux-generic/include/odp_pool_internal.h b/platform/linux-generic/include/odp_pool_internal.h

> index b0805ac..8fdc688 100644

> --- a/platform/linux-generic/include/odp_pool_internal.h

> +++ b/platform/linux-generic/include/odp_pool_internal.h

> @@ -121,9 +121,29 @@ static inline odp_buffer_hdr_t *buf_hdl_to_hdr(odp_buffer_t buf)

>         return pool_buf_hdl_to_hdr(pool, buf);

>  }

>

> +static inline odp_pool_t pool_index_to_handle(uint32_t pool_idx)

> +{

> +       return _odp_cast_scalar(odp_pool_t, pool_idx);

> +}

> +

> +static inline uint32_t pool_id_from_buf(odp_buffer_t buf)

> +{

> +       odp_buffer_bits_t handle;

> +

> +       handle.handle = buf;

> +       return handle.pool_id;

> +}

> +

>  int buffer_alloc_multi(pool_t *pool, odp_buffer_t buf[],

>                        odp_buffer_hdr_t *buf_hdr[], int num);

> -void buffer_free_multi(const odp_buffer_t buf[], int num_free);

> +void buffer_free_to_pool(uint32_t pool_id, const odp_buffer_t buf[], int num);

> +

> +static inline void buffer_free_multi(const odp_buffer_t buf[], int num_total)

> +{

> +       uint32_t pool_id = pool_id_from_buf(buf[0]);

> +

> +       buffer_free_to_pool(pool_id, buf, num_total);

> +}

>

>  #ifdef __cplusplus

>  }

> diff --git a/platform/linux-generic/odp_packet.c b/platform/linux-generic/odp_packet.c

> index 170965a..a0e65cc 100644

> --- a/platform/linux-generic/odp_packet.c

> +++ b/platform/linux-generic/odp_packet.c

> @@ -30,7 +30,7 @@ static inline odp_packet_t packet_handle(odp_packet_hdr_t *pkt_hdr)

>

>  static inline odp_buffer_t buffer_handle(odp_packet_hdr_t *pkt_hdr)

>  {

> -       return pkt_hdr->buf_hdr.handle.handle;

> +       return odp_hdr_to_buf(&pkt_hdr->buf_hdr);

>  }

>

>  static inline uint32_t packet_ref_inc(odp_packet_hdr_t *pkt_hdr)

> @@ -599,18 +599,69 @@ int odp_packet_alloc_multi(odp_pool_t pool_hdl, uint32_t len,

>         return num;

>  }

>

> -static inline void packet_free(odp_packet_hdr_t *pkt_hdr)

> +static inline odp_packet_hdr_t *packet_free_to_list(odp_packet_hdr_t *pkt_hdr,

> +                                                   odp_buffer_t buf[],

> +                                                   int nbufs,

> +                                                   int *nfree)

>  {

>         odp_packet_hdr_t *ref_hdr;

>         uint32_t ref_count;

> +       int num_seg, i;

>

>         do {

> +               ref_count = packet_ref_count(pkt_hdr) - 1;

> +               num_seg = pkt_hdr->buf_hdr.segcount;

>                 ref_hdr = pkt_hdr->ref_hdr;

> +

> +               if (odp_likely((CONFIG_PACKET_MAX_SEGS == 1 || num_seg == 1) &&

> +                              ref_count == 0)) {

> +                       if (*nfree >= nbufs)

> +                               break;

> +

> +                       buf[(*nfree)++] = buffer_handle(pkt_hdr);

> +               } else {

> +                       if (*nfree + num_seg >= nbufs)

> +                               break;

> +

> +                       for (i = 0; i < num_seg; i++) {

> +                               odp_packet_hdr_t *hdr =

> +                                       pkt_hdr->buf_hdr.seg[i].hdr;

> +

> +                               if (packet_ref_dec(hdr) == 1)

> +                                       buf[(*nfree)++] = buffer_handle(hdr);

> +                       }

> +

> +                       if (ref_count == 1)

> +                               pkt_hdr->unshared_len = pkt_hdr->frame_len;

> +               }

> +

> +               pkt_hdr = ref_hdr;

> +       } while (pkt_hdr);

> +

> +       return pkt_hdr;

> +}

> +

> +static inline void packet_free(odp_packet_hdr_t *pkt_hdr)

> +{

> +       odp_packet_hdr_t *ref_hdr;

> +       uint32_t ref_count;

> +       int num_seg;

> +

> +       do {

>                 ref_count = packet_ref_count(pkt_hdr) - 1;

> -               free_bufs(pkt_hdr, 0, pkt_hdr->buf_hdr.segcount);

> +               num_seg = pkt_hdr->buf_hdr.segcount;

> +               ref_hdr = pkt_hdr->ref_hdr;

>

> -               if (ref_count == 1)

> -                       pkt_hdr->unshared_len = pkt_hdr->frame_len;

> +               if (odp_likely((CONFIG_PACKET_MAX_SEGS == 1 || num_seg == 1) &&

> +                              ref_count == 0)) {

> +                       buffer_free_multi((odp_buffer_t *)

> +                                         &pkt_hdr->buf_hdr.handle.handle, 1);

> +               } else {

> +                       free_bufs(pkt_hdr, 0, num_seg);

> +

> +                       if (ref_count == 1)

> +                               pkt_hdr->unshared_len = pkt_hdr->frame_len;

> +               }

>

>                 pkt_hdr = ref_hdr;

>         } while (pkt_hdr);

> @@ -623,10 +674,40 @@ void odp_packet_free(odp_packet_t pkt)

>

>  void odp_packet_free_multi(const odp_packet_t pkt[], int num)

>  {

> -       int i;

> +       int nbufs = num * CONFIG_PACKET_MAX_SEGS * 2;

> +       odp_buffer_t buf[nbufs];

> +       int nfree = 0;

> +       int i = 0;

> +       odp_packet_hdr_t *pkt_hdr = odp_packet_hdr(pkt[0]);

> +       uint32_t pool_id = pool_id_from_buf(pkt_hdr->buf_hdr.handle.handle);

> +       uint32_t next_pool;

> +

> +       while (1) {

> +               pkt_hdr = packet_free_to_list(pkt_hdr, buf, nbufs, &nfree);

>

> -       for (i = 0; i < num; i++)

> -               packet_free(odp_packet_hdr(pkt[i]));

> +               if (pkt_hdr) {

> +                       buffer_free_to_pool(pool_id, buf, nfree);

> +                       nfree = 0;

> +                       continue;

> +               }

> +

> +               if (++i == num)

> +                       break;

> +

> +               pkt_hdr = odp_packet_hdr(pkt[i]);

> +               next_pool = pool_id_from_buf(pkt_hdr->buf_hdr.handle.handle);

> +

> +               if (odp_unlikely(next_pool != pool_id)) {

> +                       if (nfree > 0) {

> +                               buffer_free_to_pool(pool_id, buf, nfree);

> +                               nfree = 0;

> +                       }

> +                       pool_id = next_pool;

> +               }

> +       }

> +

> +       if (nfree > 0)

> +               buffer_free_to_pool(pool_id, buf, nfree);

>  }

>

>  int odp_packet_reset(odp_packet_t pkt, uint32_t len)

> diff --git a/platform/linux-generic/odp_pool.c b/platform/linux-generic/odp_pool.c

> index 090a55f..2648068 100644

> --- a/platform/linux-generic/odp_pool.c

> +++ b/platform/linux-generic/odp_pool.c

> @@ -47,19 +47,6 @@ typedef struct pool_local_t {

>  pool_table_t *pool_tbl;

>  static __thread pool_local_t local;

>

> -static inline odp_pool_t pool_index_to_handle(uint32_t pool_idx)

> -{

> -       return _odp_cast_scalar(odp_pool_t, pool_idx);

> -}

> -

> -static inline uint32_t pool_id_from_buf(odp_buffer_t buf)

> -{

> -       odp_buffer_bits_t handle;

> -

> -       handle.handle = buf;

> -       return handle.pool_id;

> -}

> -

>  int odp_pool_init_global(void)

>  {

>         uint32_t i;

> @@ -671,8 +658,8 @@ int buffer_alloc_multi(pool_t *pool, odp_buffer_t buf[],

>         return num_ch + num_deq;

>  }

>

> -static inline void buffer_free_to_pool(uint32_t pool_id,

> -                                      const odp_buffer_t buf[], int num)

> +void buffer_free_to_pool(uint32_t pool_id,

> +                        const odp_buffer_t buf[], int num)

>  {

>         pool_t *pool;

>         int i;

> @@ -732,36 +719,6 @@ static inline void buffer_free_to_pool(uint32_t pool_id,

>         cache->num = cache_num + num;

>  }

>

> -void buffer_free_multi(const odp_buffer_t buf[], int num_total)

> -{

> -       uint32_t pool_id;

> -       int num;

> -       int i;

> -       int first = 0;

> -

> -       while (1) {

> -               num = 1;

> -               i   = 1;

> -               pool_id = pool_id_from_buf(buf[first]);

> -

> -               /* 'num' buffers are from the same pool */

> -               if (num_total > 1) {

> -                       for (i = first; i < num_total; i++)

> -                               if (pool_id != pool_id_from_buf(buf[i]))

> -                                       break;

> -

> -                       num = i - first;

> -               }

> -

> -               buffer_free_to_pool(pool_id, &buf[first], num);

> -

> -               if (i == num_total)

> -                       return;

> -

> -               first = i;

> -       }

> -}

> -

>  odp_buffer_t odp_buffer_alloc(odp_pool_t pool_hdl)

>  {

>         odp_buffer_t buf;

> @@ -793,7 +750,20 @@ void odp_buffer_free(odp_buffer_t buf)

>

>  void odp_buffer_free_multi(const odp_buffer_t buf[], int num)

>  {

> -       buffer_free_multi(buf, num);

> +       uint32_t next_pool, pool_id = pool_id_from_buf(buf[0]);

> +       int i, first = 0;

> +

> +       for (i = 1; i < num; i++) {

> +               next_pool = pool_id_from_buf(buf[i]);

> +

> +               if (odp_unlikely(next_pool != pool_id)) {

> +                       buffer_free_to_pool(pool_id, &buf[first], i - first);

> +                       first = i;

> +                       pool_id = next_pool;

> +               }

> +       }

> +

> +       buffer_free_to_pool(pool_id, &buf[first], num - first);

>  }

>

>  int odp_pool_capability(odp_pool_capability_t *capa)

> --

> 2.9.3

>
Maxim Uvarov Jan. 25, 2017, 6:23 p.m. UTC | #2
Petri, can you please review/test this patch?

Maxim.

On 01/24/17 17:05, Bill Fischofer wrote:
> Ping. This still needs a review.

> 

> On Sat, Jan 14, 2017 at 12:24 PM, Bill Fischofer

> <bill.fischofer@linaro.org> wrote:

>> As reported in Bug https://bugs.linaro.org/show_bug.cgi?id=2816 packet

>> alloc/free performance for non-references appears significantly degraded

>> after the introduction of packet reference support. Add fastpaths for

>> non-reference packet frees to optimize this.

>>

>> Signed-off-by: Bill Fischofer <bill.fischofer@linaro.org>

>> ---

>> Changes for v3:

>> - Fix memory leak detected by odp_bench_packet.

>>

>> Changes for v2:

>> - Reworked odp_packet_free_multi() to streamline processing for reference and

>>   non-reference paths.

>>

>>  platform/linux-generic/include/odp_pool_internal.h | 22 ++++-

>>  platform/linux-generic/odp_packet.c                | 97 ++++++++++++++++++++--

>>  platform/linux-generic/odp_pool.c                  | 62 ++++----------

>>  3 files changed, 126 insertions(+), 55 deletions(-)

>>

>> diff --git a/platform/linux-generic/include/odp_pool_internal.h b/platform/linux-generic/include/odp_pool_internal.h

>> index b0805ac..8fdc688 100644

>> --- a/platform/linux-generic/include/odp_pool_internal.h

>> +++ b/platform/linux-generic/include/odp_pool_internal.h

>> @@ -121,9 +121,29 @@ static inline odp_buffer_hdr_t *buf_hdl_to_hdr(odp_buffer_t buf)

>>         return pool_buf_hdl_to_hdr(pool, buf);

>>  }

>>

>> +static inline odp_pool_t pool_index_to_handle(uint32_t pool_idx)

>> +{

>> +       return _odp_cast_scalar(odp_pool_t, pool_idx);

>> +}

>> +

>> +static inline uint32_t pool_id_from_buf(odp_buffer_t buf)

>> +{

>> +       odp_buffer_bits_t handle;

>> +

>> +       handle.handle = buf;

>> +       return handle.pool_id;

>> +}

>> +

>>  int buffer_alloc_multi(pool_t *pool, odp_buffer_t buf[],

>>                        odp_buffer_hdr_t *buf_hdr[], int num);

>> -void buffer_free_multi(const odp_buffer_t buf[], int num_free);

>> +void buffer_free_to_pool(uint32_t pool_id, const odp_buffer_t buf[], int num);

>> +

>> +static inline void buffer_free_multi(const odp_buffer_t buf[], int num_total)

>> +{

>> +       uint32_t pool_id = pool_id_from_buf(buf[0]);

>> +

>> +       buffer_free_to_pool(pool_id, buf, num_total);

>> +}

>>

>>  #ifdef __cplusplus

>>  }

>> diff --git a/platform/linux-generic/odp_packet.c b/platform/linux-generic/odp_packet.c

>> index 170965a..a0e65cc 100644

>> --- a/platform/linux-generic/odp_packet.c

>> +++ b/platform/linux-generic/odp_packet.c

>> @@ -30,7 +30,7 @@ static inline odp_packet_t packet_handle(odp_packet_hdr_t *pkt_hdr)

>>

>>  static inline odp_buffer_t buffer_handle(odp_packet_hdr_t *pkt_hdr)

>>  {

>> -       return pkt_hdr->buf_hdr.handle.handle;

>> +       return odp_hdr_to_buf(&pkt_hdr->buf_hdr);

>>  }

>>

>>  static inline uint32_t packet_ref_inc(odp_packet_hdr_t *pkt_hdr)

>> @@ -599,18 +599,69 @@ int odp_packet_alloc_multi(odp_pool_t pool_hdl, uint32_t len,

>>         return num;

>>  }

>>

>> -static inline void packet_free(odp_packet_hdr_t *pkt_hdr)

>> +static inline odp_packet_hdr_t *packet_free_to_list(odp_packet_hdr_t *pkt_hdr,

>> +                                                   odp_buffer_t buf[],

>> +                                                   int nbufs,

>> +                                                   int *nfree)

>>  {

>>         odp_packet_hdr_t *ref_hdr;

>>         uint32_t ref_count;

>> +       int num_seg, i;

>>

>>         do {

>> +               ref_count = packet_ref_count(pkt_hdr) - 1;

>> +               num_seg = pkt_hdr->buf_hdr.segcount;

>>                 ref_hdr = pkt_hdr->ref_hdr;

>> +

>> +               if (odp_likely((CONFIG_PACKET_MAX_SEGS == 1 || num_seg == 1) &&

>> +                              ref_count == 0)) {

>> +                       if (*nfree >= nbufs)

>> +                               break;

>> +

>> +                       buf[(*nfree)++] = buffer_handle(pkt_hdr);

>> +               } else {

>> +                       if (*nfree + num_seg >= nbufs)

>> +                               break;

>> +

>> +                       for (i = 0; i < num_seg; i++) {

>> +                               odp_packet_hdr_t *hdr =

>> +                                       pkt_hdr->buf_hdr.seg[i].hdr;

>> +

>> +                               if (packet_ref_dec(hdr) == 1)

>> +                                       buf[(*nfree)++] = buffer_handle(hdr);

>> +                       }

>> +

>> +                       if (ref_count == 1)

>> +                               pkt_hdr->unshared_len = pkt_hdr->frame_len;

>> +               }

>> +

>> +               pkt_hdr = ref_hdr;

>> +       } while (pkt_hdr);

>> +

>> +       return pkt_hdr;

>> +}

>> +

>> +static inline void packet_free(odp_packet_hdr_t *pkt_hdr)

>> +{

>> +       odp_packet_hdr_t *ref_hdr;

>> +       uint32_t ref_count;

>> +       int num_seg;

>> +

>> +       do {

>>                 ref_count = packet_ref_count(pkt_hdr) - 1;

>> -               free_bufs(pkt_hdr, 0, pkt_hdr->buf_hdr.segcount);

>> +               num_seg = pkt_hdr->buf_hdr.segcount;

>> +               ref_hdr = pkt_hdr->ref_hdr;

>>

>> -               if (ref_count == 1)

>> -                       pkt_hdr->unshared_len = pkt_hdr->frame_len;

>> +               if (odp_likely((CONFIG_PACKET_MAX_SEGS == 1 || num_seg == 1) &&

>> +                              ref_count == 0)) {

>> +                       buffer_free_multi((odp_buffer_t *)

>> +                                         &pkt_hdr->buf_hdr.handle.handle, 1);

>> +               } else {

>> +                       free_bufs(pkt_hdr, 0, num_seg);

>> +

>> +                       if (ref_count == 1)

>> +                               pkt_hdr->unshared_len = pkt_hdr->frame_len;

>> +               }

>>

>>                 pkt_hdr = ref_hdr;

>>         } while (pkt_hdr);

>> @@ -623,10 +674,40 @@ void odp_packet_free(odp_packet_t pkt)

>>

>>  void odp_packet_free_multi(const odp_packet_t pkt[], int num)

>>  {

>> -       int i;

>> +       int nbufs = num * CONFIG_PACKET_MAX_SEGS * 2;

>> +       odp_buffer_t buf[nbufs];

>> +       int nfree = 0;

>> +       int i = 0;

>> +       odp_packet_hdr_t *pkt_hdr = odp_packet_hdr(pkt[0]);

>> +       uint32_t pool_id = pool_id_from_buf(pkt_hdr->buf_hdr.handle.handle);

>> +       uint32_t next_pool;

>> +

>> +       while (1) {

>> +               pkt_hdr = packet_free_to_list(pkt_hdr, buf, nbufs, &nfree);

>>

>> -       for (i = 0; i < num; i++)

>> -               packet_free(odp_packet_hdr(pkt[i]));

>> +               if (pkt_hdr) {

>> +                       buffer_free_to_pool(pool_id, buf, nfree);

>> +                       nfree = 0;

>> +                       continue;

>> +               }

>> +

>> +               if (++i == num)

>> +                       break;

>> +

>> +               pkt_hdr = odp_packet_hdr(pkt[i]);

>> +               next_pool = pool_id_from_buf(pkt_hdr->buf_hdr.handle.handle);

>> +

>> +               if (odp_unlikely(next_pool != pool_id)) {

>> +                       if (nfree > 0) {

>> +                               buffer_free_to_pool(pool_id, buf, nfree);

>> +                               nfree = 0;

>> +                       }

>> +                       pool_id = next_pool;

>> +               }

>> +       }

>> +

>> +       if (nfree > 0)

>> +               buffer_free_to_pool(pool_id, buf, nfree);

>>  }

>>

>>  int odp_packet_reset(odp_packet_t pkt, uint32_t len)

>> diff --git a/platform/linux-generic/odp_pool.c b/platform/linux-generic/odp_pool.c

>> index 090a55f..2648068 100644

>> --- a/platform/linux-generic/odp_pool.c

>> +++ b/platform/linux-generic/odp_pool.c

>> @@ -47,19 +47,6 @@ typedef struct pool_local_t {

>>  pool_table_t *pool_tbl;

>>  static __thread pool_local_t local;

>>

>> -static inline odp_pool_t pool_index_to_handle(uint32_t pool_idx)

>> -{

>> -       return _odp_cast_scalar(odp_pool_t, pool_idx);

>> -}

>> -

>> -static inline uint32_t pool_id_from_buf(odp_buffer_t buf)

>> -{

>> -       odp_buffer_bits_t handle;

>> -

>> -       handle.handle = buf;

>> -       return handle.pool_id;

>> -}

>> -

>>  int odp_pool_init_global(void)

>>  {

>>         uint32_t i;

>> @@ -671,8 +658,8 @@ int buffer_alloc_multi(pool_t *pool, odp_buffer_t buf[],

>>         return num_ch + num_deq;

>>  }

>>

>> -static inline void buffer_free_to_pool(uint32_t pool_id,

>> -                                      const odp_buffer_t buf[], int num)

>> +void buffer_free_to_pool(uint32_t pool_id,

>> +                        const odp_buffer_t buf[], int num)

>>  {

>>         pool_t *pool;

>>         int i;

>> @@ -732,36 +719,6 @@ static inline void buffer_free_to_pool(uint32_t pool_id,

>>         cache->num = cache_num + num;

>>  }

>>

>> -void buffer_free_multi(const odp_buffer_t buf[], int num_total)

>> -{

>> -       uint32_t pool_id;

>> -       int num;

>> -       int i;

>> -       int first = 0;

>> -

>> -       while (1) {

>> -               num = 1;

>> -               i   = 1;

>> -               pool_id = pool_id_from_buf(buf[first]);

>> -

>> -               /* 'num' buffers are from the same pool */

>> -               if (num_total > 1) {

>> -                       for (i = first; i < num_total; i++)

>> -                               if (pool_id != pool_id_from_buf(buf[i]))

>> -                                       break;

>> -

>> -                       num = i - first;

>> -               }

>> -

>> -               buffer_free_to_pool(pool_id, &buf[first], num);

>> -

>> -               if (i == num_total)

>> -                       return;

>> -

>> -               first = i;

>> -       }

>> -}

>> -

>>  odp_buffer_t odp_buffer_alloc(odp_pool_t pool_hdl)

>>  {

>>         odp_buffer_t buf;

>> @@ -793,7 +750,20 @@ void odp_buffer_free(odp_buffer_t buf)

>>

>>  void odp_buffer_free_multi(const odp_buffer_t buf[], int num)

>>  {

>> -       buffer_free_multi(buf, num);

>> +       uint32_t next_pool, pool_id = pool_id_from_buf(buf[0]);

>> +       int i, first = 0;

>> +

>> +       for (i = 1; i < num; i++) {

>> +               next_pool = pool_id_from_buf(buf[i]);

>> +

>> +               if (odp_unlikely(next_pool != pool_id)) {

>> +                       buffer_free_to_pool(pool_id, &buf[first], i - first);

>> +                       first = i;

>> +                       pool_id = next_pool;

>> +               }

>> +       }

>> +

>> +       buffer_free_to_pool(pool_id, &buf[first], num - first);

>>  }

>>

>>  int odp_pool_capability(odp_pool_capability_t *capa)

>> --

>> 2.9.3

>>
diff mbox

Patch

diff --git a/platform/linux-generic/include/odp_pool_internal.h b/platform/linux-generic/include/odp_pool_internal.h
index b0805ac..8fdc688 100644
--- a/platform/linux-generic/include/odp_pool_internal.h
+++ b/platform/linux-generic/include/odp_pool_internal.h
@@ -121,9 +121,29 @@  static inline odp_buffer_hdr_t *buf_hdl_to_hdr(odp_buffer_t buf)
 	return pool_buf_hdl_to_hdr(pool, buf);
 }
 
+static inline odp_pool_t pool_index_to_handle(uint32_t pool_idx)
+{
+	return _odp_cast_scalar(odp_pool_t, pool_idx);
+}
+
+static inline uint32_t pool_id_from_buf(odp_buffer_t buf)
+{
+	odp_buffer_bits_t handle;
+
+	handle.handle = buf;
+	return handle.pool_id;
+}
+
 int buffer_alloc_multi(pool_t *pool, odp_buffer_t buf[],
 		       odp_buffer_hdr_t *buf_hdr[], int num);
-void buffer_free_multi(const odp_buffer_t buf[], int num_free);
+void buffer_free_to_pool(uint32_t pool_id, const odp_buffer_t buf[], int num);
+
+static inline void buffer_free_multi(const odp_buffer_t buf[], int num_total)
+{
+	uint32_t pool_id = pool_id_from_buf(buf[0]);
+
+	buffer_free_to_pool(pool_id, buf, num_total);
+}
 
 #ifdef __cplusplus
 }
diff --git a/platform/linux-generic/odp_packet.c b/platform/linux-generic/odp_packet.c
index 170965a..a0e65cc 100644
--- a/platform/linux-generic/odp_packet.c
+++ b/platform/linux-generic/odp_packet.c
@@ -30,7 +30,7 @@  static inline odp_packet_t packet_handle(odp_packet_hdr_t *pkt_hdr)
 
 static inline odp_buffer_t buffer_handle(odp_packet_hdr_t *pkt_hdr)
 {
-	return pkt_hdr->buf_hdr.handle.handle;
+	return odp_hdr_to_buf(&pkt_hdr->buf_hdr);
 }
 
 static inline uint32_t packet_ref_inc(odp_packet_hdr_t *pkt_hdr)
@@ -599,18 +599,69 @@  int odp_packet_alloc_multi(odp_pool_t pool_hdl, uint32_t len,
 	return num;
 }
 
-static inline void packet_free(odp_packet_hdr_t *pkt_hdr)
+static inline odp_packet_hdr_t *packet_free_to_list(odp_packet_hdr_t *pkt_hdr,
+						    odp_buffer_t buf[],
+						    int nbufs,
+						    int *nfree)
 {
 	odp_packet_hdr_t *ref_hdr;
 	uint32_t ref_count;
+	int num_seg, i;
 
 	do {
+		ref_count = packet_ref_count(pkt_hdr) - 1;
+		num_seg = pkt_hdr->buf_hdr.segcount;
 		ref_hdr = pkt_hdr->ref_hdr;
+
+		if (odp_likely((CONFIG_PACKET_MAX_SEGS == 1 || num_seg == 1) &&
+			       ref_count == 0)) {
+			if (*nfree >= nbufs)
+				break;
+
+			buf[(*nfree)++] = buffer_handle(pkt_hdr);
+		} else {
+			if (*nfree + num_seg >= nbufs)
+				break;
+
+			for (i = 0; i < num_seg; i++) {
+				odp_packet_hdr_t *hdr =
+					pkt_hdr->buf_hdr.seg[i].hdr;
+
+				if (packet_ref_dec(hdr) == 1)
+					buf[(*nfree)++] = buffer_handle(hdr);
+			}
+
+			if (ref_count == 1)
+				pkt_hdr->unshared_len = pkt_hdr->frame_len;
+		}
+
+		pkt_hdr = ref_hdr;
+	} while (pkt_hdr);
+
+	return pkt_hdr;
+}
+
+static inline void packet_free(odp_packet_hdr_t *pkt_hdr)
+{
+	odp_packet_hdr_t *ref_hdr;
+	uint32_t ref_count;
+	int num_seg;
+
+	do {
 		ref_count = packet_ref_count(pkt_hdr) - 1;
-		free_bufs(pkt_hdr, 0, pkt_hdr->buf_hdr.segcount);
+		num_seg = pkt_hdr->buf_hdr.segcount;
+		ref_hdr = pkt_hdr->ref_hdr;
 
-		if (ref_count == 1)
-			pkt_hdr->unshared_len = pkt_hdr->frame_len;
+		if (odp_likely((CONFIG_PACKET_MAX_SEGS == 1 || num_seg == 1) &&
+			       ref_count == 0)) {
+			buffer_free_multi((odp_buffer_t *)
+					  &pkt_hdr->buf_hdr.handle.handle, 1);
+		} else {
+			free_bufs(pkt_hdr, 0, num_seg);
+
+			if (ref_count == 1)
+				pkt_hdr->unshared_len = pkt_hdr->frame_len;
+		}
 
 		pkt_hdr = ref_hdr;
 	} while (pkt_hdr);
@@ -623,10 +674,40 @@  void odp_packet_free(odp_packet_t pkt)
 
 void odp_packet_free_multi(const odp_packet_t pkt[], int num)
 {
-	int i;
+	int nbufs = num * CONFIG_PACKET_MAX_SEGS * 2;
+	odp_buffer_t buf[nbufs];
+	int nfree = 0;
+	int i = 0;
+	odp_packet_hdr_t *pkt_hdr = odp_packet_hdr(pkt[0]);
+	uint32_t pool_id = pool_id_from_buf(pkt_hdr->buf_hdr.handle.handle);
+	uint32_t next_pool;
+
+	while (1) {
+		pkt_hdr = packet_free_to_list(pkt_hdr, buf, nbufs, &nfree);
 
-	for (i = 0; i < num; i++)
-		packet_free(odp_packet_hdr(pkt[i]));
+		if (pkt_hdr) {
+			buffer_free_to_pool(pool_id, buf, nfree);
+			nfree = 0;
+			continue;
+		}
+
+		if (++i == num)
+			break;
+
+		pkt_hdr = odp_packet_hdr(pkt[i]);
+		next_pool = pool_id_from_buf(pkt_hdr->buf_hdr.handle.handle);
+
+		if (odp_unlikely(next_pool != pool_id)) {
+			if (nfree > 0) {
+				buffer_free_to_pool(pool_id, buf, nfree);
+				nfree = 0;
+			}
+			pool_id = next_pool;
+		}
+	}
+
+	if (nfree > 0)
+		buffer_free_to_pool(pool_id, buf, nfree);
 }
 
 int odp_packet_reset(odp_packet_t pkt, uint32_t len)
diff --git a/platform/linux-generic/odp_pool.c b/platform/linux-generic/odp_pool.c
index 090a55f..2648068 100644
--- a/platform/linux-generic/odp_pool.c
+++ b/platform/linux-generic/odp_pool.c
@@ -47,19 +47,6 @@  typedef struct pool_local_t {
 pool_table_t *pool_tbl;
 static __thread pool_local_t local;
 
-static inline odp_pool_t pool_index_to_handle(uint32_t pool_idx)
-{
-	return _odp_cast_scalar(odp_pool_t, pool_idx);
-}
-
-static inline uint32_t pool_id_from_buf(odp_buffer_t buf)
-{
-	odp_buffer_bits_t handle;
-
-	handle.handle = buf;
-	return handle.pool_id;
-}
-
 int odp_pool_init_global(void)
 {
 	uint32_t i;
@@ -671,8 +658,8 @@  int buffer_alloc_multi(pool_t *pool, odp_buffer_t buf[],
 	return num_ch + num_deq;
 }
 
-static inline void buffer_free_to_pool(uint32_t pool_id,
-				       const odp_buffer_t buf[], int num)
+void buffer_free_to_pool(uint32_t pool_id,
+			 const odp_buffer_t buf[], int num)
 {
 	pool_t *pool;
 	int i;
@@ -732,36 +719,6 @@  static inline void buffer_free_to_pool(uint32_t pool_id,
 	cache->num = cache_num + num;
 }
 
-void buffer_free_multi(const odp_buffer_t buf[], int num_total)
-{
-	uint32_t pool_id;
-	int num;
-	int i;
-	int first = 0;
-
-	while (1) {
-		num = 1;
-		i   = 1;
-		pool_id = pool_id_from_buf(buf[first]);
-
-		/* 'num' buffers are from the same pool */
-		if (num_total > 1) {
-			for (i = first; i < num_total; i++)
-				if (pool_id != pool_id_from_buf(buf[i]))
-					break;
-
-			num = i - first;
-		}
-
-		buffer_free_to_pool(pool_id, &buf[first], num);
-
-		if (i == num_total)
-			return;
-
-		first = i;
-	}
-}
-
 odp_buffer_t odp_buffer_alloc(odp_pool_t pool_hdl)
 {
 	odp_buffer_t buf;
@@ -793,7 +750,20 @@  void odp_buffer_free(odp_buffer_t buf)
 
 void odp_buffer_free_multi(const odp_buffer_t buf[], int num)
 {
-	buffer_free_multi(buf, num);
+	uint32_t next_pool, pool_id = pool_id_from_buf(buf[0]);
+	int i, first = 0;
+
+	for (i = 1; i < num; i++) {
+		next_pool = pool_id_from_buf(buf[i]);
+
+		if (odp_unlikely(next_pool != pool_id)) {
+			buffer_free_to_pool(pool_id, &buf[first], i - first);
+			first = i;
+			pool_id = next_pool;
+		}
+	}
+
+	buffer_free_to_pool(pool_id, &buf[first], num - first);
 }
 
 int odp_pool_capability(odp_pool_capability_t *capa)