diff mbox

[API-NEXTv5,1/6] linux-gen: _ishm: adding buddy and slab allocation

Message ID 1483456215-40789-2-git-send-email-christophe.milard@linaro.org
State Superseded
Headers show

Commit Message

Christophe Milard Jan. 3, 2017, 3:10 p.m. UTC
_ishm now provides functions to create/destroy pools for buddy/slab
memory allocation, as well as functions to allocated/release memory
from the created pools.

Signed-off-by: Christophe Milard <christophe.milard@linaro.org>

---
 platform/linux-generic/Makefile.am                 |   2 +
 platform/linux-generic/_ishm.c                     |  14 +-
 platform/linux-generic/_ishmpool.c                 | 811 +++++++++++++++++++++
 .../linux-generic/include/_ishmpool_internal.h     |  56 ++
 4 files changed, 882 insertions(+), 1 deletion(-)
 create mode 100644 platform/linux-generic/_ishmpool.c
 create mode 100644 platform/linux-generic/include/_ishmpool_internal.h

-- 
2.7.4

Comments

Yi He Jan. 12, 2017, 7:52 a.m. UTC | #1
Hi, Christophe,

Q1: this patchset name seems a little problem: API-NEXTv5 -> API-NEXT
PATCHv5

Q2: the 1/6 patch cannot be applied to the api-next, I've tried to fix to
continue the test but did not work, need to re-base and I'll test then.

For the patchset code looks OK to me and below questions are to clarify the
usage of these APIs:

Q3: In the practical usage does it mean all ishmpool creations need to be
happened in
1st control thread before all other threads spawning?

Otherwise if a worker ODP thread spawn firstly and after then create an
ishmpool, the
memories allocated from this pool seems won't be accessible to other ODP
threads? because
the shm_attach and mmap did not happen in other ODP threads (processes).

So control thread needs to collect all worker threads requirements on small
memory size
requirements and creates all ishmpools in a batch manner before spawning
worker threads?

Understand the typical usage:

If the size requirements of small memory are not very specific, create one
or several pools of
range [min, max] to support small memory allocation.

If the size requirements of small memory are specific, for example to
specific data
structure or small packets, create several pools of dedicated [size].

Thanks and best regards, Yi

On 3 January 2017 at 23:10, Christophe Milard <christophe.milard@linaro.org>
wrote:

> _ishm now provides functions to create/destroy pools for buddy/slab

> memory allocation, as well as functions to allocated/release memory


from the created pools.
>

> Signed-off-by: Christophe Milard <christophe.milard@linaro.org>

> ---

>  platform/linux-generic/Makefile.am                 |   2 +

>  platform/linux-generic/_ishm.c                     |  14 +-

>  platform/linux-generic/_ishmpool.c                 | 811

> +++++++++++++++++++++

>  .../linux-generic/include/_ishmpool_internal.h     |  56 ++

>  4 files changed, 882 insertions(+), 1 deletion(-)

>  create mode 100644 platform/linux-generic/_ishmpool.c

>  create mode 100644 platform/linux-generic/include/_ishmpool_internal.h

>

> diff --git a/platform/linux-generic/Makefile.am

> b/platform/linux-generic/Makefile.am

> index 999a7f5..d153c5d 100644

> --- a/platform/linux-generic/Makefile.am

> +++ b/platform/linux-generic/Makefile.am

> @@ -127,6 +127,7 @@ noinst_HEADERS = \

>                   ${srcdir}/include/_fdserver_internal.h \

>                   ${srcdir}/include/_ishm_internal.h \

>                   ${srcdir}/include/_ishmphy_internal.h \

> +                 ${srcdir}/include/_ishmpool_internal.h \

>                   ${srcdir}/include/odp_align_internal.h \

>                   ${srcdir}/include/odp_atomic_internal.h \

>                   ${srcdir}/include/odp_buffer_inlines.h \

> @@ -171,6 +172,7 @@ __LIB__libodp_linux_la_SOURCES = \

>                            _fdserver.c \

>                            _ishm.c \

>                            _ishmphy.c \

> +                          _ishmpool.c \

>                            odp_atomic.c \

>                            odp_barrier.c \

>                            odp_buffer.c \

> diff --git a/platform/linux-generic/_ishm.c b/platform/linux-generic/_ishm

> .c

> index 6d5b752..0befdba 100644

> --- a/platform/linux-generic/_ishm.c

> +++ b/platform/linux-generic/_ishm.c

> @@ -59,6 +59,7 @@

>  #include <_fdserver_internal.h>

>  #include <_ishm_internal.h>

>  #include <_ishmphy_internal.h>

> +#include <_ishmpool_internal.h>

>  #include <stdlib.h>

>  #include <stdio.h>

>  #include <unistd.h>

> @@ -1441,8 +1442,19 @@ int _odp_ishm_init_global(void)

>          * is performed for the main thread... Many init_global() functions

>          * indeed assume the availability of odp_shm_reserve()...:

>          */

> -       return do_odp_ishm_init_local();

> +       if (do_odp_ishm_init_local()) {

> +               ODP_ERR("unable to init the main thread\n.");

> +               goto init_glob_err4;

> +       }

> +

> +       /* get ready to create pools: */

> +       _odp_ishm_pool_init();

>

> +       return 0;

> +

> +init_glob_err4:

> +       if (_odp_ishmphy_unbook_va())

> +               ODP_ERR("unable to unbook virtual space\n.");

>  init_glob_err3:

>         if (munmap(ishm_ftbl, sizeof(ishm_ftable_t)) < 0)

>                 ODP_ERR("unable to munmap main fragment table\n.");

> diff --git a/platform/linux-generic/_ishmpool.c

> b/platform/linux-generic/_ishmpool.c

> new file mode 100644

> index 0000000..df6e49e

> --- /dev/null

> +++ b/platform/linux-generic/_ishmpool.c

> @@ -0,0 +1,811 @@

> +/* Copyright (c) 2017, Linaro Limited

> + * All rights reserved.

> + *

> + * SPDX-License-Identifier:     BSD-3-Clause

> + */

> +

> +/* This file gathers the buddy and slab allocation functionality provided

> + * by _ishm.

> + * _odp_ishmpool_create() can be used to create a pool for buddy/slab

> + * allocation. _odp_ishmpool_create() will allocate a memory area using

> + * ishm_reserve() for both the control part (needed for tracking

> + * allocation/free...) and the user memory itself (part of which will be

> given

> + * at each ishmpool_alloc()).

> + * The element size provided at pool creation time determines whether

> + * to pool will of type buddy or slab.


+ * For buddy, all allocations are rounded to the nearest power of 2.
> + *

> + * The implementation of the buddy allocator is very traditional: it

> + * maintains N lists of free buffers.

> + * The control part actually contains these N queue heads, (N-M are

> actually

> + * used), the free buffers themselves being used for chaining (the

> chaining info

> + * is in the buffers: as they are "free" they should not be touched by the

> + * user). The control part also contains a array of bytes for remembering


+ * the size (actually the order) of the allocated buffers:
> + * There are 2^(N-M) such bytes, this number being the maximum number of

> + * allocated buffers (when all allocation are <= 2^M bytes)

> + * Buddy allocators handle fragmentation by splitting or merging blocks

> by 2.

> + * They guarantee a minimum efficiency of 50%, at worse case

> fragmentation.

> + *

> + * Slab implementation is even simpler, all free elements being queued in

> + * one single queue at init, taken from this queue when allocated and

> + * returned to this same queue when freed.

> + *

> + * The reason for not using malloc() is that malloc does not guarantee

> + * memory sharability between ODP threads (regardless of their

> implememtation)

> + * which ishm_reserve() can do. see the comments around

> + * _odp_ishmbud_pool_create() and ishm_reserve() for more details.

> + *

> + * This file is divided in 3 sections: the first one regroups functions

> + * needed by the buddy allocation.

> + * The second one regroups the functions needed by the slab allocator.

> + * The third section regroups the common functions exported externally.

> + */

> +

> +#include <odp_posix_extensions.h>

> +#include <odp_internal.h>

> +#include <odp/api/spinlock.h>

> +#include <odp/api/align.h>

> +#include <odp/api/debug.h>

> +#include <odp/drv/shm.h>

> +#include <odp_shm_internal.h>

> +#include <odp_debug_internal.h>

> +#include <odp_align_internal.h>

> +#include <_ishm_internal.h>

> +#include <_ishmpool_internal.h>

> +#include <stdlib.h>

> +#include <stdio.h>

> +#include <unistd.h>

> +#include <string.h>

> +#include <inttypes.h>

> +

> +#define BUDDY_MIN_SIZE 32 /* minimal buddy allocation size */

> +

> +typedef _odp_ishm_pool_t pool_t; /* for shorter writing             */

> +

> +/* array of ishm block index used for pools. only used for pool

> + * lookup by name */

> +#define MAX_NB_POOL 100

> +static int pool_blk_idx[MAX_NB_POOL];


+
> +/* section 1: functions for buddy allocation:

>      */

> +

> +/* free buddy blocks contains the following structure, used to link the

> + * free blocks together.

> + */

> +typedef struct bblock_t {

> +       struct bblock_t *next;

> +       uint32_t order;

> +} bblock_t;

> +

> +/* value set in the 'order' table when the block is not allocated:   */

> +#define BBLOCK_FREE 0

> +

> +/* compute ceil(log2(size)) */

> +static uint8_t clog2(uint64_t size)

> +{

> +       uint64_t sz;

> +       uint32_t bit;

> +       uint8_t res;

> +

> +       sz = size;      /* we start by computing res = log2(sz)...   */

> +       res = 0;

> +       for (bit = 32; bit ; bit >>= 1) {

> +               if (sz >= ((uint64_t)1 << bit)) {

> +                       sz >>= bit;

> +                       res += bit;

> +               }

> +       }

> +       if (((uint64_t)1 << res) < size) /* ...and then ceil(x)      */

> +               res++;

> +

> +       return res;

> +}

> +

> +/*

> + * given a bblock address, and an order value, returns the address

> + * of the buddy bblock (the other "half")

> + */

> +static inline bblock_t *get_bblock_buddy(pool_t *bpool, bblock_t *addr,

> +                                        uint8_t order)

> +{

> +       uintptr_t b;

> +

> +       b = ((uintptr_t)addr - (uintptr_t)bpool->ctrl.user_addr);

> +       b ^= 1 << order;

> +       return (void *)(b + (uintptr_t)bpool->ctrl.user_addr);

> +}

> +

> +/*

> + * given a buddy block address, return its number (used for busy flags):

> + */

> +static inline uintptr_t get_bblock_nr(pool_t *bpool, void *addr)

> +{

> +       uintptr_t b;

> +       uint8_t min_order;

> +

> +       min_order = bpool->ctrl.min_order;

> +       b = ((uintptr_t)addr - (uintptr_t)bpool->ctrl.user_addr) >>

> min_order;

> +       return b;

> +}

> +

> +/* remove bblock from the list for bblocks of rank order. The bblock to be

> + * removed is really expected to be on the list: not finding it is an

> error */

> +static inline void remove_from_list(pool_t *bpool, uint8_t order,

> +                                   bblock_t *bblock)

> +{

> +       bblock_t *curr;       /* current bblock (when parsing list) */

> +       bblock_t *prev;       /* previous bblock (when parsing list) */

> +

> +       curr = bpool->ctrl.free_heads[order];

> +       if (!curr)

> +               goto remove_from_list_error;

> +

> +       if (curr == bblock) {

> +               bpool->ctrl.free_heads[order] = curr->next;

> +               return;

> +       }

> +

> +       while (curr) {

> +               if (curr == bblock) {

> +                       prev->next = curr->next;

> +                       return;

> +               }

> +               prev = curr;

> +               curr = curr->next;

> +       }

> +

> +remove_from_list_error:

> +       ODP_ERR("List corrupted\n");

> +}

> +

> +/*

> + * create a buddy memory pool of given size (actually nearest power of 2),

> + * where allocation will never be smaller than min_alloc.

> + * returns a pointer to the created buddy_pool

> + * The allocated area contains:

> + * - The _odp_ishm_pool_ctrl_t structure

> + * - The array of ((order - min_order) of free list heads

> + * - The array of 'order' values, remembering sizes of allocated bblocks

> + * - alignment to cache line

> + * - The user memory

> + */

> +static pool_t *_odp_ishmbud_pool_create(const char *pool_name, int

> store_idx,

> +                                       uint64_t size,

> +                                       uint64_t min_alloc, int flags)

> +{

> +       uint8_t  order;          /* pool order = ceil(log2(size))

>  */

> +       uint8_t  min_order;      /* pool min_order =

> ceil(log2(min_alloc))*/

> +       uint32_t max_nb_bblock;  /* max number of bblock, when smallest

>  */

> +       uint32_t control_sz;     /* size of control area

> */

> +       uint32_t free_head_sz;   /* mem area needed for list heads

> */

> +       uint32_t saved_order_sz; /* mem area to remember given sizes

> */

> +       uint64_t user_sz;        /* 2^order bytes

>  */

> +       uint64_t total_sz;       /* total size to request

>  */

> +       int      blk_idx;        /* as returned by _ishm_resrve()

>  */

> +       pool_t *bpool;

> +       int i;

> +       bblock_t *first_block;

> +

> +       /* a bblock_t must fit in the buffers for linked chain! */

> +       if (min_alloc < sizeof(bblock_t))

> +               min_alloc = sizeof(bblock_t);

> +

> +       /* pool order is such that 2^order = size. same for min_order   */

> +       order = clog2(size);

> +       min_order = clog2(min_alloc);

> +

> +       /* check parameters obvious wishes: */

> +       if (order >= 64)

> +               return NULL;

> +       if (order < min_order)

> +               return NULL;

> +

> +       /* at worst case, all bblocks have smallest (2^min_order) size  */

> +       max_nb_bblock = (1 << (order - min_order));

> +

> +       /* space needed for the control area (padded to cache line size)*/

> +       control_sz =

> +               ODP_CACHE_LINE_SIZE_ROUNDUP(s

> izeof(_odp_ishm_pool_ctrl_t));

> +

> +       /* space needed for 'order' free bblock list heads:             */

> +       /* Note that only lists from min_order to order are really used.*/

> +       free_head_sz = ODP_CACHE_LINE_SIZE_ROUNDUP(sizeof(void *) *

> +                                                  (order + 1));

> +

> +       /* space needed for order -i.e. size- storage of alloc'd bblock:*/

> +       saved_order_sz = ODP_CACHE_LINE_SIZE_ROUNDUP(max_nb_bblock *

> +                                                    sizeof(uint8_t));

> +

> +       /* space needed for user area is 2^order bytes: */

> +       user_sz = 1 << order;

> +

> +       total_sz = control_sz +

> +                  free_head_sz +

> +                  saved_order_sz +

> +                  user_sz;

> +

> +       /* allocate required memory: */

> +       blk_idx = _odp_ishm_reserve(pool_name, total_sz, -1,

> +                                   ODP_CACHE_LINE_SIZE, flags, 0);

> +       if (blk_idx < 0) {

> +               ODP_ERR("_odp_ishm_reserve failed.");

> +               return NULL;

> +       }

> +

> +       bpool = _odp_ishm_address(blk_idx);

> +       if (bpool == NULL) {

> +               ODP_ERR("_odp_ishm_address failed.");

> +               return NULL;

> +       }

> +

> +       /* store in pool array (needed for look up): */

> +       pool_blk_idx[store_idx] = blk_idx;

> +

> +       /* remember block index, needed when pool is destroyed */

> +       bpool->ctrl.ishm_blk_idx = blk_idx;

> +

> +       /* remember element size: 0 means unknown size, i.e. buddy

> alloation*/

> +       bpool->ctrl.element_sz = 0;

> +

> +       /* prepare mutex: */

> +       odp_spinlock_init(&bpool->ctrl.lock);

> +

> +       /* initialise pointers and things... */

> +       bpool->ctrl.order = order;

> +       bpool->ctrl.min_order = min_order;

> +       bpool->ctrl.free_heads =

> +               (void *)((uintptr_t)bpool + control_sz);

> +       bpool->ctrl.alloced_order =

> +               (uint8_t *)((uintptr_t)bpool->ctrl.free_heads +

> free_head_sz);

> +       bpool->ctrl.user_addr =

> +               (void *)((uintptr_t)bpool->ctrl.alloced_order +

> saved_order_sz);

> +

> +       /* initialize all free list to NULL, except the top biggest

> element:*/

> +       for (i = 0; i < (order - min_order); i++)

> +               bpool->ctrl.free_heads[i] = NULL;

> +       bpool->ctrl.free_heads[order] = bpool->ctrl.user_addr;

> +       first_block = (bblock_t *)bpool->ctrl.user_addr;

> +       first_block->next = NULL;

> +       first_block->order = order;

> +

> +       /* set all 'order' of allocated bblocks to free: */

> +       memset(bpool->ctrl.alloced_order, BBLOCK_FREE, saved_order_sz);

> +

> +       return bpool;

> +}

> +

> +/* allocated memory from the given buddy pool */

> +static void *_odp_ishmbud_alloc(pool_t *bpool, uint64_t size)

> +{

> +       uint32_t rq_order; /* requested order */

> +       uint32_t try_order;

> +       bblock_t *bblock;

> +       bblock_t *buddy;

> +       uintptr_t nr;

> +

> +       /* if size is zero or too big reject: */

> +       if ((!size) && (size > (1U << bpool->ctrl.order))) {

> +               ODP_ERR("Invalid alloc size (0 or larger than whole

> pool)\n");

> +               return NULL;

> +       }

> +

> +       /* compute ceil(log2(size)), to get the requested block order:

> */

> +       rq_order = clog2(size);

> +

> +       /* make sure the requested order is bigger (or same) as minimum!

> */

> +       if (rq_order < bpool->ctrl.min_order)

> +               rq_order = bpool->ctrl.min_order;

> +

> +       /* mutex from here: */

> +       odp_spinlock_lock(&bpool->ctrl.lock);

> +

> +       /* now, start trying to allocate a bblock of rq_order. If that

> +        * fails keep trying larger orders until pool order is reached

> */

> +       bblock = NULL;

> +       for (try_order = rq_order; try_order <= bpool->ctrl.order;

> +            try_order++) {

> +               if (bpool->ctrl.free_heads[try_order]) {

> +                       /* remove from list: */

> +                       bblock =

> +                               (bblock_t *)(bpool->ctrl.free_heads[try_

> order]);

> +                       bpool->ctrl.free_heads[try_order] = bblock->next;

> +                       break;

> +               }

> +       }

> +

> +       if (!bblock) {

> +               odp_spinlock_unlock(&bpool->ctrl.lock);

> +               ODP_ERR("Out of memory. (Buddy pool full)\n");

> +               return NULL;

> +       }

> +

> +       /* OK: we got a block, but possibbly too large (if

> try_order>rq_order)

> +        * return the extra halves to the pool hence splitting the bblock

> at

> +        * each 'extra' order: */

> +       while (try_order-- > rq_order) {

> +               /* split: */

> +               buddy = (bblock_t *)((uintptr_t)bblock + (1 << try_order));

> +               buddy->order = try_order;

> +               /* add to list: */

> +               buddy->next = bpool->ctrl.free_heads[try_order];

> +               bpool->ctrl.free_heads[try_order] = buddy;

> +               /* mark as free (non allocated block get size 0): */

> +               nr = get_bblock_nr(bpool, buddy);

> +               bpool->ctrl.alloced_order[nr] = BBLOCK_FREE;

> +       }

> +

> +       /* remember the size if the allocated block: */

> +       nr = get_bblock_nr(bpool, bblock);

> +       bpool->ctrl.alloced_order[nr] = rq_order;

> +

> +       /* and return the allocated block! */

> +       odp_spinlock_unlock(&bpool->ctrl.lock);

> +       return (void *)bblock;

> +}

> +

> +/* free a previously allocated buffer from a given buddy pool */

> +static int _odp_ishmbud_free(pool_t *bpool, void *addr)

> +{

> +       uintptr_t user_start; /* start of user area */

> +       uintptr_t user_stop;  /* stop  of user area */

> +       uintptr_t mask;       /* 2^min_order - 1    */

> +       bblock_t *bblock;     /* bblock being freed */

> +       bblock_t *buddy;      /* buddy bblock of bblock being freed */

> +       uint8_t order;        /* order of block being freed */

> +       uintptr_t nr;         /* block number */

> +

> +       /* freeing NULL is regarded as OK, though without any effect:   */

> +       if (!addr)

> +               return 0;

> +

> +       user_start = (uintptr_t)bpool->ctrl.user_addr;

> +       user_stop  = user_start + ((uintptr_t)1 << bpool->ctrl.order);

> +       mask = ((uintptr_t)1 << bpool->ctrl.min_order) - 1;

> +

> +       /* some sanity checks: check that given address is within pool and

> +        * that relative address has 2^min_order granularity:           */

> +       if (((uintptr_t)addr < user_start) ||

> +           ((uintptr_t)addr > user_stop)  ||

> +           (((uintptr_t)addr - user_start) & mask)) {

> +               ODP_ERR("Invalid address to be freed\n");

> +               return -1;

> +       }

> +

> +       /* mutex from here: */

> +       odp_spinlock_lock(&bpool->ctrl.lock);

> +

> +       /* collect saved block order and make sure bblock was allocated */

> +       bblock = (bblock_t *)addr;

> +       nr = get_bblock_nr(bpool, bblock);

> +       order = bpool->ctrl.alloced_order[nr];

> +       if (order == BBLOCK_FREE) {

> +               ODP_ERR("Double free error\n");

> +               odp_spinlock_unlock(&bpool->ctrl.lock);

> +               return -1;

> +       }

> +

> +       /* this looks like a valid free, mark at least this as free:   */

> +       bpool->ctrl.alloced_order[nr] = BBLOCK_FREE;

> +

> +       /* go up in orders, trying to merge buddies... */

> +       while (order < bpool->ctrl.order) {

> +               buddy = get_bblock_buddy(bpool, bblock, order);

> +               /*if buddy is not free: no further merge possible */

> +               nr = get_bblock_nr(bpool, buddy);

> +               if (bpool->ctrl.alloced_order[nr] != BBLOCK_FREE)

> +                       break;

> +               /*merge only bblock of same order:*/

> +               if (buddy->order != order)

> +                       break;

> +               /*merge: remove buddy from free list: */

> +               remove_from_list(bpool, order, buddy);

> +               /*merge: make sure we point at start of block: */

> +               if (bblock > buddy)

> +                       bblock = buddy;

> +               /*merge: size of bloack has dubbled: increse order: */

> +               order++;

> +       }

> +

> +       /* insert the bblock into its correct free block list: */

> +       bblock->next = bpool->ctrl.free_heads[order];

> +       bpool->ctrl.free_heads[order] = bblock;

> +

> +       /* remember the (possibly now merged) block order: */

> +       bblock->order = order;

> +

> +       odp_spinlock_unlock(&bpool->ctrl.lock);

> +       return 0;

> +}

> +

> +/* print buddy pool status and performs sanity checks */

> +static int _odp_ishmbud_pool_status(const char *title, pool_t *bpool)

> +{

> +       uint8_t order, pool_order, pool_min_order;

> +       uint64_t free_q_nb_bblocks[64];

> +       uint64_t allocated_nb_bblocks[64];

> +       uint64_t free_q_nb_bblocks_bytes[64];

> +       uint64_t allocated_nb_bblocks_bytes[64];

> +       uint64_t total_bytes_free;

> +       uint64_t total_bytes_allocated;

> +       uint64_t nr;

> +       bblock_t *bblock;

> +       int res = 0;

> +

> +       odp_spinlock_lock(&bpool->ctrl.lock);

> +

> +       pool_order = bpool->ctrl.order;

> +       pool_min_order = bpool->ctrl.min_order;

> +

> +       ODP_DBG("\n%s\n", title);

> +       ODP_DBG("Pool Type: BUDDY\n");

> +       ODP_DBG("pool size: %" PRIu64 " (bytes)\n", (1UL << pool_order));

> +       ODP_DBG("pool order: %d\n", (int)pool_order);

> +       ODP_DBG("pool min_order: %d\n", (int)pool_min_order);

> +

> +       /* a pool wholse order is more than 64 cannot even be reached on 64

> +        * bit machines! */

> +       if (pool_order > 64) {

> +               odp_spinlock_unlock(&bpool->ctrl.lock);

> +               return -1;

> +       }

> +

> +       total_bytes_free = 0;

> +       total_bytes_allocated = 0;

> +

> +       /* for each queue */

> +       for (order = pool_min_order; order <= pool_order; order++) {

> +               free_q_nb_bblocks[order] = 0;

> +               free_q_nb_bblocks_bytes[order] = 0;

> +               allocated_nb_bblocks[order] = 0;

> +               allocated_nb_bblocks_bytes[order] = 0;

> +

> +               /* get the number of buffs in the free queue for this

> order: */

> +               bblock = bpool->ctrl.free_heads[order];

> +               while (bblock) {

> +                       free_q_nb_bblocks[order]++;

> +                       free_q_nb_bblocks_bytes[order] += (1 << order);

> +                       bblock = bblock->next;

> +               }

> +

> +               total_bytes_free += free_q_nb_bblocks_bytes[order];

> +

> +               /* get the number of allocated buffers of this order */

> +               for (nr = 0;

> +                    nr < (1U << (pool_order - pool_min_order)); nr++) {

> +                       if (bpool->ctrl.alloced_order[nr] == order)

> +                               allocated_nb_bblocks[order]++;

> +               }

> +

> +               allocated_nb_bblocks_bytes[order] =

> +                       allocated_nb_bblocks[order] * (1 << order);

> +

> +               total_bytes_allocated += allocated_nb_bblocks_bytes[ord

> er];

> +

> +               ODP_DBG("Order %d => Free: %" PRIu64 " buffers "

> +                       "(%" PRIu64" bytes)   "

> +                       "Allocated %" PRIu64 " buffers (%" PRIu64 "

> bytes)   "

> +                       "Total: %" PRIu64 "  bytes\n",

> +                       (int)order, free_q_nb_bblocks[order],

> +                       free_q_nb_bblocks_bytes[order],

> +                       allocated_nb_bblocks[order],

> +                       allocated_nb_bblocks_bytes[order],

> +                       free_q_nb_bblocks_bytes[order] +

> +                       allocated_nb_bblocks_bytes[order]);

> +       }

> +

> +       ODP_DBG("Allocated space: %" PRIu64 " (bytes)\n",

> +               total_bytes_allocated);

> +       ODP_DBG("Free space: %" PRIu64 " (bytes)\n", total_bytes_free);

> +

> +       if (total_bytes_free + total_bytes_allocated != (1U <<

> pool_order)) {

> +               ODP_DBG("Lost bytes on this pool!\n");

> +               res = -1;

> +       }

> +

> +       if (res)

> +               ODP_DBG("Pool inconsistent!\n");

> +

> +       odp_spinlock_unlock(&bpool->ctrl.lock);

> +       return res;

> +}

> +

> +/* section 2: functions for slab allocation:

>     */

> +

> +/* free slab blocks contains the following structure, used to link the

> + * free blocks together.

> + */

> +typedef struct sblock_t {

> +       struct sblock_t *next;

> +} sblock_t;

> +

> +/*

> + * create a slab memory pool of given size (rounded up to the nearest

> integer

> + * number of element, where each element has size 'elt_size').

> + * returns a pointer to the created slab pool.

> + * The allocated area contains:

> + * - The _odp_ishm_pool_ctrl_t structure

> + * - alignment to cache line

> + * - The user memory

> + */

> +static pool_t *_odp_ishmslab_pool_create(const char *pool_name, int

> store_idx,

> +                                        uint64_t size,

> +                                        uint64_t elt_size, int flags)

> +{

> +       uint32_t nb_sblock;      /* number of elements in the pool

> */

> +       uint32_t control_sz;     /* size of control area

> */

> +       uint64_t total_sz;       /* total size to request

>  */

> +       uint64_t user_sz;        /* 2^order bytes

>  */

> +       int      blk_idx;        /* as returned by _ishm_reserve()

> */

> +       pool_t *spool;

> +       unsigned int i;

> +       sblock_t *block;

> +

> +       /* a sblock_t must fit in the buffers for linked chain! */

> +       if (elt_size < sizeof(bblock_t)) {

> +               elt_size = sizeof(bblock_t);

> +               size = size * (sizeof(bblock_t) / elt_size +

> +                              ((sizeof(bblock_t) % elt_size) ? 1 : 0));

> +       }

> +

> +       /* nb of element fitting in the pool is just ceil(size/elt_size)*/

> +       nb_sblock = (size / elt_size) + ((size % elt_size) ? 1 : 0);

> +

> +       /* space needed for the control area (padded to cache line size)*/

> +       control_sz =

> +               ODP_CACHE_LINE_SIZE_ROUNDUP(s

> izeof(_odp_ishm_pool_ctrl_t));

> +

> +       /* space needed for user area is : */

> +       user_sz = nb_sblock * elt_size;

> +

> +       total_sz = control_sz +

> +                  user_sz;

> +

> +       /* allocate required memory: */

> +       blk_idx = _odp_ishm_reserve(pool_name, total_sz, -1,

> +                                   ODP_CACHE_LINE_SIZE, flags, 0);

> +       if (blk_idx < 0) {

> +               ODP_ERR("_odp_ishm_reserve failed.");

> +               return NULL;

> +       }

> +

> +       spool = _odp_ishm_address(blk_idx);

> +       if (spool == NULL) {

> +               ODP_ERR("_odp_ishm_address failed.");

> +               return NULL;

> +       }

> +

> +       /* store in pool array (needed for look up): */

> +       pool_blk_idx[store_idx] = blk_idx;

> +

> +       /* remember block index, needed when pool is destroyed */

> +       spool->ctrl.ishm_blk_idx = blk_idx;

> +

> +       /* remember element (sblock) size and their number: */

> +       spool->ctrl.element_sz = elt_size;

> +       spool->ctrl.nb_elem = nb_sblock;

> +

> +       /* prepare mutex: */

> +       odp_spinlock_init(&spool->ctrl.lock);

> +

> +       /* initialise pointers and things... */

> +       spool->ctrl.user_addr =

> +               (void *)((uintptr_t)spool + control_sz);

> +

> +       /* initialise the free list with the list of all elements:*/

> +       spool->ctrl.free_head = spool->ctrl.user_addr;

> +       for (i = 0; i < nb_sblock - 1; i++) {

> +               block = (sblock_t *)((uintptr_t)spool->ctrl.user_addr +

> +                                    i * (uintptr_t)elt_size);

> +               block->next = (sblock_t *)((uintptr_t)block +

> +                                          (uintptr_t)elt_size);

> +       }

> +       block = (sblock_t *)((uintptr_t)spool->ctrl.user_addr +

> +                            (nb_sblock - 1) * (uintptr_t)elt_size);

> +       block->next = NULL;

> +

> +       return spool;

> +}

> +

> +/* allocated memory from the given slab pool */

> +static void *_odp_ishmslab_alloc(pool_t *spool, uint64_t size)

> +{

> +       void *ret;

> +       sblock_t *block;

> +

> +       if (size > spool->ctrl.element_sz)

> +               return NULL;

> +

> +       odp_spinlock_lock(&spool->ctrl.lock);

> +       ret = spool->ctrl.free_head;

> +       if (!ret) {

> +               odp_spinlock_unlock(&spool->ctrl.lock);

> +               ODP_ERR("Out of memory. (Slab pool full)\n");

> +               return NULL;

> +       }

> +

> +       block = (sblock_t *)ret;

> +       spool->ctrl.free_head = block->next;

> +

> +       odp_spinlock_unlock(&spool->ctrl.lock);

> +       return ret;

> +}

> +

> +/* free a previously allocated buffer from a given slab pool */

> +static int _odp_ishmslab_free(pool_t *spool, void *addr)

> +{

> +       uintptr_t user_start; /* start of user area */

> +       uintptr_t user_stop;  /* stop  of user area */

> +       sblock_t *block;

> +

> +       /* freeing NULL is regarded as OK, though without any effect:   */

> +       if (!addr)

> +               return 0;

> +

> +       user_start = (uintptr_t)spool->ctrl.user_addr;

> +       user_stop  = user_start + spool->ctrl.element_sz *

> spool->ctrl.nb_elem;

> +

> +       /* some sanity checks: check that given address is within pool and

> +        * that relative address has element_sz granularity:           */

> +       if (((uintptr_t)addr < user_start) ||

> +           ((uintptr_t)addr > user_stop)  ||

> +           (((uintptr_t)addr - user_start) % spool->ctrl.element_sz)) {

> +               ODP_ERR("Invalid address to be freed\n");

> +               return -1;

> +       }

> +

> +       odp_spinlock_lock(&spool->ctrl.lock);

> +       block = (sblock_t *)addr;

> +       block->next = (sblock_t *)spool->ctrl.free_head;

> +       spool->ctrl.free_head = addr;

> +       odp_spinlock_unlock(&spool->ctrl.lock);

> +

> +       return 0;

> +}

> +

> +/* print slab pool status and performs sanity checks */

> +static int _odp_ishmslab_pool_status(const char *title, pool_t *spool)

> +{

> +       sblock_t *sblock;

> +       uint64_t nb_free_elts; /* number of free elements */

> +

> +       odp_spinlock_lock(&spool->ctrl.lock);

> +

> +       ODP_DBG("\n%s\n", title);

> +       ODP_DBG("Pool Type: FIXED SIZE\n");

> +       ODP_DBG("pool size: %" PRIu64 " (bytes)\n",

> +               spool->ctrl.nb_elem * spool->ctrl.element_sz);

> +

> +       /* count the number of free elements in the free list: */

> +       nb_free_elts = 0;

> +       sblock = (sblock_t *)spool->ctrl.free_head;

> +       while (sblock) {

> +               nb_free_elts++;

> +               sblock = sblock->next;

> +       }

> +

> +       ODP_DBG("%" PRIu64 "/%" PRIu64 " available elements.\n",

> +               nb_free_elts, spool->ctrl.nb_elem);

> +

> +       odp_spinlock_unlock(&spool->ctrl.lock);

> +       return 0;

> +}

> +

> +/* section 3: common, external functions:

>      */

> +

> +/* create a pool: either with fixed alloc size (if max_alloc/min_alloc<2)

> or

> + * of variable block size (if max_alloc == 0) */

> +pool_t *_odp_ishm_pool_create(const char *pool_name, uint64_t size,

> +                             uint64_t min_alloc, uint64_t max_alloc, int

> flags)

> +{

> +       int store_idx;

> +       uint64_t real_pool_sz;

> +

> +       if (min_alloc > max_alloc) {

> +               ODP_ERR("invalid parameter: min_alloc > max_alloc");

> +               return NULL;

> +       }

> +

> +       /* search for a free index in pool_blk_idx for the pool */

> +       for (store_idx = 0; store_idx < MAX_NB_POOL; store_idx++) {

> +               if (pool_blk_idx[store_idx] < 0)

> +                       break;

> +       }

> +       if (store_idx == MAX_NB_POOL) {

> +               ODP_ERR("Max number of pool reached (MAX_NB_POOL)");

> +               return NULL;

> +       }

> +

> +       if ((min_alloc == 0) || ((max_alloc / min_alloc) > 2)) {

> +               /* alloc variation is not constant enough: we go for a

> buddy

> +                * allocator. The pool efficiency may go as low as 50%

> +                * so we double the required size to make sure we can

> satisfy

> +                * the user request */

> +               real_pool_sz = 2 * size;

> +               return _odp_ishmbud_pool_create(pool_name, store_idx,

> +                                               real_pool_sz,

> +                                               BUDDY_MIN_SIZE, flags);

> +       } else {

> +               /* min and max are close enough so we go for constant size

> +                * allocator:

> +                * make sure the pool can fit the required size, even when

> +                * only min_alloc allocation are performed: */

> +               real_pool_sz = ((size / min_alloc) +

> +                               ((size % min_alloc) ? 1 : 0))

> +                              * max_alloc;

> +               return _odp_ishmslab_pool_create(pool_name, store_idx,

> +                                                real_pool_sz,

> +                                                max_alloc, flags);

> +       }

> +}

> +

> +/* destroy a pool. everything goes away. no operation on the pool should

> + * follow. */

> +int _odp_ishm_pool_destroy(pool_t *pool)

> +{

> +       int store_idx;

> +

> +       for (store_idx = 0; store_idx < MAX_NB_POOL; store_idx++) {

> +               if (pool_blk_idx[store_idx] == pool->ctrl.ishm_blk_idx) {

> +                       pool_blk_idx[store_idx] = -1;

> +                       break;

> +               }

> +       }

> +

> +       return _odp_ishm_free_by_index(pool->ctrl.ishm_blk_idx);

> +}

> +

> +/* allocated a buffer from a pool */

> +void *_odp_ishm_pool_alloc(_odp_ishm_pool_t *pool, uint64_t size)

> +{

> +       if (!pool->ctrl.element_sz)

> +               return _odp_ishmbud_alloc(pool, size);

> +       else

> +               return _odp_ishmslab_alloc(pool, size);

> +}

> +

> +/* free a previously allocated buffer from a pool */

> +int _odp_ishm_pool_free(_odp_ishm_pool_t *pool, void *addr)

> +{

> +       if (!pool->ctrl.element_sz)

> +               return _odp_ishmbud_free(pool, addr);

> +       else

> +               return _odp_ishmslab_free(pool, addr);

> +}

> +

> +/* Print a pool status */

> +int _odp_ishm_pool_status(const char *title, _odp_ishm_pool_t *pool)

> +{

> +       if (!pool->ctrl.element_sz)

> +               return _odp_ishmbud_pool_status(title, pool);

> +       else

> +               return _odp_ishmslab_pool_status(title, pool);

> +}

> +

> +void _odp_ishm_pool_init(void)

> +{

> +       int i;

> +

> +       for (i = 0; i < MAX_NB_POOL; i++)

> +               pool_blk_idx[i] = -1;

> +}

> +

> +_odp_ishm_pool_t *_odp_ishm_pool_lookup(const char *pool_name)

> +{

> +       int block_idx;

> +       int store_idx;

> +

> +       /* search for a _ishm block with the given name */

> +       block_idx = _odp_ishm_lookup_by_name(pool_name);

> +       if (block_idx < 0)

> +               return NULL;

> +

> +       /* a block with that name exists: make sure it is within

> +        * the registered pools */

> +       for (store_idx = 0; store_idx < MAX_NB_POOL; store_idx++) {

> +               if (pool_blk_idx[store_idx] == block_idx)

> +                       return  _odp_ishm_address(block_idx);

> +       }

> +

> +       return NULL;

> +}

> diff --git a/platform/linux-generic/include/_ishmpool_internal.h

> b/platform/linux-generic/include/_ishmpool_internal.h

> new file mode 100644

> index 0000000..5c5304a

> --- /dev/null

> +++ b/platform/linux-generic/include/_ishmpool_internal.h

> @@ -0,0 +1,56 @@

> +/* Copyright (c) 2017, Linaro Limited

> + * All rights reserved.

> + *

> + * SPDX-License-Identifier:     BSD-3-Clause

> + */

> +

> +#ifndef ODP_ISHMBUDDY_INTERNAL_H_

> +#define ODP_ISHMBUDDY_INTERNAL_H_

> +

> +#ifdef __cplusplus

> +extern "C" {

> +#endif

> +

> +#include <stdint.h>

> +#include <odp/api/spinlock.h>

> +

> +typedef struct _odp_ishm_pool_ctrl_t {

> +       uint32_t element_sz;    /* 0 for buddy pools, >0 for slab.

>    */

> +       int ishm_blk_idx;       /* the block index returned by

> _ishm_resrve()*/

> +       odp_spinlock_t  lock;   /* for pool access mutex

>    */

> +       void *user_addr;        /* user pool area ('real user pool')

>    */

> +       union {

> +               struct {        /* things needed for buddy pools:

>   */

> +                       uint8_t order;  /* pool is 2^order bytes long

>   */

> +                       uint8_t min_order; /*alloc won't go below

> 2^min_order*/

> +                       void **free_heads; /* 'order' free list heads.

>    */

> +                       uint8_t *alloced_order; /* size of blocks, 0=free

>   */

> +               };

> +               struct {        /* things needed for slab pools:

>    */

> +                       void *free_head; /* free element list head

>    */

> +                       uint64_t nb_elem;/* total number of elements in

> pool */

> +               };

> +       };

> +} _odp_ishm_pool_ctrl_t;

> +

> +typedef struct _odp_ishm_pool_t {

> +       _odp_ishm_pool_ctrl_t ctrl;     /* control part

>   */

> +       uint8_t mem[1];         /* area for heads, saved alloc'd orders,

> data*/

> +} _odp_ishm_pool_t;

> +

> +_odp_ishm_pool_t *_odp_ishm_pool_create(const char *pool_name,

> +                                       uint64_t size,

> +                                       uint64_t min_alloc,

> +                                       uint64_t max_alloc, int flags);

> +int _odp_ishm_pool_destroy(_odp_ishm_pool_t *pool);

> +void *_odp_ishm_pool_alloc(_odp_ishm_pool_t *pool, uint64_t size);

> +int _odp_ishm_pool_free(_odp_ishm_pool_t *pool, void *addr);

> +int _odp_ishm_pool_status(const char *title, _odp_ishm_pool_t *pool);

> +_odp_ishm_pool_t *_odp_ishm_pool_lookup(const char *pool_name);

> +void _odp_ishm_pool_init(void);

> +

> +#ifdef __cplusplus

> +}

> +#endif

> +

> +#endif

> --

> 2.7.4

>

>
Christophe Milard Jan. 12, 2017, 8:42 a.m. UTC | #2
On 12 January 2017 at 08:52, Yi He <yi.he@linaro.org> wrote:
> Hi, Christophe,

>

> Q1: this patchset name seems a little problem: API-NEXTv5 -> API-NEXT

> PATCHv5


oh god!  Will fix in V6!

>

> Q2: the 1/6 patch cannot be applied to the api-next, I've tried to fix to

> continue the test but did not work, need to re-base and I'll test then.


OK. will rebase for v6

>

> For the patchset code looks OK to me and below questions are to clarify the

> usage of these APIs:

>

> Q3: In the practical usage does it mean all ishmpool creations need to be

> happened in

> 1st control thread before all other threads spawning?


no: you can do that, but it is not needed:
If a pool is created during ODP initialisation (typicaly during one
the xxx_init_global functions), then a normal _ishm_reserve(), with
flags=0 can be performed: as you said, the memory allocated will be
inherited by all.
My first intention was to create a pool this way for the driver
interface so that all drivers could share the same pool. I am coming
from an embedded system world where memory consumption matters :-)
Petri objected that this would make memory corruption harder to debug,
which is true: if a driver exceeds its buffer size, that could make
another driver crash. If each driver create its own pools, this
becomes much less probable.
But this is not a problem either (except that this meant that I had to
expose the pool creation/deletion to the drv API), because _ishm
support the _ODP_ISHM_SINGLE_VA flag, which makes any memory allocated
at any time visible to all ODP threads (inclusive linux thread). So
your question really relates to _ishm and not _ishmpool. I suggest you
read the large comment I made at the top of
platform/linux-generic/_ishm.c which describes that.
>

> Otherwise if a worker ODP thread spawn firstly and after then create an

> ishmpool, the

> memories allocated from this pool seems won't be accessible to other ODP

> threads? because

> the shm_attach and mmap did not happen in other ODP threads (processes).


no: flag  _ODP_ISHM_SINGLE_VA can workaround this issue.

>

> So control thread needs to collect all worker threads requirements on small

> memory size

> requirements and creates all ishmpools in a batch manner before spawning

> worker threads?


no. same as above.

>

> Understand the typical usage:

>

> If the size requirements of small memory are not very specific, create one

> or several pools of

> range [min, max] to support small memory allocation.

>

> If the size requirements of small memory are specific, for example to

> specific data

> structure or small packets, create several pools of dedicated [size].

>

> Thanks and best regards, Yi


Yi: You really comment on the foundamentals, and I REALLY appreciate
that! Your comment are trully thought! What you missed here was the
_ishm capability to allocate memory after fork and still guarantee
sharability of addresses among all ODPthread (linux threads or
processes).
I suggest you read that comment in platform/linux-generic/_ishm.c.

Within the ODP code one can use the _odp_ishm_pool_create(...flags...) function:
Note the flags argument there: this flags urgument is passed directely
to _ishm_reserve().
Typically, during any XXX_init_global functions, flag would be 0.
Because you are running from the OPD instantiation, you are guaranteed
that any memory allocated at this point will be inherited by all.
If ODP later needs a fully sharable pool, it would propably call
_odp_ishm_pool_create() with flag set to _ODP_ISHM_SINGLE_VA set,
hence requiring fully sharable memory.

One could use this flag at all times, but this would consume of the
pre-reserved address space for no reason when run from init.

The function exposed to the driver, odpdrv_shm_pool_create(), always set
this flag, so driver can create fully sharable pools. At any time.

I will fix the rebase and a "PATCH" in the title and send a v6.

Thanks for you review,

Christophe.
>

> On 3 January 2017 at 23:10, Christophe Milard <christophe.milard@linaro.org>

> wrote:

>>

>> _ishm now provides functions to create/destroy pools for buddy/slab

>> memory allocation, as well as functions to allocated/release memory

>>

>> from the created pools.

>>

>> Signed-off-by: Christophe Milard <christophe.milard@linaro.org>

>> ---

>>  platform/linux-generic/Makefile.am                 |   2 +

>>  platform/linux-generic/_ishm.c                     |  14 +-

>>  platform/linux-generic/_ishmpool.c                 | 811

>> +++++++++++++++++++++

>>  .../linux-generic/include/_ishmpool_internal.h     |  56 ++

>>  4 files changed, 882 insertions(+), 1 deletion(-)

>>  create mode 100644 platform/linux-generic/_ishmpool.c

>>  create mode 100644 platform/linux-generic/include/_ishmpool_internal.h

>>

>> diff --git a/platform/linux-generic/Makefile.am

>> b/platform/linux-generic/Makefile.am

>> index 999a7f5..d153c5d 100644

>> --- a/platform/linux-generic/Makefile.am

>> +++ b/platform/linux-generic/Makefile.am

>> @@ -127,6 +127,7 @@ noinst_HEADERS = \

>>                   ${srcdir}/include/_fdserver_internal.h \

>>                   ${srcdir}/include/_ishm_internal.h \

>>                   ${srcdir}/include/_ishmphy_internal.h \

>> +                 ${srcdir}/include/_ishmpool_internal.h \

>>                   ${srcdir}/include/odp_align_internal.h \

>>                   ${srcdir}/include/odp_atomic_internal.h \

>>                   ${srcdir}/include/odp_buffer_inlines.h \

>> @@ -171,6 +172,7 @@ __LIB__libodp_linux_la_SOURCES = \

>>                            _fdserver.c \

>>                            _ishm.c \

>>                            _ishmphy.c \

>> +                          _ishmpool.c \

>>                            odp_atomic.c \

>>                            odp_barrier.c \

>>                            odp_buffer.c \

>> diff --git a/platform/linux-generic/_ishm.c

>> b/platform/linux-generic/_ishm.c

>> index 6d5b752..0befdba 100644

>> --- a/platform/linux-generic/_ishm.c

>> +++ b/platform/linux-generic/_ishm.c

>> @@ -59,6 +59,7 @@

>>  #include <_fdserver_internal.h>

>>  #include <_ishm_internal.h>

>>  #include <_ishmphy_internal.h>

>> +#include <_ishmpool_internal.h>

>>  #include <stdlib.h>

>>  #include <stdio.h>

>>  #include <unistd.h>

>> @@ -1441,8 +1442,19 @@ int _odp_ishm_init_global(void)

>>          * is performed for the main thread... Many init_global()

>> functions

>>          * indeed assume the availability of odp_shm_reserve()...:

>>          */

>> -       return do_odp_ishm_init_local();

>> +       if (do_odp_ishm_init_local()) {

>> +               ODP_ERR("unable to init the main thread\n.");

>> +               goto init_glob_err4;

>> +       }

>> +

>> +       /* get ready to create pools: */

>> +       _odp_ishm_pool_init();

>>

>> +       return 0;

>> +

>> +init_glob_err4:

>> +       if (_odp_ishmphy_unbook_va())

>> +               ODP_ERR("unable to unbook virtual space\n.");

>>  init_glob_err3:

>>         if (munmap(ishm_ftbl, sizeof(ishm_ftable_t)) < 0)

>>                 ODP_ERR("unable to munmap main fragment table\n.");

>> diff --git a/platform/linux-generic/_ishmpool.c

>> b/platform/linux-generic/_ishmpool.c

>> new file mode 100644

>> index 0000000..df6e49e

>> --- /dev/null

>> +++ b/platform/linux-generic/_ishmpool.c

>> @@ -0,0 +1,811 @@

>> +/* Copyright (c) 2017, Linaro Limited

>> + * All rights reserved.

>> + *

>> + * SPDX-License-Identifier:     BSD-3-Clause

>> + */

>> +

>> +/* This file gathers the buddy and slab allocation functionality provided

>> + * by _ishm.

>> + * _odp_ishmpool_create() can be used to create a pool for buddy/slab

>> + * allocation. _odp_ishmpool_create() will allocate a memory area using

>> + * ishm_reserve() for both the control part (needed for tracking

>> + * allocation/free...) and the user memory itself (part of which will be

>> given

>> + * at each ishmpool_alloc()).

>> + * The element size provided at pool creation time determines whether

>> + * to pool will of type buddy or slab.

>>

>> + * For buddy, all allocations are rounded to the nearest power of 2.

>> + *

>> + * The implementation of the buddy allocator is very traditional: it

>> + * maintains N lists of free buffers.

>> + * The control part actually contains these N queue heads, (N-M are

>> actually

>> + * used), the free buffers themselves being used for chaining (the

>> chaining info

>> + * is in the buffers: as they are "free" they should not be touched by

>> the

>> + * user). The control part also contains a array of bytes for remembering

>>

>> + * the size (actually the order) of the allocated buffers:

>> + * There are 2^(N-M) such bytes, this number being the maximum number of

>> + * allocated buffers (when all allocation are <= 2^M bytes)

>> + * Buddy allocators handle fragmentation by splitting or merging blocks

>> by 2.

>> + * They guarantee a minimum efficiency of 50%, at worse case

>> fragmentation.

>> + *

>> + * Slab implementation is even simpler, all free elements being queued in

>> + * one single queue at init, taken from this queue when allocated and

>> + * returned to this same queue when freed.

>> + *

>> + * The reason for not using malloc() is that malloc does not guarantee

>> + * memory sharability between ODP threads (regardless of their

>> implememtation)

>> + * which ishm_reserve() can do. see the comments around

>> + * _odp_ishmbud_pool_create() and ishm_reserve() for more details.

>> + *

>> + * This file is divided in 3 sections: the first one regroups functions

>> + * needed by the buddy allocation.

>> + * The second one regroups the functions needed by the slab allocator.

>> + * The third section regroups the common functions exported externally.

>> + */

>> +

>> +#include <odp_posix_extensions.h>

>> +#include <odp_internal.h>

>> +#include <odp/api/spinlock.h>

>> +#include <odp/api/align.h>

>> +#include <odp/api/debug.h>

>> +#include <odp/drv/shm.h>

>> +#include <odp_shm_internal.h>

>> +#include <odp_debug_internal.h>

>> +#include <odp_align_internal.h>

>> +#include <_ishm_internal.h>

>> +#include <_ishmpool_internal.h>

>> +#include <stdlib.h>

>> +#include <stdio.h>

>> +#include <unistd.h>

>> +#include <string.h>

>> +#include <inttypes.h>

>> +

>> +#define BUDDY_MIN_SIZE 32 /* minimal buddy allocation size */

>> +

>> +typedef _odp_ishm_pool_t pool_t; /* for shorter writing             */

>> +

>> +/* array of ishm block index used for pools. only used for pool

>> + * lookup by name */

>> +#define MAX_NB_POOL 100

>> +static int pool_blk_idx[MAX_NB_POOL];

>>

>> +

>> +/* section 1: functions for buddy allocation:

>> */

>> +

>> +/* free buddy blocks contains the following structure, used to link the

>> + * free blocks together.

>> + */

>> +typedef struct bblock_t {

>> +       struct bblock_t *next;

>> +       uint32_t order;

>> +} bblock_t;

>> +

>> +/* value set in the 'order' table when the block is not allocated:   */

>> +#define BBLOCK_FREE 0

>> +

>> +/* compute ceil(log2(size)) */

>> +static uint8_t clog2(uint64_t size)

>> +{

>> +       uint64_t sz;

>> +       uint32_t bit;

>> +       uint8_t res;

>> +

>> +       sz = size;      /* we start by computing res = log2(sz)...   */

>> +       res = 0;

>> +       for (bit = 32; bit ; bit >>= 1) {

>> +               if (sz >= ((uint64_t)1 << bit)) {

>> +                       sz >>= bit;

>> +                       res += bit;

>> +               }

>> +       }

>> +       if (((uint64_t)1 << res) < size) /* ...and then ceil(x)      */

>> +               res++;

>> +

>> +       return res;

>> +}

>> +

>> +/*

>> + * given a bblock address, and an order value, returns the address

>> + * of the buddy bblock (the other "half")

>> + */

>> +static inline bblock_t *get_bblock_buddy(pool_t *bpool, bblock_t *addr,

>> +                                        uint8_t order)

>> +{

>> +       uintptr_t b;

>> +

>> +       b = ((uintptr_t)addr - (uintptr_t)bpool->ctrl.user_addr);

>> +       b ^= 1 << order;

>> +       return (void *)(b + (uintptr_t)bpool->ctrl.user_addr);

>> +}

>> +

>> +/*

>> + * given a buddy block address, return its number (used for busy flags):

>> + */

>> +static inline uintptr_t get_bblock_nr(pool_t *bpool, void *addr)

>> +{

>> +       uintptr_t b;

>> +       uint8_t min_order;

>> +

>> +       min_order = bpool->ctrl.min_order;

>> +       b = ((uintptr_t)addr - (uintptr_t)bpool->ctrl.user_addr) >>

>> min_order;

>> +       return b;

>> +}

>> +

>> +/* remove bblock from the list for bblocks of rank order. The bblock to

>> be

>> + * removed is really expected to be on the list: not finding it is an

>> error */

>> +static inline void remove_from_list(pool_t *bpool, uint8_t order,

>> +                                   bblock_t *bblock)

>> +{

>> +       bblock_t *curr;       /* current bblock (when parsing list) */

>> +       bblock_t *prev;       /* previous bblock (when parsing list) */

>> +

>> +       curr = bpool->ctrl.free_heads[order];

>> +       if (!curr)

>> +               goto remove_from_list_error;

>> +

>> +       if (curr == bblock) {

>> +               bpool->ctrl.free_heads[order] = curr->next;

>> +               return;

>> +       }

>> +

>> +       while (curr) {

>> +               if (curr == bblock) {

>> +                       prev->next = curr->next;

>> +                       return;

>> +               }

>> +               prev = curr;

>> +               curr = curr->next;

>> +       }

>> +

>> +remove_from_list_error:

>> +       ODP_ERR("List corrupted\n");

>> +}

>> +

>> +/*

>> + * create a buddy memory pool of given size (actually nearest power of

>> 2),

>> + * where allocation will never be smaller than min_alloc.

>> + * returns a pointer to the created buddy_pool

>> + * The allocated area contains:

>> + * - The _odp_ishm_pool_ctrl_t structure

>> + * - The array of ((order - min_order) of free list heads

>> + * - The array of 'order' values, remembering sizes of allocated bblocks

>> + * - alignment to cache line

>> + * - The user memory

>> + */

>> +static pool_t *_odp_ishmbud_pool_create(const char *pool_name, int

>> store_idx,

>> +                                       uint64_t size,

>> +                                       uint64_t min_alloc, int flags)

>> +{

>> +       uint8_t  order;          /* pool order = ceil(log2(size))

>> */

>> +       uint8_t  min_order;      /* pool min_order =

>> ceil(log2(min_alloc))*/

>> +       uint32_t max_nb_bblock;  /* max number of bblock, when smallest

>> */

>> +       uint32_t control_sz;     /* size of control area

>> */

>> +       uint32_t free_head_sz;   /* mem area needed for list heads

>> */

>> +       uint32_t saved_order_sz; /* mem area to remember given sizes

>> */

>> +       uint64_t user_sz;        /* 2^order bytes

>> */

>> +       uint64_t total_sz;       /* total size to request

>> */

>> +       int      blk_idx;        /* as returned by _ishm_resrve()

>> */

>> +       pool_t *bpool;

>> +       int i;

>> +       bblock_t *first_block;

>> +

>> +       /* a bblock_t must fit in the buffers for linked chain! */

>> +       if (min_alloc < sizeof(bblock_t))

>> +               min_alloc = sizeof(bblock_t);

>> +

>> +       /* pool order is such that 2^order = size. same for min_order   */

>> +       order = clog2(size);

>> +       min_order = clog2(min_alloc);

>> +

>> +       /* check parameters obvious wishes: */

>> +       if (order >= 64)

>> +               return NULL;

>> +       if (order < min_order)

>> +               return NULL;

>> +

>> +       /* at worst case, all bblocks have smallest (2^min_order) size  */

>> +       max_nb_bblock = (1 << (order - min_order));

>> +

>> +       /* space needed for the control area (padded to cache line size)*/

>> +       control_sz =

>> +

>> ODP_CACHE_LINE_SIZE_ROUNDUP(sizeof(_odp_ishm_pool_ctrl_t));

>> +

>> +       /* space needed for 'order' free bblock list heads:             */

>> +       /* Note that only lists from min_order to order are really used.*/

>> +       free_head_sz = ODP_CACHE_LINE_SIZE_ROUNDUP(sizeof(void *) *

>> +                                                  (order + 1));

>> +

>> +       /* space needed for order -i.e. size- storage of alloc'd bblock:*/

>> +       saved_order_sz = ODP_CACHE_LINE_SIZE_ROUNDUP(max_nb_bblock *

>> +                                                    sizeof(uint8_t));

>> +

>> +       /* space needed for user area is 2^order bytes: */

>> +       user_sz = 1 << order;

>> +

>> +       total_sz = control_sz +

>> +                  free_head_sz +

>> +                  saved_order_sz +

>> +                  user_sz;

>> +

>> +       /* allocate required memory: */

>> +       blk_idx = _odp_ishm_reserve(pool_name, total_sz, -1,

>> +                                   ODP_CACHE_LINE_SIZE, flags, 0);

>> +       if (blk_idx < 0) {

>> +               ODP_ERR("_odp_ishm_reserve failed.");

>> +               return NULL;

>> +       }

>> +

>> +       bpool = _odp_ishm_address(blk_idx);

>> +       if (bpool == NULL) {

>> +               ODP_ERR("_odp_ishm_address failed.");

>> +               return NULL;

>> +       }

>> +

>> +       /* store in pool array (needed for look up): */

>> +       pool_blk_idx[store_idx] = blk_idx;

>> +

>> +       /* remember block index, needed when pool is destroyed */

>> +       bpool->ctrl.ishm_blk_idx = blk_idx;

>> +

>> +       /* remember element size: 0 means unknown size, i.e. buddy

>> alloation*/

>> +       bpool->ctrl.element_sz = 0;

>> +

>> +       /* prepare mutex: */

>> +       odp_spinlock_init(&bpool->ctrl.lock);

>> +

>> +       /* initialise pointers and things... */

>> +       bpool->ctrl.order = order;

>> +       bpool->ctrl.min_order = min_order;

>> +       bpool->ctrl.free_heads =

>> +               (void *)((uintptr_t)bpool + control_sz);

>> +       bpool->ctrl.alloced_order =

>> +               (uint8_t *)((uintptr_t)bpool->ctrl.free_heads +

>> free_head_sz);

>> +       bpool->ctrl.user_addr =

>> +               (void *)((uintptr_t)bpool->ctrl.alloced_order +

>> saved_order_sz);

>> +

>> +       /* initialize all free list to NULL, except the top biggest

>> element:*/

>> +       for (i = 0; i < (order - min_order); i++)

>> +               bpool->ctrl.free_heads[i] = NULL;

>> +       bpool->ctrl.free_heads[order] = bpool->ctrl.user_addr;

>> +       first_block = (bblock_t *)bpool->ctrl.user_addr;

>> +       first_block->next = NULL;

>> +       first_block->order = order;

>> +

>> +       /* set all 'order' of allocated bblocks to free: */

>> +       memset(bpool->ctrl.alloced_order, BBLOCK_FREE, saved_order_sz);

>> +

>> +       return bpool;

>> +}

>> +

>> +/* allocated memory from the given buddy pool */

>> +static void *_odp_ishmbud_alloc(pool_t *bpool, uint64_t size)

>> +{

>> +       uint32_t rq_order; /* requested order */

>> +       uint32_t try_order;

>> +       bblock_t *bblock;

>> +       bblock_t *buddy;

>> +       uintptr_t nr;

>> +

>> +       /* if size is zero or too big reject: */

>> +       if ((!size) && (size > (1U << bpool->ctrl.order))) {

>> +               ODP_ERR("Invalid alloc size (0 or larger than whole

>> pool)\n");

>> +               return NULL;

>> +       }

>> +

>> +       /* compute ceil(log2(size)), to get the requested block order:

>> */

>> +       rq_order = clog2(size);

>> +

>> +       /* make sure the requested order is bigger (or same) as minimum!

>> */

>> +       if (rq_order < bpool->ctrl.min_order)

>> +               rq_order = bpool->ctrl.min_order;

>> +

>> +       /* mutex from here: */

>> +       odp_spinlock_lock(&bpool->ctrl.lock);

>> +

>> +       /* now, start trying to allocate a bblock of rq_order. If that

>> +        * fails keep trying larger orders until pool order is reached

>> */

>> +       bblock = NULL;

>> +       for (try_order = rq_order; try_order <= bpool->ctrl.order;

>> +            try_order++) {

>> +               if (bpool->ctrl.free_heads[try_order]) {

>> +                       /* remove from list: */

>> +                       bblock =

>> +                               (bblock_t

>> *)(bpool->ctrl.free_heads[try_order]);

>> +                       bpool->ctrl.free_heads[try_order] = bblock->next;

>> +                       break;

>> +               }

>> +       }

>> +

>> +       if (!bblock) {

>> +               odp_spinlock_unlock(&bpool->ctrl.lock);

>> +               ODP_ERR("Out of memory. (Buddy pool full)\n");

>> +               return NULL;

>> +       }

>> +

>> +       /* OK: we got a block, but possibbly too large (if

>> try_order>rq_order)

>> +        * return the extra halves to the pool hence splitting the bblock

>> at

>> +        * each 'extra' order: */

>> +       while (try_order-- > rq_order) {

>> +               /* split: */

>> +               buddy = (bblock_t *)((uintptr_t)bblock + (1 <<

>> try_order));

>> +               buddy->order = try_order;

>> +               /* add to list: */

>> +               buddy->next = bpool->ctrl.free_heads[try_order];

>> +               bpool->ctrl.free_heads[try_order] = buddy;

>> +               /* mark as free (non allocated block get size 0): */

>> +               nr = get_bblock_nr(bpool, buddy);

>> +               bpool->ctrl.alloced_order[nr] = BBLOCK_FREE;

>> +       }

>> +

>> +       /* remember the size if the allocated block: */

>> +       nr = get_bblock_nr(bpool, bblock);

>> +       bpool->ctrl.alloced_order[nr] = rq_order;

>> +

>> +       /* and return the allocated block! */

>> +       odp_spinlock_unlock(&bpool->ctrl.lock);

>> +       return (void *)bblock;

>> +}

>> +

>> +/* free a previously allocated buffer from a given buddy pool */

>> +static int _odp_ishmbud_free(pool_t *bpool, void *addr)

>> +{

>> +       uintptr_t user_start; /* start of user area */

>> +       uintptr_t user_stop;  /* stop  of user area */

>> +       uintptr_t mask;       /* 2^min_order - 1    */

>> +       bblock_t *bblock;     /* bblock being freed */

>> +       bblock_t *buddy;      /* buddy bblock of bblock being freed */

>> +       uint8_t order;        /* order of block being freed */

>> +       uintptr_t nr;         /* block number */

>> +

>> +       /* freeing NULL is regarded as OK, though without any effect:   */

>> +       if (!addr)

>> +               return 0;

>> +

>> +       user_start = (uintptr_t)bpool->ctrl.user_addr;

>> +       user_stop  = user_start + ((uintptr_t)1 << bpool->ctrl.order);

>> +       mask = ((uintptr_t)1 << bpool->ctrl.min_order) - 1;

>> +

>> +       /* some sanity checks: check that given address is within pool and

>> +        * that relative address has 2^min_order granularity:           */

>> +       if (((uintptr_t)addr < user_start) ||

>> +           ((uintptr_t)addr > user_stop)  ||

>> +           (((uintptr_t)addr - user_start) & mask)) {

>> +               ODP_ERR("Invalid address to be freed\n");

>> +               return -1;

>> +       }

>> +

>> +       /* mutex from here: */

>> +       odp_spinlock_lock(&bpool->ctrl.lock);

>> +

>> +       /* collect saved block order and make sure bblock was allocated */

>> +       bblock = (bblock_t *)addr;

>> +       nr = get_bblock_nr(bpool, bblock);

>> +       order = bpool->ctrl.alloced_order[nr];

>> +       if (order == BBLOCK_FREE) {

>> +               ODP_ERR("Double free error\n");

>> +               odp_spinlock_unlock(&bpool->ctrl.lock);

>> +               return -1;

>> +       }

>> +

>> +       /* this looks like a valid free, mark at least this as free:   */

>> +       bpool->ctrl.alloced_order[nr] = BBLOCK_FREE;

>> +

>> +       /* go up in orders, trying to merge buddies... */

>> +       while (order < bpool->ctrl.order) {

>> +               buddy = get_bblock_buddy(bpool, bblock, order);

>> +               /*if buddy is not free: no further merge possible */

>> +               nr = get_bblock_nr(bpool, buddy);

>> +               if (bpool->ctrl.alloced_order[nr] != BBLOCK_FREE)

>> +                       break;

>> +               /*merge only bblock of same order:*/

>> +               if (buddy->order != order)

>> +                       break;

>> +               /*merge: remove buddy from free list: */

>> +               remove_from_list(bpool, order, buddy);

>> +               /*merge: make sure we point at start of block: */

>> +               if (bblock > buddy)

>> +                       bblock = buddy;

>> +               /*merge: size of bloack has dubbled: increse order: */

>> +               order++;

>> +       }

>> +

>> +       /* insert the bblock into its correct free block list: */

>> +       bblock->next = bpool->ctrl.free_heads[order];

>> +       bpool->ctrl.free_heads[order] = bblock;

>> +

>> +       /* remember the (possibly now merged) block order: */

>> +       bblock->order = order;

>> +

>> +       odp_spinlock_unlock(&bpool->ctrl.lock);

>> +       return 0;

>> +}

>> +

>> +/* print buddy pool status and performs sanity checks */

>> +static int _odp_ishmbud_pool_status(const char *title, pool_t *bpool)

>> +{

>> +       uint8_t order, pool_order, pool_min_order;

>> +       uint64_t free_q_nb_bblocks[64];

>> +       uint64_t allocated_nb_bblocks[64];

>> +       uint64_t free_q_nb_bblocks_bytes[64];

>> +       uint64_t allocated_nb_bblocks_bytes[64];

>> +       uint64_t total_bytes_free;

>> +       uint64_t total_bytes_allocated;

>> +       uint64_t nr;

>> +       bblock_t *bblock;

>> +       int res = 0;

>> +

>> +       odp_spinlock_lock(&bpool->ctrl.lock);

>> +

>> +       pool_order = bpool->ctrl.order;

>> +       pool_min_order = bpool->ctrl.min_order;

>> +

>> +       ODP_DBG("\n%s\n", title);

>> +       ODP_DBG("Pool Type: BUDDY\n");

>> +       ODP_DBG("pool size: %" PRIu64 " (bytes)\n", (1UL << pool_order));

>> +       ODP_DBG("pool order: %d\n", (int)pool_order);

>> +       ODP_DBG("pool min_order: %d\n", (int)pool_min_order);

>> +

>> +       /* a pool wholse order is more than 64 cannot even be reached on

>> 64

>> +        * bit machines! */

>> +       if (pool_order > 64) {

>> +               odp_spinlock_unlock(&bpool->ctrl.lock);

>> +               return -1;

>> +       }

>> +

>> +       total_bytes_free = 0;

>> +       total_bytes_allocated = 0;

>> +

>> +       /* for each queue */

>> +       for (order = pool_min_order; order <= pool_order; order++) {

>> +               free_q_nb_bblocks[order] = 0;

>> +               free_q_nb_bblocks_bytes[order] = 0;

>> +               allocated_nb_bblocks[order] = 0;

>> +               allocated_nb_bblocks_bytes[order] = 0;

>> +

>> +               /* get the number of buffs in the free queue for this

>> order: */

>> +               bblock = bpool->ctrl.free_heads[order];

>> +               while (bblock) {

>> +                       free_q_nb_bblocks[order]++;

>> +                       free_q_nb_bblocks_bytes[order] += (1 << order);

>> +                       bblock = bblock->next;

>> +               }

>> +

>> +               total_bytes_free += free_q_nb_bblocks_bytes[order];

>> +

>> +               /* get the number of allocated buffers of this order */

>> +               for (nr = 0;

>> +                    nr < (1U << (pool_order - pool_min_order)); nr++) {

>> +                       if (bpool->ctrl.alloced_order[nr] == order)

>> +                               allocated_nb_bblocks[order]++;

>> +               }

>> +

>> +               allocated_nb_bblocks_bytes[order] =

>> +                       allocated_nb_bblocks[order] * (1 << order);

>> +

>> +               total_bytes_allocated +=

>> allocated_nb_bblocks_bytes[order];

>> +

>> +               ODP_DBG("Order %d => Free: %" PRIu64 " buffers "

>> +                       "(%" PRIu64" bytes)   "

>> +                       "Allocated %" PRIu64 " buffers (%" PRIu64 "

>> bytes)   "

>> +                       "Total: %" PRIu64 "  bytes\n",

>> +                       (int)order, free_q_nb_bblocks[order],

>> +                       free_q_nb_bblocks_bytes[order],

>> +                       allocated_nb_bblocks[order],

>> +                       allocated_nb_bblocks_bytes[order],

>> +                       free_q_nb_bblocks_bytes[order] +

>> +                       allocated_nb_bblocks_bytes[order]);

>> +       }

>> +

>> +       ODP_DBG("Allocated space: %" PRIu64 " (bytes)\n",

>> +               total_bytes_allocated);

>> +       ODP_DBG("Free space: %" PRIu64 " (bytes)\n", total_bytes_free);

>> +

>> +       if (total_bytes_free + total_bytes_allocated != (1U <<

>> pool_order)) {

>> +               ODP_DBG("Lost bytes on this pool!\n");

>> +               res = -1;

>> +       }

>> +

>> +       if (res)

>> +               ODP_DBG("Pool inconsistent!\n");

>> +

>> +       odp_spinlock_unlock(&bpool->ctrl.lock);

>> +       return res;

>> +}

>> +

>> +/* section 2: functions for slab allocation:

>> */

>> +

>> +/* free slab blocks contains the following structure, used to link the

>> + * free blocks together.

>> + */

>> +typedef struct sblock_t {

>> +       struct sblock_t *next;

>> +} sblock_t;

>> +

>> +/*

>> + * create a slab memory pool of given size (rounded up to the nearest

>> integer

>> + * number of element, where each element has size 'elt_size').

>> + * returns a pointer to the created slab pool.

>> + * The allocated area contains:

>> + * - The _odp_ishm_pool_ctrl_t structure

>> + * - alignment to cache line

>> + * - The user memory

>> + */

>> +static pool_t *_odp_ishmslab_pool_create(const char *pool_name, int

>> store_idx,

>> +                                        uint64_t size,

>> +                                        uint64_t elt_size, int flags)

>> +{

>> +       uint32_t nb_sblock;      /* number of elements in the pool

>> */

>> +       uint32_t control_sz;     /* size of control area

>> */

>> +       uint64_t total_sz;       /* total size to request

>> */

>> +       uint64_t user_sz;        /* 2^order bytes

>> */

>> +       int      blk_idx;        /* as returned by _ishm_reserve()

>> */

>> +       pool_t *spool;

>> +       unsigned int i;

>> +       sblock_t *block;

>> +

>> +       /* a sblock_t must fit in the buffers for linked chain! */

>> +       if (elt_size < sizeof(bblock_t)) {

>> +               elt_size = sizeof(bblock_t);

>> +               size = size * (sizeof(bblock_t) / elt_size +

>> +                              ((sizeof(bblock_t) % elt_size) ? 1 : 0));

>> +       }

>> +

>> +       /* nb of element fitting in the pool is just ceil(size/elt_size)*/

>> +       nb_sblock = (size / elt_size) + ((size % elt_size) ? 1 : 0);

>> +

>> +       /* space needed for the control area (padded to cache line size)*/

>> +       control_sz =

>> +

>> ODP_CACHE_LINE_SIZE_ROUNDUP(sizeof(_odp_ishm_pool_ctrl_t));

>> +

>> +       /* space needed for user area is : */

>> +       user_sz = nb_sblock * elt_size;

>> +

>> +       total_sz = control_sz +

>> +                  user_sz;

>> +

>> +       /* allocate required memory: */

>> +       blk_idx = _odp_ishm_reserve(pool_name, total_sz, -1,

>> +                                   ODP_CACHE_LINE_SIZE, flags, 0);

>> +       if (blk_idx < 0) {

>> +               ODP_ERR("_odp_ishm_reserve failed.");

>> +               return NULL;

>> +       }

>> +

>> +       spool = _odp_ishm_address(blk_idx);

>> +       if (spool == NULL) {

>> +               ODP_ERR("_odp_ishm_address failed.");

>> +               return NULL;

>> +       }

>> +

>> +       /* store in pool array (needed for look up): */

>> +       pool_blk_idx[store_idx] = blk_idx;

>> +

>> +       /* remember block index, needed when pool is destroyed */

>> +       spool->ctrl.ishm_blk_idx = blk_idx;

>> +

>> +       /* remember element (sblock) size and their number: */

>> +       spool->ctrl.element_sz = elt_size;

>> +       spool->ctrl.nb_elem = nb_sblock;

>> +

>> +       /* prepare mutex: */

>> +       odp_spinlock_init(&spool->ctrl.lock);

>> +

>> +       /* initialise pointers and things... */

>> +       spool->ctrl.user_addr =

>> +               (void *)((uintptr_t)spool + control_sz);

>> +

>> +       /* initialise the free list with the list of all elements:*/

>> +       spool->ctrl.free_head = spool->ctrl.user_addr;

>> +       for (i = 0; i < nb_sblock - 1; i++) {

>> +               block = (sblock_t *)((uintptr_t)spool->ctrl.user_addr +

>> +                                    i * (uintptr_t)elt_size);

>> +               block->next = (sblock_t *)((uintptr_t)block +

>> +                                          (uintptr_t)elt_size);

>> +       }

>> +       block = (sblock_t *)((uintptr_t)spool->ctrl.user_addr +

>> +                            (nb_sblock - 1) * (uintptr_t)elt_size);

>> +       block->next = NULL;

>> +

>> +       return spool;

>> +}

>> +

>> +/* allocated memory from the given slab pool */

>> +static void *_odp_ishmslab_alloc(pool_t *spool, uint64_t size)

>> +{

>> +       void *ret;

>> +       sblock_t *block;

>> +

>> +       if (size > spool->ctrl.element_sz)

>> +               return NULL;

>> +

>> +       odp_spinlock_lock(&spool->ctrl.lock);

>> +       ret = spool->ctrl.free_head;

>> +       if (!ret) {

>> +               odp_spinlock_unlock(&spool->ctrl.lock);

>> +               ODP_ERR("Out of memory. (Slab pool full)\n");

>> +               return NULL;

>> +       }

>> +

>> +       block = (sblock_t *)ret;

>> +       spool->ctrl.free_head = block->next;

>> +

>> +       odp_spinlock_unlock(&spool->ctrl.lock);

>> +       return ret;

>> +}

>> +

>> +/* free a previously allocated buffer from a given slab pool */

>> +static int _odp_ishmslab_free(pool_t *spool, void *addr)

>> +{

>> +       uintptr_t user_start; /* start of user area */

>> +       uintptr_t user_stop;  /* stop  of user area */

>> +       sblock_t *block;

>> +

>> +       /* freeing NULL is regarded as OK, though without any effect:   */

>> +       if (!addr)

>> +               return 0;

>> +

>> +       user_start = (uintptr_t)spool->ctrl.user_addr;

>> +       user_stop  = user_start + spool->ctrl.element_sz *

>> spool->ctrl.nb_elem;

>> +

>> +       /* some sanity checks: check that given address is within pool and

>> +        * that relative address has element_sz granularity:           */

>> +       if (((uintptr_t)addr < user_start) ||

>> +           ((uintptr_t)addr > user_stop)  ||

>> +           (((uintptr_t)addr - user_start) % spool->ctrl.element_sz)) {

>> +               ODP_ERR("Invalid address to be freed\n");

>> +               return -1;

>> +       }

>> +

>> +       odp_spinlock_lock(&spool->ctrl.lock);

>> +       block = (sblock_t *)addr;

>> +       block->next = (sblock_t *)spool->ctrl.free_head;

>> +       spool->ctrl.free_head = addr;

>> +       odp_spinlock_unlock(&spool->ctrl.lock);

>> +

>> +       return 0;

>> +}

>> +

>> +/* print slab pool status and performs sanity checks */

>> +static int _odp_ishmslab_pool_status(const char *title, pool_t *spool)

>> +{

>> +       sblock_t *sblock;

>> +       uint64_t nb_free_elts; /* number of free elements */

>> +

>> +       odp_spinlock_lock(&spool->ctrl.lock);

>> +

>> +       ODP_DBG("\n%s\n", title);

>> +       ODP_DBG("Pool Type: FIXED SIZE\n");

>> +       ODP_DBG("pool size: %" PRIu64 " (bytes)\n",

>> +               spool->ctrl.nb_elem * spool->ctrl.element_sz);

>> +

>> +       /* count the number of free elements in the free list: */

>> +       nb_free_elts = 0;

>> +       sblock = (sblock_t *)spool->ctrl.free_head;

>> +       while (sblock) {

>> +               nb_free_elts++;

>> +               sblock = sblock->next;

>> +       }

>> +

>> +       ODP_DBG("%" PRIu64 "/%" PRIu64 " available elements.\n",

>> +               nb_free_elts, spool->ctrl.nb_elem);

>> +

>> +       odp_spinlock_unlock(&spool->ctrl.lock);

>> +       return 0;

>> +}

>> +

>> +/* section 3: common, external functions:

>> */

>> +

>> +/* create a pool: either with fixed alloc size (if max_alloc/min_alloc<2)

>> or

>> + * of variable block size (if max_alloc == 0) */

>> +pool_t *_odp_ishm_pool_create(const char *pool_name, uint64_t size,

>> +                             uint64_t min_alloc, uint64_t max_alloc, int

>> flags)

>> +{

>> +       int store_idx;

>> +       uint64_t real_pool_sz;

>> +

>> +       if (min_alloc > max_alloc) {

>> +               ODP_ERR("invalid parameter: min_alloc > max_alloc");

>> +               return NULL;

>> +       }

>> +

>> +       /* search for a free index in pool_blk_idx for the pool */

>> +       for (store_idx = 0; store_idx < MAX_NB_POOL; store_idx++) {

>> +               if (pool_blk_idx[store_idx] < 0)

>> +                       break;

>> +       }

>> +       if (store_idx == MAX_NB_POOL) {

>> +               ODP_ERR("Max number of pool reached (MAX_NB_POOL)");

>> +               return NULL;

>> +       }

>> +

>> +       if ((min_alloc == 0) || ((max_alloc / min_alloc) > 2)) {

>> +               /* alloc variation is not constant enough: we go for a

>> buddy

>> +                * allocator. The pool efficiency may go as low as 50%

>> +                * so we double the required size to make sure we can

>> satisfy

>> +                * the user request */

>> +               real_pool_sz = 2 * size;

>> +               return _odp_ishmbud_pool_create(pool_name, store_idx,

>> +                                               real_pool_sz,

>> +                                               BUDDY_MIN_SIZE, flags);

>> +       } else {

>> +               /* min and max are close enough so we go for constant size

>> +                * allocator:

>> +                * make sure the pool can fit the required size, even when

>> +                * only min_alloc allocation are performed: */

>> +               real_pool_sz = ((size / min_alloc) +

>> +                               ((size % min_alloc) ? 1 : 0))

>> +                              * max_alloc;

>> +               return _odp_ishmslab_pool_create(pool_name, store_idx,

>> +                                                real_pool_sz,

>> +                                                max_alloc, flags);

>> +       }

>> +}

>> +

>> +/* destroy a pool. everything goes away. no operation on the pool should

>> + * follow. */

>> +int _odp_ishm_pool_destroy(pool_t *pool)

>> +{

>> +       int store_idx;

>> +

>> +       for (store_idx = 0; store_idx < MAX_NB_POOL; store_idx++) {

>> +               if (pool_blk_idx[store_idx] == pool->ctrl.ishm_blk_idx) {

>> +                       pool_blk_idx[store_idx] = -1;

>> +                       break;

>> +               }

>> +       }

>> +

>> +       return _odp_ishm_free_by_index(pool->ctrl.ishm_blk_idx);

>> +}

>> +

>> +/* allocated a buffer from a pool */

>> +void *_odp_ishm_pool_alloc(_odp_ishm_pool_t *pool, uint64_t size)

>> +{

>> +       if (!pool->ctrl.element_sz)

>> +               return _odp_ishmbud_alloc(pool, size);

>> +       else

>> +               return _odp_ishmslab_alloc(pool, size);

>> +}

>> +

>> +/* free a previously allocated buffer from a pool */

>> +int _odp_ishm_pool_free(_odp_ishm_pool_t *pool, void *addr)

>> +{

>> +       if (!pool->ctrl.element_sz)

>> +               return _odp_ishmbud_free(pool, addr);

>> +       else

>> +               return _odp_ishmslab_free(pool, addr);

>> +}

>> +

>> +/* Print a pool status */

>> +int _odp_ishm_pool_status(const char *title, _odp_ishm_pool_t *pool)

>> +{

>> +       if (!pool->ctrl.element_sz)

>> +               return _odp_ishmbud_pool_status(title, pool);

>> +       else

>> +               return _odp_ishmslab_pool_status(title, pool);

>> +}

>> +

>> +void _odp_ishm_pool_init(void)

>> +{

>> +       int i;

>> +

>> +       for (i = 0; i < MAX_NB_POOL; i++)

>> +               pool_blk_idx[i] = -1;

>> +}

>> +

>> +_odp_ishm_pool_t *_odp_ishm_pool_lookup(const char *pool_name)

>> +{

>> +       int block_idx;

>> +       int store_idx;

>> +

>> +       /* search for a _ishm block with the given name */

>> +       block_idx = _odp_ishm_lookup_by_name(pool_name);

>> +       if (block_idx < 0)

>> +               return NULL;

>> +

>> +       /* a block with that name exists: make sure it is within

>> +        * the registered pools */

>> +       for (store_idx = 0; store_idx < MAX_NB_POOL; store_idx++) {

>> +               if (pool_blk_idx[store_idx] == block_idx)

>> +                       return  _odp_ishm_address(block_idx);

>> +       }

>> +

>> +       return NULL;

>> +}

>> diff --git a/platform/linux-generic/include/_ishmpool_internal.h

>> b/platform/linux-generic/include/_ishmpool_internal.h

>> new file mode 100644

>> index 0000000..5c5304a

>> --- /dev/null

>> +++ b/platform/linux-generic/include/_ishmpool_internal.h

>> @@ -0,0 +1,56 @@

>> +/* Copyright (c) 2017, Linaro Limited

>> + * All rights reserved.

>> + *

>> + * SPDX-License-Identifier:     BSD-3-Clause

>> + */

>> +

>> +#ifndef ODP_ISHMBUDDY_INTERNAL_H_

>> +#define ODP_ISHMBUDDY_INTERNAL_H_

>> +

>> +#ifdef __cplusplus

>> +extern "C" {

>> +#endif

>> +

>> +#include <stdint.h>

>> +#include <odp/api/spinlock.h>

>> +

>> +typedef struct _odp_ishm_pool_ctrl_t {

>> +       uint32_t element_sz;    /* 0 for buddy pools, >0 for slab.

>> */

>> +       int ishm_blk_idx;       /* the block index returned by

>> _ishm_resrve()*/

>> +       odp_spinlock_t  lock;   /* for pool access mutex

>> */

>> +       void *user_addr;        /* user pool area ('real user pool')

>> */

>> +       union {

>> +               struct {        /* things needed for buddy pools:

>> */

>> +                       uint8_t order;  /* pool is 2^order bytes long

>> */

>> +                       uint8_t min_order; /*alloc won't go below

>> 2^min_order*/

>> +                       void **free_heads; /* 'order' free list heads.

>> */

>> +                       uint8_t *alloced_order; /* size of blocks, 0=free

>> */

>> +               };

>> +               struct {        /* things needed for slab pools:

>> */

>> +                       void *free_head; /* free element list head

>> */

>> +                       uint64_t nb_elem;/* total number of elements in

>> pool */

>> +               };

>> +       };

>> +} _odp_ishm_pool_ctrl_t;

>> +

>> +typedef struct _odp_ishm_pool_t {

>> +       _odp_ishm_pool_ctrl_t ctrl;     /* control part

>> */

>> +       uint8_t mem[1];         /* area for heads, saved alloc'd orders,

>> data*/

>> +} _odp_ishm_pool_t;

>> +

>> +_odp_ishm_pool_t *_odp_ishm_pool_create(const char *pool_name,

>> +                                       uint64_t size,

>> +                                       uint64_t min_alloc,

>> +                                       uint64_t max_alloc, int flags);

>> +int _odp_ishm_pool_destroy(_odp_ishm_pool_t *pool);

>> +void *_odp_ishm_pool_alloc(_odp_ishm_pool_t *pool, uint64_t size);

>> +int _odp_ishm_pool_free(_odp_ishm_pool_t *pool, void *addr);

>> +int _odp_ishm_pool_status(const char *title, _odp_ishm_pool_t *pool);

>> +_odp_ishm_pool_t *_odp_ishm_pool_lookup(const char *pool_name);

>> +void _odp_ishm_pool_init(void);

>> +

>> +#ifdef __cplusplus

>> +}

>> +#endif

>> +

>> +#endif

>> --

>> 2.7.4

>>

>
diff mbox

Patch

diff --git a/platform/linux-generic/Makefile.am b/platform/linux-generic/Makefile.am
index 999a7f5..d153c5d 100644
--- a/platform/linux-generic/Makefile.am
+++ b/platform/linux-generic/Makefile.am
@@ -127,6 +127,7 @@  noinst_HEADERS = \
 		  ${srcdir}/include/_fdserver_internal.h \
 		  ${srcdir}/include/_ishm_internal.h \
 		  ${srcdir}/include/_ishmphy_internal.h \
+		  ${srcdir}/include/_ishmpool_internal.h \
 		  ${srcdir}/include/odp_align_internal.h \
 		  ${srcdir}/include/odp_atomic_internal.h \
 		  ${srcdir}/include/odp_buffer_inlines.h \
@@ -171,6 +172,7 @@  __LIB__libodp_linux_la_SOURCES = \
 			   _fdserver.c \
 			   _ishm.c \
 			   _ishmphy.c \
+			   _ishmpool.c \
 			   odp_atomic.c \
 			   odp_barrier.c \
 			   odp_buffer.c \
diff --git a/platform/linux-generic/_ishm.c b/platform/linux-generic/_ishm.c
index 6d5b752..0befdba 100644
--- a/platform/linux-generic/_ishm.c
+++ b/platform/linux-generic/_ishm.c
@@ -59,6 +59,7 @@ 
 #include <_fdserver_internal.h>
 #include <_ishm_internal.h>
 #include <_ishmphy_internal.h>
+#include <_ishmpool_internal.h>
 #include <stdlib.h>
 #include <stdio.h>
 #include <unistd.h>
@@ -1441,8 +1442,19 @@  int _odp_ishm_init_global(void)
 	 * is performed for the main thread... Many init_global() functions
 	 * indeed assume the availability of odp_shm_reserve()...:
 	 */
-	return do_odp_ishm_init_local();
+	if (do_odp_ishm_init_local()) {
+		ODP_ERR("unable to init the main thread\n.");
+		goto init_glob_err4;
+	}
+
+	/* get ready to create pools: */
+	_odp_ishm_pool_init();
 
+	return 0;
+
+init_glob_err4:
+	if (_odp_ishmphy_unbook_va())
+		ODP_ERR("unable to unbook virtual space\n.");
 init_glob_err3:
 	if (munmap(ishm_ftbl, sizeof(ishm_ftable_t)) < 0)
 		ODP_ERR("unable to munmap main fragment table\n.");
diff --git a/platform/linux-generic/_ishmpool.c b/platform/linux-generic/_ishmpool.c
new file mode 100644
index 0000000..df6e49e
--- /dev/null
+++ b/platform/linux-generic/_ishmpool.c
@@ -0,0 +1,811 @@ 
+/* Copyright (c) 2017, Linaro Limited
+ * All rights reserved.
+ *
+ * SPDX-License-Identifier:     BSD-3-Clause
+ */
+
+/* This file gathers the buddy and slab allocation functionality provided
+ * by _ishm.
+ * _odp_ishmpool_create() can be used to create a pool for buddy/slab
+ * allocation. _odp_ishmpool_create() will allocate a memory area using
+ * ishm_reserve() for both the control part (needed for tracking
+ * allocation/free...) and the user memory itself (part of which will be given
+ * at each ishmpool_alloc()).
+ * The element size provided at pool creation time determines whether
+ * to pool will of type buddy or slab.
+ * For buddy, all allocations are rounded to the nearest power of 2.
+ *
+ * The implementation of the buddy allocator is very traditional: it
+ * maintains N lists of free buffers.
+ * The control part actually contains these N queue heads, (N-M are actually
+ * used), the free buffers themselves being used for chaining (the chaining info
+ * is in the buffers: as they are "free" they should not be touched by the
+ * user). The control part also contains a array of bytes for remembering
+ * the size (actually the order) of the allocated buffers:
+ * There are 2^(N-M) such bytes, this number being the maximum number of
+ * allocated buffers (when all allocation are <= 2^M bytes)
+ * Buddy allocators handle fragmentation by splitting or merging blocks by 2.
+ * They guarantee a minimum efficiency of 50%, at worse case fragmentation.
+ *
+ * Slab implementation is even simpler, all free elements being queued in
+ * one single queue at init, taken from this queue when allocated and
+ * returned to this same queue when freed.
+ *
+ * The reason for not using malloc() is that malloc does not guarantee
+ * memory sharability between ODP threads (regardless of their implememtation)
+ * which ishm_reserve() can do. see the comments around
+ * _odp_ishmbud_pool_create() and ishm_reserve() for more details.
+ *
+ * This file is divided in 3 sections: the first one regroups functions
+ * needed by the buddy allocation.
+ * The second one regroups the functions needed by the slab allocator.
+ * The third section regroups the common functions exported externally.
+ */
+
+#include <odp_posix_extensions.h>
+#include <odp_internal.h>
+#include <odp/api/spinlock.h>
+#include <odp/api/align.h>
+#include <odp/api/debug.h>
+#include <odp/drv/shm.h>
+#include <odp_shm_internal.h>
+#include <odp_debug_internal.h>
+#include <odp_align_internal.h>
+#include <_ishm_internal.h>
+#include <_ishmpool_internal.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <string.h>
+#include <inttypes.h>
+
+#define BUDDY_MIN_SIZE 32 /* minimal buddy allocation size */
+
+typedef _odp_ishm_pool_t pool_t; /* for shorter writing             */
+
+/* array of ishm block index used for pools. only used for pool
+ * lookup by name */
+#define MAX_NB_POOL 100
+static int pool_blk_idx[MAX_NB_POOL];
+
+/* section 1: functions for buddy allocation:                                 */
+
+/* free buddy blocks contains the following structure, used to link the
+ * free blocks together.
+ */
+typedef struct bblock_t {
+	struct bblock_t *next;
+	uint32_t order;
+} bblock_t;
+
+/* value set in the 'order' table when the block is not allocated:   */
+#define BBLOCK_FREE 0
+
+/* compute ceil(log2(size)) */
+static uint8_t clog2(uint64_t size)
+{
+	uint64_t sz;
+	uint32_t bit;
+	uint8_t res;
+
+	sz = size;	/* we start by computing res = log2(sz)...   */
+	res = 0;
+	for (bit = 32; bit ; bit >>= 1) {
+		if (sz >= ((uint64_t)1 << bit)) {
+			sz >>= bit;
+			res += bit;
+		}
+	}
+	if (((uint64_t)1 << res) < size) /* ...and then ceil(x)      */
+		res++;
+
+	return res;
+}
+
+/*
+ * given a bblock address, and an order value, returns the address
+ * of the buddy bblock (the other "half")
+ */
+static inline bblock_t *get_bblock_buddy(pool_t *bpool, bblock_t *addr,
+					 uint8_t order)
+{
+	uintptr_t b;
+
+	b = ((uintptr_t)addr - (uintptr_t)bpool->ctrl.user_addr);
+	b ^= 1 << order;
+	return (void *)(b + (uintptr_t)bpool->ctrl.user_addr);
+}
+
+/*
+ * given a buddy block address, return its number (used for busy flags):
+ */
+static inline uintptr_t get_bblock_nr(pool_t *bpool, void *addr)
+{
+	uintptr_t b;
+	uint8_t min_order;
+
+	min_order = bpool->ctrl.min_order;
+	b = ((uintptr_t)addr - (uintptr_t)bpool->ctrl.user_addr) >> min_order;
+	return b;
+}
+
+/* remove bblock from the list for bblocks of rank order. The bblock to be
+ * removed is really expected to be on the list: not finding it is an error */
+static inline void remove_from_list(pool_t *bpool, uint8_t order,
+				    bblock_t *bblock)
+{
+	bblock_t *curr;       /* current bblock (when parsing list) */
+	bblock_t *prev;       /* previous bblock (when parsing list) */
+
+	curr = bpool->ctrl.free_heads[order];
+	if (!curr)
+		goto remove_from_list_error;
+
+	if (curr == bblock) {
+		bpool->ctrl.free_heads[order] = curr->next;
+		return;
+	}
+
+	while (curr) {
+		if (curr == bblock) {
+			prev->next = curr->next;
+			return;
+		}
+		prev = curr;
+		curr = curr->next;
+	}
+
+remove_from_list_error:
+	ODP_ERR("List corrupted\n");
+}
+
+/*
+ * create a buddy memory pool of given size (actually nearest power of 2),
+ * where allocation will never be smaller than min_alloc.
+ * returns a pointer to the created buddy_pool
+ * The allocated area contains:
+ * - The _odp_ishm_pool_ctrl_t structure
+ * - The array of ((order - min_order) of free list heads
+ * - The array of 'order' values, remembering sizes of allocated bblocks
+ * - alignment to cache line
+ * - The user memory
+ */
+static pool_t *_odp_ishmbud_pool_create(const char *pool_name, int store_idx,
+					uint64_t size,
+					uint64_t min_alloc, int flags)
+{
+	uint8_t  order;          /* pool order = ceil(log2(size))         */
+	uint8_t  min_order;      /* pool min_order = ceil(log2(min_alloc))*/
+	uint32_t max_nb_bblock;  /* max number of bblock, when smallest   */
+	uint32_t control_sz;     /* size of control area                  */
+	uint32_t free_head_sz;   /* mem area needed for list heads        */
+	uint32_t saved_order_sz; /* mem area to remember given sizes      */
+	uint64_t user_sz;        /* 2^order bytes		          */
+	uint64_t total_sz;	 /* total size to request                 */
+	int	 blk_idx;	 /* as returned by _ishm_resrve()         */
+	pool_t *bpool;
+	int i;
+	bblock_t *first_block;
+
+	/* a bblock_t must fit in the buffers for linked chain! */
+	if (min_alloc < sizeof(bblock_t))
+		min_alloc = sizeof(bblock_t);
+
+	/* pool order is such that 2^order = size. same for min_order   */
+	order = clog2(size);
+	min_order = clog2(min_alloc);
+
+	/* check parameters obvious wishes: */
+	if (order >= 64)
+		return NULL;
+	if (order < min_order)
+		return NULL;
+
+	/* at worst case, all bblocks have smallest (2^min_order) size  */
+	max_nb_bblock = (1 << (order - min_order));
+
+	/* space needed for the control area (padded to cache line size)*/
+	control_sz =
+		ODP_CACHE_LINE_SIZE_ROUNDUP(sizeof(_odp_ishm_pool_ctrl_t));
+
+	/* space needed for 'order' free bblock list heads:             */
+	/* Note that only lists from min_order to order are really used.*/
+	free_head_sz = ODP_CACHE_LINE_SIZE_ROUNDUP(sizeof(void *) *
+						   (order + 1));
+
+	/* space needed for order -i.e. size- storage of alloc'd bblock:*/
+	saved_order_sz = ODP_CACHE_LINE_SIZE_ROUNDUP(max_nb_bblock *
+						     sizeof(uint8_t));
+
+	/* space needed for user area is 2^order bytes: */
+	user_sz = 1 << order;
+
+	total_sz = control_sz +
+		   free_head_sz +
+		   saved_order_sz +
+		   user_sz;
+
+	/* allocate required memory: */
+	blk_idx = _odp_ishm_reserve(pool_name, total_sz, -1,
+				    ODP_CACHE_LINE_SIZE, flags, 0);
+	if (blk_idx < 0) {
+		ODP_ERR("_odp_ishm_reserve failed.");
+		return NULL;
+	}
+
+	bpool = _odp_ishm_address(blk_idx);
+	if (bpool == NULL) {
+		ODP_ERR("_odp_ishm_address failed.");
+		return NULL;
+	}
+
+	/* store in pool array (needed for look up): */
+	pool_blk_idx[store_idx] = blk_idx;
+
+	/* remember block index, needed when pool is destroyed */
+	bpool->ctrl.ishm_blk_idx = blk_idx;
+
+	/* remember element size: 0 means unknown size, i.e. buddy alloation*/
+	bpool->ctrl.element_sz = 0;
+
+	/* prepare mutex: */
+	odp_spinlock_init(&bpool->ctrl.lock);
+
+	/* initialise pointers and things... */
+	bpool->ctrl.order = order;
+	bpool->ctrl.min_order = min_order;
+	bpool->ctrl.free_heads =
+		(void *)((uintptr_t)bpool + control_sz);
+	bpool->ctrl.alloced_order =
+		(uint8_t *)((uintptr_t)bpool->ctrl.free_heads + free_head_sz);
+	bpool->ctrl.user_addr =
+		(void *)((uintptr_t)bpool->ctrl.alloced_order + saved_order_sz);
+
+	/* initialize all free list to NULL, except the top biggest element:*/
+	for (i = 0; i < (order - min_order); i++)
+		bpool->ctrl.free_heads[i] = NULL;
+	bpool->ctrl.free_heads[order] = bpool->ctrl.user_addr;
+	first_block = (bblock_t *)bpool->ctrl.user_addr;
+	first_block->next = NULL;
+	first_block->order = order;
+
+	/* set all 'order' of allocated bblocks to free: */
+	memset(bpool->ctrl.alloced_order, BBLOCK_FREE, saved_order_sz);
+
+	return bpool;
+}
+
+/* allocated memory from the given buddy pool */
+static void *_odp_ishmbud_alloc(pool_t *bpool, uint64_t size)
+{
+	uint32_t rq_order; /* requested order */
+	uint32_t try_order;
+	bblock_t *bblock;
+	bblock_t *buddy;
+	uintptr_t nr;
+
+	/* if size is zero or too big reject: */
+	if ((!size) && (size > (1U << bpool->ctrl.order))) {
+		ODP_ERR("Invalid alloc size (0 or larger than whole pool)\n");
+		return NULL;
+	}
+
+	/* compute ceil(log2(size)), to get the requested block order:    */
+	rq_order = clog2(size);
+
+	/* make sure the requested order is bigger (or same) as minimum!  */
+	if (rq_order < bpool->ctrl.min_order)
+		rq_order = bpool->ctrl.min_order;
+
+	/* mutex from here: */
+	odp_spinlock_lock(&bpool->ctrl.lock);
+
+	/* now, start trying to allocate a bblock of rq_order. If that
+	 * fails keep trying larger orders until pool order is reached    */
+	bblock = NULL;
+	for (try_order = rq_order; try_order <= bpool->ctrl.order;
+	     try_order++) {
+		if (bpool->ctrl.free_heads[try_order]) {
+			/* remove from list: */
+			bblock =
+				(bblock_t *)(bpool->ctrl.free_heads[try_order]);
+			bpool->ctrl.free_heads[try_order] = bblock->next;
+			break;
+		}
+	}
+
+	if (!bblock) {
+		odp_spinlock_unlock(&bpool->ctrl.lock);
+		ODP_ERR("Out of memory. (Buddy pool full)\n");
+		return NULL;
+	}
+
+	/* OK: we got a block, but possibbly too large (if try_order>rq_order)
+	 * return the extra halves to the pool hence splitting the bblock at
+	 * each 'extra' order: */
+	while (try_order-- > rq_order) {
+		/* split: */
+		buddy = (bblock_t *)((uintptr_t)bblock + (1 << try_order));
+		buddy->order = try_order;
+		/* add to list: */
+		buddy->next = bpool->ctrl.free_heads[try_order];
+		bpool->ctrl.free_heads[try_order] = buddy;
+		/* mark as free (non allocated block get size 0): */
+		nr = get_bblock_nr(bpool, buddy);
+		bpool->ctrl.alloced_order[nr] = BBLOCK_FREE;
+	}
+
+	/* remember the size if the allocated block: */
+	nr = get_bblock_nr(bpool, bblock);
+	bpool->ctrl.alloced_order[nr] = rq_order;
+
+	/* and return the allocated block! */
+	odp_spinlock_unlock(&bpool->ctrl.lock);
+	return (void *)bblock;
+}
+
+/* free a previously allocated buffer from a given buddy pool */
+static int _odp_ishmbud_free(pool_t *bpool, void *addr)
+{
+	uintptr_t user_start; /* start of user area */
+	uintptr_t user_stop;  /* stop  of user area */
+	uintptr_t mask;	      /* 2^min_order - 1    */
+	bblock_t *bblock;     /* bblock being freed */
+	bblock_t *buddy;      /* buddy bblock of bblock being freed */
+	uint8_t order;	      /* order of block being freed */
+	uintptr_t nr;	      /* block number */
+
+	/* freeing NULL is regarded as OK, though without any effect:   */
+	if (!addr)
+		return 0;
+
+	user_start = (uintptr_t)bpool->ctrl.user_addr;
+	user_stop  = user_start + ((uintptr_t)1 << bpool->ctrl.order);
+	mask = ((uintptr_t)1 << bpool->ctrl.min_order) - 1;
+
+	/* some sanity checks: check that given address is within pool and
+	 * that relative address has 2^min_order granularity:           */
+	if (((uintptr_t)addr < user_start) ||
+	    ((uintptr_t)addr > user_stop)  ||
+	    (((uintptr_t)addr - user_start) & mask)) {
+		ODP_ERR("Invalid address to be freed\n");
+		return -1;
+	}
+
+	/* mutex from here: */
+	odp_spinlock_lock(&bpool->ctrl.lock);
+
+	/* collect saved block order and make sure bblock was allocated */
+	bblock = (bblock_t *)addr;
+	nr = get_bblock_nr(bpool, bblock);
+	order = bpool->ctrl.alloced_order[nr];
+	if (order == BBLOCK_FREE) {
+		ODP_ERR("Double free error\n");
+		odp_spinlock_unlock(&bpool->ctrl.lock);
+		return -1;
+	}
+
+	/* this looks like a valid free, mark at least this as free:   */
+	bpool->ctrl.alloced_order[nr] = BBLOCK_FREE;
+
+	/* go up in orders, trying to merge buddies... */
+	while (order < bpool->ctrl.order) {
+		buddy = get_bblock_buddy(bpool, bblock, order);
+		/*if buddy is not free: no further merge possible */
+		nr = get_bblock_nr(bpool, buddy);
+		if (bpool->ctrl.alloced_order[nr] != BBLOCK_FREE)
+			break;
+		/*merge only bblock of same order:*/
+		if (buddy->order != order)
+			break;
+		/*merge: remove buddy from free list: */
+		remove_from_list(bpool, order, buddy);
+		/*merge: make sure we point at start of block: */
+		if (bblock > buddy)
+			bblock = buddy;
+		/*merge: size of bloack has dubbled: increse order: */
+		order++;
+	}
+
+	/* insert the bblock into its correct free block list: */
+	bblock->next = bpool->ctrl.free_heads[order];
+	bpool->ctrl.free_heads[order] = bblock;
+
+	/* remember the (possibly now merged) block order: */
+	bblock->order = order;
+
+	odp_spinlock_unlock(&bpool->ctrl.lock);
+	return 0;
+}
+
+/* print buddy pool status and performs sanity checks */
+static int _odp_ishmbud_pool_status(const char *title, pool_t *bpool)
+{
+	uint8_t order, pool_order, pool_min_order;
+	uint64_t free_q_nb_bblocks[64];
+	uint64_t allocated_nb_bblocks[64];
+	uint64_t free_q_nb_bblocks_bytes[64];
+	uint64_t allocated_nb_bblocks_bytes[64];
+	uint64_t total_bytes_free;
+	uint64_t total_bytes_allocated;
+	uint64_t nr;
+	bblock_t *bblock;
+	int res = 0;
+
+	odp_spinlock_lock(&bpool->ctrl.lock);
+
+	pool_order = bpool->ctrl.order;
+	pool_min_order = bpool->ctrl.min_order;
+
+	ODP_DBG("\n%s\n", title);
+	ODP_DBG("Pool Type: BUDDY\n");
+	ODP_DBG("pool size: %" PRIu64 " (bytes)\n", (1UL << pool_order));
+	ODP_DBG("pool order: %d\n", (int)pool_order);
+	ODP_DBG("pool min_order: %d\n", (int)pool_min_order);
+
+	/* a pool wholse order is more than 64 cannot even be reached on 64
+	 * bit machines! */
+	if (pool_order > 64) {
+		odp_spinlock_unlock(&bpool->ctrl.lock);
+		return -1;
+	}
+
+	total_bytes_free = 0;
+	total_bytes_allocated = 0;
+
+	/* for each queue */
+	for (order = pool_min_order; order <= pool_order; order++) {
+		free_q_nb_bblocks[order] = 0;
+		free_q_nb_bblocks_bytes[order] = 0;
+		allocated_nb_bblocks[order] = 0;
+		allocated_nb_bblocks_bytes[order] = 0;
+
+		/* get the number of buffs in the free queue for this order: */
+		bblock = bpool->ctrl.free_heads[order];
+		while (bblock) {
+			free_q_nb_bblocks[order]++;
+			free_q_nb_bblocks_bytes[order] += (1 << order);
+			bblock = bblock->next;
+		}
+
+		total_bytes_free += free_q_nb_bblocks_bytes[order];
+
+		/* get the number of allocated buffers of this order */
+		for (nr = 0;
+		     nr < (1U << (pool_order - pool_min_order)); nr++) {
+			if (bpool->ctrl.alloced_order[nr] == order)
+				allocated_nb_bblocks[order]++;
+		}
+
+		allocated_nb_bblocks_bytes[order] =
+			allocated_nb_bblocks[order] * (1 << order);
+
+		total_bytes_allocated += allocated_nb_bblocks_bytes[order];
+
+		ODP_DBG("Order %d => Free: %" PRIu64 " buffers "
+			"(%" PRIu64" bytes)   "
+			"Allocated %" PRIu64 " buffers (%" PRIu64 "  bytes)   "
+			"Total: %" PRIu64 "  bytes\n",
+			(int)order, free_q_nb_bblocks[order],
+			free_q_nb_bblocks_bytes[order],
+			allocated_nb_bblocks[order],
+			allocated_nb_bblocks_bytes[order],
+			free_q_nb_bblocks_bytes[order] +
+			allocated_nb_bblocks_bytes[order]);
+	}
+
+	ODP_DBG("Allocated space: %" PRIu64 " (bytes)\n",
+		total_bytes_allocated);
+	ODP_DBG("Free space: %" PRIu64 " (bytes)\n", total_bytes_free);
+
+	if (total_bytes_free + total_bytes_allocated != (1U << pool_order)) {
+		ODP_DBG("Lost bytes on this pool!\n");
+		res = -1;
+	}
+
+	if (res)
+		ODP_DBG("Pool inconsistent!\n");
+
+	odp_spinlock_unlock(&bpool->ctrl.lock);
+	return res;
+}
+
+/* section 2: functions for slab allocation:                                  */
+
+/* free slab blocks contains the following structure, used to link the
+ * free blocks together.
+ */
+typedef struct sblock_t {
+	struct sblock_t *next;
+} sblock_t;
+
+/*
+ * create a slab memory pool of given size (rounded up to the nearest integer
+ * number of element, where each element has size 'elt_size').
+ * returns a pointer to the created slab pool.
+ * The allocated area contains:
+ * - The _odp_ishm_pool_ctrl_t structure
+ * - alignment to cache line
+ * - The user memory
+ */
+static pool_t *_odp_ishmslab_pool_create(const char *pool_name, int store_idx,
+					 uint64_t size,
+					 uint64_t elt_size, int flags)
+{
+	uint32_t nb_sblock;      /* number of elements in the pool        */
+	uint32_t control_sz;     /* size of control area                  */
+	uint64_t total_sz;	 /* total size to request                 */
+	uint64_t user_sz;        /* 2^order bytes		          */
+	int	 blk_idx;	 /* as returned by _ishm_reserve()        */
+	pool_t *spool;
+	unsigned int i;
+	sblock_t *block;
+
+	/* a sblock_t must fit in the buffers for linked chain! */
+	if (elt_size < sizeof(bblock_t)) {
+		elt_size = sizeof(bblock_t);
+		size = size * (sizeof(bblock_t) / elt_size +
+			       ((sizeof(bblock_t) % elt_size) ? 1 : 0));
+	}
+
+	/* nb of element fitting in the pool is just ceil(size/elt_size)*/
+	nb_sblock = (size / elt_size) + ((size % elt_size) ? 1 : 0);
+
+	/* space needed for the control area (padded to cache line size)*/
+	control_sz =
+		ODP_CACHE_LINE_SIZE_ROUNDUP(sizeof(_odp_ishm_pool_ctrl_t));
+
+	/* space needed for user area is : */
+	user_sz = nb_sblock * elt_size;
+
+	total_sz = control_sz +
+		   user_sz;
+
+	/* allocate required memory: */
+	blk_idx = _odp_ishm_reserve(pool_name, total_sz, -1,
+				    ODP_CACHE_LINE_SIZE, flags, 0);
+	if (blk_idx < 0) {
+		ODP_ERR("_odp_ishm_reserve failed.");
+		return NULL;
+	}
+
+	spool = _odp_ishm_address(blk_idx);
+	if (spool == NULL) {
+		ODP_ERR("_odp_ishm_address failed.");
+		return NULL;
+	}
+
+	/* store in pool array (needed for look up): */
+	pool_blk_idx[store_idx] = blk_idx;
+
+	/* remember block index, needed when pool is destroyed */
+	spool->ctrl.ishm_blk_idx = blk_idx;
+
+	/* remember element (sblock) size and their number: */
+	spool->ctrl.element_sz = elt_size;
+	spool->ctrl.nb_elem = nb_sblock;
+
+	/* prepare mutex: */
+	odp_spinlock_init(&spool->ctrl.lock);
+
+	/* initialise pointers and things... */
+	spool->ctrl.user_addr =
+		(void *)((uintptr_t)spool + control_sz);
+
+	/* initialise the free list with the list of all elements:*/
+	spool->ctrl.free_head = spool->ctrl.user_addr;
+	for (i = 0; i < nb_sblock - 1; i++) {
+		block = (sblock_t *)((uintptr_t)spool->ctrl.user_addr +
+				     i * (uintptr_t)elt_size);
+		block->next = (sblock_t *)((uintptr_t)block +
+					   (uintptr_t)elt_size);
+	}
+	block = (sblock_t *)((uintptr_t)spool->ctrl.user_addr +
+			     (nb_sblock - 1) * (uintptr_t)elt_size);
+	block->next = NULL;
+
+	return spool;
+}
+
+/* allocated memory from the given slab pool */
+static void *_odp_ishmslab_alloc(pool_t *spool, uint64_t size)
+{
+	void *ret;
+	sblock_t *block;
+
+	if (size > spool->ctrl.element_sz)
+		return NULL;
+
+	odp_spinlock_lock(&spool->ctrl.lock);
+	ret = spool->ctrl.free_head;
+	if (!ret) {
+		odp_spinlock_unlock(&spool->ctrl.lock);
+		ODP_ERR("Out of memory. (Slab pool full)\n");
+		return NULL;
+	}
+
+	block = (sblock_t *)ret;
+	spool->ctrl.free_head = block->next;
+
+	odp_spinlock_unlock(&spool->ctrl.lock);
+	return ret;
+}
+
+/* free a previously allocated buffer from a given slab pool */
+static int _odp_ishmslab_free(pool_t *spool, void *addr)
+{
+	uintptr_t user_start; /* start of user area */
+	uintptr_t user_stop;  /* stop  of user area */
+	sblock_t *block;
+
+	/* freeing NULL is regarded as OK, though without any effect:   */
+	if (!addr)
+		return 0;
+
+	user_start = (uintptr_t)spool->ctrl.user_addr;
+	user_stop  = user_start + spool->ctrl.element_sz * spool->ctrl.nb_elem;
+
+	/* some sanity checks: check that given address is within pool and
+	 * that relative address has element_sz granularity:           */
+	if (((uintptr_t)addr < user_start) ||
+	    ((uintptr_t)addr > user_stop)  ||
+	    (((uintptr_t)addr - user_start) % spool->ctrl.element_sz)) {
+		ODP_ERR("Invalid address to be freed\n");
+		return -1;
+	}
+
+	odp_spinlock_lock(&spool->ctrl.lock);
+	block = (sblock_t *)addr;
+	block->next = (sblock_t *)spool->ctrl.free_head;
+	spool->ctrl.free_head = addr;
+	odp_spinlock_unlock(&spool->ctrl.lock);
+
+	return 0;
+}
+
+/* print slab pool status and performs sanity checks */
+static int _odp_ishmslab_pool_status(const char *title, pool_t *spool)
+{
+	sblock_t *sblock;
+	uint64_t nb_free_elts; /* number of free elements */
+
+	odp_spinlock_lock(&spool->ctrl.lock);
+
+	ODP_DBG("\n%s\n", title);
+	ODP_DBG("Pool Type: FIXED SIZE\n");
+	ODP_DBG("pool size: %" PRIu64 " (bytes)\n",
+		spool->ctrl.nb_elem * spool->ctrl.element_sz);
+
+	/* count the number of free elements in the free list: */
+	nb_free_elts = 0;
+	sblock = (sblock_t *)spool->ctrl.free_head;
+	while (sblock) {
+		nb_free_elts++;
+		sblock = sblock->next;
+	}
+
+	ODP_DBG("%" PRIu64 "/%" PRIu64 " available elements.\n",
+		nb_free_elts, spool->ctrl.nb_elem);
+
+	odp_spinlock_unlock(&spool->ctrl.lock);
+	return 0;
+}
+
+/* section 3: common, external functions:                                     */
+
+/* create a pool: either with fixed alloc size (if max_alloc/min_alloc<2) or
+ * of variable block size (if max_alloc == 0) */
+pool_t *_odp_ishm_pool_create(const char *pool_name, uint64_t size,
+			      uint64_t min_alloc, uint64_t max_alloc, int flags)
+{
+	int store_idx;
+	uint64_t real_pool_sz;
+
+	if (min_alloc > max_alloc) {
+		ODP_ERR("invalid parameter: min_alloc > max_alloc");
+		return NULL;
+	}
+
+	/* search for a free index in pool_blk_idx for the pool */
+	for (store_idx = 0; store_idx < MAX_NB_POOL; store_idx++) {
+		if (pool_blk_idx[store_idx] < 0)
+			break;
+	}
+	if (store_idx == MAX_NB_POOL) {
+		ODP_ERR("Max number of pool reached (MAX_NB_POOL)");
+		return NULL;
+	}
+
+	if ((min_alloc == 0) || ((max_alloc / min_alloc) > 2)) {
+		/* alloc variation is not constant enough: we go for a buddy
+		 * allocator. The pool efficiency may go as low as 50%
+		 * so we double the required size to make sure we can satisfy
+		 * the user request */
+		real_pool_sz = 2 * size;
+		return _odp_ishmbud_pool_create(pool_name, store_idx,
+						real_pool_sz,
+						BUDDY_MIN_SIZE, flags);
+	} else {
+		/* min and max are close enough so we go for constant size
+		 * allocator:
+		 * make sure the pool can fit the required size, even when
+		 * only min_alloc allocation are performed: */
+		real_pool_sz = ((size / min_alloc) +
+				((size % min_alloc) ? 1 : 0))
+			       * max_alloc;
+		return _odp_ishmslab_pool_create(pool_name, store_idx,
+						 real_pool_sz,
+						 max_alloc, flags);
+	}
+}
+
+/* destroy a pool. everything goes away. no operation on the pool should
+ * follow. */
+int _odp_ishm_pool_destroy(pool_t *pool)
+{
+	int store_idx;
+
+	for (store_idx = 0; store_idx < MAX_NB_POOL; store_idx++) {
+		if (pool_blk_idx[store_idx] == pool->ctrl.ishm_blk_idx) {
+			pool_blk_idx[store_idx] = -1;
+			break;
+		}
+	}
+
+	return _odp_ishm_free_by_index(pool->ctrl.ishm_blk_idx);
+}
+
+/* allocated a buffer from a pool */
+void *_odp_ishm_pool_alloc(_odp_ishm_pool_t *pool, uint64_t size)
+{
+	if (!pool->ctrl.element_sz)
+		return _odp_ishmbud_alloc(pool, size);
+	else
+		return _odp_ishmslab_alloc(pool, size);
+}
+
+/* free a previously allocated buffer from a pool */
+int _odp_ishm_pool_free(_odp_ishm_pool_t *pool, void *addr)
+{
+	if (!pool->ctrl.element_sz)
+		return _odp_ishmbud_free(pool, addr);
+	else
+		return _odp_ishmslab_free(pool, addr);
+}
+
+/* Print a pool status */
+int _odp_ishm_pool_status(const char *title, _odp_ishm_pool_t *pool)
+{
+	if (!pool->ctrl.element_sz)
+		return _odp_ishmbud_pool_status(title, pool);
+	else
+		return _odp_ishmslab_pool_status(title, pool);
+}
+
+void _odp_ishm_pool_init(void)
+{
+	int i;
+
+	for (i = 0; i < MAX_NB_POOL; i++)
+		pool_blk_idx[i] = -1;
+}
+
+_odp_ishm_pool_t *_odp_ishm_pool_lookup(const char *pool_name)
+{
+	int block_idx;
+	int store_idx;
+
+	/* search for a _ishm block with the given name */
+	block_idx = _odp_ishm_lookup_by_name(pool_name);
+	if (block_idx < 0)
+		return NULL;
+
+	/* a block with that name exists: make sure it is within
+	 * the registered pools */
+	for (store_idx = 0; store_idx < MAX_NB_POOL; store_idx++) {
+		if (pool_blk_idx[store_idx] == block_idx)
+			return  _odp_ishm_address(block_idx);
+	}
+
+	return NULL;
+}
diff --git a/platform/linux-generic/include/_ishmpool_internal.h b/platform/linux-generic/include/_ishmpool_internal.h
new file mode 100644
index 0000000..5c5304a
--- /dev/null
+++ b/platform/linux-generic/include/_ishmpool_internal.h
@@ -0,0 +1,56 @@ 
+/* Copyright (c) 2017, Linaro Limited
+ * All rights reserved.
+ *
+ * SPDX-License-Identifier:     BSD-3-Clause
+ */
+
+#ifndef ODP_ISHMBUDDY_INTERNAL_H_
+#define ODP_ISHMBUDDY_INTERNAL_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <stdint.h>
+#include <odp/api/spinlock.h>
+
+typedef struct _odp_ishm_pool_ctrl_t {
+	uint32_t element_sz;    /* 0 for buddy pools, >0 for slab.           */
+	int ishm_blk_idx;       /* the block index returned by _ishm_resrve()*/
+	odp_spinlock_t  lock;   /* for pool access mutex		     */
+	void *user_addr;	/* user pool area ('real user pool')         */
+	union {
+		struct {	/* things needed for buddy pools:	     */
+			uint8_t order;	/* pool is 2^order bytes long	     */
+			uint8_t min_order; /*alloc won't go below 2^min_order*/
+			void **free_heads; /* 'order' free list heads.	     */
+			uint8_t *alloced_order;	/* size of blocks, 0=free    */
+		};
+		struct {	/* things needed for slab pools:	     */
+			void *free_head; /* free element list head	     */
+			uint64_t nb_elem;/* total number of elements in pool */
+		};
+	};
+} _odp_ishm_pool_ctrl_t;
+
+typedef struct _odp_ishm_pool_t {
+	_odp_ishm_pool_ctrl_t ctrl;	/* control part			     */
+	uint8_t mem[1];		/* area for heads, saved alloc'd orders, data*/
+} _odp_ishm_pool_t;
+
+_odp_ishm_pool_t *_odp_ishm_pool_create(const char *pool_name,
+					uint64_t size,
+					uint64_t min_alloc,
+					uint64_t max_alloc, int flags);
+int _odp_ishm_pool_destroy(_odp_ishm_pool_t *pool);
+void *_odp_ishm_pool_alloc(_odp_ishm_pool_t *pool, uint64_t size);
+int _odp_ishm_pool_free(_odp_ishm_pool_t *pool, void *addr);
+int _odp_ishm_pool_status(const char *title, _odp_ishm_pool_t *pool);
+_odp_ishm_pool_t *_odp_ishm_pool_lookup(const char *pool_name);
+void _odp_ishm_pool_init(void);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif