diff mbox

[PATCHv4] Timer API and and priority queue-based implementation

Message ID 1412263404-12041-1-git-send-email-ola.liljedahl@linaro.org
State New
Headers show

Commit Message

Ola Liljedahl Oct. 2, 2014, 3:23 p.m. UTC
Signed-off-by: Ola Liljedahl <ola.liljedahl@linaro.org>
---
Fixed review comments for v3 from Anders R.
* Example code snippets use @code/@endcode.
* Added some missing doxygen comments.
* Updated some comments.
* Reverted year in copyright notices.
* Added odp_likely() hint.
* Made some variables self-descriptive and removed redundant comments.
Changed to use ticket locks instead of spin locks (ticket locks are more fair).
Changed to use ODP_ABORT() which has become available since the last patch.

 example/timer/odp_timer_test.c                     | 125 +--
 platform/linux-generic/Makefile.am                 |   1 +
 platform/linux-generic/include/api/odp_timer.h     | 570 +++++++++++--
 .../include/odp_priority_queue_internal.h          | 108 +++
 .../linux-generic/include/odp_timer_internal.h     |  71 +-
 platform/linux-generic/odp_priority_queue.c        | 283 +++++++
 platform/linux-generic/odp_timer.c                 | 923 ++++++++++++++-------
 test/api_test/odp_timer_ping.c                     |  73 +-
 8 files changed, 1648 insertions(+), 506 deletions(-)
 create mode 100644 platform/linux-generic/include/odp_priority_queue_internal.h
 create mode 100644 platform/linux-generic/odp_priority_queue.c

Comments

Mike Holmes Oct. 3, 2014, 6:57 p.m. UTC | #1
On 2 October 2014 11:23, Ola Liljedahl <ola.liljedahl@linaro.org> wrote:

> Signed-off-by: Ola Liljedahl <ola.liljedahl@linaro.org>
> ---
> Fixed review comments for v3 from Anders R.
> * Example code snippets use @code/@endcode.
> * Added some missing doxygen comments.
> * Updated some comments.
> * Reverted year in copyright notices.
> * Added odp_likely() hint.
> * Made some variables self-descriptive and removed redundant comments.
> Changed to use ticket locks instead of spin locks (ticket locks are more
> fair).
> Changed to use ODP_ABORT() which has become available since the last patch.
>
>  example/timer/odp_timer_test.c                     | 125 +--
>

Should this "test"  be under odp/test instead of odp/example, or should the
"test " be renamed if it makes a good example ?

 platform/linux-generic/Makefile.am                 |   1 +
>  platform/linux-generic/include/api/odp_timer.h     | 570 +++++++++++--
>  .../include/odp_priority_queue_internal.h          | 108 +++
>  .../linux-generic/include/odp_timer_internal.h     |  71 +-
>  platform/linux-generic/odp_priority_queue.c        | 283 +++++++
>  platform/linux-generic/odp_timer.c                 | 923
> ++++++++++++++-------
>  test/api_test/odp_timer_ping.c                     |  73 +-
>  8 files changed, 1648 insertions(+), 506 deletions(-)
>  create mode 100644
> platform/linux-generic/include/odp_priority_queue_internal.h
>  create mode 100644 platform/linux-generic/odp_priority_queue.c
>
> diff --git a/example/timer/odp_timer_test.c
> b/example/timer/odp_timer_test.c
> index 6e1715d..750d785 100644
> --- a/example/timer/odp_timer_test.c
> +++ b/example/timer/odp_timer_test.c
> @@ -41,67 +41,89 @@ typedef struct {
>  /** @private Barrier for test synchronisation */
>  static odp_barrier_t test_barrier;
>
> -/** @private Timer handle*/
> -static odp_timer_t test_timer;
> +/** @private Timer pool handle */
> +static odp_timer_pool_t tp;
>
>
> +/** @private Timeout status ASCII strings */
> +static const char *const status2str[] = {
> +       "fresh", "stale", "orphaned"
> +};
> +
>  /** @private test timeout */
>  static void test_abs_timeouts(int thr, test_args_t *args)
>  {
> -       uint64_t tick;
>         uint64_t period;
>         uint64_t period_ns;
>         odp_queue_t queue;
> -       odp_buffer_t buf;
> -       int num;
> +       int remain = args->tmo_count;
> +       odp_timer_t hdl;
> +       uint64_t tick;
>
>         ODP_DBG("  [%i] test_timeouts\n", thr);
>
>         queue = odp_queue_lookup("timer_queue");
>
>         period_ns = args->period_us*ODP_TIME_USEC;
> -       period    = odp_timer_ns_to_tick(test_timer, period_ns);
> +       period    = odp_timer_ns_to_tick(tp, period_ns);
>
>         ODP_DBG("  [%i] period %"PRIu64" ticks,  %"PRIu64" ns\n", thr,
>                 period, period_ns);
>
> -       tick = odp_timer_current_tick(test_timer);
> -
> -       ODP_DBG("  [%i] current tick %"PRIu64"\n", thr, tick);
> -
> -       tick += period;
> +       ODP_DBG("  [%i] current tick %"PRIu64"\n", thr,
> +               odp_timer_current_tick(tp));
>
> -       if (odp_timer_absolute_tmo(test_timer, tick, queue,
> ODP_BUFFER_INVALID)
> -           == ODP_TIMER_TMO_INVALID){
> -               ODP_DBG("Timeout request failed\n");
> +       odp_timer_t test_timer;
> +       test_timer = odp_timer_alloc(tp, queue, NULL);
> +       if (test_timer == ODP_TIMER_INVALID) {
> +               ODP_ERR("Failed to allocate timer\n");
>                 return;
>         }
> +       tick = odp_timer_current_tick(tp);
> +       hdl = test_timer;
>
> -       num = args->tmo_count;
> -
> -       while (1) {
> -               odp_timeout_t tmo;
> -
> -               buf = odp_schedule_one(&queue, ODP_SCHED_WAIT);
> -
> -               tmo  = odp_timeout_from_buffer(buf);
> -               tick = odp_timeout_tick(tmo);
> -
> -               ODP_DBG("  [%i] timeout, tick %"PRIu64"\n", thr, tick);
> -
> -               odp_buffer_free(buf);
> -
> -               num--;
> -
> -               if (num == 0)
> -                       break;
> +       while (remain != 0) {
> +               odp_buffer_t buf;
> +               odp_timer_tmo_t tmo;
> +               odp_timer_tmo_status_t stat;
> +               odp_timer_set_t rc;
>
>                 tick += period;
> +               rc = odp_timer_set_abs(hdl, tick);
> +               if (odp_unlikely(rc != ODP_TIMER_SET_SUCCESS)) {
> +                       ODP_ERR("odp_timer_set_abs() failed (%u)\n", rc);
> +                       abort();
> +               }
>
> -               odp_timer_absolute_tmo(test_timer, tick,
> -                                      queue, ODP_BUFFER_INVALID);
> +               /* Get the next ready buffer/timeout */
> +               buf = odp_schedule_one(&queue, ODP_SCHED_WAIT);
> +               if (odp_unlikely(odp_buffer_type(buf) !=
> +                                ODP_BUFFER_TYPE_TIMEOUT)) {
> +                       ODP_ERR("Unexpected buffer type received\n");
> +                       abort();
> +               }
> +               tmo = odp_timeout_from_buffer(buf);
> +               stat = odp_timer_tmo_status(tmo);
> +               tick = odp_timer_expiration(tmo);
> +               hdl = odp_timer_handle(tmo);
> +               ODP_DBG("  [%i] timeout, tick %"PRIu64", status %s\n",
> +                       thr, tick, status2str[stat]);
> +               /* if (stat == ODP_TMO_FRESH)  - do your thing! */
> +               if (odp_likely(stat == ODP_TMO_ORPHAN)) {
> +                       /* Some other thread freed the corresponding
> +                          timer after the timeout was already
> +                          enqueued */
> +                       /* Timeout handle is invalid, use our own timer */
> +                       hdl = test_timer;
> +               }
> +               /* Return timeout to timer manager, regardless of status */
> +               odp_timer_return_tmo(tmo);
> +               remain--;
>         }
>
> +       odp_timer_cancel(test_timer);
> +       odp_timer_free(test_timer);
> +
>         if (odp_queue_sched_type(queue) == ODP_SCHED_SYNC_ATOMIC)
>                 odp_schedule_release_atomic();
>  }
> @@ -155,7 +177,6 @@ static void print_usage(void)
>         printf("Options:\n");
>         printf("  -c, --count <number>    core count, core IDs start from
> 1\n");
>         printf("  -r, --resolution <us>   timeout resolution in usec\n");
> -       printf("  -m, --min <us>          minimum timeout in usec\n");
>         printf("  -x, --max <us>          maximum timeout in usec\n");
>         printf("  -p, --period <us>       timeout period in usec\n");
>         printf("  -t, --timeouts <count>  timeout repeat count\n");
> @@ -190,14 +211,14 @@ static void parse_args(int argc, char *argv[],
> test_args_t *args)
>         /* defaults */
>         args->core_count    = 0; /* all cores */
>         args->resolution_us = 10000;
> -       args->min_us        = args->resolution_us;
> +       args->min_us        = 0;
>         args->max_us        = 10000000;
>         args->period_us     = 1000000;
>         args->tmo_count     = 30;
>
>         while (1) {
>                 opt = getopt_long(argc, argv, "+c:r:m:x:p:t:h",
> -                                longopts, &long_index);
> +                                 longopts, &long_index);
>
>                 if (opt == -1)
>                         break;  /* No more options */
> @@ -321,10 +342,25 @@ int main(int argc, char *argv[])
>                                       ODP_BUFFER_TYPE_TIMEOUT);
>
>         if (pool == ODP_BUFFER_POOL_INVALID) {
> -               ODP_ERR("Pool create failed.\n");
> +               ODP_ERR("Buffer pool create failed.\n");
>                 return -1;
>         }
>
> +       tp = odp_timer_pool_create("timer_pool", pool,
> +                                  args.resolution_us*ODP_TIME_USEC,
> +                                  args.min_us*ODP_TIME_USEC,
> +                                  args.max_us*ODP_TIME_USEC,
> +                                  num_workers, /* One timer per worker */
> +                                  true,
> +                                  ODP_CLOCK_CPU);
> +       if (tp == ODP_TIMER_POOL_INVALID) {
> +               ODP_ERR("Timer pool create failed.\n");
> +               return -1;
> +       }
> +       odp_timer_pool_start();
> +
> +       odp_shm_print_all();
> +
>         /*
>          * Create a queue for timer test
>          */
> @@ -340,19 +376,6 @@ int main(int argc, char *argv[])
>                 return -1;
>         }
>
> -       test_timer = odp_timer_create("test_timer", pool,
> -                                     args.resolution_us*ODP_TIME_USEC,
> -                                     args.min_us*ODP_TIME_USEC,
> -                                     args.max_us*ODP_TIME_USEC);
> -
> -       if (test_timer == ODP_TIMER_INVALID) {
> -               ODP_ERR("Timer create failed.\n");
> -               return -1;
> -       }
> -
> -
> -       odp_shm_print_all();
> -
>         printf("CPU freq %"PRIu64" hz\n", odp_sys_cpu_hz());
>         printf("Cycles vs nanoseconds:\n");
>         ns = 0;
> diff --git a/platform/linux-generic/Makefile.am
> b/platform/linux-generic/Makefile.am
> index d076d50..71f923c 100644
> --- a/platform/linux-generic/Makefile.am
> +++ b/platform/linux-generic/Makefile.am
> @@ -59,6 +59,7 @@ __LIB__libodp_la_SOURCES = \
>                            odp_packet_flags.c \
>                            odp_packet_io.c \
>                            odp_packet_socket.c \
> +                          odp_priority_queue.c \
>                            odp_queue.c \
>                            odp_ring.c \
>                            odp_rwlock.c \
> diff --git a/platform/linux-generic/include/api/odp_timer.h
> b/platform/linux-generic/include/api/odp_timer.h
> index 01db839..82a1e05 100644
> --- a/platform/linux-generic/include/api/odp_timer.h
> +++ b/platform/linux-generic/include/api/odp_timer.h
> @@ -8,9 +8,193 @@
>  /**
>   * @file
>   *
> - * ODP timer
> + * ODP timer service
>   */
>
> +/** Example #1 Retransmission timer (e.g. for reliable connections)
> + @code
> +
> +//Create timer pool for reliable connections
> +#define SEC 1000000000ULL //1s expressed in nanoseconds
> +odp_timer_pool_t tcp_tpid =
> +    odp_timer_pool_create("TCP",
> +                         buffer_pool,
> +                         1000000,//resolution 1ms
> +                         0,//min tmo
> +                         7200 * SEC,//max tmo length 2hours
> +                         40000,//num_timers
> +                         true,//shared
> +                         ODP_CLOCK_CPU
> +                        );
> +if (tcp_tpid == ODP_TIMER_POOL_INVALID)
> +{
> +       //Failed to create timer pool => fatal error
> +}
> +
> +
> +//Setting up a new connection
> +//Allocate retransmission timeout (identical for supervision timeout)
> +//The user pointer points back to the connection context
> +conn->ret_tim = odp_timer_alloc(tcp_tpid, queue, conn);
> +//Check if all resources were successfully allocated
> +if (conn->ret_tim == ODP_TIMER_INVALID)
> +{
> +       //Failed to allocate all resources for connection => tear down
> +       //Destroy timeout
> +       odp_timer_free(conn->ret_tim);
> +       //Tear down connection
> +       ...
> +       return false;
> +}
> +//All necessary resources successfully allocated
> +//Compute initial retransmission length in timer ticks
> +conn->ret_len = odp_timer_ns_to_tick(tcp_tpid, 3 * SEC);//Per RFC1122
> +//Arm the timer
> +odp_timer_set_rel(conn->ret_tim, conn->ret_len);
> +return true;
> +
> +
> +//A packet for the connection has just been transmitted
> +//Reset the retransmission timer
> +odp_timer_set_rel(conn->ret_tim, conn->ret_len);
> +
> +
> +//A retransmission timeout buffer for the connection has been received
> +odp_timer_tmo_t tmo = odp_timeout_from_buffer(buf);
> +odp_timer_tmo_status_t stat = odp_timer_tmo_status(tmo);
> +//Check if timeout is fresh or stale, for stale timeouts we need to reset
> the
> +//timer
> +if (stat == ODP_TMO_FRESH) {
> +       //Fresh timeout, last transmitted packet not acked in time =>
> +         retransmit
> +       //Get connection from timeout event
> +       conn = odp_timer_get_userptr(tmo);
> +       //Retransmit last packet (e.g. TCP segment)
> +       ...
> +       //Re-arm timer using original delta value
> +       odp_timer_set_rel(conn->ret_tim, conn->ret_len);
> +} else if (stat == ODP_TMO_ORPHAN) {
> +       odp_free_buffer(buf);
> +       return;//Get out of here
> +} // else stat == ODP_TMO_STALE, do nothing
> +//Finished processing, return timeout
> +odp_timer_return_tmo(tmo);
> +
> + @endcode
> +*/
> +
> +/** Example #2 Periodic tick
> + @code
> +
> +//Create timer pool for periodic ticks
> +odp_timer_pool_t per_tpid =
> +    odp_timer_pool_create("periodic-tick",
> +                         buffer_pool,
> +                         1,//resolution 1ns
> +                         1,//minimum timeout length 1ns
> +                         1000000000,//maximum timeout length 1s
> +                         10,//num_timers
> +                         false,//not shared
> +                         ODP_CLOCK_CPU
> +                        );
> +if (per_tpid == ODP_TIMER_POOL_INVALID)
> +{
> +    //Failed to create timer pool => fatal error
> +}
> +
> +
> +//Allocate periodic timer
> +tim_1733 = odp_timer_alloc(per_tpid, queue, NULL);
> +//Check if all resources were successfully allocated
> +if (tim_1733 == ODP_TIMER_INVALID)
> +{
> +       //Failed to allocate all resources => tear down
> +       //Destroy timeout
> +       odp_timer_free(tim_1733);
> +       //Tear down other state
> +       ...
> +       return false;
> +}
> +//All necessary resources successfully allocated
> +//Compute tick period in timer ticks
> +period_1733 = odp_timer_ns_to_tick(per_tpid, 1000000000U / 1733U);//1733Hz
> +//Compute when next tick should expire
> +next_1733 = odp_timer_current_tick(per_tpid) + period_1733;
> +//Arm the periodic timer
> +odp_timer_set_abs(tim_1733, next_1733);
> +return true;
> +
> +
> +
> +//A periodic timer timeout has been received
> +odp_timer_tmo_t tmo = odp_timeout_from_buffer(buf);
> +//Get status of timeout
> +odp_timer_tmo_status_t stat = odp_timer_tmo_status(tmo);
> +//We expect the timeout is always fresh since we are not calling set or
> cancel
> +on active or expired timers in this example
> +assert(stat == ODP_TMO_FRESH);
> +//Do processing driven by timeout *before*
> +...
> +do {
> +       //Compute when the timer should expire next
> +       next_1733 += period_1733;
> +       //Check that this is in the future
> +       if (likely(next_1733 > odp_timer_current_tick(per_tpid))
> +       break;//Yes, done
> +       //Else we missed a timeout
> +       //Optionally attempt some recovery and/or logging of the problem
> +       ...
> +} while (0);
> +//Re-arm periodic timer
> +odp_timer_set_abs(tim_1733, next_1733);
> +//Or do processing driven by timeout *after*
> +...
> +odp_timer_return_tmo(tmo);
> +return;
> +
> + @endcode
> +*/
> +
> +/** Example #3 Tear down of flow
> + @code
> +//ctx points to flow context data structure owned by application
> +//Free the timer, cancelling any timeout
> +odp_timer_free(ctx->timer);//Any enqueued timeout will be made invalid
> +//Continue tearing down and eventually freeing context
> +...
> +return;
> +
> +//A timeout has been received, check status
> +odp_timer_tmo_t tmo = odp_timeout_from_buffer(buf);
> +switch (odp_timer_tmo_status(tmo))
> +{
> +    case ODP_TMO_FRESH :
> +       //A flow has timed out, tear it down
> +       //Find flow context from timeout
> +       ctx = (context *)odp_timer_get_userptr(tmo);
> +       //Free the supervision timer, any enqueued timeout will remain
> +       odp_timer_free(ctx->tim);
> +       //Free other flow related resources
> +       ...
> +       //Free the timeout buffer
> +       odp_buffer_free(buf);
> +       //Flow torn down
> +       break;
> +    case ODP_TMO_STALE :
> +       //A stale timeout was received, return timeout and update timer
> +       odp_timer_return_tmo(tmo);
> +       break;
> +    case ODP_TMO_ORPHAN :
> +       //Orphaned timeout (from previously torn down flow)
> +       //No corresponding timer or flow context
> +       //Free the timeout buffer
> +       odp_buffer_free(buf);
> +       break;
> +}
> +
> + @endcode
> +*/
> +
>  #ifndef ODP_TIMER_H_
>  #define ODP_TIMER_H_
>
> @@ -18,144 +202,408 @@
>  extern "C" {
>  #endif
>
> +#include <stdlib.h>
>  #include <odp_std_types.h>
>  #include <odp_buffer.h>
>  #include <odp_buffer_pool.h>
>  #include <odp_queue.h>
>
> +struct odp_timer_pool_s; /**< Forward declaration */
> +
> +/**
> +* ODP timer pool handle (platform dependent)
> +*/
> +typedef struct odp_timer_pool_s *odp_timer_pool_t;
> +
> +/**
> + * Invalid timer pool handle (platform dependent).
> + */
> +#define ODP_TIMER_POOL_INVALID NULL
>
>  /**
> - * ODP timer handle
> + * Clock sources for timers in timer pool.
>   */
> -typedef uint32_t odp_timer_t;
> +typedef enum odp_timer_clk_src_e {
> +       /** Use CPU clock as clock source for timers */
> +       ODP_CLOCK_CPU,
> +       /** Use external clock as clock source for timers */
> +       ODP_CLOCK_EXT
> +       /* Platform dependent which other clock sources exist */
> +} odp_timer_clk_src_t;
>
> -/** Invalid timer */
> -#define ODP_TIMER_INVALID 0
> +struct odp_timer_s; /**< Forward declaration */
>
> +/**
> +* ODP timer handle (platform dependent).
> +*/
> +typedef struct odp_timer_s *odp_timer_t;
>
>  /**
> - * ODP timeout handle
> + * Invalid timer handle (platform dependent).
>   */
> -typedef odp_buffer_t odp_timer_tmo_t;
> -
> -/** Invalid timeout */
> -#define ODP_TIMER_TMO_INVALID 0
> +#define ODP_TIMER_INVALID NULL
>
> +/**
> + * Return values of timer set calls.
> + */
> +typedef enum odp_timer_set_e {
> +       /** Timer set operation successful */
> +       ODP_TIMER_SET_SUCCESS,
> +       /** Timer set operation failed, expiration too early */
> +       ODP_TIMER_SET_TOOEARLY,
> +       /** Timer set operation failed, expiration too late */
> +       ODP_TIMER_SET_TOOLATE
> +} odp_timer_set_t;
>
>  /**
> - * Timeout notification
> + * Timeout event handle.
>   */
> -typedef odp_buffer_t odp_timeout_t;
> +typedef odp_buffer_t odp_timer_tmo_t;
>
> +/**
> + * Status of a timeout event.
> + */
> +typedef enum odp_timer_tmo_status_e {
> +       /** Timeout is fresh, process it and return timeout */
> +       ODP_TMO_FRESH,
> +       /** Timer reset or cancelled, just return timeout  */
> +       ODP_TMO_STALE,
> +       /** Timer deleted, return or free timeout */
> +       ODP_TMO_ORPHAN
> +} odp_timer_tmo_status_t;
>
>  /**
> - * Create a timer
> + * Create a timer pool
>   *
> - * Creates a new timer with requested properties.
> + * Create a new timer pool.
>   *
>   * @param name       Name
> - * @param pool       Buffer pool for allocating timeout notifications
> + * @param buf_pool   Buffer pool for allocating timeouts (and only
> timeouts)
>   * @param resolution Timeout resolution in nanoseconds
> - * @param min_tmo    Minimum timeout duration in nanoseconds
> - * @param max_tmo    Maximum timeout duration in nanoseconds
> + * @param min_tmo    Minimum relative timeout in nanoseconds
> + * @param max_tmo    Maximum relative timeout in nanoseconds
> + * @param num_timers Number of supported timers (minimum)
> + * @param shared     Shared or private timer pool.
> + *                Operations on shared timers will include the necessary
> + *                mutual exclusion, operations on private timers may not
> + *                (mutual exclusion is the responsibility of the caller).
> + * @param clk_src    Clock source to use
>   *
> - * @return Timer handle if successful, otherwise ODP_TIMER_INVALID
> + * @return Timer pool handle if successful, otherwise
> ODP_TIMER_POOL_INVALID
> + * and errno set
>   */
> -odp_timer_t odp_timer_create(const char *name, odp_buffer_pool_t pool,
> -                            uint64_t resolution, uint64_t min_tmo,
> -                            uint64_t max_tmo);
> +odp_timer_pool_t
> +odp_timer_pool_create(const char *name,
> +                     odp_buffer_pool_t buf_pool,
> +                     uint64_t resolution,
> +                     uint64_t min_tmo,
> +                     uint64_t max_tmo,
> +                     uint32_t num_timers,
> +                     bool shared,
> +                     odp_timer_clk_src_t clk_src);
> +
> +/**
> + * Start a timer pool
> + *
> + * Start all created timer pools, enabling the allocation of timers.
> + * The purpose of this call is to coordinate the creation of multiple
> timer
> + * pools that may use the same underlying HW resources.
> + * This function may be called multiple times.
> + */
> +void odp_timer_pool_start(void);
> +
> +/**
> + * Destroy a timer pool
> + *
> + * Destroy a timer pool, freeing all resources.
> + * All timers must have been freed.
> + *
> + * @param tpid  Timer pool identifier
> + */
> +void odp_timer_pool_destroy(odp_timer_pool_t tpid);
>
>  /**
>   * Convert timer ticks to nanoseconds
>   *
> - * @param timer Timer
> + * @param tpid  Timer pool identifier
>   * @param ticks Timer ticks
>   *
>   * @return Nanoseconds
>   */
> -uint64_t odp_timer_tick_to_ns(odp_timer_t timer, uint64_t ticks);
> +uint64_t odp_timer_tick_to_ns(odp_timer_pool_t tpid, uint64_t ticks);
>
>  /**
>   * Convert nanoseconds to timer ticks
>   *
> - * @param timer Timer
> + * @param tpid  Timer pool identifier
>   * @param ns    Nanoseconds
>   *
>   * @return Timer ticks
>   */
> -uint64_t odp_timer_ns_to_tick(odp_timer_t timer, uint64_t ns);
> +uint64_t odp_timer_ns_to_tick(odp_timer_pool_t tpid, uint64_t ns);
>
>  /**
> - * Timer resolution in nanoseconds
> + * Current tick value
>   *
> - * @param timer Timer
> + * @param tpid Timer pool identifier
>   *
> - * @return Resolution in nanoseconds
> + * @return Current time in timer ticks
> + */
> +uint64_t odp_timer_current_tick(odp_timer_pool_t tpid);
> +
> +/**
> + * ODP timer configurations
>   */
> -uint64_t odp_timer_resolution(odp_timer_t timer);
> +
> +typedef enum odp_timer_pool_conf_e {
> +       ODP_TIMER_NAME,      /**< Return name of timer pool */
> +       ODP_TIMER_RESOLUTION,/**< Return the timer resolution (in ns) */
> +       ODP_TIMER_MIN_TICKS, /**< Return the min supported rel timeout
> (ticks)*/
> +       ODP_TIMER_MAX_TICKS, /**< Return the max supported rel timeout
> (ticks)*/
> +       ODP_TIMER_NUM_TIMERS,/**< Return number of supported timers */
> +       ODP_TIMER_SHARED     /**< Return shared flag */
> +} odp_timer_pool_conf_t;
>
>  /**
> - * Maximum timeout in timer ticks
> + * Query different timer pool configurations, e.g.
> + *  Timer resolution in nanoseconds
> + *  Maximum timeout in timer ticks
> + *  Number of supported timers
> + *  Shared or private timer pool
>   *
> - * @param timer Timer
> + * @param tpid Timer pool identifier
> + * @param item Configuration item being queried
>   *
> - * @return Maximum timeout in timer ticks
> + * @return the requested piece of information or 0 for unknown item.
>   */
> -uint64_t odp_timer_maximum_tmo(odp_timer_t timer);
> +uintptr_t odp_timer_pool_query_conf(odp_timer_pool_t tpid,
> +                                   odp_timer_pool_conf_t item);
>
>  /**
> - * Current timer tick
> + * Allocate a timer
>   *
> - * @param timer Timer
> + * Create a timer (allocating all necessary resources e.g. timeout event)
> from
> + * the timer pool.
>   *
> - * @return Current time in timer ticks
> + * @param tpid     Timer pool identifier
> + * @param queue    Destination queue for timeout notifications
> + * @param user_ptr User defined pointer or NULL (copied to timeouts)
> + *
> + * @return Timer handle if successful, otherwise ODP_TIMER_INVALID and
> + *        errno set.
>   */
> -uint64_t odp_timer_current_tick(odp_timer_t timer);
> +odp_timer_t odp_timer_alloc(odp_timer_pool_t tpid,
> +                           odp_queue_t queue,
> +                           void *user_ptr);
>
>  /**
> - * Request timeout with an absolute timer tick
> + * Free a timer
> + *
> + * Free (destroy) a timer, freeing all associated resources (e.g. default
> + * timeout event). An expired and enqueued timeout event will not be
> freed.
> + * It is the responsibility of the application to free this timeout when
> it
> + * is received.
>   *
> - * When tick reaches tmo_tick, the timer enqueues the timeout
> notification into
> - * the destination queue.
> + * @param tim      Timer handle
> + */
> +void odp_timer_free(odp_timer_t tim);
> +
> +/**
> + * Set a timer (absolute time) with a user-defined timeout buffer
>   *
> - * @param timer    Timer
> - * @param tmo_tick Absolute timer tick value which triggers the timeout
> - * @param queue    Destination queue for the timeout notification
> - * @param buf      User defined timeout notification buffer. When
> - *                 ODP_BUFFER_INVALID, default timeout notification is
> used.
> + * Set (arm) the timer to expire at specific time. The user-defined
> + * buffer will be enqueued when the timer expires.
> + * Arming may fail (if the timer is in state EXPIRED), an earlier timeout
> + * will then be received. odp_timer_tmo_status() must be used to check if
> + * the received timeout is valid.
>   *
> - * @return Timeout handle if successful, otherwise ODP_TIMER_TMO_INVALID
> + * Note: any invalid parameters will be treated as programming errors and
> will
> + * cause the application to abort.
> + *
> + * @param tim      Timer
> + * @param abs_tck  Expiration time in absolute timer ticks
> + * @param user_buf The buffer to use as timeout event
> + *
> + * @return Success or failure code
>   */
> -odp_timer_tmo_t odp_timer_absolute_tmo(odp_timer_t timer, uint64_t
> tmo_tick,
> -                                      odp_queue_t queue, odp_buffer_t
> buf);
> +odp_timer_set_t odp_timer_set_abs_w_buf(odp_timer_t tim,
> +                                       uint64_t abs_tck,
> +                                       odp_buffer_t user_buf);
>
>  /**
> - * Cancel a timeout
> + * Set a timer with an absolute expiration time
> + *
> + * Set (arm) the timer to expire at a specific time.
> + * Arming may fail (if the timer is in state EXPIRED), an earlier timeout
> + * will then be received. odp_timer_tmo_status() must be used to check if
> + * the received timeout is valid.
> + *
> + * Note: any invalid parameters will be treated as programming errors and
> will
> + * cause the application to abort.
>   *
> - * @param timer Timer
> - * @param tmo   Timeout to cancel
> + * @param tim     Timer
> + * @param abs_tck Expiration time in absolute timer ticks
>   *
> - * @return 0 if successful
> + * @return Success or failure code
>   */
> -int odp_timer_cancel_tmo(odp_timer_t timer, odp_timer_tmo_t tmo);
> +odp_timer_set_t odp_timer_set_abs(odp_timer_t tim, uint64_t abs_tck);
>
>  /**
> - * Convert buffer handle to timeout handle
> + * Set a timer with a relative expiration time and user-defined buffer.
>   *
> - * @param buf  Buffer handle
> + * Set (arm) the timer to expire at a relative future time.
> + * Arming may fail (if the timer is in state EXPIRED),
> + * an earlier timeout will then be received. odp_timer_tmo_status() must
> + * be used to check if the received timeout is valid.
>   *
> - * @return Timeout buffer handle
> + * Note: any invalid parameters will be treated as programming errors and
> will
> + * cause the application to abort.
> + *
> + * @param tim      Timer
> + * @param rel_tck  Expiration time in timer ticks relative to current
> time of
> + *                the timer pool the timer belongs to
> + * @param user_buf The buffer to use as timeout event
> + *
> + * @return Success or failure code
>   */
> -odp_timeout_t odp_timeout_from_buffer(odp_buffer_t buf);
> +odp_timer_set_t odp_timer_set_rel_w_buf(odp_timer_t tim,
> +                                       uint64_t rel_tck,
> +                                       odp_buffer_t user_buf);
> +/**
> + * Set a timer with a relative expiration time
> + *
> + * Set (arm) the timer to expire at a relative future time.
> + * Arming may fail (if the timer is in state EXPIRED),
> + * an earlier timeout will then be received. odp_timer_tmo_status() must
> + * be used to check if the received timeout is valid.
> + *
> + * Note: any invalid parameters will be treated as programming errors and
> will
> + * cause the application to abort.
> + *
> + * @param tim     Timer
> + * @param rel_tck Expiration time in timer ticks relative to current time
> of
> + *               the timer pool the timer belongs to
> + *
> + * @return Success or failure code
> + */
> +odp_timer_set_t odp_timer_set_rel(odp_timer_t tim, uint64_t rel_tck);
>
>  /**
> - * Return absolute timeout tick
> + * Cancel a timer
> + *
> + * Cancel a timer, preventing future expiration and delivery.
> + *
> + * A timer that has already expired and been enqueued for delivery may be
> + * impossible to cancel and will instead be delivered to the destination
> queue.
> + * Use odp_timer_tmo_status() the check whether a received timeout is
> fresh or
> + * stale (cancelled). Stale timeouts will automatically be recycled.
> + *
> + * Note: any invalid parameters will be treated as programming errors and
> will
> + * cause the application to abort.
> + *
> + * @param tim    Timer handle
> + */
> +void odp_timer_cancel(odp_timer_t tim);
> +
> +/**
> + * Translate from buffer to timeout
> + *
> + * Return the timeout handle that corresponds to the specified buffer
> handle.
> + * The buffer must be of time ODP_BUFFER_TYPE_TIMEOUT.
> + *
> + * @param buf   Buffer handle to translate.
> + *
> + * @return      The corresponding timeout handle.
> + */
> +static inline odp_timer_tmo_t odp_timeout_from_buffer(odp_buffer_t buf)
> +{
> +       if (odp_unlikely(odp_buffer_type(buf) != ODP_BUFFER_TYPE_TIMEOUT))
> {
> +               ODP_ERR("Buffer type %u not timeout\n", buf);
> +               abort();
> +       }
> +       /* In this implementation, timeout == buffer */
> +       return (odp_timer_tmo_t)buf;
> +}
> +
> +/**
> + * Translate from timeout to buffer
> + *
> + * Return the buffer handle that corresponds to the specified timeout
> handle.
> + *
> + * @param tmo   Timeout handle to translate.
> + *
> + * @return      The corresponding buffer handle.
> + */
> +static inline odp_buffer_t odp_buffer_from_timeout(odp_timer_tmo_t tmo)
> +{
> +       /* In this implementation, buffer == timeout */
> +       return (odp_buffer_t)tmo;
> +}
> +
> +/**
> + * Return timeout to timer
> + *
> + * Return a received timeout for reuse with the parent timer.
> + * Note: odp_timer_return_tmo() must be called on all received timeouts!
> + * (Excluding user defined timeout buffers).
> + * The timeout must not be accessed after this call, the semantics is
> + * equivalent to a free call.
> + *
> + * @param tmo    Timeout
> + */
> +void odp_timer_return_tmo(odp_timer_tmo_t tmo);
> +
> +/**
> + * Return fresh/stale/orphan status of timeout.
> + *
> + * Check a received timeout for orphaness (i.e. parent timer freed) and
> + * staleness (i.e. parent timer has been reset or cancelled after the
> timeout
> + * expired and was enqueued).
> + * If the timeout is fresh, it should be processed.
> + * If the timeout is stale or orphaned, it should be ignored.
> + * All timeouts must be returned using the odp_timer_return_tmo() call.
> + *
> + * @param tmo    Timeout
> + *
> + * @return One of ODP_TMO_FRESH, ODP_TMO_STALE or ODP_TMO_ORPHAN.
> + */
> +odp_timer_tmo_status_t odp_timer_tmo_status(odp_timer_tmo_t tmo);
> +
> +/**
> + * Get timer handle
> + *
> + * Return Handle of parent timer.
> + *
> + * @param tmo   Timeout
> + *
> + * @return Timer handle or ODP_TIMER_INVALID for orphaned timeouts.
> + *         Note that the parent timer could be freed by some other thread
> + *         at any time and thus the timeout becomes orphaned.
> + */
> +odp_timer_t odp_timer_handle(odp_timer_tmo_t tmo);
> +
> +/**
> + * Get expiration time
> + *
> + * Return (requested) expiration time of timeout.
> + *
> + * @param tmo   Timeout
> + *
> + * @return Expiration time
> + */
> +uint64_t odp_timer_expiration(odp_timer_tmo_t tmo);
> +
> +/**
> + * Get user pointer
> + *
> + * Return User pointer of timer associated with timeout.
> + * The user pointer is often used to point to some associated context.
>   *
> - * @param tmo Timeout buffer handle
> + * @param tmo   Timeout
>   *
> - * @return Absolute timeout tick
> + * @return User pointer
>   */
> -uint64_t odp_timeout_tick(odp_timeout_t tmo);
> +void *odp_timer_userptr(odp_timer_tmo_t tmo);
>
>  #ifdef __cplusplus
>  }
> diff --git a/platform/linux-generic/include/odp_priority_queue_internal.h
> b/platform/linux-generic/include/odp_priority_queue_internal.h
> new file mode 100644
> index 0000000..7d7f3a2
> --- /dev/null
> +++ b/platform/linux-generic/include/odp_priority_queue_internal.h
> @@ -0,0 +1,108 @@
> +#ifndef _PRIORITY_QUEUE_H
> +#define _PRIORITY_QUEUE_H
> +
> +#include <assert.h>
> +#include <stddef.h>
> +#include <stdint.h>
> +#include <stdbool.h>
> +#include <odp_align.h>
> +
> +#define INVALID_INDEX ~0U
> +#define INVALID_PRIORITY ((pq_priority_t)~0ULL)
> +
> +typedef uint64_t pq_priority_t;
> +
> +struct heap_node;
> +
> +typedef struct priority_queue {
> +       uint32_t max_elems;/* Number of elements in heap */
> +       /* Number of registered elements (active + inactive) */
> +       uint32_t reg_elems;
> +       uint32_t num_elems;/* Number of active elements */
> +       struct heap_node *heap;
> +       struct heap_node *org_ptr;
> +} priority_queue ODP_ALIGNED(sizeof(uint64_t));
> +
> +/* The user gets a pointer to this structure */
> +typedef struct {
> +       /* Set when pq_element registered with priority queue */
> +       priority_queue *pq;
> +       uint32_t index;/* Index into heap array */
> +       pq_priority_t prio;
> +} pq_element;
> +
> +/*** Operations on pq_element ***/
> +
> +static inline void pq_element_con(pq_element *this)
> +{
> +       this->pq = NULL;
> +       this->index = INVALID_INDEX;
> +       this->prio = 0U;
> +}
> +
> +static inline void pq_element_des(pq_element *this)
> +{
> +       (void)this;
> +       assert(this->index == INVALID_INDEX);
> +}
> +
> +static inline priority_queue *get_pq(const pq_element *this)
> +{
> +       return this->pq;
> +}
> +
> +static inline pq_priority_t get_prio(const pq_element *this)
> +{
> +       return this->prio;
> +}
> +
> +static inline uint32_t get_index(const pq_element *this)
> +{
> +       return this->index;
> +}
> +
> +static inline bool is_active(const pq_element *this)
> +{
> +       return this->index != INVALID_INDEX;
> +}
> +
> +/*** Operations on priority_queue ***/
> +
> +extern uint32_t pq_smallest_child(priority_queue *, uint32_t,
> pq_priority_t);
> +extern void pq_bubble_down(priority_queue *, pq_element *);
> +extern void pq_bubble_up(priority_queue *, pq_element *);
> +
> +static inline bool valid_index(priority_queue *this, uint32_t idx)
> +{
> +       return idx < this->num_elems;
> +}
> +
> +extern void priority_queue_con(priority_queue *, uint32_t _max_elems);
> +extern void priority_queue_des(priority_queue *);
> +
> +/* Register pq_element with priority queue */
> +/* Return false if priority queue full */
> +extern bool pq_register_element(priority_queue *, pq_element *);
> +
> +/* Activate and add pq_element to priority queue */
> +/* Element must be disarmed */
> +extern void pq_activate_element(priority_queue *, pq_element *,
> pq_priority_t);
> +
> +/* Reset (increase) priority for pq_element */
> +/* Element may be active or inactive (released) */
> +extern void pq_reset_element(priority_queue *, pq_element *,
> pq_priority_t);
> +
> +/* Deactivate and remove element from priority queue */
> +/* Element may be active or inactive (released) */
> +extern void pq_deactivate_element(priority_queue *, pq_element *);
> +
> +/* Unregister pq_element */
> +extern void pq_unregister_element(priority_queue *, pq_element *);
> +
> +/* Return priority of first element (lowest numerical value) */
> +extern pq_priority_t pq_first_priority(const priority_queue *);
> +
> +/* Deactivate and return first element if it's prio is <= threshold */
> +extern pq_element *pq_release_element(priority_queue *, pq_priority_t
> thresh);
> +
> +#endif /* _PRIORITY_QUEUE_H */
> diff --git a/platform/linux-generic/include/odp_timer_internal.h
> b/platform/linux-generic/include/odp_timer_internal.h
> index ad28f53..461f28c 100644
> --- a/platform/linux-generic/include/odp_timer_internal.h
> +++ b/platform/linux-generic/include/odp_timer_internal.h
> @@ -1,4 +1,4 @@
> -/* Copyright (c) 2013, Linaro Limited
> +/* Copyright (c) 2014, Linaro Limited
>   * All rights reserved.
>   *
>   * SPDX-License-Identifier:     BSD-3-Clause
> @@ -8,72 +8,51 @@
>  /**
>   * @file
>   *
> - * ODP timer timeout descriptor - implementation internal
> + * ODP timeout descriptor - implementation internal
>   */
>
>  #ifndef ODP_TIMER_INTERNAL_H_
>  #define ODP_TIMER_INTERNAL_H_
>
> -#ifdef __cplusplus
> -extern "C" {
> -#endif
> -
> -#include <odp_std_types.h>
> -#include <odp_queue.h>
> -#include <odp_buffer.h>
> +#include <odp_align.h>
> +#include <odp_debug.h>
>  #include <odp_buffer_internal.h>
>  #include <odp_buffer_pool_internal.h>
>  #include <odp_timer.h>
>
> -struct timeout_t;
> -
> -typedef struct timeout_t {
> -       struct timeout_t *next;
> -       int               timer_id;
> -       int               tick;
> -       uint64_t          tmo_tick;
> -       odp_queue_t       queue;
> -       odp_buffer_t      buf;
> -       odp_buffer_t      tmo_buf;
> -} timeout_t;
> -
> -
> -struct odp_timeout_hdr_t;
> -
>  /**
> - * Timeout notification header
> + * Internal Timeout header
>   */
> -typedef struct odp_timeout_hdr_t {
> +typedef struct {
> +       /* common buffer header */
>         odp_buffer_hdr_t buf_hdr;
>
> -       timeout_t meta;
> -
> -       uint8_t buf_data[];
> +       /* Requested expiration time */
> +       uint64_t expiration;
> +       /* User ptr inherited from parent timer */
> +       void *user_ptr;
> +       /* Parent timer */
> +       odp_timer_t timer;
> +       /* Tag inherited from parent timer at time of expiration */
> +       uint32_t tag;
> +       /* Gen-cnt inherited from parent timer at time of creation */
> +       uint16_t gencnt;
> +       uint16_t pad;
> +       uint8_t buf_data[0];
>  } odp_timeout_hdr_t;
>
> -
> -
>  ODP_STATIC_ASSERT(sizeof(odp_timeout_hdr_t) ==
> -          ODP_OFFSETOF(odp_timeout_hdr_t, buf_data),
> -          "ODP_TIMEOUT_HDR_T__SIZE_ERR");
> -
> +                 ODP_OFFSETOF(odp_timeout_hdr_t, buf_data),
> +                 "sizeof(odp_timeout_hdr_t) ==
> ODP_OFFSETOF(odp_timeout_hdr_t, buf_data)");
>  ODP_STATIC_ASSERT(sizeof(odp_timeout_hdr_t) % sizeof(uint64_t) == 0,
> -          "ODP_TIMEOUT_HDR_T__SIZE_ERR2");
> -
> +                 "sizeof(odp_timeout_hdr_t) % sizeof(uint64_t) == 0");
>
>  /**
> - * Return timeout header
> + * Return the timeout header
>   */
> -static inline odp_timeout_hdr_t *odp_timeout_hdr(odp_timeout_t tmo)
> +static inline odp_timeout_hdr_t *odp_timeout_hdr(odp_buffer_t buf)
>  {
> -       odp_buffer_hdr_t *buf_hdr = odp_buf_to_hdr((odp_buffer_t)tmo);
> -       return (odp_timeout_hdr_t *)(uintptr_t)buf_hdr;
> -}
> -
> -
> -
> -#ifdef __cplusplus
> +       return (odp_timeout_hdr_t *)odp_buf_to_hdr(buf);
>  }
> -#endif
>
>  #endif
> diff --git a/platform/linux-generic/odp_priority_queue.c
> b/platform/linux-generic/odp_priority_queue.c
> new file mode 100644
> index 0000000..b72c26f
> --- /dev/null
> +++ b/platform/linux-generic/odp_priority_queue.c
> @@ -0,0 +1,283 @@
> +#define NDEBUG /* Enabled by default by ODP build system */
> +#include <assert.h>
> +#include <unistd.h>
> +#include <stdlib.h>
> +#include <string.h>
> +#include <strings.h>
> +#include <odp_hints.h>
> +#include <odp_align.h>
> +#include <odp_debug.h>
> +
> +#include "odp_priority_queue_internal.h"
> +
> +
> +#define NUM_CHILDREN 4
> +#define CHILD(n) (NUM_CHILDREN * (n) + 1)
> +#define PARENT(n) (((n) - 1) / NUM_CHILDREN)
> +
> +/* Internal nodes in the array */
> +typedef struct heap_node {
> +       pq_element *elem;
> +       /* Copy of elem->prio so we avoid unnecessary dereferencing */
> +       pq_priority_t prio;
> +} heap_node;
> +
> +static void pq_assert_heap(priority_queue *this);
> +
> +#define ALIGNMENT(p) (1U << ((unsigned)ffs((int)p) - 1U))
> +
> +void priority_queue_con(priority_queue *this, uint32_t _max_elems)
> +{
> +       this->max_elems = _max_elems;
> +       this->reg_elems = 0;
> +       this->num_elems = 0;
> +       this->org_ptr = malloc((_max_elems + 64 / sizeof(heap_node)) *
> +                              sizeof(heap_node));
> +       if (odp_unlikely(this->org_ptr == NULL)) {
> +               ODP_ERR("malloc failed\n");
> +               abort();
> +       }
> +       this->heap = this->org_ptr;
> +       assert((size_t)&this->heap[1] % 8 == 0);
> +       /* Increment base address until first child (index 1) is cache
> line */
> +       /* aligned and thus all children (e.g. index 1-4) stored in the */
> +       /* same cache line. We are not interested in the alignment of */
> +       /* heap[0] as this is a lone node */
> +       while ((size_t)&this->heap[1] % ODP_CACHE_LINE_SIZE != 0) {
> +               /* Cast to ptr to struct member with the greatest
> alignment */
> +               /* requirement */
> +               this->heap = (heap_node *)((pq_priority_t *)this->heap +
> 1);
> +       }
> +       pq_assert_heap(this);
> +}
> +
> +void priority_queue_des(priority_queue *this)
> +{
> +       pq_assert_heap(this);
> +       free(this->org_ptr);
> +}
> +
> +#ifndef NDEBUG
> +static uint32_t
> +pq_assert_elem(priority_queue *this, uint32_t index, bool recurse)
> +{
> +       uint32_t num = 1;
> +       const pq_element *elem = this->heap[index].elem;
> +       assert(elem->index == index);
> +       assert(elem->prio == this->heap[index].prio);
> +       uint32_t child = CHILD(index);
> +       uint32_t i;
> +       for (i = 0; i < NUM_CHILDREN; i++, child++) {
> +               if (valid_index(this, child)) {
> +                       assert(this->heap[child].elem != NULL);
> +                       assert(this->heap[child].prio >= elem->prio);
> +                       if (recurse)
> +                               num += pq_assert_elem(this, child,
> recurse);
> +               }
> +       }
> +       return num;
> +}
> +#endif
> +
> +static void
> +pq_assert_heap(priority_queue *this)
> +{
> +       (void)this;
> +#ifndef NDEBUG
> +       uint32_t num = 0;
> +       if (odp_likely(this->num_elems != 0)) {
> +               assert(this->heap[0].elem != NULL);
> +               num += pq_assert_elem(this, 0, true);
> +       }
> +       assert(num == this->num_elems);
> +       unsigned i;
> +       for (i = 0; i < this->num_elems; i++) {
> +               assert(this->heap[i].elem != NULL);
> +               assert(this->heap[i].prio != INVALID_PRIORITY);
> +       }
> +#endif
> +}
> +
> +/* Bubble up to proper position */
> +void
> +pq_bubble_up(priority_queue *this, pq_element *elem)
> +{
> +       assert(this->heap[elem->index].elem == elem);
> +       assert(this->heap[elem->index].prio == elem->prio);
> +       uint32_t current = elem->index;
> +       pq_priority_t prio = elem->prio;
> +       assert(current == 0 || this->heap[PARENT(current)].elem != NULL);
> +       /* Move up into proper position */
> +       while (current != 0 && this->heap[PARENT(current)].prio > prio) {
> +               uint32_t parent = PARENT(current);
> +               assert(this->heap[parent].elem != NULL);
> +               /* Swap current with parent */
> +               /* 1) Move parent down */
> +               this->heap[current].elem = this->heap[parent].elem;
> +               this->heap[current].prio = this->heap[parent].prio;
> +               this->heap[current].elem->index = current;
> +               /* 2) Move current up to parent */
> +               this->heap[parent].elem = elem;
> +               this->heap[parent].prio = prio;
> +               this->heap[parent].elem->index = parent;
> +               /* Continue moving elem until it is in the right place */
> +               current = parent;
> +       }
> +       pq_assert_heap(this);
> +}
> +
> +/* Find the smallest child that is smaller than the specified priority */
> +/* Very hot function, can we decrease the number of cache misses? */
> +uint32_t pq_smallest_child(priority_queue *this,
> +                          uint32_t index,
> +                          pq_priority_t val)
> +{
> +       uint32_t smallest = index;
> +       uint32_t child = CHILD(index);
> +#if NUM_CHILDREN == 4
> +       /* Unroll loop when all children exist */
> +       if (odp_likely(valid_index(this, child + 3))) {
> +               if (this->heap[child + 0].prio < val)
> +                       val = this->heap[smallest = child + 0].prio;
> +               if (this->heap[child + 1].prio < val)
> +                       val = this->heap[smallest = child + 1].prio;
> +               if (this->heap[child + 2].prio < val)
> +                       val = this->heap[smallest = child + 2].prio;
> +               if (this->heap[child + 3].prio < val)
> +                       (void)this->heap[smallest = child + 3].prio;
> +               return smallest;
> +       }
> +#endif
> +       uint32_t i;
> +       for (i = 0; i < NUM_CHILDREN; i++) {
> +               if (odp_unlikely(!valid_index(this, child + i)))
> +                       break;
> +               if (this->heap[child + i].prio < val) {
> +                       smallest = child + i;
> +                       val = this->heap[smallest].prio;
> +               }
> +       }
> +       return smallest;
> +}
> +
> +/* Very hot function, can it be optimised? */
> +void
> +pq_bubble_down(priority_queue *this, pq_element *elem)
> +{
> +       assert(this->heap[elem->index].elem == elem);
> +       assert(this->heap[elem->index].prio == elem->prio);
> +       uint32_t current = elem->index;
> +       pq_priority_t prio = elem->prio;
> +       for (;;) {
> +               uint32_t child = pq_smallest_child(this, current, prio);
> +               if (current == child) {
> +                       /* No smaller child, we are done */
> +                       pq_assert_heap(this);
> +                       return;
> +               }
> +               /* Element larger than smaller child, must move down */
> +               assert(this->heap[child].elem != NULL);
> +               /* 1) Move child up to current */
> +               this->heap[current].elem = this->heap[child].elem;
> +               this->heap[current].prio = this->heap[child].prio;
> +               /* 2) Move current down to child */
> +               this->heap[child].elem = elem;
> +               this->heap[child].prio = prio;
> +               this->heap[child].elem->index = child;
> +
> +               this->heap[current].elem->index = current; /* cache
> misses! */
> +               /* Continue moving element until it is in the right place
> */
> +               current = child;
> +       }
> +}
> +
> +bool
> +pq_register_element(priority_queue *this, pq_element *elem)
> +{
> +       if (odp_likely(this->reg_elems < this->max_elems)) {
> +               elem->pq = this;
> +               this->reg_elems++;
> +               return true;
> +       }
> +       return false;
> +}
> +
> +void
> +pq_unregister_element(priority_queue *this, pq_element *elem)
> +{
> +       assert(elem->pq == this);
> +       if (is_active(elem))
> +               pq_deactivate_element(this, elem);
> +       this->reg_elems--;
> +}
> +
> +void
> +pq_activate_element(priority_queue *this, pq_element *elem, pq_priority_t
> prio)
> +{
> +       assert(elem->index == INVALID_INDEX);
> +       /* Insert element at end */
> +       uint32_t index = this->num_elems++;
> +       this->heap[index].elem = elem;
> +       this->heap[index].prio = prio;
> +       elem->index = index;
> +       elem->prio = prio;
> +       pq_bubble_up(this, elem);
> +}
> +
> +void
> +pq_deactivate_element(priority_queue *this, pq_element *elem)
> +{
> +       assert(elem->pq == this);
> +       if (odp_likely(is_active(elem))) {
> +               /* Swap element with last element */
> +               uint32_t current = elem->index;
> +               uint32_t last = --this->num_elems;
> +               if (odp_likely(last != current)) {
> +                       /* Move last element to current */
> +                       this->heap[current].elem = this->heap[last].elem;
> +                       this->heap[current].prio = this->heap[last].prio;
> +                       this->heap[current].elem->index = current;
> +                       /* Bubble down old 'last' element to its proper
> place*/
> +                       if (this->heap[current].prio < elem->prio)
> +                               pq_bubble_up(this,
> this->heap[current].elem);
> +                       else
> +                               pq_bubble_down(this,
> this->heap[current].elem);
> +               }
> +               elem->index = INVALID_INDEX;
> +               pq_assert_heap(this);
> +       }
> +}
> +
> +void
> +pq_reset_element(priority_queue *this, pq_element *elem, pq_priority_t
> prio)
> +{
> +       assert(prio != INVALID_PRIORITY);
> +       if (odp_likely(is_active(elem))) {
> +               assert(prio >= elem->prio);
> +               elem->prio = prio;
> +               this->heap[elem->index].prio = prio;/* cache misses here!
> */
> +               pq_bubble_down(this, elem);
> +               pq_assert_heap(this);
> +       } else {
> +               pq_activate_element(this, elem, prio);
> +       }
> +}
> +
> +pq_priority_t pq_first_priority(const priority_queue *this)
> +{
> +       return this->num_elems != 0 ? this->heap[0].prio :
> INVALID_PRIORITY;
> +}
> +
> +pq_element *
> +pq_release_element(priority_queue *this, pq_priority_t threshold)
> +{
> +       if (odp_likely(this->num_elems != 0 &&
> +                      this->heap[0].prio <= threshold)) {
> +               pq_element *elem = this->heap[0].elem;
> +               /* Remove element from heap */
> +               pq_deactivate_element(this, elem);
> +               assert(elem->prio <= threshold);
> +               return elem;
> +       }
> +       return NULL;
> +}
> diff --git a/platform/linux-generic/odp_timer.c
> b/platform/linux-generic/odp_timer.c
> index 313c713..0e5071c 100644
> --- a/platform/linux-generic/odp_timer.c
> +++ b/platform/linux-generic/odp_timer.c
> @@ -4,428 +4,713 @@
>   * SPDX-License-Identifier:     BSD-3-Clause
>   */
>
> -#include <odp_timer.h>
> -#include <odp_timer_internal.h>
> -#include <odp_time.h>
> -#include <odp_buffer_pool_internal.h>
> -#include <odp_internal.h>
> -#include <odp_atomic.h>
> -#include <odp_spinlock.h>
> -#include <odp_sync.h>
> -#include <odp_debug.h>
> -
> -#include <signal.h>
> -#include <time.h>
> +/**
> + * @file
> + *
> + * ODP timer service
> + *
> + */
>
> +#include <assert.h>
> +#include <errno.h>
>  #include <string.h>
> -
> -#define NUM_TIMERS    1
> -#define MAX_TICKS     1024
> -#define MAX_RES       ODP_TIME_SEC
> -#define MIN_RES       (100*ODP_TIME_USEC)
> -
> -
> -typedef struct {
> -       odp_spinlock_t lock;
> -       timeout_t      *list;
> -} tick_t;
> -
> -typedef struct {
> -       int               allocated;
> -       volatile int      active;
> -       volatile uint64_t cur_tick;
> -       timer_t           timerid;
> -       odp_timer_t       timer_hdl;
> -       odp_buffer_pool_t pool;
> -       uint64_t          resolution_ns;
> -       uint64_t          max_ticks;
> -       tick_t            tick[MAX_TICKS];
> -
> -} timer_ring_t;
> -
> -typedef struct {
> -       odp_spinlock_t lock;
> -       int            num_timers;
> -       timer_ring_t   timer[NUM_TIMERS];
> -
> -} timer_global_t;
> -
> -/* Global */
> -static timer_global_t odp_timer;
> -
> -static void add_tmo(tick_t *tick, timeout_t *tmo)
> +#include <stdlib.h>
> +#include <time.h>
> +#include <signal.h>
> +#include "odp_std_types.h"
> +#include "odp_buffer.h"
> +#include "odp_buffer_pool.h"
> +#include "odp_queue.h"
> +#include "odp_hints.h"
> +#include "odp_sync.h"
> +#include "odp_ticketlock.h"
> +#include "odp_debug.h"
> +#include "odp_align.h"
> +#include "odp_shared_memory.h"
> +#include "odp_hints.h"
> +#include "odp_internal.h"
> +#include "odp_time.h"
> +#include "odp_timer.h"
> +#include "odp_timer_internal.h"
> +#include "odp_priority_queue_internal.h"
> +
>
> +/******************************************************************************
> + * Translation between timeout and timeout header
> +
> *****************************************************************************/
> +
> +static inline odp_timeout_hdr_t *odp_tmo_to_hdr(odp_timer_tmo_t tmo)
>  {
> -       odp_spinlock_lock(&tick->lock);
> -
> -       tmo->next  = tick->list;
> -       tick->list = tmo;
> +       odp_buffer_t buf = odp_buffer_from_timeout(tmo);
> +       odp_timeout_hdr_t *tmo_hdr = (odp_timeout_hdr_t
> *)odp_buf_to_hdr(buf);
> +       return tmo_hdr;
> +}
>
> -       odp_spinlock_unlock(&tick->lock);
>
> +/******************************************************************************
> + * odp_timer abstract datatype
> +
> *****************************************************************************/
> +
> +typedef struct odp_timer_s {
> +       pq_element pqelem;/* Base class */
> +       uint64_t req_tmo;/* Requested timeout tick */
> +       odp_buffer_t tmo_buf;/* ODP_BUFFER_INVALID if timeout enqueued */
> +       odp_queue_t queue;/* ODP_QUEUE_INVALID if timer is free */
> +       uint32_t tag;/* Reusing tag as next pointer/index when timer is
> free */
> +       uint16_t gencnt;/* Smaller to make place for user_buf flag */
> +       unsigned int user_buf:1; /* User-defined buffer? */
> +} odp_timer;
> +
> +/* Constructor */
> +static inline void odp_timer_con(odp_timer *this)
> +{
> +       pq_element_con(&this->pqelem);
> +       this->tmo_buf = ODP_BUFFER_INVALID;
> +       this->queue = ODP_QUEUE_INVALID;
> +       this->gencnt = 0;
>  }
>
> -static timeout_t *rem_tmo(tick_t *tick)
> +/* Destructor */
> +static inline void odp_timer_des(odp_timer *this)
>  {
> -       timeout_t *tmo;
> +       assert(this->tmo_buf == ODP_BUFFER_INVALID);
> +       assert(this->queue == ODP_QUEUE_INVALID);
> +       pq_element_des(&this->pqelem);
> +}
>
> -       odp_spinlock_lock(&tick->lock);
> +/* Setup when timer is allocated */
> +static void setup(odp_timer *this,
> +                 odp_queue_t _q,
> +                 void *_up,
> +                 odp_buffer_t _tmo)
> +{
> +       this->req_tmo = INVALID_PRIORITY;
> +       this->tmo_buf = _tmo;
> +       this->queue = _q;
> +       this->tag = 0;
> +       this->user_buf = false;
> +       /* Initialise constant fields of timeout event */
> +       odp_timeout_hdr_t *tmo_hdr =
> +               odp_tmo_to_hdr(odp_timeout_from_buffer(this->tmo_buf));
> +       tmo_hdr->gencnt = this->gencnt;
> +       tmo_hdr->timer = this;
> +       tmo_hdr->user_ptr = _up;
> +       /* tmo_hdr->tag set at expiration time */
> +       /* tmo_hdr->expiration set at expiration time */
> +       assert(this->queue != ODP_QUEUE_INVALID);
> +}
>
> -       tmo = tick->list;
> +/* Teardown when timer is freed */
> +static odp_buffer_t teardown(odp_timer *this)
> +{
> +       /* Increase generation count to make any pending timeout(s)
> orphaned */
> +       ++this->gencnt;
> +       odp_buffer_t buf = this->tmo_buf;
> +       this->tmo_buf = ODP_BUFFER_INVALID;
> +       this->queue = ODP_QUEUE_INVALID;
> +       return buf;
> +}
>
> -       if (tmo)
> -               tick->list = tmo->next;
> +static inline uint32_t get_next_free(odp_timer *this)
> +{
> +       assert(this->queue == ODP_QUEUE_INVALID);
> +       return this->tag;
> +}
>
> -       odp_spinlock_unlock(&tick->lock);
> +static inline void set_next_free(odp_timer *this, uint32_t nf)
> +{
> +       assert(this->queue == ODP_QUEUE_INVALID);
> +       this->tag = nf;
> +}
>
> -       if (tmo)
> -               tmo->next = NULL;
>
> +/******************************************************************************
> + * odp_timer_pool abstract datatype
> + * Inludes alloc and free timer
> +
> *****************************************************************************/
> +
> +typedef struct odp_timer_pool_s {
> +       priority_queue pq;
> +       uint64_t cur_tick;/* Current tick value */
> +       uint64_t min_tick;/* Current expiration lower bound */
> +       uint64_t max_tick;/* Current expiration higher bound */
> +       bool shared;
> +       odp_ticketlock_t lock;
> +       const char *name;
> +       odp_buffer_pool_t buf_pool;
> +       uint64_t resolution_ns;
> +       uint64_t min_tmo_tck;
> +       uint64_t max_tmo_tck;
> +       odp_timer *timers;
> +       uint32_t num_alloc;/* Current number of allocated timers */
> +       uint32_t max_timers;/* Max number of timers */
> +       uint32_t first_free;/* 0..max_timers-1 => free timer */
> +       timer_t timerid;
> +       odp_timer_clk_src_t clk_src;
> +} odp_timer_pool;
> +
> +/* Forward declarations */
> +static void timer_init(odp_timer_pool *tp);
> +static void timer_exit(odp_timer_pool *tp);
> +
> +static void odp_timer_pool_con(odp_timer_pool *this,
> +                              const char *_n,
> +                              odp_buffer_pool_t _bp,
> +                              uint64_t _r,
> +                              uint64_t _mint,
> +                              uint64_t _maxt,
> +                              uint32_t _mt,
> +                              bool _s,
> +                              odp_timer_clk_src_t _cs)
> +{
> +       priority_queue_con(&this->pq, _mt);
> +       this->cur_tick = 0;
> +       this->shared = _s;
> +       this->name = strdup(_n);
> +       this->buf_pool = _bp;
> +       this->resolution_ns = _r;
> +       this->min_tmo_tck = odp_timer_ns_to_tick(this, _mint);
> +       this->max_tmo_tck = odp_timer_ns_to_tick(this, _maxt);
> +       this->min_tick = this->cur_tick + this->min_tmo_tck;
> +       this->max_tick = this->cur_tick + this->max_tmo_tck;
> +       this->num_alloc = 0;
> +       this->max_timers = _mt;
> +       this->first_free = 0;
> +       this->clk_src = _cs;
> +       this->timers = malloc(sizeof(odp_timer) * this->max_timers);
> +       if (this->timers == NULL)
> +               ODP_ABORT("%s: malloc failed\n", _n);
> +       uint32_t i;
> +       for (i = 0; i < this->max_timers; i++)
> +               odp_timer_con(&this->timers[i]);
> +       for (i = 0; i < this->max_timers; i++)
> +               set_next_free(&this->timers[i], i + 1);
> +       odp_ticketlock_init(&this->lock);
> +       if (this->clk_src == ODP_CLOCK_CPU)
> +               timer_init(this);
> +       /* Make sure timer pool initialisation is globally observable */
> +       /* before we return a pointer to it */
> +       odp_sync_stores();
> +}
>
> -       return tmo;
> +static odp_timer_pool *odp_timer_pool_new(
> +       const char *_n,
> +       odp_buffer_pool_t _bp,
> +       uint64_t _r,
> +       uint64_t _mint,
> +       uint64_t _maxt,
> +       uint32_t _mt,
> +       bool _s,
> +       odp_timer_clk_src_t _cs)
> +{
> +       odp_timer_pool *this = malloc(sizeof(odp_timer_pool));
> +       if (odp_unlikely(this == NULL))
> +               ODP_ABORT("%s: timer pool malloc failed\n", _n);
> +       odp_timer_pool_con(this, _n, _bp, _r, _mint, _maxt, _mt, _s, _cs);
> +       return this;
>  }
>
> -/**
> - * Search and delete tmo entry from timeout list
> - * return -1 : on error.. handle not in list
> - *             0 : success
> - */
> -static int find_and_del_tmo(timeout_t **tmo, odp_timer_tmo_t handle)
> +static void odp_timer_pool_des(odp_timer_pool *this)
>  {
> -       timeout_t *cur, *prev;
> -       prev = NULL;
> +       if (this->shared)
> +               odp_ticketlock_lock(&this->lock);
> +       if (this->num_alloc != 0) {
> +               /* It's a programming error to attempt to destroy a */
> +               /* timer pool which is still in use */
> +               ODP_ABORT("%s: timers in use\n", this->name);
> +       }
> +       if (this->clk_src == ODP_CLOCK_CPU)
> +               timer_exit(this);
> +       uint32_t i;
> +       for (i = 0; i < this->max_timers; i++)
> +               odp_timer_des(&this->timers[i]);
> +       free(this->timers);
> +       priority_queue_des(&this->pq);
> +       odp_sync_stores();
> +}
>
> -       for (cur = *tmo; cur != NULL; prev = cur, cur = cur->next) {
> -               if (cur->tmo_buf == handle) {
> -                       if (prev == NULL)
> -                               *tmo = cur->next;
> -                       else
> -                               prev->next = cur->next;
> +static void odp_timer_pool_del(odp_timer_pool *this)
> +{
> +       odp_timer_pool_des(this);
> +       free(this);
> +}
>
> -                       break;
> +static inline odp_timer *timer_alloc(odp_timer_pool *this,
> +                                    odp_queue_t queue,
> +                                    void *user_ptr,
> +                                    odp_buffer_t tmo_buf)
> +{
> +       odp_timer *tim = ODP_TIMER_INVALID;
> +       if (odp_likely(this->shared))
> +               odp_ticketlock_lock(&this->lock);
> +       if (odp_likely(this->num_alloc < this->max_timers)) {
> +               this->num_alloc++;
> +               /* Remove first unused timer from free list */
> +               assert(this->first_free != this->max_timers);
> +               tim = &this->timers[this->first_free];
> +               this->first_free = get_next_free(tim);
> +               /* Insert timer into priority queue */
> +               if (odp_unlikely(!pq_register_element(&this->pq,
> +                                                     &tim->pqelem))) {
> +                       /* Unexpected internal error */
> +                       abort();
>                 }
> +               /* Create timer */
> +               setup(tim, queue, user_ptr, tmo_buf);
> +       } else {
> +               errno = ENFILE; /* Reusing file table overvlow */
>         }
> -
> -       if (!cur)
> -               /* couldn't find tmo in list */
> -               return -1;
> -
> -       /* application to free tmo_buf provided by absolute_tmo call */
> -       return 0;
> +       if (odp_likely(this->shared))
> +               odp_ticketlock_unlock(&this->lock);
> +       return tim;
>  }
>
> -int odp_timer_cancel_tmo(odp_timer_t timer_hdl, odp_timer_tmo_t tmo)
> +static inline void timer_free(odp_timer_pool *this, odp_timer *tim)
>  {
> -       int id;
> -       int tick_idx;
> -       timeout_t *cancel_tmo;
> -       odp_timeout_hdr_t *tmo_hdr;
> -       tick_t *tick;
> -
> -       /* get id */
> -       id = (int)timer_hdl - 1;
> -
> -       tmo_hdr = odp_timeout_hdr((odp_timeout_t) tmo);
> -       /* get tmo_buf to cancel */
> -       cancel_tmo = &tmo_hdr->meta;
> +       if (odp_likely(this->shared))
> +               odp_ticketlock_lock(&this->lock);
> +       if (odp_unlikely(tim->queue == ODP_QUEUE_INVALID))
> +               ODP_ABORT("Invalid timer %p\n", tim);
> +       /* Destroy timer */
> +       odp_buffer_t buf = teardown(tim);
> +       /* Remove timer from priority queue */
> +       pq_unregister_element(&this->pq, &tim->pqelem);
> +       /* Insert timer into free list */
> +       set_next_free(tim, this->first_free);
> +       this->first_free = (tim - &this->timers[0]) /
> sizeof(this->timers[0]);
> +       assert(this->num_alloc != 0);
> +       this->num_alloc--;
> +       if (odp_likely(this->shared))
> +               odp_ticketlock_unlock(&this->lock);
> +       if (buf != ODP_BUFFER_INVALID)
> +               odp_buffer_free(buf);
> +}
>
> -       tick_idx = cancel_tmo->tick;
> -       tick = &odp_timer.timer[id].tick[tick_idx];
>
> +/******************************************************************************
> + * Operations on timers
> + * reset/reset_w_buf/cancel timer, return timeout
> +
> *****************************************************************************/
>
> -       odp_spinlock_lock(&tick->lock);
> -       /* search and delete tmo from tick list */
> -       if (find_and_del_tmo(&tick->list, tmo) != 0) {
> -               odp_spinlock_unlock(&tick->lock);
> -               ODP_DBG("Couldn't find the tmo (%d) in tick list\n",
> (int)tmo);
> -               return -1;
> +static inline void timer_expire(odp_timer *tim)
> +{
> +       assert(tim->req_tmo != INVALID_PRIORITY);
> +       /* Timer expired, is there actually any timeout event */
> +       /* we can enqueue? */
> +       if (odp_likely(tim->tmo_buf != ODP_BUFFER_INVALID)) {
> +               /* Swap out timeout buffer */
> +               odp_buffer_t buf = tim->tmo_buf;
> +               tim->tmo_buf = ODP_BUFFER_INVALID;
> +               if (odp_likely(!tim->user_buf)) {
> +                       odp_timeout_hdr_t *tmo_hdr =
> +
>  odp_tmo_to_hdr(odp_timeout_from_buffer(buf));
> +                       /* Copy tag and requested expiration tick from
> timer */
> +                       tmo_hdr->tag = tim->tag;
> +                       tmo_hdr->expiration = tim->req_tmo;
> +               }
> +               /* Else don't touch user-defined buffer */
> +               int rc = odp_queue_enq(tim->queue, buf);
> +               if (odp_unlikely(rc != 0))
> +                       ODP_ABORT("Failed to enqueue timeout buffer
> (%d)\n",
> +                                 rc);
> +               /* Mark timer as inactive */
> +               tim->req_tmo = INVALID_PRIORITY;
>         }
> -       odp_spinlock_unlock(&tick->lock);
> -
> -       return 0;
> +       /* No, timeout event already enqueued or unavailable */
> +       /* Keep timer active, odp_timer_return_tmo() will patch up */
>  }
>
> -static void notify_function(union sigval sigval)
> +static odp_timer_set_t timer_reset(odp_timer_pool *tp,
> +                                  odp_timer *tim,
> +                                  uint64_t abs_tck)
>  {
> -       uint64_t cur_tick;
> -       timeout_t *tmo;
> -       tick_t *tick;
> -       timer_ring_t *timer;
> +       assert(tim->user_buf == false);
> +       if (odp_unlikely(abs_tck < tp->min_tick))
> +               return ODP_TIMER_SET_TOOEARLY;
> +       if (odp_unlikely(abs_tck > tp->max_tick))
> +               return ODP_TIMER_SET_TOOLATE;
> +
> +       if (odp_likely(tp->shared))
> +               odp_ticketlock_lock(&tp->lock);
> +
> +       if (odp_unlikely(tim->queue == ODP_QUEUE_INVALID))
> +               ODP_ABORT("Invalid timer %p\n", tim);
> +       if (odp_unlikely(tim->user_buf))
> +               ODP_ABORT("Timer %p has user buffer\n", tim);
> +       /* Increase timer tag to make any pending timeout stale */
> +       tim->tag++;
> +       /* Save requested timeout */
> +       tim->req_tmo = abs_tck;
> +       /* Update timer position in priority queue */
> +       pq_reset_element(&tp->pq, &tim->pqelem, abs_tck);
> +
> +       if (odp_likely(tp->shared))
> +               odp_ticketlock_unlock(&tp->lock);
> +       return ODP_TIMER_SET_SUCCESS;
> +}
>
> -       timer = sigval.sival_ptr;
> +static odp_timer_set_t timer_reset_w_buf(odp_timer_pool *tp,
> +               odp_timer *tim,
> +               uint64_t abs_tck,
> +               odp_buffer_t user_buf)
> +{
> +       if (odp_unlikely(abs_tck < tp->min_tick))
> +               return ODP_TIMER_SET_TOOEARLY;
> +       if (odp_unlikely(abs_tck > tp->max_tick))
> +               return ODP_TIMER_SET_TOOLATE;
> +
> +       if (odp_likely(tp->shared))
> +               odp_ticketlock_lock(&tp->lock);
> +
> +       if (odp_unlikely(tim->queue == ODP_QUEUE_INVALID))
> +               ODP_ABORT("Invalid timer %p\n", tim);
> +       /* Increase timer tag to make any pending timeout stale */
> +       tim->tag++;
> +       /* Save requested timeout */
> +       tim->req_tmo = abs_tck;
> +       /* Set flag indicating presence of user defined buffer */
> +       tim->user_buf = true;
> +       /* Swap in new buffer, save any old buffer pointer */
> +       odp_buffer_t old_buf = tim->tmo_buf;
> +       tim->tmo_buf = user_buf;
> +       /* Update timer position in priority queue */
> +       pq_reset_element(&tp->pq, &tim->pqelem, abs_tck);
> +
> +       if (odp_likely(tp->shared))
> +               odp_ticketlock_unlock(&tp->lock);
> +
> +       /* Free old buffer if present */
> +       if (odp_unlikely(old_buf != ODP_BUFFER_INVALID))
> +               odp_buffer_free(old_buf);
> +       return ODP_TIMER_SET_SUCCESS;
> +}
>
> -       if (timer->active == 0) {
> -               ODP_DBG("Timer (%u) not active\n", timer->timer_hdl);
> -               return;
> +static inline void timer_cancel(odp_timer_pool *tp,
> +                               odp_timer *tim)
> +{
> +       odp_buffer_t old_buf = ODP_BUFFER_INVALID;
> +       if (odp_likely(tp->shared))
> +               odp_ticketlock_lock(&tp->lock);
> +
> +       if (odp_unlikely(tim->queue == ODP_QUEUE_INVALID))
> +               ODP_ABORT("Invalid timer %p\n", tim);
> +       if (odp_unlikely(tim->user_buf)) {
> +               /* Swap out old user buffer */
> +               old_buf = tim->tmo_buf;
> +               tim->tmo_buf = ODP_BUFFER_INVALID;
> +               /* tim->user_buf stays true */
>         }
> +       /* Else a normal timer (no user-defined buffer) */
> +       /* Increase timer tag to make any pending timeout stale */
> +       tim->tag++;
> +       /* Clear requested timeout, mark timer inactive */
> +       tim->req_tmo = INVALID_PRIORITY;
> +       /* Remove timer from the priority queue */
> +       pq_deactivate_element(&tp->pq, &tim->pqelem);
> +
> +       if (odp_likely(tp->shared))
> +               odp_ticketlock_unlock(&tp->lock);
> +       /* Free user-defined buffer if present */
> +       if (odp_unlikely(old_buf != ODP_BUFFER_INVALID))
> +               odp_buffer_free(old_buf);
> +}
>
> -       /* ODP_DBG("Tick\n"); */
> -
> -       cur_tick = timer->cur_tick++;
> -
> -       odp_sync_stores();
> +static inline void timer_return(odp_timer_pool *tp,
> +                               odp_timer *tim,
> +                               odp_timer_tmo_t tmo,
> +                               const odp_timeout_hdr_t *tmo_hdr)
> +{
> +       odp_buffer_t tmo_buf = odp_buffer_from_timeout(tmo);
> +       if (odp_likely(tp->shared))
> +               odp_ticketlock_lock(&tp->lock);
> +       if (odp_unlikely(tim->user_buf))
> +               ODP_ABORT("Timer %p has user-defined buffer\n", tim);
> +       if (odp_likely(tmo_hdr->gencnt == tim->gencnt)) {
> +               assert(tim->tmo_buf == ODP_BUFFER_INVALID);
> +               /* Save returned buffer for use when timer expires next
> time */
> +               tim->tmo_buf = tmo_buf;
> +               tmo_buf = ODP_BUFFER_INVALID;
> +               /* Check if timer is active and should have expired */
> +               if (odp_unlikely(tim->req_tmo != INVALID_PRIORITY &&
> +                                tim->req_tmo <= tp->cur_tick)) {
> +                       /* Expire timer now since we have restored the
> timeout
> +                          buffer */
> +                       timer_expire(tim);
> +               }
> +               /* Else timer inactive or expires in the future */
> +       }
> +       /* Else timeout orphaned, free buffer later */
> +       if (odp_likely(tp->shared))
> +               odp_ticketlock_unlock(&tp->lock);
> +       if (odp_unlikely(tmo_buf != ODP_BUFFER_INVALID))
> +               odp_buffer_free(tmo_buf);
> +}
>
> -       tick = &timer->tick[cur_tick % MAX_TICKS];
> +/* Non-public so not in odp_timer.h but externally visible, must declare
> + * somewhere */
> +unsigned odp_timer_pool_expire(odp_timer_pool_t tpid, uint64_t tick);
>
> -       while ((tmo = rem_tmo(tick)) != NULL) {
> -               odp_queue_t  queue;
> -               odp_buffer_t buf;
> +unsigned odp_timer_pool_expire(odp_timer_pool_t tpid, uint64_t tick)
> +{
> +       if (odp_likely(tpid->shared))
> +               odp_ticketlock_lock(&tpid->lock);
> +
> +       unsigned nexp = 0;
> +       odp_timer_t tim;
> +       tpid->cur_tick = tick;
> +       tpid->min_tick = tick + tpid->min_tmo_tck;
> +       tpid->max_tick = tick + tpid->max_tmo_tck;
> +       while ((tim = (odp_timer_t)pq_release_element(&tpid->pq, tick)) !=
> +              ODP_TIMER_INVALID) {
> +               assert(get_prio(&tim->pqelem) <= tick);
> +               timer_expire(tim);
> +               nexp++;
> +       }
>
> -               queue = tmo->queue;
> -               buf   = tmo->buf;
> +       if (odp_likely(tpid->shared))
> +               odp_ticketlock_unlock(&tpid->lock);
> +       return nexp;
> +}
>
> -               if (buf != tmo->tmo_buf)
> -                       odp_buffer_free(tmo->tmo_buf);
>
> +/******************************************************************************
> + * POSIX timer support
> + * Functions that use Linux/POSIX per-process timers and related
> facilities
> +
> *****************************************************************************/
>
> -               odp_queue_enq(queue, buf);
> -       }
> +static void timer_notify(sigval_t sigval)
> +{
> +       odp_timer_pool *tp = (odp_timer_pool *)sigval.sival_ptr;
> +       uint64_t new_tick = tp->cur_tick + 1;
> +       (void)odp_timer_pool_expire(tp, new_tick);
>  }
>
> -static void timer_start(timer_ring_t *timer)
> +static void timer_init(odp_timer_pool *tp)
>  {
>         struct sigevent   sigev;
>         struct itimerspec ispec;
>         uint64_t res, sec, nsec;
>
> -       ODP_DBG("\nTimer (%u) starts\n", timer->timer_hdl);
> +       ODP_DBG("Creating POSIX timer for timer pool %s, period %"
> +               PRIu64" ns\n", tp->name, tp->resolution_ns);
>
>         memset(&sigev, 0, sizeof(sigev));
>         memset(&ispec, 0, sizeof(ispec));
>
>         sigev.sigev_notify          = SIGEV_THREAD;
> -       sigev.sigev_notify_function = notify_function;
> -       sigev.sigev_value.sival_ptr = timer;
> +       sigev.sigev_notify_function = timer_notify;
> +       sigev.sigev_value.sival_ptr = tp;
>
> -       if (timer_create(CLOCK_MONOTONIC, &sigev, &timer->timerid)) {
> -               ODP_DBG("Timer create failed\n");
> -               return;
> -       }
> +       if (timer_create(CLOCK_MONOTONIC, &sigev, &tp->timerid))
> +               ODP_ABORT("timer_create() returned error %s\n",
> +                         strerror(errno));
>
> -       res  = timer->resolution_ns;
> +       res  = tp->resolution_ns;
>         sec  = res / ODP_TIME_SEC;
> -       nsec = res - sec*ODP_TIME_SEC;
> +       nsec = res - sec * ODP_TIME_SEC;
>
>         ispec.it_interval.tv_sec  = (time_t)sec;
>         ispec.it_interval.tv_nsec = (long)nsec;
>         ispec.it_value.tv_sec     = (time_t)sec;
>         ispec.it_value.tv_nsec    = (long)nsec;
>
> -       if (timer_settime(timer->timerid, 0, &ispec, NULL)) {
> -               ODP_DBG("Timer set failed\n");
> -               return;
> -       }
> -
> -       return;
> +       if (timer_settime(&tp->timerid, 0, &ispec, NULL))
> +               ODP_ABORT("timer_settime() returned error %s\n",
> +                         strerror(errno));
>  }
>
> -int odp_timer_init_global(void)
> +static void timer_exit(odp_timer_pool *tp)
>  {
> -       ODP_DBG("Timer init ...");
> -
> -       memset(&odp_timer, 0, sizeof(timer_global_t));
> -
> -       odp_spinlock_init(&odp_timer.lock);
> -
> -       ODP_DBG("done\n");
> -
> -       return 0;
> +       if (timer_delete(tp->timerid) != 0)
> +               ODP_ABORT("timer_delete() returned error %s\n",
> +                         strerror(errno));
>  }
>
> -int odp_timer_disarm_all(void)
>
> +/******************************************************************************
> + * Public API functions
> + * Some parameter checks and error messages
> + * No modificatios of internal state
> +
> *****************************************************************************/
> +odp_timer_pool_t
> +odp_timer_pool_create(const char *name,
> +                     odp_buffer_pool_t buf_pool,
> +                     uint64_t resolution_ns,
> +                     uint64_t min_timeout,
> +                     uint64_t max_timeout,
> +                     uint32_t num_timers,
> +                     bool shared,
> +                     odp_timer_clk_src_t clk_src)
>  {
> -       int timers;
> -       struct itimerspec ispec;
> -
> -       odp_spinlock_lock(&odp_timer.lock);
> -
> -       timers = odp_timer.num_timers;
> -
> -       ispec.it_interval.tv_sec  = 0;
> -       ispec.it_interval.tv_nsec = 0;
> -       ispec.it_value.tv_sec     = 0;
> -       ispec.it_value.tv_nsec    = 0;
> -
> -       for (; timers >= 0; timers--) {
> -               if (timer_settime(odp_timer.timer[timers].timerid,
> -                                 0, &ispec, NULL)) {
> -                       ODP_DBG("Timer reset failed\n");
> -                       odp_spinlock_unlock(&odp_timer.lock);
> -                       return -1;
> -               }
> -               odp_timer.num_timers--;
> -       }
> -
> -       odp_spinlock_unlock(&odp_timer.lock);
> -
> -       return 0;
> +       /* Verify that buffer pool can be used for timeouts */
> +       odp_buffer_t buf = odp_buffer_alloc(buf_pool);
> +       if (buf == ODP_BUFFER_INVALID)
> +               ODP_ABORT("%s: Failed to allocate buffer\n", name);
> +       if (odp_buffer_type(buf) != ODP_BUFFER_TYPE_TIMEOUT)
> +               ODP_ABORT("%s: Buffer pool wrong type\n", name);
> +       odp_buffer_free(buf);
> +       odp_timer_pool_t tp = odp_timer_pool_new(name, buf_pool,
> resolution_ns,
> +                             min_timeout, max_timeout, num_timers,
> +                             shared, clk_src);
> +       return tp;
>  }
>
> -odp_timer_t odp_timer_create(const char *name, odp_buffer_pool_t pool,
> -                            uint64_t resolution_ns, uint64_t min_ns,
> -                            uint64_t max_ns)
> +void odp_timer_pool_start(void)
>  {
> -       uint32_t id;
> -       timer_ring_t *timer;
> -       odp_timer_t timer_hdl;
> -       int i;
> -       uint64_t max_ticks;
> -       (void) name;
> -
> -       if (resolution_ns < MIN_RES)
> -               resolution_ns = MIN_RES;
> -
> -       if (resolution_ns > MAX_RES)
> -               resolution_ns = MAX_RES;
> -
> -       max_ticks = max_ns / resolution_ns;
> -
> -       if (max_ticks > MAX_TICKS) {
> -               ODP_DBG("Maximum timeout too long: %"PRIu64" ticks\n",
> -                       max_ticks);
> -               return ODP_TIMER_INVALID;
> -       }
> -
> -       if (min_ns < resolution_ns) {
> -               ODP_DBG("Min timeout %"PRIu64" ns < resolution %"PRIu64"
> ns\n",
> -                       min_ns, resolution_ns);
> -               return ODP_TIMER_INVALID;
> -       }
> -
> -       odp_spinlock_lock(&odp_timer.lock);
> -
> -       if (odp_timer.num_timers >= NUM_TIMERS) {
> -               odp_spinlock_unlock(&odp_timer.lock);
> -               ODP_DBG("All timers allocated\n");
> -               return ODP_TIMER_INVALID;
> -       }
> -
> -       for (id = 0; id < NUM_TIMERS; id++) {
> -               if (odp_timer.timer[id].allocated == 0)
> -                       break;
> -       }
> -
> -       timer = &odp_timer.timer[id];
> -       timer->allocated = 1;
> -       odp_timer.num_timers++;
> -
> -       odp_spinlock_unlock(&odp_timer.lock);
> -
> -       timer_hdl = id + 1;
> -
> -       timer->timer_hdl     = timer_hdl;
> -       timer->pool          = pool;
> -       timer->resolution_ns = resolution_ns;
> -       timer->max_ticks     = MAX_TICKS;
> -
> -       for (i = 0; i < MAX_TICKS; i++) {
> -               odp_spinlock_init(&timer->tick[i].lock);
> -               timer->tick[i].list = NULL;
> -       }
> -
> -       timer->active = 1;
> -       odp_sync_stores();
> -
> -       timer_start(timer);
> +       /* Nothing to do here, timer pools are started by the create call
> */
> +}
>
> -       return timer_hdl;
> +void odp_timer_pool_destroy(odp_timer_pool_t tpid)
> +{
> +       odp_timer_pool_del(tpid);
>  }
>
> -odp_timer_tmo_t odp_timer_absolute_tmo(odp_timer_t timer_hdl, uint64_t
> tmo_tick,
> -                                      odp_queue_t queue, odp_buffer_t buf)
> +uint64_t odp_timer_tick_to_ns(odp_timer_pool_t tpid, uint64_t ticks)
>  {
> -       int id;
> -       uint64_t tick;
> -       uint64_t cur_tick;
> -       timeout_t *new_tmo;
> -       odp_buffer_t tmo_buf;
> -       odp_timeout_hdr_t *tmo_hdr;
> -       timer_ring_t *timer;
> +       return ticks * tpid->resolution_ns;
> +}
>
> -       id = (int)timer_hdl - 1;
> -       timer = &odp_timer.timer[id];
> +uint64_t odp_timer_ns_to_tick(odp_timer_pool_t tpid, uint64_t ns)
> +{
> +       return (uint64_t)(ns / tpid->resolution_ns);
> +}
>
> -       cur_tick = timer->cur_tick;
> -       if (tmo_tick <= cur_tick) {
> -               ODP_DBG("timeout too close\n");
> -               return ODP_TIMER_TMO_INVALID;
> -       }
> +uint64_t odp_timer_current_tick(odp_timer_pool_t tpid)
> +{
> +       return tpid->cur_tick;
> +}
>
> -       if ((tmo_tick - cur_tick) > MAX_TICKS) {
> -               ODP_DBG("timeout too far: cur %"PRIu64" tmo %"PRIu64"\n",
> -                       cur_tick, tmo_tick);
> -               return ODP_TIMER_TMO_INVALID;
> +uintptr_t odp_timer_pool_query_conf(odp_timer_pool_t tpid,
> +                                   odp_timer_pool_conf_t item)
> +{
> +       switch (item) {
> +       case ODP_TIMER_NAME:
> +               return (uintptr_t)(tpid->name);
> +       case ODP_TIMER_RESOLUTION:
> +               return tpid->resolution_ns;
> +       case ODP_TIMER_MIN_TICKS:
> +               return tpid->min_tmo_tck;
> +       case ODP_TIMER_MAX_TICKS:
> +               return tpid->max_tmo_tck;
> +       case ODP_TIMER_NUM_TIMERS:
> +               return tpid->max_timers;
> +       case ODP_TIMER_SHARED:
> +               return tpid->shared;
> +       default:
> +               return 0;
>         }
> +}
>
> -       tick = tmo_tick % MAX_TICKS;
> -
> -       tmo_buf = odp_buffer_alloc(timer->pool);
> -       if (tmo_buf == ODP_BUFFER_INVALID) {
> -               ODP_DBG("tmo buffer alloc failed\n");
> -               return ODP_TIMER_TMO_INVALID;
> +odp_timer_t odp_timer_alloc(odp_timer_pool_t tpid,
> +                           odp_queue_t queue,
> +                           void *user_ptr)
> +{
> +       /* We check this because ODP_QUEUE_INVALID is used */
> +       /* to indicate a free timer */
> +       if (odp_unlikely(queue == ODP_QUEUE_INVALID))
> +               ODP_ABORT("%s: Invalid queue handle\n", tpid->name);
> +       odp_buffer_t tmo_buf = odp_buffer_alloc(tpid->buf_pool);
> +       if (odp_likely(tmo_buf != ODP_BUFFER_INVALID)) {
> +               odp_timer *tim = timer_alloc(tpid, queue, user_ptr,
> tmo_buf);
> +               if (odp_likely(tim != ODP_TIMER_INVALID)) {
> +                       /* Success */
> +                       assert(tim->queue != ODP_QUEUE_INVALID);
> +                       return tim;
> +               }
> +               odp_buffer_free(tmo_buf);
>         }
> +       /* Else failed to allocate timeout event */
> +       /* errno set by odp_buffer_alloc() or timer_alloc () */
> +       return ODP_TIMER_INVALID;
> +}
>
> -       tmo_hdr = odp_timeout_hdr((odp_timeout_t) tmo_buf);
> -       new_tmo = &tmo_hdr->meta;
> -
> -       new_tmo->timer_id = id;
> -       new_tmo->tick     = (int)tick;
> -       new_tmo->tmo_tick = tmo_tick;
> -       new_tmo->queue    = queue;
> -       new_tmo->tmo_buf  = tmo_buf;
> -
> -       if (buf != ODP_BUFFER_INVALID)
> -               new_tmo->buf = buf;
> -       else
> -               new_tmo->buf = tmo_buf;
> -
> -       add_tmo(&timer->tick[tick], new_tmo);
> -
> -       return tmo_buf;
> +void odp_timer_free(odp_timer_t tim)
> +{
> +       odp_timer_pool *tp = (odp_timer_pool *)get_pq(&tim->pqelem);
> +       timer_free(tp, tim);
>  }
>
> -uint64_t odp_timer_tick_to_ns(odp_timer_t timer_hdl, uint64_t ticks)
> +odp_timer_set_t odp_timer_set_abs_w_buf(odp_timer_t tim,
> +                                       uint64_t abs_tck,
> +                                       odp_buffer_t user_buf)
>  {
> -       uint32_t id;
> +       odp_timer_pool *tp = (odp_timer_pool *)get_pq(&tim->pqelem);
> +       odp_timer_set_t rc = timer_reset_w_buf(tp, tim, abs_tck, user_buf);
> +       return rc;
> +}
>
> -       id = timer_hdl - 1;
> -       return ticks * odp_timer.timer[id].resolution_ns;
> +odp_timer_set_t odp_timer_set_abs(odp_timer_t tim, uint64_t abs_tck)
> +{
> +       odp_timer_pool *tp = (odp_timer_pool *)get_pq(&tim->pqelem);
> +       odp_timer_set_t rc = timer_reset(tp, tim, abs_tck);
> +       return rc;
>  }
>
> -uint64_t odp_timer_ns_to_tick(odp_timer_t timer_hdl, uint64_t ns)
> +odp_timer_set_t odp_timer_set_rel_w_buf(odp_timer_t tim,
> +                                       uint64_t rel_tck,
> +                                       odp_buffer_t user_buf)
>  {
> -       uint32_t id;
> +       odp_timer_pool *tp = (odp_timer_pool *)get_pq(&tim->pqelem);
> +       odp_timer_set_t rc = timer_reset_w_buf(tp, tim, tp->cur_tick +
> rel_tck,
> +                                              user_buf);
> +       return rc;
> +}
>
> -       id = timer_hdl - 1;
> -       return ns / odp_timer.timer[id].resolution_ns;
> +odp_timer_set_t odp_timer_set_rel(odp_timer_t tim, uint64_t rel_tck)
> +{
> +       odp_timer_pool *tp = (odp_timer_pool *)get_pq(&tim->pqelem);
> +       odp_timer_set_t rc = timer_reset(tp, tim, tp->cur_tick + rel_tck);
> +       return rc;
>  }
>
> -uint64_t odp_timer_resolution(odp_timer_t timer_hdl)
> +void odp_timer_cancel(odp_timer_t tim)
>  {
> -       uint32_t id;
> +       odp_timer_pool *tp = (odp_timer_pool *)get_pq(&tim->pqelem);
> +       timer_cancel(tp, tim);
> +}
>
> -       id = timer_hdl - 1;
> -       return odp_timer.timer[id].resolution_ns;
> +void odp_timer_return_tmo(odp_timer_tmo_t tmo)
> +{
> +       const odp_timeout_hdr_t *tmo_hdr = odp_tmo_to_hdr(tmo);
> +       odp_timer *parent_tim = tmo_hdr->timer;
> +       odp_timer_pool *tp = (odp_timer_pool *)get_pq(&parent_tim->pqelem);
> +       timer_return(tp, parent_tim, tmo, tmo_hdr);
>  }
>
> -uint64_t odp_timer_maximum_tmo(odp_timer_t timer_hdl)
> +odp_timer_tmo_status_t odp_timer_tmo_status(odp_timer_tmo_t tmo)
>  {
> -       uint32_t id;
> +       const odp_timeout_hdr_t *tmo_hdr = odp_tmo_to_hdr(tmo);
> +       odp_timer *parent_tim = tmo_hdr->timer;
>
> -       id = timer_hdl - 1;
> -       return odp_timer.timer[id].max_ticks;
> +       if (odp_unlikely(tmo_hdr->gencnt != parent_tim->gencnt)) {
> +               /* Generation counters differ => timer has been freed */
> +               return ODP_TMO_ORPHAN;
> +       }
> +       /* Else generation counters match => parent timer exists */
> +
> +       if (odp_likely(parent_tim->tag == tmo_hdr->tag))
> +               return ODP_TMO_FRESH;
> +       else
> +               return ODP_TMO_STALE;
>  }
>
> -uint64_t odp_timer_current_tick(odp_timer_t timer_hdl)
> +odp_timer_t odp_timer_handle(odp_timer_tmo_t tmo)
>  {
> -       uint32_t id;
> +       odp_timeout_hdr_t *tmo_hdr = odp_tmo_to_hdr(tmo);
> +       odp_timer_t parent_tim = tmo_hdr->timer;
> +       if (odp_likely(tmo_hdr->gencnt == parent_tim->gencnt))
> +               return parent_tim;
> +       else
> +               return ODP_TIMER_INVALID;
> +}
>
> -       id = timer_hdl - 1;
> -       return odp_timer.timer[id].cur_tick;
> +uint64_t odp_timer_expiration(odp_timer_tmo_t tmo)
> +{
> +       odp_timeout_hdr_t *tmo_hdr = odp_tmo_to_hdr(tmo);
> +       return tmo_hdr->expiration;
>  }
>
> -odp_timeout_t odp_timeout_from_buffer(odp_buffer_t buf)
> +void *odp_timer_userptr(odp_timer_tmo_t tmo)
>  {
> -       return (odp_timeout_t) buf;
> +       odp_timeout_hdr_t *tmo_hdr = odp_tmo_to_hdr(tmo);
> +       return tmo_hdr->user_ptr;
>  }
>
> -uint64_t odp_timeout_tick(odp_timeout_t tmo)
> +int odp_timer_init_global(void)
>  {
> -       odp_timeout_hdr_t *tmo_hdr = odp_timeout_hdr(tmo);
> -       return tmo_hdr->meta.tmo_tick;
> +       return 0;
>  }
> diff --git a/test/api_test/odp_timer_ping.c
> b/test/api_test/odp_timer_ping.c
> index 7406a45..2617b5c 100644
> --- a/test/api_test/odp_timer_ping.c
> +++ b/test/api_test/odp_timer_ping.c
> @@ -20,6 +20,8 @@
>   *    Otherwise timeout may happen bcz of slow nw speed
>   */
>
> +#include <assert.h>
> +#include <stdlib.h>
>  #include <unistd.h>
>  #include <fcntl.h>
>  #include <errno.h>
> @@ -41,14 +43,15 @@
>  #define MSG_POOL_SIZE         (4*1024*1024)
>  #define BUF_SIZE               8
>  #define PING_CNT       10
> -#define PING_THRD      2       /* Send and Rx Ping thread */
> +#define PING_THRD      2       /* send_ping and rx_ping threads */
>
>  /* Nanoseconds */
>  #define RESUS  10000
>  #define MINUS  10000
>  #define MAXUS  10000000
>
> -static odp_timer_t test_timer_ping;
> +static odp_timer_pool_t tp;
> +static odp_timer_t test_timer_ping = ODP_TIMER_INVALID;
>  static odp_timer_tmo_t test_ping_tmo;
>
>  #define PKTSIZE      64
> @@ -128,15 +131,7 @@ static int listen_to_pingack(void)
>                                          (socklen_t *)&len);
>                         if (bytes > 0) {
>                                 /* pkt rxvd therefore cancel the timeout */
> -                               if (odp_timer_cancel_tmo(test_timer_ping,
> -                                                        test_ping_tmo) !=
> 0) {
> -                                       ODP_ERR("cancel_tmo failed
> ..exiting listner thread\n");
> -                                       /* avoid exiting from here even if
> tmo
> -                                        * failed for current ping,
> -                                        * allow subsequent ping_rx
> request */
> -                                       err = -1;
> -
> -                               }
> +                               odp_timer_cancel(test_timer_ping);
>                                 /* cruel bad hack used for sender, listner
> ipc..
>                                  * euwww.. FIXME ..
>                                  */
> @@ -160,7 +155,6 @@ static int send_ping_request(struct sockaddr_in *addr)
>
>         uint64_t tick;
>         odp_queue_t queue;
> -       odp_buffer_t buf;
>
>         int err = 0;
>
> @@ -184,8 +178,16 @@ static int send_ping_request(struct sockaddr_in *addr)
>
>         /* get the ping queue */
>         queue = odp_queue_lookup("ping_timer_queue");
> +       test_timer_ping = odp_timer_alloc(tp, queue, NULL);
> +       if (test_timer_ping == ODP_TIMER_INVALID) {
> +               ODP_ERR("Failed to allocate timer.\n");
> +               err = -1;
> +               goto err;
> +       }
>
>         for (i = 0; i < PING_CNT; i++) {
> +               odp_buffer_t buf;
> +               odp_timer_tmo_t tmo;
>                 /* prepare icmp pkt */
>                 bzero(&pckt, sizeof(pckt));
>                 pckt.hdr.type = ICMP_ECHO;
> @@ -209,12 +211,10 @@ static int send_ping_request(struct sockaddr_in
> *addr)
>                 printf(" icmp_sent msg_cnt %d\n", i);
>
>                 /* arm the timer */
> -               tick = odp_timer_current_tick(test_timer_ping);
> +               tick = odp_timer_current_tick(tp);
>
>                 tick += 1000;
> -               test_ping_tmo = odp_timer_absolute_tmo(test_timer_ping,
> tick,
> -                                                      queue,
> -                                                      ODP_BUFFER_INVALID);
> +               odp_timer_set_abs(test_timer_ping, tick);
>                 /* wait for timeout event */
>                 while ((buf = odp_queue_deq(queue)) == ODP_BUFFER_INVALID)
> {
>                         /* flag true means ack rxvd.. a cruel hack as I
> @@ -229,17 +229,28 @@ static int send_ping_request(struct sockaddr_in
> *addr)
>                                 break;
>                         }
>                 }
> +               assert(odp_buffer_type(buf) == ODP_BUFFER_TYPE_TIMEOUT);
> +               tmo = odp_timeout_from_buffer(buf);
>
> -               /* free tmo_buf for timeout case */
> -               if (buf != ODP_BUFFER_INVALID) {
> -                       ODP_DBG(" timeout msg_cnt [%i] \n", i);
> +               switch (odp_timer_tmo_status(tmo)) {
> +               case ODP_TMO_FRESH:
> +                       ODP_DBG(" timeout msg_cnt [%i]\n", i);
>                         /* so to avoid seg fault commented */
> -                       odp_buffer_free(buf);
>                         err = -1;
> +                       break;
> +               case ODP_TMO_STALE:
> +                       /* Ignore stale timeouts */
> +                       break;
> +               case ODP_TMO_ORPHAN:
> +                       ODP_ERR("Received orphaned timeout!\n");
> +                       abort();
>                 }
> +               odp_timer_return_tmo(tmo);
>         }
>
>  err:
> +       if (test_timer_ping != ODP_TIMER_INVALID)
> +               odp_timer_free(test_timer_ping);
>         return err;
>  }
>
> @@ -340,9 +351,9 @@ int main(int argc ODP_UNUSED, char *argv[] ODP_UNUSED)
>         pool = odp_buffer_pool_create("msg_pool", pool_base, MSG_POOL_SIZE,
>                                       BUF_SIZE,
>                                       ODP_CACHE_LINE_SIZE,
> -                                     ODP_BUFFER_TYPE_RAW);
> +                                     ODP_BUFFER_TYPE_TIMEOUT);
>         if (pool == ODP_BUFFER_POOL_INVALID) {
> -               ODP_ERR("Pool create failed.\n");
> +               ODP_ERR("Buffer pool create failed.\n");
>                 return -1;
>         }
>
> @@ -357,15 +368,19 @@ int main(int argc ODP_UNUSED, char *argv[]
> ODP_UNUSED)
>                 return -1;
>         }
>
> -       test_timer_ping = odp_timer_create("ping_timer", pool,
> -                                          RESUS*ODP_TIME_USEC,
> -                                          MINUS*ODP_TIME_USEC,
> -                                          MAXUS*ODP_TIME_USEC);
> -
> -       if (test_timer_ping == ODP_TIMER_INVALID) {
> -               ODP_ERR("Timer create failed.\n");
> +       /*
> +        * Create timer pool
> +        */
> +       tp = odp_timer_pool_create("timer_pool", pool,
> +                                  RESUS*ODP_TIME_USEC,
> +                                  MINUS*ODP_TIME_USEC,
> +                                  MAXUS*ODP_TIME_USEC,
> +                                  1, false, ODP_CLOCK_CPU);
> +       if (tp == ODP_TIMER_POOL_INVALID) {
> +               ODP_ERR("Timer pool create failed.\n");
>                 return -1;
>         }
> +       odp_timer_pool_start();
>
>         odp_shm_print_all();
>
> --
> 1.9.1
>
>
> _______________________________________________
> lng-odp mailing list
> lng-odp@lists.linaro.org
> http://lists.linaro.org/mailman/listinfo/lng-odp
>
Ola Liljedahl Oct. 5, 2014, 3:58 p.m. UTC | #2
This is an existing program, I just updated it to use the new Timer API. If
there are no comments regarding this change, I don't think any complaints
here should stop the Timer API patch from being merged.

However. it is not a very good test program, more of a simple example
program. So it ought to be renamed to better describe what it is. We can
leave that for another patch as nothing has really changed due to this
patch. We could use a real test program as well. I did write one for the
timer implementation using a priority queue but it is only single-threaded
so not a good test program for ODP.

On 3 October 2014 20:57, Mike Holmes <mike.holmes@linaro.org> wrote:

>
>
> On 2 October 2014 11:23, Ola Liljedahl <ola.liljedahl@linaro.org> wrote:
>
>> Signed-off-by: Ola Liljedahl <ola.liljedahl@linaro.org>
>> ---
>> Fixed review comments for v3 from Anders R.
>> * Example code snippets use @code/@endcode.
>> * Added some missing doxygen comments.
>> * Updated some comments.
>> * Reverted year in copyright notices.
>> * Added odp_likely() hint.
>> * Made some variables self-descriptive and removed redundant comments.
>> Changed to use ticket locks instead of spin locks (ticket locks are more
>> fair).
>> Changed to use ODP_ABORT() which has become available since the last
>> patch.
>>
>>  example/timer/odp_timer_test.c                     | 125 +--
>>
>
> Should this "test"  be under odp/test instead of odp/example, or should
> the "test " be renamed if it makes a good example ?
>
>  platform/linux-generic/Makefile.am                 |   1 +
>>  platform/linux-generic/include/api/odp_timer.h     | 570 +++++++++++--
>>  .../include/odp_priority_queue_internal.h          | 108 +++
>>  .../linux-generic/include/odp_timer_internal.h     |  71 +-
>>  platform/linux-generic/odp_priority_queue.c        | 283 +++++++
>>  platform/linux-generic/odp_timer.c                 | 923
>> ++++++++++++++-------
>>  test/api_test/odp_timer_ping.c                     |  73 +-
>>  8 files changed, 1648 insertions(+), 506 deletions(-)
>>  create mode 100644
>> platform/linux-generic/include/odp_priority_queue_internal.h
>>  create mode 100644 platform/linux-generic/odp_priority_queue.c
>>
>> diff --git a/example/timer/odp_timer_test.c
>> b/example/timer/odp_timer_test.c
>> index 6e1715d..750d785 100644
>> --- a/example/timer/odp_timer_test.c
>> +++ b/example/timer/odp_timer_test.c
>> @@ -41,67 +41,89 @@ typedef struct {
>>  /** @private Barrier for test synchronisation */
>>  static odp_barrier_t test_barrier;
>>
>> -/** @private Timer handle*/
>> -static odp_timer_t test_timer;
>> +/** @private Timer pool handle */
>> +static odp_timer_pool_t tp;
>>
>>
>> +/** @private Timeout status ASCII strings */
>> +static const char *const status2str[] = {
>> +       "fresh", "stale", "orphaned"
>> +};
>> +
>>  /** @private test timeout */
>>  static void test_abs_timeouts(int thr, test_args_t *args)
>>  {
>> -       uint64_t tick;
>>         uint64_t period;
>>         uint64_t period_ns;
>>         odp_queue_t queue;
>> -       odp_buffer_t buf;
>> -       int num;
>> +       int remain = args->tmo_count;
>> +       odp_timer_t hdl;
>> +       uint64_t tick;
>>
>>         ODP_DBG("  [%i] test_timeouts\n", thr);
>>
>>         queue = odp_queue_lookup("timer_queue");
>>
>>         period_ns = args->period_us*ODP_TIME_USEC;
>> -       period    = odp_timer_ns_to_tick(test_timer, period_ns);
>> +       period    = odp_timer_ns_to_tick(tp, period_ns);
>>
>>         ODP_DBG("  [%i] period %"PRIu64" ticks,  %"PRIu64" ns\n", thr,
>>                 period, period_ns);
>>
>> -       tick = odp_timer_current_tick(test_timer);
>> -
>> -       ODP_DBG("  [%i] current tick %"PRIu64"\n", thr, tick);
>> -
>> -       tick += period;
>> +       ODP_DBG("  [%i] current tick %"PRIu64"\n", thr,
>> +               odp_timer_current_tick(tp));
>>
>> -       if (odp_timer_absolute_tmo(test_timer, tick, queue,
>> ODP_BUFFER_INVALID)
>> -           == ODP_TIMER_TMO_INVALID){
>> -               ODP_DBG("Timeout request failed\n");
>> +       odp_timer_t test_timer;
>> +       test_timer = odp_timer_alloc(tp, queue, NULL);
>> +       if (test_timer == ODP_TIMER_INVALID) {
>> +               ODP_ERR("Failed to allocate timer\n");
>>                 return;
>>         }
>> +       tick = odp_timer_current_tick(tp);
>> +       hdl = test_timer;
>>
>> -       num = args->tmo_count;
>> -
>> -       while (1) {
>> -               odp_timeout_t tmo;
>> -
>> -               buf = odp_schedule_one(&queue, ODP_SCHED_WAIT);
>> -
>> -               tmo  = odp_timeout_from_buffer(buf);
>> -               tick = odp_timeout_tick(tmo);
>> -
>> -               ODP_DBG("  [%i] timeout, tick %"PRIu64"\n", thr, tick);
>> -
>> -               odp_buffer_free(buf);
>> -
>> -               num--;
>> -
>> -               if (num == 0)
>> -                       break;
>> +       while (remain != 0) {
>> +               odp_buffer_t buf;
>> +               odp_timer_tmo_t tmo;
>> +               odp_timer_tmo_status_t stat;
>> +               odp_timer_set_t rc;
>>
>>                 tick += period;
>> +               rc = odp_timer_set_abs(hdl, tick);
>> +               if (odp_unlikely(rc != ODP_TIMER_SET_SUCCESS)) {
>> +                       ODP_ERR("odp_timer_set_abs() failed (%u)\n", rc);
>> +                       abort();
>> +               }
>>
>> -               odp_timer_absolute_tmo(test_timer, tick,
>> -                                      queue, ODP_BUFFER_INVALID);
>> +               /* Get the next ready buffer/timeout */
>> +               buf = odp_schedule_one(&queue, ODP_SCHED_WAIT);
>> +               if (odp_unlikely(odp_buffer_type(buf) !=
>> +                                ODP_BUFFER_TYPE_TIMEOUT)) {
>> +                       ODP_ERR("Unexpected buffer type received\n");
>> +                       abort();
>> +               }
>> +               tmo = odp_timeout_from_buffer(buf);
>> +               stat = odp_timer_tmo_status(tmo);
>> +               tick = odp_timer_expiration(tmo);
>> +               hdl = odp_timer_handle(tmo);
>> +               ODP_DBG("  [%i] timeout, tick %"PRIu64", status %s\n",
>> +                       thr, tick, status2str[stat]);
>> +               /* if (stat == ODP_TMO_FRESH)  - do your thing! */
>> +               if (odp_likely(stat == ODP_TMO_ORPHAN)) {
>> +                       /* Some other thread freed the corresponding
>> +                          timer after the timeout was already
>> +                          enqueued */
>> +                       /* Timeout handle is invalid, use our own timer */
>> +                       hdl = test_timer;
>> +               }
>> +               /* Return timeout to timer manager, regardless of status
>> */
>> +               odp_timer_return_tmo(tmo);
>> +               remain--;
>>         }
>>
>> +       odp_timer_cancel(test_timer);
>> +       odp_timer_free(test_timer);
>> +
>>         if (odp_queue_sched_type(queue) == ODP_SCHED_SYNC_ATOMIC)
>>                 odp_schedule_release_atomic();
>>  }
>> @@ -155,7 +177,6 @@ static void print_usage(void)
>>         printf("Options:\n");
>>         printf("  -c, --count <number>    core count, core IDs start from
>> 1\n");
>>         printf("  -r, --resolution <us>   timeout resolution in usec\n");
>> -       printf("  -m, --min <us>          minimum timeout in usec\n");
>>         printf("  -x, --max <us>          maximum timeout in usec\n");
>>         printf("  -p, --period <us>       timeout period in usec\n");
>>         printf("  -t, --timeouts <count>  timeout repeat count\n");
>> @@ -190,14 +211,14 @@ static void parse_args(int argc, char *argv[],
>> test_args_t *args)
>>         /* defaults */
>>         args->core_count    = 0; /* all cores */
>>         args->resolution_us = 10000;
>> -       args->min_us        = args->resolution_us;
>> +       args->min_us        = 0;
>>         args->max_us        = 10000000;
>>         args->period_us     = 1000000;
>>         args->tmo_count     = 30;
>>
>>         while (1) {
>>                 opt = getopt_long(argc, argv, "+c:r:m:x:p:t:h",
>> -                                longopts, &long_index);
>> +                                 longopts, &long_index);
>>
>>                 if (opt == -1)
>>                         break;  /* No more options */
>> @@ -321,10 +342,25 @@ int main(int argc, char *argv[])
>>                                       ODP_BUFFER_TYPE_TIMEOUT);
>>
>>         if (pool == ODP_BUFFER_POOL_INVALID) {
>> -               ODP_ERR("Pool create failed.\n");
>> +               ODP_ERR("Buffer pool create failed.\n");
>>                 return -1;
>>         }
>>
>> +       tp = odp_timer_pool_create("timer_pool", pool,
>> +                                  args.resolution_us*ODP_TIME_USEC,
>> +                                  args.min_us*ODP_TIME_USEC,
>> +                                  args.max_us*ODP_TIME_USEC,
>> +                                  num_workers, /* One timer per worker */
>> +                                  true,
>> +                                  ODP_CLOCK_CPU);
>> +       if (tp == ODP_TIMER_POOL_INVALID) {
>> +               ODP_ERR("Timer pool create failed.\n");
>> +               return -1;
>> +       }
>> +       odp_timer_pool_start();
>> +
>> +       odp_shm_print_all();
>> +
>>         /*
>>          * Create a queue for timer test
>>          */
>> @@ -340,19 +376,6 @@ int main(int argc, char *argv[])
>>                 return -1;
>>         }
>>
>> -       test_timer = odp_timer_create("test_timer", pool,
>> -                                     args.resolution_us*ODP_TIME_USEC,
>> -                                     args.min_us*ODP_TIME_USEC,
>> -                                     args.max_us*ODP_TIME_USEC);
>> -
>> -       if (test_timer == ODP_TIMER_INVALID) {
>> -               ODP_ERR("Timer create failed.\n");
>> -               return -1;
>> -       }
>> -
>> -
>> -       odp_shm_print_all();
>> -
>>         printf("CPU freq %"PRIu64" hz\n", odp_sys_cpu_hz());
>>         printf("Cycles vs nanoseconds:\n");
>>         ns = 0;
>> diff --git a/platform/linux-generic/Makefile.am
>> b/platform/linux-generic/Makefile.am
>> index d076d50..71f923c 100644
>> --- a/platform/linux-generic/Makefile.am
>> +++ b/platform/linux-generic/Makefile.am
>> @@ -59,6 +59,7 @@ __LIB__libodp_la_SOURCES = \
>>                            odp_packet_flags.c \
>>                            odp_packet_io.c \
>>                            odp_packet_socket.c \
>> +                          odp_priority_queue.c \
>>                            odp_queue.c \
>>                            odp_ring.c \
>>                            odp_rwlock.c \
>> diff --git a/platform/linux-generic/include/api/odp_timer.h
>> b/platform/linux-generic/include/api/odp_timer.h
>> index 01db839..82a1e05 100644
>> --- a/platform/linux-generic/include/api/odp_timer.h
>> +++ b/platform/linux-generic/include/api/odp_timer.h
>> @@ -8,9 +8,193 @@
>>  /**
>>   * @file
>>   *
>> - * ODP timer
>> + * ODP timer service
>>   */
>>
>> +/** Example #1 Retransmission timer (e.g. for reliable connections)
>> + @code
>> +
>> +//Create timer pool for reliable connections
>> +#define SEC 1000000000ULL //1s expressed in nanoseconds
>> +odp_timer_pool_t tcp_tpid =
>> +    odp_timer_pool_create("TCP",
>> +                         buffer_pool,
>> +                         1000000,//resolution 1ms
>> +                         0,//min tmo
>> +                         7200 * SEC,//max tmo length 2hours
>> +                         40000,//num_timers
>> +                         true,//shared
>> +                         ODP_CLOCK_CPU
>> +                        );
>> +if (tcp_tpid == ODP_TIMER_POOL_INVALID)
>> +{
>> +       //Failed to create timer pool => fatal error
>> +}
>> +
>> +
>> +//Setting up a new connection
>> +//Allocate retransmission timeout (identical for supervision timeout)
>> +//The user pointer points back to the connection context
>> +conn->ret_tim = odp_timer_alloc(tcp_tpid, queue, conn);
>> +//Check if all resources were successfully allocated
>> +if (conn->ret_tim == ODP_TIMER_INVALID)
>> +{
>> +       //Failed to allocate all resources for connection => tear down
>> +       //Destroy timeout
>> +       odp_timer_free(conn->ret_tim);
>> +       //Tear down connection
>> +       ...
>> +       return false;
>> +}
>> +//All necessary resources successfully allocated
>> +//Compute initial retransmission length in timer ticks
>> +conn->ret_len = odp_timer_ns_to_tick(tcp_tpid, 3 * SEC);//Per RFC1122
>> +//Arm the timer
>> +odp_timer_set_rel(conn->ret_tim, conn->ret_len);
>> +return true;
>> +
>> +
>> +//A packet for the connection has just been transmitted
>> +//Reset the retransmission timer
>> +odp_timer_set_rel(conn->ret_tim, conn->ret_len);
>> +
>> +
>> +//A retransmission timeout buffer for the connection has been received
>> +odp_timer_tmo_t tmo = odp_timeout_from_buffer(buf);
>> +odp_timer_tmo_status_t stat = odp_timer_tmo_status(tmo);
>> +//Check if timeout is fresh or stale, for stale timeouts we need to
>> reset the
>> +//timer
>> +if (stat == ODP_TMO_FRESH) {
>> +       //Fresh timeout, last transmitted packet not acked in time =>
>> +         retransmit
>> +       //Get connection from timeout event
>> +       conn = odp_timer_get_userptr(tmo);
>> +       //Retransmit last packet (e.g. TCP segment)
>> +       ...
>> +       //Re-arm timer using original delta value
>> +       odp_timer_set_rel(conn->ret_tim, conn->ret_len);
>> +} else if (stat == ODP_TMO_ORPHAN) {
>> +       odp_free_buffer(buf);
>> +       return;//Get out of here
>> +} // else stat == ODP_TMO_STALE, do nothing
>> +//Finished processing, return timeout
>> +odp_timer_return_tmo(tmo);
>> +
>> + @endcode
>> +*/
>> +
>> +/** Example #2 Periodic tick
>> + @code
>> +
>> +//Create timer pool for periodic ticks
>> +odp_timer_pool_t per_tpid =
>> +    odp_timer_pool_create("periodic-tick",
>> +                         buffer_pool,
>> +                         1,//resolution 1ns
>> +                         1,//minimum timeout length 1ns
>> +                         1000000000,//maximum timeout length 1s
>> +                         10,//num_timers
>> +                         false,//not shared
>> +                         ODP_CLOCK_CPU
>> +                        );
>> +if (per_tpid == ODP_TIMER_POOL_INVALID)
>> +{
>> +    //Failed to create timer pool => fatal error
>> +}
>> +
>> +
>> +//Allocate periodic timer
>> +tim_1733 = odp_timer_alloc(per_tpid, queue, NULL);
>> +//Check if all resources were successfully allocated
>> +if (tim_1733 == ODP_TIMER_INVALID)
>> +{
>> +       //Failed to allocate all resources => tear down
>> +       //Destroy timeout
>> +       odp_timer_free(tim_1733);
>> +       //Tear down other state
>> +       ...
>> +       return false;
>> +}
>> +//All necessary resources successfully allocated
>> +//Compute tick period in timer ticks
>> +period_1733 = odp_timer_ns_to_tick(per_tpid, 1000000000U /
>> 1733U);//1733Hz
>> +//Compute when next tick should expire
>> +next_1733 = odp_timer_current_tick(per_tpid) + period_1733;
>> +//Arm the periodic timer
>> +odp_timer_set_abs(tim_1733, next_1733);
>> +return true;
>> +
>> +
>> +
>> +//A periodic timer timeout has been received
>> +odp_timer_tmo_t tmo = odp_timeout_from_buffer(buf);
>> +//Get status of timeout
>> +odp_timer_tmo_status_t stat = odp_timer_tmo_status(tmo);
>> +//We expect the timeout is always fresh since we are not calling set or
>> cancel
>> +on active or expired timers in this example
>> +assert(stat == ODP_TMO_FRESH);
>> +//Do processing driven by timeout *before*
>> +...
>> +do {
>> +       //Compute when the timer should expire next
>> +       next_1733 += period_1733;
>> +       //Check that this is in the future
>> +       if (likely(next_1733 > odp_timer_current_tick(per_tpid))
>> +       break;//Yes, done
>> +       //Else we missed a timeout
>> +       //Optionally attempt some recovery and/or logging of the problem
>> +       ...
>> +} while (0);
>> +//Re-arm periodic timer
>> +odp_timer_set_abs(tim_1733, next_1733);
>> +//Or do processing driven by timeout *after*
>> +...
>> +odp_timer_return_tmo(tmo);
>> +return;
>> +
>> + @endcode
>> +*/
>> +
>> +/** Example #3 Tear down of flow
>> + @code
>> +//ctx points to flow context data structure owned by application
>> +//Free the timer, cancelling any timeout
>> +odp_timer_free(ctx->timer);//Any enqueued timeout will be made invalid
>> +//Continue tearing down and eventually freeing context
>> +...
>> +return;
>> +
>> +//A timeout has been received, check status
>> +odp_timer_tmo_t tmo = odp_timeout_from_buffer(buf);
>> +switch (odp_timer_tmo_status(tmo))
>> +{
>> +    case ODP_TMO_FRESH :
>> +       //A flow has timed out, tear it down
>> +       //Find flow context from timeout
>> +       ctx = (context *)odp_timer_get_userptr(tmo);
>> +       //Free the supervision timer, any enqueued timeout will remain
>> +       odp_timer_free(ctx->tim);
>> +       //Free other flow related resources
>> +       ...
>> +       //Free the timeout buffer
>> +       odp_buffer_free(buf);
>> +       //Flow torn down
>> +       break;
>> +    case ODP_TMO_STALE :
>> +       //A stale timeout was received, return timeout and update timer
>> +       odp_timer_return_tmo(tmo);
>> +       break;
>> +    case ODP_TMO_ORPHAN :
>> +       //Orphaned timeout (from previously torn down flow)
>> +       //No corresponding timer or flow context
>> +       //Free the timeout buffer
>> +       odp_buffer_free(buf);
>> +       break;
>> +}
>> +
>> + @endcode
>> +*/
>> +
>>  #ifndef ODP_TIMER_H_
>>  #define ODP_TIMER_H_
>>
>> @@ -18,144 +202,408 @@
>>  extern "C" {
>>  #endif
>>
>> +#include <stdlib.h>
>>  #include <odp_std_types.h>
>>  #include <odp_buffer.h>
>>  #include <odp_buffer_pool.h>
>>  #include <odp_queue.h>
>>
>> +struct odp_timer_pool_s; /**< Forward declaration */
>> +
>> +/**
>> +* ODP timer pool handle (platform dependent)
>> +*/
>> +typedef struct odp_timer_pool_s *odp_timer_pool_t;
>> +
>> +/**
>> + * Invalid timer pool handle (platform dependent).
>> + */
>> +#define ODP_TIMER_POOL_INVALID NULL
>>
>>  /**
>> - * ODP timer handle
>> + * Clock sources for timers in timer pool.
>>   */
>> -typedef uint32_t odp_timer_t;
>> +typedef enum odp_timer_clk_src_e {
>> +       /** Use CPU clock as clock source for timers */
>> +       ODP_CLOCK_CPU,
>> +       /** Use external clock as clock source for timers */
>> +       ODP_CLOCK_EXT
>> +       /* Platform dependent which other clock sources exist */
>> +} odp_timer_clk_src_t;
>>
>> -/** Invalid timer */
>> -#define ODP_TIMER_INVALID 0
>> +struct odp_timer_s; /**< Forward declaration */
>>
>> +/**
>> +* ODP timer handle (platform dependent).
>> +*/
>> +typedef struct odp_timer_s *odp_timer_t;
>>
>>  /**
>> - * ODP timeout handle
>> + * Invalid timer handle (platform dependent).
>>   */
>> -typedef odp_buffer_t odp_timer_tmo_t;
>> -
>> -/** Invalid timeout */
>> -#define ODP_TIMER_TMO_INVALID 0
>> +#define ODP_TIMER_INVALID NULL
>>
>> +/**
>> + * Return values of timer set calls.
>> + */
>> +typedef enum odp_timer_set_e {
>> +       /** Timer set operation successful */
>> +       ODP_TIMER_SET_SUCCESS,
>> +       /** Timer set operation failed, expiration too early */
>> +       ODP_TIMER_SET_TOOEARLY,
>> +       /** Timer set operation failed, expiration too late */
>> +       ODP_TIMER_SET_TOOLATE
>> +} odp_timer_set_t;
>>
>>  /**
>> - * Timeout notification
>> + * Timeout event handle.
>>   */
>> -typedef odp_buffer_t odp_timeout_t;
>> +typedef odp_buffer_t odp_timer_tmo_t;
>>
>> +/**
>> + * Status of a timeout event.
>> + */
>> +typedef enum odp_timer_tmo_status_e {
>> +       /** Timeout is fresh, process it and return timeout */
>> +       ODP_TMO_FRESH,
>> +       /** Timer reset or cancelled, just return timeout  */
>> +       ODP_TMO_STALE,
>> +       /** Timer deleted, return or free timeout */
>> +       ODP_TMO_ORPHAN
>> +} odp_timer_tmo_status_t;
>>
>>  /**
>> - * Create a timer
>> + * Create a timer pool
>>   *
>> - * Creates a new timer with requested properties.
>> + * Create a new timer pool.
>>   *
>>   * @param name       Name
>> - * @param pool       Buffer pool for allocating timeout notifications
>> + * @param buf_pool   Buffer pool for allocating timeouts (and only
>> timeouts)
>>   * @param resolution Timeout resolution in nanoseconds
>> - * @param min_tmo    Minimum timeout duration in nanoseconds
>> - * @param max_tmo    Maximum timeout duration in nanoseconds
>> + * @param min_tmo    Minimum relative timeout in nanoseconds
>> + * @param max_tmo    Maximum relative timeout in nanoseconds
>> + * @param num_timers Number of supported timers (minimum)
>> + * @param shared     Shared or private timer pool.
>> + *                Operations on shared timers will include the necessary
>> + *                mutual exclusion, operations on private timers may not
>> + *                (mutual exclusion is the responsibility of the caller).
>> + * @param clk_src    Clock source to use
>>   *
>> - * @return Timer handle if successful, otherwise ODP_TIMER_INVALID
>> + * @return Timer pool handle if successful, otherwise
>> ODP_TIMER_POOL_INVALID
>> + * and errno set
>>   */
>> -odp_timer_t odp_timer_create(const char *name, odp_buffer_pool_t pool,
>> -                            uint64_t resolution, uint64_t min_tmo,
>> -                            uint64_t max_tmo);
>> +odp_timer_pool_t
>> +odp_timer_pool_create(const char *name,
>> +                     odp_buffer_pool_t buf_pool,
>> +                     uint64_t resolution,
>> +                     uint64_t min_tmo,
>> +                     uint64_t max_tmo,
>> +                     uint32_t num_timers,
>> +                     bool shared,
>> +                     odp_timer_clk_src_t clk_src);
>> +
>> +/**
>> + * Start a timer pool
>> + *
>> + * Start all created timer pools, enabling the allocation of timers.
>> + * The purpose of this call is to coordinate the creation of multiple
>> timer
>> + * pools that may use the same underlying HW resources.
>> + * This function may be called multiple times.
>> + */
>> +void odp_timer_pool_start(void);
>> +
>> +/**
>> + * Destroy a timer pool
>> + *
>> + * Destroy a timer pool, freeing all resources.
>> + * All timers must have been freed.
>> + *
>> + * @param tpid  Timer pool identifier
>> + */
>> +void odp_timer_pool_destroy(odp_timer_pool_t tpid);
>>
>>  /**
>>   * Convert timer ticks to nanoseconds
>>   *
>> - * @param timer Timer
>> + * @param tpid  Timer pool identifier
>>   * @param ticks Timer ticks
>>   *
>>   * @return Nanoseconds
>>   */
>> -uint64_t odp_timer_tick_to_ns(odp_timer_t timer, uint64_t ticks);
>> +uint64_t odp_timer_tick_to_ns(odp_timer_pool_t tpid, uint64_t ticks);
>>
>>  /**
>>   * Convert nanoseconds to timer ticks
>>   *
>> - * @param timer Timer
>> + * @param tpid  Timer pool identifier
>>   * @param ns    Nanoseconds
>>   *
>>   * @return Timer ticks
>>   */
>> -uint64_t odp_timer_ns_to_tick(odp_timer_t timer, uint64_t ns);
>> +uint64_t odp_timer_ns_to_tick(odp_timer_pool_t tpid, uint64_t ns);
>>
>>  /**
>> - * Timer resolution in nanoseconds
>> + * Current tick value
>>   *
>> - * @param timer Timer
>> + * @param tpid Timer pool identifier
>>   *
>> - * @return Resolution in nanoseconds
>> + * @return Current time in timer ticks
>> + */
>> +uint64_t odp_timer_current_tick(odp_timer_pool_t tpid);
>> +
>> +/**
>> + * ODP timer configurations
>>   */
>> -uint64_t odp_timer_resolution(odp_timer_t timer);
>> +
>> +typedef enum odp_timer_pool_conf_e {
>> +       ODP_TIMER_NAME,      /**< Return name of timer pool */
>> +       ODP_TIMER_RESOLUTION,/**< Return the timer resolution (in ns) */
>> +       ODP_TIMER_MIN_TICKS, /**< Return the min supported rel timeout
>> (ticks)*/
>> +       ODP_TIMER_MAX_TICKS, /**< Return the max supported rel timeout
>> (ticks)*/
>> +       ODP_TIMER_NUM_TIMERS,/**< Return number of supported timers */
>> +       ODP_TIMER_SHARED     /**< Return shared flag */
>> +} odp_timer_pool_conf_t;
>>
>>  /**
>> - * Maximum timeout in timer ticks
>> + * Query different timer pool configurations, e.g.
>> + *  Timer resolution in nanoseconds
>> + *  Maximum timeout in timer ticks
>> + *  Number of supported timers
>> + *  Shared or private timer pool
>>   *
>> - * @param timer Timer
>> + * @param tpid Timer pool identifier
>> + * @param item Configuration item being queried
>>   *
>> - * @return Maximum timeout in timer ticks
>> + * @return the requested piece of information or 0 for unknown item.
>>   */
>> -uint64_t odp_timer_maximum_tmo(odp_timer_t timer);
>> +uintptr_t odp_timer_pool_query_conf(odp_timer_pool_t tpid,
>> +                                   odp_timer_pool_conf_t item);
>>
>>  /**
>> - * Current timer tick
>> + * Allocate a timer
>>   *
>> - * @param timer Timer
>> + * Create a timer (allocating all necessary resources e.g. timeout
>> event) from
>> + * the timer pool.
>>   *
>> - * @return Current time in timer ticks
>> + * @param tpid     Timer pool identifier
>> + * @param queue    Destination queue for timeout notifications
>> + * @param user_ptr User defined pointer or NULL (copied to timeouts)
>> + *
>> + * @return Timer handle if successful, otherwise ODP_TIMER_INVALID and
>> + *        errno set.
>>   */
>> -uint64_t odp_timer_current_tick(odp_timer_t timer);
>> +odp_timer_t odp_timer_alloc(odp_timer_pool_t tpid,
>> +                           odp_queue_t queue,
>> +                           void *user_ptr);
>>
>>  /**
>> - * Request timeout with an absolute timer tick
>> + * Free a timer
>> + *
>> + * Free (destroy) a timer, freeing all associated resources (e.g. default
>> + * timeout event). An expired and enqueued timeout event will not be
>> freed.
>> + * It is the responsibility of the application to free this timeout when
>> it
>> + * is received.
>>   *
>> - * When tick reaches tmo_tick, the timer enqueues the timeout
>> notification into
>> - * the destination queue.
>> + * @param tim      Timer handle
>> + */
>> +void odp_timer_free(odp_timer_t tim);
>> +
>> +/**
>> + * Set a timer (absolute time) with a user-defined timeout buffer
>>   *
>> - * @param timer    Timer
>> - * @param tmo_tick Absolute timer tick value which triggers the timeout
>> - * @param queue    Destination queue for the timeout notification
>> - * @param buf      User defined timeout notification buffer. When
>> - *                 ODP_BUFFER_INVALID, default timeout notification is
>> used.
>> + * Set (arm) the timer to expire at specific time. The user-defined
>> + * buffer will be enqueued when the timer expires.
>> + * Arming may fail (if the timer is in state EXPIRED), an earlier timeout
>> + * will then be received. odp_timer_tmo_status() must be used to check if
>> + * the received timeout is valid.
>>   *
>> - * @return Timeout handle if successful, otherwise ODP_TIMER_TMO_INVALID
>> + * Note: any invalid parameters will be treated as programming errors
>> and will
>> + * cause the application to abort.
>> + *
>> + * @param tim      Timer
>> + * @param abs_tck  Expiration time in absolute timer ticks
>> + * @param user_buf The buffer to use as timeout event
>> + *
>> + * @return Success or failure code
>>   */
>> -odp_timer_tmo_t odp_timer_absolute_tmo(odp_timer_t timer, uint64_t
>> tmo_tick,
>> -                                      odp_queue_t queue, odp_buffer_t
>> buf);
>> +odp_timer_set_t odp_timer_set_abs_w_buf(odp_timer_t tim,
>> +                                       uint64_t abs_tck,
>> +                                       odp_buffer_t user_buf);
>>
>>  /**
>> - * Cancel a timeout
>> + * Set a timer with an absolute expiration time
>> + *
>> + * Set (arm) the timer to expire at a specific time.
>> + * Arming may fail (if the timer is in state EXPIRED), an earlier timeout
>> + * will then be received. odp_timer_tmo_status() must be used to check if
>> + * the received timeout is valid.
>> + *
>> + * Note: any invalid parameters will be treated as programming errors
>> and will
>> + * cause the application to abort.
>>   *
>> - * @param timer Timer
>> - * @param tmo   Timeout to cancel
>> + * @param tim     Timer
>> + * @param abs_tck Expiration time in absolute timer ticks
>>   *
>> - * @return 0 if successful
>> + * @return Success or failure code
>>   */
>> -int odp_timer_cancel_tmo(odp_timer_t timer, odp_timer_tmo_t tmo);
>> +odp_timer_set_t odp_timer_set_abs(odp_timer_t tim, uint64_t abs_tck);
>>
>>  /**
>> - * Convert buffer handle to timeout handle
>> + * Set a timer with a relative expiration time and user-defined buffer.
>>   *
>> - * @param buf  Buffer handle
>> + * Set (arm) the timer to expire at a relative future time.
>> + * Arming may fail (if the timer is in state EXPIRED),
>> + * an earlier timeout will then be received. odp_timer_tmo_status() must
>> + * be used to check if the received timeout is valid.
>>   *
>> - * @return Timeout buffer handle
>> + * Note: any invalid parameters will be treated as programming errors
>> and will
>> + * cause the application to abort.
>> + *
>> + * @param tim      Timer
>> + * @param rel_tck  Expiration time in timer ticks relative to current
>> time of
>> + *                the timer pool the timer belongs to
>> + * @param user_buf The buffer to use as timeout event
>> + *
>> + * @return Success or failure code
>>   */
>> -odp_timeout_t odp_timeout_from_buffer(odp_buffer_t buf);
>> +odp_timer_set_t odp_timer_set_rel_w_buf(odp_timer_t tim,
>> +                                       uint64_t rel_tck,
>> +                                       odp_buffer_t user_buf);
>> +/**
>> + * Set a timer with a relative expiration time
>> + *
>> + * Set (arm) the timer to expire at a relative future time.
>> + * Arming may fail (if the timer is in state EXPIRED),
>> + * an earlier timeout will then be received. odp_timer_tmo_status() must
>> + * be used to check if the received timeout is valid.
>> + *
>> + * Note: any invalid parameters will be treated as programming errors
>> and will
>> + * cause the application to abort.
>> + *
>> + * @param tim     Timer
>> + * @param rel_tck Expiration time in timer ticks relative to current
>> time of
>> + *               the timer pool the timer belongs to
>> + *
>> + * @return Success or failure code
>> + */
>> +odp_timer_set_t odp_timer_set_rel(odp_timer_t tim, uint64_t rel_tck);
>>
>>  /**
>> - * Return absolute timeout tick
>> + * Cancel a timer
>> + *
>> + * Cancel a timer, preventing future expiration and delivery.
>> + *
>> + * A timer that has already expired and been enqueued for delivery may be
>> + * impossible to cancel and will instead be delivered to the destination
>> queue.
>> + * Use odp_timer_tmo_status() the check whether a received timeout is
>> fresh or
>> + * stale (cancelled). Stale timeouts will automatically be recycled.
>> + *
>> + * Note: any invalid parameters will be treated as programming errors
>> and will
>> + * cause the application to abort.
>> + *
>> + * @param tim    Timer handle
>> + */
>> +void odp_timer_cancel(odp_timer_t tim);
>> +
>> +/**
>> + * Translate from buffer to timeout
>> + *
>> + * Return the timeout handle that corresponds to the specified buffer
>> handle.
>> + * The buffer must be of time ODP_BUFFER_TYPE_TIMEOUT.
>> + *
>> + * @param buf   Buffer handle to translate.
>> + *
>> + * @return      The corresponding timeout handle.
>> + */
>> +static inline odp_timer_tmo_t odp_timeout_from_buffer(odp_buffer_t buf)
>> +{
>> +       if (odp_unlikely(odp_buffer_type(buf) !=
>> ODP_BUFFER_TYPE_TIMEOUT)) {
>> +               ODP_ERR("Buffer type %u not timeout\n", buf);
>> +               abort();
>> +       }
>> +       /* In this implementation, timeout == buffer */
>> +       return (odp_timer_tmo_t)buf;
>> +}
>> +
>> +/**
>> + * Translate from timeout to buffer
>> + *
>> + * Return the buffer handle that corresponds to the specified timeout
>> handle.
>> + *
>> + * @param tmo   Timeout handle to translate.
>> + *
>> + * @return      The corresponding buffer handle.
>> + */
>> +static inline odp_buffer_t odp_buffer_from_timeout(odp_timer_tmo_t tmo)
>> +{
>> +       /* In this implementation, buffer == timeout */
>> +       return (odp_buffer_t)tmo;
>> +}
>> +
>> +/**
>> + * Return timeout to timer
>> + *
>> + * Return a received timeout for reuse with the parent timer.
>> + * Note: odp_timer_return_tmo() must be called on all received timeouts!
>> + * (Excluding user defined timeout buffers).
>> + * The timeout must not be accessed after this call, the semantics is
>> + * equivalent to a free call.
>> + *
>> + * @param tmo    Timeout
>> + */
>> +void odp_timer_return_tmo(odp_timer_tmo_t tmo);
>> +
>> +/**
>> + * Return fresh/stale/orphan status of timeout.
>> + *
>> + * Check a received timeout for orphaness (i.e. parent timer freed) and
>> + * staleness (i.e. parent timer has been reset or cancelled after the
>> timeout
>> + * expired and was enqueued).
>> + * If the timeout is fresh, it should be processed.
>> + * If the timeout is stale or orphaned, it should be ignored.
>> + * All timeouts must be returned using the odp_timer_return_tmo() call.
>> + *
>> + * @param tmo    Timeout
>> + *
>> + * @return One of ODP_TMO_FRESH, ODP_TMO_STALE or ODP_TMO_ORPHAN.
>> + */
>> +odp_timer_tmo_status_t odp_timer_tmo_status(odp_timer_tmo_t tmo);
>> +
>> +/**
>> + * Get timer handle
>> + *
>> + * Return Handle of parent timer.
>> + *
>> + * @param tmo   Timeout
>> + *
>> + * @return Timer handle or ODP_TIMER_INVALID for orphaned timeouts.
>> + *         Note that the parent timer could be freed by some other thread
>> + *         at any time and thus the timeout becomes orphaned.
>> + */
>> +odp_timer_t odp_timer_handle(odp_timer_tmo_t tmo);
>> +
>> +/**
>> + * Get expiration time
>> + *
>> + * Return (requested) expiration time of timeout.
>> + *
>> + * @param tmo   Timeout
>> + *
>> + * @return Expiration time
>> + */
>> +uint64_t odp_timer_expiration(odp_timer_tmo_t tmo);
>> +
>> +/**
>> + * Get user pointer
>> + *
>> + * Return User pointer of timer associated with timeout.
>> + * The user pointer is often used to point to some associated context.
>>   *
>> - * @param tmo Timeout buffer handle
>> + * @param tmo   Timeout
>>   *
>> - * @return Absolute timeout tick
>> + * @return User pointer
>>   */
>> -uint64_t odp_timeout_tick(odp_timeout_t tmo);
>> +void *odp_timer_userptr(odp_timer_tmo_t tmo);
>>
>>  #ifdef __cplusplus
>>  }
>> diff --git a/platform/linux-generic/include/odp_priority_queue_internal.h
>> b/platform/linux-generic/include/odp_priority_queue_internal.h
>> new file mode 100644
>> index 0000000..7d7f3a2
>> --- /dev/null
>> +++ b/platform/linux-generic/include/odp_priority_queue_internal.h
>> @@ -0,0 +1,108 @@
>> +#ifndef _PRIORITY_QUEUE_H
>> +#define _PRIORITY_QUEUE_H
>> +
>> +#include <assert.h>
>> +#include <stddef.h>
>> +#include <stdint.h>
>> +#include <stdbool.h>
>> +#include <odp_align.h>
>> +
>> +#define INVALID_INDEX ~0U
>> +#define INVALID_PRIORITY ((pq_priority_t)~0ULL)
>> +
>> +typedef uint64_t pq_priority_t;
>> +
>> +struct heap_node;
>> +
>> +typedef struct priority_queue {
>> +       uint32_t max_elems;/* Number of elements in heap */
>> +       /* Number of registered elements (active + inactive) */
>> +       uint32_t reg_elems;
>> +       uint32_t num_elems;/* Number of active elements */
>> +       struct heap_node *heap;
>> +       struct heap_node *org_ptr;
>> +} priority_queue ODP_ALIGNED(sizeof(uint64_t));
>> +
>> +/* The user gets a pointer to this structure */
>> +typedef struct {
>> +       /* Set when pq_element registered with priority queue */
>> +       priority_queue *pq;
>> +       uint32_t index;/* Index into heap array */
>> +       pq_priority_t prio;
>> +} pq_element;
>> +
>> +/*** Operations on pq_element ***/
>> +
>> +static inline void pq_element_con(pq_element *this)
>> +{
>> +       this->pq = NULL;
>> +       this->index = INVALID_INDEX;
>> +       this->prio = 0U;
>> +}
>> +
>> +static inline void pq_element_des(pq_element *this)
>> +{
>> +       (void)this;
>> +       assert(this->index == INVALID_INDEX);
>> +}
>> +
>> +static inline priority_queue *get_pq(const pq_element *this)
>> +{
>> +       return this->pq;
>> +}
>> +
>> +static inline pq_priority_t get_prio(const pq_element *this)
>> +{
>> +       return this->prio;
>> +}
>> +
>> +static inline uint32_t get_index(const pq_element *this)
>> +{
>> +       return this->index;
>> +}
>> +
>> +static inline bool is_active(const pq_element *this)
>> +{
>> +       return this->index != INVALID_INDEX;
>> +}
>> +
>> +/*** Operations on priority_queue ***/
>> +
>> +extern uint32_t pq_smallest_child(priority_queue *, uint32_t,
>> pq_priority_t);
>> +extern void pq_bubble_down(priority_queue *, pq_element *);
>> +extern void pq_bubble_up(priority_queue *, pq_element *);
>> +
>> +static inline bool valid_index(priority_queue *this, uint32_t idx)
>> +{
>> +       return idx < this->num_elems;
>> +}
>> +
>> +extern void priority_queue_con(priority_queue *, uint32_t _max_elems);
>> +extern void priority_queue_des(priority_queue *);
>> +
>> +/* Register pq_element with priority queue */
>> +/* Return false if priority queue full */
>> +extern bool pq_register_element(priority_queue *, pq_element *);
>> +
>> +/* Activate and add pq_element to priority queue */
>> +/* Element must be disarmed */
>> +extern void pq_activate_element(priority_queue *, pq_element *,
>> pq_priority_t);
>> +
>> +/* Reset (increase) priority for pq_element */
>> +/* Element may be active or inactive (released) */
>> +extern void pq_reset_element(priority_queue *, pq_element *,
>> pq_priority_t);
>> +
>> +/* Deactivate and remove element from priority queue */
>> +/* Element may be active or inactive (released) */
>> +extern void pq_deactivate_element(priority_queue *, pq_element *);
>> +
>> +/* Unregister pq_element */
>> +extern void pq_unregister_element(priority_queue *, pq_element *);
>> +
>> +/* Return priority of first element (lowest numerical value) */
>> +extern pq_priority_t pq_first_priority(const priority_queue *);
>> +
>> +/* Deactivate and return first element if it's prio is <= threshold */
>> +extern pq_element *pq_release_element(priority_queue *, pq_priority_t
>> thresh);
>> +
>> +#endif /* _PRIORITY_QUEUE_H */
>> diff --git a/platform/linux-generic/include/odp_timer_internal.h
>> b/platform/linux-generic/include/odp_timer_internal.h
>> index ad28f53..461f28c 100644
>> --- a/platform/linux-generic/include/odp_timer_internal.h
>> +++ b/platform/linux-generic/include/odp_timer_internal.h
>> @@ -1,4 +1,4 @@
>> -/* Copyright (c) 2013, Linaro Limited
>> +/* Copyright (c) 2014, Linaro Limited
>>   * All rights reserved.
>>   *
>>   * SPDX-License-Identifier:     BSD-3-Clause
>> @@ -8,72 +8,51 @@
>>  /**
>>   * @file
>>   *
>> - * ODP timer timeout descriptor - implementation internal
>> + * ODP timeout descriptor - implementation internal
>>   */
>>
>>  #ifndef ODP_TIMER_INTERNAL_H_
>>  #define ODP_TIMER_INTERNAL_H_
>>
>> -#ifdef __cplusplus
>> -extern "C" {
>> -#endif
>> -
>> -#include <odp_std_types.h>
>> -#include <odp_queue.h>
>> -#include <odp_buffer.h>
>> +#include <odp_align.h>
>> +#include <odp_debug.h>
>>  #include <odp_buffer_internal.h>
>>  #include <odp_buffer_pool_internal.h>
>>  #include <odp_timer.h>
>>
>> -struct timeout_t;
>> -
>> -typedef struct timeout_t {
>> -       struct timeout_t *next;
>> -       int               timer_id;
>> -       int               tick;
>> -       uint64_t          tmo_tick;
>> -       odp_queue_t       queue;
>> -       odp_buffer_t      buf;
>> -       odp_buffer_t      tmo_buf;
>> -} timeout_t;
>> -
>> -
>> -struct odp_timeout_hdr_t;
>> -
>>  /**
>> - * Timeout notification header
>> + * Internal Timeout header
>>   */
>> -typedef struct odp_timeout_hdr_t {
>> +typedef struct {
>> +       /* common buffer header */
>>         odp_buffer_hdr_t buf_hdr;
>>
>> -       timeout_t meta;
>> -
>> -       uint8_t buf_data[];
>> +       /* Requested expiration time */
>> +       uint64_t expiration;
>> +       /* User ptr inherited from parent timer */
>> +       void *user_ptr;
>> +       /* Parent timer */
>> +       odp_timer_t timer;
>> +       /* Tag inherited from parent timer at time of expiration */
>> +       uint32_t tag;
>> +       /* Gen-cnt inherited from parent timer at time of creation */
>> +       uint16_t gencnt;
>> +       uint16_t pad;
>> +       uint8_t buf_data[0];
>>  } odp_timeout_hdr_t;
>>
>> -
>> -
>>  ODP_STATIC_ASSERT(sizeof(odp_timeout_hdr_t) ==
>> -          ODP_OFFSETOF(odp_timeout_hdr_t, buf_data),
>> -          "ODP_TIMEOUT_HDR_T__SIZE_ERR");
>> -
>> +                 ODP_OFFSETOF(odp_timeout_hdr_t, buf_data),
>> +                 "sizeof(odp_timeout_hdr_t) ==
>> ODP_OFFSETOF(odp_timeout_hdr_t, buf_data)");
>>  ODP_STATIC_ASSERT(sizeof(odp_timeout_hdr_t) % sizeof(uint64_t) == 0,
>> -          "ODP_TIMEOUT_HDR_T__SIZE_ERR2");
>> -
>> +                 "sizeof(odp_timeout_hdr_t) % sizeof(uint64_t) == 0");
>>
>>  /**
>> - * Return timeout header
>> + * Return the timeout header
>>   */
>> -static inline odp_timeout_hdr_t *odp_timeout_hdr(odp_timeout_t tmo)
>> +static inline odp_timeout_hdr_t *odp_timeout_hdr(odp_buffer_t buf)
>>  {
>> -       odp_buffer_hdr_t *buf_hdr = odp_buf_to_hdr((odp_buffer_t)tmo);
>> -       return (odp_timeout_hdr_t *)(uintptr_t)buf_hdr;
>> -}
>> -
>> -
>> -
>> -#ifdef __cplusplus
>> +       return (odp_timeout_hdr_t *)odp_buf_to_hdr(buf);
>>  }
>> -#endif
>>
>>  #endif
>> diff --git a/platform/linux-generic/odp_priority_queue.c
>> b/platform/linux-generic/odp_priority_queue.c
>> new file mode 100644
>> index 0000000..b72c26f
>> --- /dev/null
>> +++ b/platform/linux-generic/odp_priority_queue.c
>> @@ -0,0 +1,283 @@
>> +#define NDEBUG /* Enabled by default by ODP build system */
>> +#include <assert.h>
>> +#include <unistd.h>
>> +#include <stdlib.h>
>> +#include <string.h>
>> +#include <strings.h>
>> +#include <odp_hints.h>
>> +#include <odp_align.h>
>> +#include <odp_debug.h>
>> +
>> +#include "odp_priority_queue_internal.h"
>> +
>> +
>> +#define NUM_CHILDREN 4
>> +#define CHILD(n) (NUM_CHILDREN * (n) + 1)
>> +#define PARENT(n) (((n) - 1) / NUM_CHILDREN)
>> +
>> +/* Internal nodes in the array */
>> +typedef struct heap_node {
>> +       pq_element *elem;
>> +       /* Copy of elem->prio so we avoid unnecessary dereferencing */
>> +       pq_priority_t prio;
>> +} heap_node;
>> +
>> +static void pq_assert_heap(priority_queue *this);
>> +
>> +#define ALIGNMENT(p) (1U << ((unsigned)ffs((int)p) - 1U))
>> +
>> +void priority_queue_con(priority_queue *this, uint32_t _max_elems)
>> +{
>> +       this->max_elems = _max_elems;
>> +       this->reg_elems = 0;
>> +       this->num_elems = 0;
>> +       this->org_ptr = malloc((_max_elems + 64 / sizeof(heap_node)) *
>> +                              sizeof(heap_node));
>> +       if (odp_unlikely(this->org_ptr == NULL)) {
>> +               ODP_ERR("malloc failed\n");
>> +               abort();
>> +       }
>> +       this->heap = this->org_ptr;
>> +       assert((size_t)&this->heap[1] % 8 == 0);
>> +       /* Increment base address until first child (index 1) is cache
>> line */
>> +       /* aligned and thus all children (e.g. index 1-4) stored in the */
>> +       /* same cache line. We are not interested in the alignment of */
>> +       /* heap[0] as this is a lone node */
>> +       while ((size_t)&this->heap[1] % ODP_CACHE_LINE_SIZE != 0) {
>> +               /* Cast to ptr to struct member with the greatest
>> alignment */
>> +               /* requirement */
>> +               this->heap = (heap_node *)((pq_priority_t *)this->heap +
>> 1);
>> +       }
>> +       pq_assert_heap(this);
>> +}
>> +
>> +void priority_queue_des(priority_queue *this)
>> +{
>> +       pq_assert_heap(this);
>> +       free(this->org_ptr);
>> +}
>> +
>> +#ifndef NDEBUG
>> +static uint32_t
>> +pq_assert_elem(priority_queue *this, uint32_t index, bool recurse)
>> +{
>> +       uint32_t num = 1;
>> +       const pq_element *elem = this->heap[index].elem;
>> +       assert(elem->index == index);
>> +       assert(elem->prio == this->heap[index].prio);
>> +       uint32_t child = CHILD(index);
>> +       uint32_t i;
>> +       for (i = 0; i < NUM_CHILDREN; i++, child++) {
>> +               if (valid_index(this, child)) {
>> +                       assert(this->heap[child].elem != NULL);
>> +                       assert(this->heap[child].prio >= elem->prio);
>> +                       if (recurse)
>> +                               num += pq_assert_elem(this, child,
>> recurse);
>> +               }
>> +       }
>> +       return num;
>> +}
>> +#endif
>> +
>> +static void
>> +pq_assert_heap(priority_queue *this)
>> +{
>> +       (void)this;
>> +#ifndef NDEBUG
>> +       uint32_t num = 0;
>> +       if (odp_likely(this->num_elems != 0)) {
>> +               assert(this->heap[0].elem != NULL);
>> +               num += pq_assert_elem(this, 0, true);
>> +       }
>> +       assert(num == this->num_elems);
>> +       unsigned i;
>> +       for (i = 0; i < this->num_elems; i++) {
>> +               assert(this->heap[i].elem != NULL);
>> +               assert(this->heap[i].prio != INVALID_PRIORITY);
>> +       }
>> +#endif
>> +}
>> +
>> +/* Bubble up to proper position */
>> +void
>> +pq_bubble_up(priority_queue *this, pq_element *elem)
>> +{
>> +       assert(this->heap[elem->index].elem == elem);
>> +       assert(this->heap[elem->index].prio == elem->prio);
>> +       uint32_t current = elem->index;
>> +       pq_priority_t prio = elem->prio;
>> +       assert(current == 0 || this->heap[PARENT(current)].elem != NULL);
>> +       /* Move up into proper position */
>> +       while (current != 0 && this->heap[PARENT(current)].prio > prio) {
>> +               uint32_t parent = PARENT(current);
>> +               assert(this->heap[parent].elem != NULL);
>> +               /* Swap current with parent */
>> +               /* 1) Move parent down */
>> +               this->heap[current].elem = this->heap[parent].elem;
>> +               this->heap[current].prio = this->heap[parent].prio;
>> +               this->heap[current].elem->index = current;
>> +               /* 2) Move current up to parent */
>> +               this->heap[parent].elem = elem;
>> +               this->heap[parent].prio = prio;
>> +               this->heap[parent].elem->index = parent;
>> +               /* Continue moving elem until it is in the right place */
>> +               current = parent;
>> +       }
>> +       pq_assert_heap(this);
>> +}
>> +
>> +/* Find the smallest child that is smaller than the specified priority */
>> +/* Very hot function, can we decrease the number of cache misses? */
>> +uint32_t pq_smallest_child(priority_queue *this,
>> +                          uint32_t index,
>> +                          pq_priority_t val)
>> +{
>> +       uint32_t smallest = index;
>> +       uint32_t child = CHILD(index);
>> +#if NUM_CHILDREN == 4
>> +       /* Unroll loop when all children exist */
>> +       if (odp_likely(valid_index(this, child + 3))) {
>> +               if (this->heap[child + 0].prio < val)
>> +                       val = this->heap[smallest = child + 0].prio;
>> +               if (this->heap[child + 1].prio < val)
>> +                       val = this->heap[smallest = child + 1].prio;
>> +               if (this->heap[child + 2].prio < val)
>> +                       val = this->heap[smallest = child + 2].prio;
>> +               if (this->heap[child + 3].prio < val)
>> +                       (void)this->heap[smallest = child + 3].prio;
>> +               return smallest;
>> +       }
>> +#endif
>> +       uint32_t i;
>> +       for (i = 0; i < NUM_CHILDREN; i++) {
>> +               if (odp_unlikely(!valid_index(this, child + i)))
>> +                       break;
>> +               if (this->heap[child + i].prio < val) {
>> +                       smallest = child + i;
>> +                       val = this->heap[smallest].prio;
>> +               }
>> +       }
>> +       return smallest;
>> +}
>> +
>> +/* Very hot function, can it be optimised? */
>> +void
>> +pq_bubble_down(priority_queue *this, pq_element *elem)
>> +{
>> +       assert(this->heap[elem->index].elem == elem);
>> +       assert(this->heap[elem->index].prio == elem->prio);
>> +       uint32_t current = elem->index;
>> +       pq_priority_t prio = elem->prio;
>> +       for (;;) {
>> +               uint32_t child = pq_smallest_child(this, current, prio);
>> +               if (current == child) {
>> +                       /* No smaller child, we are done */
>> +                       pq_assert_heap(this);
>> +                       return;
>> +               }
>> +               /* Element larger than smaller child, must move down */
>> +               assert(this->heap[child].elem != NULL);
>> +               /* 1) Move child up to current */
>> +               this->heap[current].elem = this->heap[child].elem;
>> +               this->heap[current].prio = this->heap[child].prio;
>> +               /* 2) Move current down to child */
>> +               this->heap[child].elem = elem;
>> +               this->heap[child].prio = prio;
>> +               this->heap[child].elem->index = child;
>> +
>> +               this->heap[current].elem->index = current; /* cache
>> misses! */
>> +               /* Continue moving element until it is in the right place
>> */
>> +               current = child;
>> +       }
>> +}
>> +
>> +bool
>> +pq_register_element(priority_queue *this, pq_element *elem)
>> +{
>> +       if (odp_likely(this->reg_elems < this->max_elems)) {
>> +               elem->pq = this;
>> +               this->reg_elems++;
>> +               return true;
>> +       }
>> +       return false;
>> +}
>> +
>> +void
>> +pq_unregister_element(priority_queue *this, pq_element *elem)
>> +{
>> +       assert(elem->pq == this);
>> +       if (is_active(elem))
>> +               pq_deactivate_element(this, elem);
>> +       this->reg_elems--;
>> +}
>> +
>> +void
>> +pq_activate_element(priority_queue *this, pq_element *elem,
>> pq_priority_t prio)
>> +{
>> +       assert(elem->index == INVALID_INDEX);
>> +       /* Insert element at end */
>> +       uint32_t index = this->num_elems++;
>> +       this->heap[index].elem = elem;
>> +       this->heap[index].prio = prio;
>> +       elem->index = index;
>> +       elem->prio = prio;
>> +       pq_bubble_up(this, elem);
>> +}
>> +
>> +void
>> +pq_deactivate_element(priority_queue *this, pq_element *elem)
>> +{
>> +       assert(elem->pq == this);
>> +       if (odp_likely(is_active(elem))) {
>> +               /* Swap element with last element */
>> +               uint32_t current = elem->index;
>> +               uint32_t last = --this->num_elems;
>> +               if (odp_likely(last != current)) {
>> +                       /* Move last element to current */
>> +                       this->heap[current].elem = this->heap[last].elem;
>> +                       this->heap[current].prio = this->heap[last].prio;
>> +                       this->heap[current].elem->index = current;
>> +                       /* Bubble down old 'last' element to its proper
>> place*/
>> +                       if (this->heap[current].prio < elem->prio)
>> +                               pq_bubble_up(this,
>> this->heap[current].elem);
>> +                       else
>> +                               pq_bubble_down(this,
>> this->heap[current].elem);
>> +               }
>> +               elem->index = INVALID_INDEX;
>> +               pq_assert_heap(this);
>> +       }
>> +}
>> +
>> +void
>> +pq_reset_element(priority_queue *this, pq_element *elem, pq_priority_t
>> prio)
>> +{
>> +       assert(prio != INVALID_PRIORITY);
>> +       if (odp_likely(is_active(elem))) {
>> +               assert(prio >= elem->prio);
>> +               elem->prio = prio;
>> +               this->heap[elem->index].prio = prio;/* cache misses here!
>> */
>> +               pq_bubble_down(this, elem);
>> +               pq_assert_heap(this);
>> +       } else {
>> +               pq_activate_element(this, elem, prio);
>> +       }
>> +}
>> +
>> +pq_priority_t pq_first_priority(const priority_queue *this)
>> +{
>> +       return this->num_elems != 0 ? this->heap[0].prio :
>> INVALID_PRIORITY;
>> +}
>> +
>> +pq_element *
>> +pq_release_element(priority_queue *this, pq_priority_t threshold)
>> +{
>> +       if (odp_likely(this->num_elems != 0 &&
>> +                      this->heap[0].prio <= threshold)) {
>> +               pq_element *elem = this->heap[0].elem;
>> +               /* Remove element from heap */
>> +               pq_deactivate_element(this, elem);
>> +               assert(elem->prio <= threshold);
>> +               return elem;
>> +       }
>> +       return NULL;
>> +}
>> diff --git a/platform/linux-generic/odp_timer.c
>> b/platform/linux-generic/odp_timer.c
>> index 313c713..0e5071c 100644
>> --- a/platform/linux-generic/odp_timer.c
>> +++ b/platform/linux-generic/odp_timer.c
>> @@ -4,428 +4,713 @@
>>   * SPDX-License-Identifier:     BSD-3-Clause
>>   */
>>
>> -#include <odp_timer.h>
>> -#include <odp_timer_internal.h>
>> -#include <odp_time.h>
>> -#include <odp_buffer_pool_internal.h>
>> -#include <odp_internal.h>
>> -#include <odp_atomic.h>
>> -#include <odp_spinlock.h>
>> -#include <odp_sync.h>
>> -#include <odp_debug.h>
>> -
>> -#include <signal.h>
>> -#include <time.h>
>> +/**
>> + * @file
>> + *
>> + * ODP timer service
>> + *
>> + */
>>
>> +#include <assert.h>
>> +#include <errno.h>
>>  #include <string.h>
>> -
>> -#define NUM_TIMERS    1
>> -#define MAX_TICKS     1024
>> -#define MAX_RES       ODP_TIME_SEC
>> -#define MIN_RES       (100*ODP_TIME_USEC)
>> -
>> -
>> -typedef struct {
>> -       odp_spinlock_t lock;
>> -       timeout_t      *list;
>> -} tick_t;
>> -
>> -typedef struct {
>> -       int               allocated;
>> -       volatile int      active;
>> -       volatile uint64_t cur_tick;
>> -       timer_t           timerid;
>> -       odp_timer_t       timer_hdl;
>> -       odp_buffer_pool_t pool;
>> -       uint64_t          resolution_ns;
>> -       uint64_t          max_ticks;
>> -       tick_t            tick[MAX_TICKS];
>> -
>> -} timer_ring_t;
>> -
>> -typedef struct {
>> -       odp_spinlock_t lock;
>> -       int            num_timers;
>> -       timer_ring_t   timer[NUM_TIMERS];
>> -
>> -} timer_global_t;
>> -
>> -/* Global */
>> -static timer_global_t odp_timer;
>> -
>> -static void add_tmo(tick_t *tick, timeout_t *tmo)
>> +#include <stdlib.h>
>> +#include <time.h>
>> +#include <signal.h>
>> +#include "odp_std_types.h"
>> +#include "odp_buffer.h"
>> +#include "odp_buffer_pool.h"
>> +#include "odp_queue.h"
>> +#include "odp_hints.h"
>> +#include "odp_sync.h"
>> +#include "odp_ticketlock.h"
>> +#include "odp_debug.h"
>> +#include "odp_align.h"
>> +#include "odp_shared_memory.h"
>> +#include "odp_hints.h"
>> +#include "odp_internal.h"
>> +#include "odp_time.h"
>> +#include "odp_timer.h"
>> +#include "odp_timer_internal.h"
>> +#include "odp_priority_queue_internal.h"
>> +
>>
>> +/******************************************************************************
>> + * Translation between timeout and timeout header
>> +
>> *****************************************************************************/
>> +
>> +static inline odp_timeout_hdr_t *odp_tmo_to_hdr(odp_timer_tmo_t tmo)
>>  {
>> -       odp_spinlock_lock(&tick->lock);
>> -
>> -       tmo->next  = tick->list;
>> -       tick->list = tmo;
>> +       odp_buffer_t buf = odp_buffer_from_timeout(tmo);
>> +       odp_timeout_hdr_t *tmo_hdr = (odp_timeout_hdr_t
>> *)odp_buf_to_hdr(buf);
>> +       return tmo_hdr;
>> +}
>>
>> -       odp_spinlock_unlock(&tick->lock);
>>
>> +/******************************************************************************
>> + * odp_timer abstract datatype
>> +
>> *****************************************************************************/
>> +
>> +typedef struct odp_timer_s {
>> +       pq_element pqelem;/* Base class */
>> +       uint64_t req_tmo;/* Requested timeout tick */
>> +       odp_buffer_t tmo_buf;/* ODP_BUFFER_INVALID if timeout enqueued */
>> +       odp_queue_t queue;/* ODP_QUEUE_INVALID if timer is free */
>> +       uint32_t tag;/* Reusing tag as next pointer/index when timer is
>> free */
>> +       uint16_t gencnt;/* Smaller to make place for user_buf flag */
>> +       unsigned int user_buf:1; /* User-defined buffer? */
>> +} odp_timer;
>> +
>> +/* Constructor */
>> +static inline void odp_timer_con(odp_timer *this)
>> +{
>> +       pq_element_con(&this->pqelem);
>> +       this->tmo_buf = ODP_BUFFER_INVALID;
>> +       this->queue = ODP_QUEUE_INVALID;
>> +       this->gencnt = 0;
>>  }
>>
>> -static timeout_t *rem_tmo(tick_t *tick)
>> +/* Destructor */
>> +static inline void odp_timer_des(odp_timer *this)
>>  {
>> -       timeout_t *tmo;
>> +       assert(this->tmo_buf == ODP_BUFFER_INVALID);
>> +       assert(this->queue == ODP_QUEUE_INVALID);
>> +       pq_element_des(&this->pqelem);
>> +}
>>
>> -       odp_spinlock_lock(&tick->lock);
>> +/* Setup when timer is allocated */
>> +static void setup(odp_timer *this,
>> +                 odp_queue_t _q,
>> +                 void *_up,
>> +                 odp_buffer_t _tmo)
>> +{
>> +       this->req_tmo = INVALID_PRIORITY;
>> +       this->tmo_buf = _tmo;
>> +       this->queue = _q;
>> +       this->tag = 0;
>> +       this->user_buf = false;
>> +       /* Initialise constant fields of timeout event */
>> +       odp_timeout_hdr_t *tmo_hdr =
>> +               odp_tmo_to_hdr(odp_timeout_from_buffer(this->tmo_buf));
>> +       tmo_hdr->gencnt = this->gencnt;
>> +       tmo_hdr->timer = this;
>> +       tmo_hdr->user_ptr = _up;
>> +       /* tmo_hdr->tag set at expiration time */
>> +       /* tmo_hdr->expiration set at expiration time */
>> +       assert(this->queue != ODP_QUEUE_INVALID);
>> +}
>>
>> -       tmo = tick->list;
>> +/* Teardown when timer is freed */
>> +static odp_buffer_t teardown(odp_timer *this)
>> +{
>> +       /* Increase generation count to make any pending timeout(s)
>> orphaned */
>> +       ++this->gencnt;
>> +       odp_buffer_t buf = this->tmo_buf;
>> +       this->tmo_buf = ODP_BUFFER_INVALID;
>> +       this->queue = ODP_QUEUE_INVALID;
>> +       return buf;
>> +}
>>
>> -       if (tmo)
>> -               tick->list = tmo->next;
>> +static inline uint32_t get_next_free(odp_timer *this)
>> +{
>> +       assert(this->queue == ODP_QUEUE_INVALID);
>> +       return this->tag;
>> +}
>>
>> -       odp_spinlock_unlock(&tick->lock);
>> +static inline void set_next_free(odp_timer *this, uint32_t nf)
>> +{
>> +       assert(this->queue == ODP_QUEUE_INVALID);
>> +       this->tag = nf;
>> +}
>>
>> -       if (tmo)
>> -               tmo->next = NULL;
>>
>> +/******************************************************************************
>> + * odp_timer_pool abstract datatype
>> + * Inludes alloc and free timer
>> +
>> *****************************************************************************/
>> +
>> +typedef struct odp_timer_pool_s {
>> +       priority_queue pq;
>> +       uint64_t cur_tick;/* Current tick value */
>> +       uint64_t min_tick;/* Current expiration lower bound */
>> +       uint64_t max_tick;/* Current expiration higher bound */
>> +       bool shared;
>> +       odp_ticketlock_t lock;
>> +       const char *name;
>> +       odp_buffer_pool_t buf_pool;
>> +       uint64_t resolution_ns;
>> +       uint64_t min_tmo_tck;
>> +       uint64_t max_tmo_tck;
>> +       odp_timer *timers;
>> +       uint32_t num_alloc;/* Current number of allocated timers */
>> +       uint32_t max_timers;/* Max number of timers */
>> +       uint32_t first_free;/* 0..max_timers-1 => free timer */
>> +       timer_t timerid;
>> +       odp_timer_clk_src_t clk_src;
>> +} odp_timer_pool;
>> +
>> +/* Forward declarations */
>> +static void timer_init(odp_timer_pool *tp);
>> +static void timer_exit(odp_timer_pool *tp);
>> +
>> +static void odp_timer_pool_con(odp_timer_pool *this,
>> +                              const char *_n,
>> +                              odp_buffer_pool_t _bp,
>> +                              uint64_t _r,
>> +                              uint64_t _mint,
>> +                              uint64_t _maxt,
>> +                              uint32_t _mt,
>> +                              bool _s,
>> +                              odp_timer_clk_src_t _cs)
>> +{
>> +       priority_queue_con(&this->pq, _mt);
>> +       this->cur_tick = 0;
>> +       this->shared = _s;
>> +       this->name = strdup(_n);
>> +       this->buf_pool = _bp;
>> +       this->resolution_ns = _r;
>> +       this->min_tmo_tck = odp_timer_ns_to_tick(this, _mint);
>> +       this->max_tmo_tck = odp_timer_ns_to_tick(this, _maxt);
>> +       this->min_tick = this->cur_tick + this->min_tmo_tck;
>> +       this->max_tick = this->cur_tick + this->max_tmo_tck;
>> +       this->num_alloc = 0;
>> +       this->max_timers = _mt;
>> +       this->first_free = 0;
>> +       this->clk_src = _cs;
>> +       this->timers = malloc(sizeof(odp_timer) * this->max_timers);
>> +       if (this->timers == NULL)
>> +               ODP_ABORT("%s: malloc failed\n", _n);
>> +       uint32_t i;
>> +       for (i = 0; i < this->max_timers; i++)
>> +               odp_timer_con(&this->timers[i]);
>> +       for (i = 0; i < this->max_timers; i++)
>> +               set_next_free(&this->timers[i], i + 1);
>> +       odp_ticketlock_init(&this->lock);
>> +       if (this->clk_src == ODP_CLOCK_CPU)
>> +               timer_init(this);
>> +       /* Make sure timer pool initialisation is globally observable */
>> +       /* before we return a pointer to it */
>> +       odp_sync_stores();
>> +}
>>
>> -       return tmo;
>> +static odp_timer_pool *odp_timer_pool_new(
>> +       const char *_n,
>> +       odp_buffer_pool_t _bp,
>> +       uint64_t _r,
>> +       uint64_t _mint,
>> +       uint64_t _maxt,
>> +       uint32_t _mt,
>> +       bool _s,
>> +       odp_timer_clk_src_t _cs)
>> +{
>> +       odp_timer_pool *this = malloc(sizeof(odp_timer_pool));
>> +       if (odp_unlikely(this == NULL))
>> +               ODP_ABORT("%s: timer pool malloc failed\n", _n);
>> +       odp_timer_pool_con(this, _n, _bp, _r, _mint, _maxt, _mt, _s, _cs);
>> +       return this;
>>  }
>>
>> -/**
>> - * Search and delete tmo entry from timeout list
>> - * return -1 : on error.. handle not in list
>> - *             0 : success
>> - */
>> -static int find_and_del_tmo(timeout_t **tmo, odp_timer_tmo_t handle)
>> +static void odp_timer_pool_des(odp_timer_pool *this)
>>  {
>> -       timeout_t *cur, *prev;
>> -       prev = NULL;
>> +       if (this->shared)
>> +               odp_ticketlock_lock(&this->lock);
>> +       if (this->num_alloc != 0) {
>> +               /* It's a programming error to attempt to destroy a */
>> +               /* timer pool which is still in use */
>> +               ODP_ABORT("%s: timers in use\n", this->name);
>> +       }
>> +       if (this->clk_src == ODP_CLOCK_CPU)
>> +               timer_exit(this);
>> +       uint32_t i;
>> +       for (i = 0; i < this->max_timers; i++)
>> +               odp_timer_des(&this->timers[i]);
>> +       free(this->timers);
>> +       priority_queue_des(&this->pq);
>> +       odp_sync_stores();
>> +}
>>
>> -       for (cur = *tmo; cur != NULL; prev = cur, cur = cur->next) {
>> -               if (cur->tmo_buf == handle) {
>> -                       if (prev == NULL)
>> -                               *tmo = cur->next;
>> -                       else
>> -                               prev->next = cur->next;
>> +static void odp_timer_pool_del(odp_timer_pool *this)
>> +{
>> +       odp_timer_pool_des(this);
>> +       free(this);
>> +}
>>
>> -                       break;
>> +static inline odp_timer *timer_alloc(odp_timer_pool *this,
>> +                                    odp_queue_t queue,
>> +                                    void *user_ptr,
>> +                                    odp_buffer_t tmo_buf)
>> +{
>> +       odp_timer *tim = ODP_TIMER_INVALID;
>> +       if (odp_likely(this->shared))
>> +               odp_ticketlock_lock(&this->lock);
>> +       if (odp_likely(this->num_alloc < this->max_timers)) {
>> +               this->num_alloc++;
>> +               /* Remove first unused timer from free list */
>> +               assert(this->first_free != this->max_timers);
>> +               tim = &this->timers[this->first_free];
>> +               this->first_free = get_next_free(tim);
>> +               /* Insert timer into priority queue */
>> +               if (odp_unlikely(!pq_register_element(&this->pq,
>> +                                                     &tim->pqelem))) {
>> +                       /* Unexpected internal error */
>> +                       abort();
>>                 }
>> +               /* Create timer */
>> +               setup(tim, queue, user_ptr, tmo_buf);
>> +       } else {
>> +               errno = ENFILE; /* Reusing file table overvlow */
>>         }
>> -
>> -       if (!cur)
>> -               /* couldn't find tmo in list */
>> -               return -1;
>> -
>> -       /* application to free tmo_buf provided by absolute_tmo call */
>> -       return 0;
>> +       if (odp_likely(this->shared))
>> +               odp_ticketlock_unlock(&this->lock);
>> +       return tim;
>>  }
>>
>> -int odp_timer_cancel_tmo(odp_timer_t timer_hdl, odp_timer_tmo_t tmo)
>> +static inline void timer_free(odp_timer_pool *this, odp_timer *tim)
>>  {
>> -       int id;
>> -       int tick_idx;
>> -       timeout_t *cancel_tmo;
>> -       odp_timeout_hdr_t *tmo_hdr;
>> -       tick_t *tick;
>> -
>> -       /* get id */
>> -       id = (int)timer_hdl - 1;
>> -
>> -       tmo_hdr = odp_timeout_hdr((odp_timeout_t) tmo);
>> -       /* get tmo_buf to cancel */
>> -       cancel_tmo = &tmo_hdr->meta;
>> +       if (odp_likely(this->shared))
>> +               odp_ticketlock_lock(&this->lock);
>> +       if (odp_unlikely(tim->queue == ODP_QUEUE_INVALID))
>> +               ODP_ABORT("Invalid timer %p\n", tim);
>> +       /* Destroy timer */
>> +       odp_buffer_t buf = teardown(tim);
>> +       /* Remove timer from priority queue */
>> +       pq_unregister_element(&this->pq, &tim->pqelem);
>> +       /* Insert timer into free list */
>> +       set_next_free(tim, this->first_free);
>> +       this->first_free = (tim - &this->timers[0]) /
>> sizeof(this->timers[0]);
>> +       assert(this->num_alloc != 0);
>> +       this->num_alloc--;
>> +       if (odp_likely(this->shared))
>> +               odp_ticketlock_unlock(&this->lock);
>> +       if (buf != ODP_BUFFER_INVALID)
>> +               odp_buffer_free(buf);
>> +}
>>
>> -       tick_idx = cancel_tmo->tick;
>> -       tick = &odp_timer.timer[id].tick[tick_idx];
>>
>> +/******************************************************************************
>> + * Operations on timers
>> + * reset/reset_w_buf/cancel timer, return timeout
>> +
>> *****************************************************************************/
>>
>> -       odp_spinlock_lock(&tick->lock);
>> -       /* search and delete tmo from tick list */
>> -       if (find_and_del_tmo(&tick->list, tmo) != 0) {
>> -               odp_spinlock_unlock(&tick->lock);
>> -               ODP_DBG("Couldn't find the tmo (%d) in tick list\n",
>> (int)tmo);
>> -               return -1;
>> +static inline void timer_expire(odp_timer *tim)
>> +{
>> +       assert(tim->req_tmo != INVALID_PRIORITY);
>> +       /* Timer expired, is there actually any timeout event */
>> +       /* we can enqueue? */
>> +       if (odp_likely(tim->tmo_buf != ODP_BUFFER_INVALID)) {
>> +               /* Swap out timeout buffer */
>> +               odp_buffer_t buf = tim->tmo_buf;
>> +               tim->tmo_buf = ODP_BUFFER_INVALID;
>> +               if (odp_likely(!tim->user_buf)) {
>> +                       odp_timeout_hdr_t *tmo_hdr =
>> +
>>  odp_tmo_to_hdr(odp_timeout_from_buffer(buf));
>> +                       /* Copy tag and requested expiration tick from
>> timer */
>> +                       tmo_hdr->tag = tim->tag;
>> +                       tmo_hdr->expiration = tim->req_tmo;
>> +               }
>> +               /* Else don't touch user-defined buffer */
>> +               int rc = odp_queue_enq(tim->queue, buf);
>> +               if (odp_unlikely(rc != 0))
>> +                       ODP_ABORT("Failed to enqueue timeout buffer
>> (%d)\n",
>> +                                 rc);
>> +               /* Mark timer as inactive */
>> +               tim->req_tmo = INVALID_PRIORITY;
>>         }
>> -       odp_spinlock_unlock(&tick->lock);
>> -
>> -       return 0;
>> +       /* No, timeout event already enqueued or unavailable */
>> +       /* Keep timer active, odp_timer_return_tmo() will patch up */
>>  }
>>
>> -static void notify_function(union sigval sigval)
>> +static odp_timer_set_t timer_reset(odp_timer_pool *tp,
>> +                                  odp_timer *tim,
>> +                                  uint64_t abs_tck)
>>  {
>> -       uint64_t cur_tick;
>> -       timeout_t *tmo;
>> -       tick_t *tick;
>> -       timer_ring_t *timer;
>> +       assert(tim->user_buf == false);
>> +       if (odp_unlikely(abs_tck < tp->min_tick))
>> +               return ODP_TIMER_SET_TOOEARLY;
>> +       if (odp_unlikely(abs_tck > tp->max_tick))
>> +               return ODP_TIMER_SET_TOOLATE;
>> +
>> +       if (odp_likely(tp->shared))
>> +               odp_ticketlock_lock(&tp->lock);
>> +
>> +       if (odp_unlikely(tim->queue == ODP_QUEUE_INVALID))
>> +               ODP_ABORT("Invalid timer %p\n", tim);
>> +       if (odp_unlikely(tim->user_buf))
>> +               ODP_ABORT("Timer %p has user buffer\n", tim);
>> +       /* Increase timer tag to make any pending timeout stale */
>> +       tim->tag++;
>> +       /* Save requested timeout */
>> +       tim->req_tmo = abs_tck;
>> +       /* Update timer position in priority queue */
>> +       pq_reset_element(&tp->pq, &tim->pqelem, abs_tck);
>> +
>> +       if (odp_likely(tp->shared))
>> +               odp_ticketlock_unlock(&tp->lock);
>> +       return ODP_TIMER_SET_SUCCESS;
>> +}
>>
>> -       timer = sigval.sival_ptr;
>> +static odp_timer_set_t timer_reset_w_buf(odp_timer_pool *tp,
>> +               odp_timer *tim,
>> +               uint64_t abs_tck,
>> +               odp_buffer_t user_buf)
>> +{
>> +       if (odp_unlikely(abs_tck < tp->min_tick))
>> +               return ODP_TIMER_SET_TOOEARLY;
>> +       if (odp_unlikely(abs_tck > tp->max_tick))
>> +               return ODP_TIMER_SET_TOOLATE;
>> +
>> +       if (odp_likely(tp->shared))
>> +               odp_ticketlock_lock(&tp->lock);
>> +
>> +       if (odp_unlikely(tim->queue == ODP_QUEUE_INVALID))
>> +               ODP_ABORT("Invalid timer %p\n", tim);
>> +       /* Increase timer tag to make any pending timeout stale */
>> +       tim->tag++;
>> +       /* Save requested timeout */
>> +       tim->req_tmo = abs_tck;
>> +       /* Set flag indicating presence of user defined buffer */
>> +       tim->user_buf = true;
>> +       /* Swap in new buffer, save any old buffer pointer */
>> +       odp_buffer_t old_buf = tim->tmo_buf;
>> +       tim->tmo_buf = user_buf;
>> +       /* Update timer position in priority queue */
>> +       pq_reset_element(&tp->pq, &tim->pqelem, abs_tck);
>> +
>> +       if (odp_likely(tp->shared))
>> +               odp_ticketlock_unlock(&tp->lock);
>> +
>> +       /* Free old buffer if present */
>> +       if (odp_unlikely(old_buf != ODP_BUFFER_INVALID))
>> +               odp_buffer_free(old_buf);
>> +       return ODP_TIMER_SET_SUCCESS;
>> +}
>>
>> -       if (timer->active == 0) {
>> -               ODP_DBG("Timer (%u) not active\n", timer->timer_hdl);
>> -               return;
>> +static inline void timer_cancel(odp_timer_pool *tp,
>> +                               odp_timer *tim)
>> +{
>> +       odp_buffer_t old_buf = ODP_BUFFER_INVALID;
>> +       if (odp_likely(tp->shared))
>> +               odp_ticketlock_lock(&tp->lock);
>> +
>> +       if (odp_unlikely(tim->queue == ODP_QUEUE_INVALID))
>> +               ODP_ABORT("Invalid timer %p\n", tim);
>> +       if (odp_unlikely(tim->user_buf)) {
>> +               /* Swap out old user buffer */
>> +               old_buf = tim->tmo_buf;
>> +               tim->tmo_buf = ODP_BUFFER_INVALID;
>> +               /* tim->user_buf stays true */
>>         }
>> +       /* Else a normal timer (no user-defined buffer) */
>> +       /* Increase timer tag to make any pending timeout stale */
>> +       tim->tag++;
>> +       /* Clear requested timeout, mark timer inactive */
>> +       tim->req_tmo = INVALID_PRIORITY;
>> +       /* Remove timer from the priority queue */
>> +       pq_deactivate_element(&tp->pq, &tim->pqelem);
>> +
>> +       if (odp_likely(tp->shared))
>> +               odp_ticketlock_unlock(&tp->lock);
>> +       /* Free user-defined buffer if present */
>> +       if (odp_unlikely(old_buf != ODP_BUFFER_INVALID))
>> +               odp_buffer_free(old_buf);
>> +}
>>
>> -       /* ODP_DBG("Tick\n"); */
>> -
>> -       cur_tick = timer->cur_tick++;
>> -
>> -       odp_sync_stores();
>> +static inline void timer_return(odp_timer_pool *tp,
>> +                               odp_timer *tim,
>> +                               odp_timer_tmo_t tmo,
>> +                               const odp_timeout_hdr_t *tmo_hdr)
>> +{
>> +       odp_buffer_t tmo_buf = odp_buffer_from_timeout(tmo);
>> +       if (odp_likely(tp->shared))
>> +               odp_ticketlock_lock(&tp->lock);
>> +       if (odp_unlikely(tim->user_buf))
>> +               ODP_ABORT("Timer %p has user-defined buffer\n", tim);
>> +       if (odp_likely(tmo_hdr->gencnt == tim->gencnt)) {
>> +               assert(tim->tmo_buf == ODP_BUFFER_INVALID);
>> +               /* Save returned buffer for use when timer expires next
>> time */
>> +               tim->tmo_buf = tmo_buf;
>> +               tmo_buf = ODP_BUFFER_INVALID;
>> +               /* Check if timer is active and should have expired */
>> +               if (odp_unlikely(tim->req_tmo != INVALID_PRIORITY &&
>> +                                tim->req_tmo <= tp->cur_tick)) {
>> +                       /* Expire timer now since we have restored the
>> timeout
>> +                          buffer */
>> +                       timer_expire(tim);
>> +               }
>> +               /* Else timer inactive or expires in the future */
>> +       }
>> +       /* Else timeout orphaned, free buffer later */
>> +       if (odp_likely(tp->shared))
>> +               odp_ticketlock_unlock(&tp->lock);
>> +       if (odp_unlikely(tmo_buf != ODP_BUFFER_INVALID))
>> +               odp_buffer_free(tmo_buf);
>> +}
>>
>> -       tick = &timer->tick[cur_tick % MAX_TICKS];
>> +/* Non-public so not in odp_timer.h but externally visible, must declare
>> + * somewhere */
>> +unsigned odp_timer_pool_expire(odp_timer_pool_t tpid, uint64_t tick);
>>
>> -       while ((tmo = rem_tmo(tick)) != NULL) {
>> -               odp_queue_t  queue;
>> -               odp_buffer_t buf;
>> +unsigned odp_timer_pool_expire(odp_timer_pool_t tpid, uint64_t tick)
>> +{
>> +       if (odp_likely(tpid->shared))
>> +               odp_ticketlock_lock(&tpid->lock);
>> +
>> +       unsigned nexp = 0;
>> +       odp_timer_t tim;
>> +       tpid->cur_tick = tick;
>> +       tpid->min_tick = tick + tpid->min_tmo_tck;
>> +       tpid->max_tick = tick + tpid->max_tmo_tck;
>> +       while ((tim = (odp_timer_t)pq_release_element(&tpid->pq, tick)) !=
>> +              ODP_TIMER_INVALID) {
>> +               assert(get_prio(&tim->pqelem) <= tick);
>> +               timer_expire(tim);
>> +               nexp++;
>> +       }
>>
>> -               queue = tmo->queue;
>> -               buf   = tmo->buf;
>> +       if (odp_likely(tpid->shared))
>> +               odp_ticketlock_unlock(&tpid->lock);
>> +       return nexp;
>> +}
>>
>> -               if (buf != tmo->tmo_buf)
>> -                       odp_buffer_free(tmo->tmo_buf);
>>
>> +/******************************************************************************
>> + * POSIX timer support
>> + * Functions that use Linux/POSIX per-process timers and related
>> facilities
>> +
>> *****************************************************************************/
>>
>> -               odp_queue_enq(queue, buf);
>> -       }
>> +static void timer_notify(sigval_t sigval)
>> +{
>> +       odp_timer_pool *tp = (odp_timer_pool *)sigval.sival_ptr;
>> +       uint64_t new_tick = tp->cur_tick + 1;
>> +       (void)odp_timer_pool_expire(tp, new_tick);
>>  }
>>
>> -static void timer_start(timer_ring_t *timer)
>> +static void timer_init(odp_timer_pool *tp)
>>  {
>>         struct sigevent   sigev;
>>         struct itimerspec ispec;
>>         uint64_t res, sec, nsec;
>>
>> -       ODP_DBG("\nTimer (%u) starts\n", timer->timer_hdl);
>> +       ODP_DBG("Creating POSIX timer for timer pool %s, period %"
>> +               PRIu64" ns\n", tp->name, tp->resolution_ns);
>>
>>         memset(&sigev, 0, sizeof(sigev));
>>         memset(&ispec, 0, sizeof(ispec));
>>
>>         sigev.sigev_notify          = SIGEV_THREAD;
>> -       sigev.sigev_notify_function = notify_function;
>> -       sigev.sigev_value.sival_ptr = timer;
>> +       sigev.sigev_notify_function = timer_notify;
>> +       sigev.sigev_value.sival_ptr = tp;
>>
>> -       if (timer_create(CLOCK_MONOTONIC, &sigev, &timer->timerid)) {
>> -               ODP_DBG("Timer create failed\n");
>> -               return;
>> -       }
>> +       if (timer_create(CLOCK_MONOTONIC, &sigev, &tp->timerid))
>> +               ODP_ABORT("timer_create() returned error %s\n",
>> +                         strerror(errno));
>>
>> -       res  = timer->resolution_ns;
>> +       res  = tp->resolution_ns;
>>         sec  = res / ODP_TIME_SEC;
>> -       nsec = res - sec*ODP_TIME_SEC;
>> +       nsec = res - sec * ODP_TIME_SEC;
>>
>>         ispec.it_interval.tv_sec  = (time_t)sec;
>>         ispec.it_interval.tv_nsec = (long)nsec;
>>         ispec.it_value.tv_sec     = (time_t)sec;
>>         ispec.it_value.tv_nsec    = (long)nsec;
>>
>> -       if (timer_settime(timer->timerid, 0, &ispec, NULL)) {
>> -               ODP_DBG("Timer set failed\n");
>> -               return;
>> -       }
>> -
>> -       return;
>> +       if (timer_settime(&tp->timerid, 0, &ispec, NULL))
>> +               ODP_ABORT("timer_settime() returned error %s\n",
>> +                         strerror(errno));
>>  }
>>
>> -int odp_timer_init_global(void)
>> +static void timer_exit(odp_timer_pool *tp)
>>  {
>> -       ODP_DBG("Timer init ...");
>> -
>> -       memset(&odp_timer, 0, sizeof(timer_global_t));
>> -
>> -       odp_spinlock_init(&odp_timer.lock);
>> -
>> -       ODP_DBG("done\n");
>> -
>> -       return 0;
>> +       if (timer_delete(tp->timerid) != 0)
>> +               ODP_ABORT("timer_delete() returned error %s\n",
>> +                         strerror(errno));
>>  }
>>
>> -int odp_timer_disarm_all(void)
>>
>> +/******************************************************************************
>> + * Public API functions
>> + * Some parameter checks and error messages
>> + * No modificatios of internal state
>> +
>> *****************************************************************************/
>> +odp_timer_pool_t
>> +odp_timer_pool_create(const char *name,
>> +                     odp_buffer_pool_t buf_pool,
>> +                     uint64_t resolution_ns,
>> +                     uint64_t min_timeout,
>> +                     uint64_t max_timeout,
>> +                     uint32_t num_timers,
>> +                     bool shared,
>> +                     odp_timer_clk_src_t clk_src)
>>  {
>> -       int timers;
>> -       struct itimerspec ispec;
>> -
>> -       odp_spinlock_lock(&odp_timer.lock);
>> -
>> -       timers = odp_timer.num_timers;
>> -
>> -       ispec.it_interval.tv_sec  = 0;
>> -       ispec.it_interval.tv_nsec = 0;
>> -       ispec.it_value.tv_sec     = 0;
>> -       ispec.it_value.tv_nsec    = 0;
>> -
>> -       for (; timers >= 0; timers--) {
>> -               if (timer_settime(odp_timer.timer[timers].timerid,
>> -                                 0, &ispec, NULL)) {
>> -                       ODP_DBG("Timer reset failed\n");
>> -                       odp_spinlock_unlock(&odp_timer.lock);
>> -                       return -1;
>> -               }
>> -               odp_timer.num_timers--;
>> -       }
>> -
>> -       odp_spinlock_unlock(&odp_timer.lock);
>> -
>> -       return 0;
>> +       /* Verify that buffer pool can be used for timeouts */
>> +       odp_buffer_t buf = odp_buffer_alloc(buf_pool);
>> +       if (buf == ODP_BUFFER_INVALID)
>> +               ODP_ABORT("%s: Failed to allocate buffer\n", name);
>> +       if (odp_buffer_type(buf) != ODP_BUFFER_TYPE_TIMEOUT)
>> +               ODP_ABORT("%s: Buffer pool wrong type\n", name);
>> +       odp_buffer_free(buf);
>> +       odp_timer_pool_t tp = odp_timer_pool_new(name, buf_pool,
>> resolution_ns,
>> +                             min_timeout, max_timeout, num_timers,
>> +                             shared, clk_src);
>> +       return tp;
>>  }
>>
>> -odp_timer_t odp_timer_create(const char *name, odp_buffer_pool_t pool,
>> -                            uint64_t resolution_ns, uint64_t min_ns,
>> -                            uint64_t max_ns)
>> +void odp_timer_pool_start(void)
>>  {
>> -       uint32_t id;
>> -       timer_ring_t *timer;
>> -       odp_timer_t timer_hdl;
>> -       int i;
>> -       uint64_t max_ticks;
>> -       (void) name;
>> -
>> -       if (resolution_ns < MIN_RES)
>> -               resolution_ns = MIN_RES;
>> -
>> -       if (resolution_ns > MAX_RES)
>> -               resolution_ns = MAX_RES;
>> -
>> -       max_ticks = max_ns / resolution_ns;
>> -
>> -       if (max_ticks > MAX_TICKS) {
>> -               ODP_DBG("Maximum timeout too long: %"PRIu64" ticks\n",
>> -                       max_ticks);
>> -               return ODP_TIMER_INVALID;
>> -       }
>> -
>> -       if (min_ns < resolution_ns) {
>> -               ODP_DBG("Min timeout %"PRIu64" ns < resolution %"PRIu64"
>> ns\n",
>> -                       min_ns, resolution_ns);
>> -               return ODP_TIMER_INVALID;
>> -       }
>> -
>> -       odp_spinlock_lock(&odp_timer.lock);
>> -
>> -       if (odp_timer.num_timers >= NUM_TIMERS) {
>> -               odp_spinlock_unlock(&odp_timer.lock);
>> -               ODP_DBG("All timers allocated\n");
>> -               return ODP_TIMER_INVALID;
>> -       }
>> -
>> -       for (id = 0; id < NUM_TIMERS; id++) {
>> -               if (odp_timer.timer[id].allocated == 0)
>> -                       break;
>> -       }
>> -
>> -       timer = &odp_timer.timer[id];
>> -       timer->allocated = 1;
>> -       odp_timer.num_timers++;
>> -
>> -       odp_spinlock_unlock(&odp_timer.lock);
>> -
>> -       timer_hdl = id + 1;
>> -
>> -       timer->timer_hdl     = timer_hdl;
>> -       timer->pool          = pool;
>> -       timer->resolution_ns = resolution_ns;
>> -       timer->max_ticks     = MAX_TICKS;
>> -
>> -       for (i = 0; i < MAX_TICKS; i++) {
>> -               odp_spinlock_init(&timer->tick[i].lock);
>> -               timer->tick[i].list = NULL;
>> -       }
>> -
>> -       timer->active = 1;
>> -       odp_sync_stores();
>> -
>> -       timer_start(timer);
>> +       /* Nothing to do here, timer pools are started by the create call
>> */
>> +}
>>
>> -       return timer_hdl;
>> +void odp_timer_pool_destroy(odp_timer_pool_t tpid)
>> +{
>> +       odp_timer_pool_del(tpid);
>>  }
>>
>> -odp_timer_tmo_t odp_timer_absolute_tmo(odp_timer_t timer_hdl, uint64_t
>> tmo_tick,
>> -                                      odp_queue_t queue, odp_buffer_t
>> buf)
>> +uint64_t odp_timer_tick_to_ns(odp_timer_pool_t tpid, uint64_t ticks)
>>  {
>> -       int id;
>> -       uint64_t tick;
>> -       uint64_t cur_tick;
>> -       timeout_t *new_tmo;
>> -       odp_buffer_t tmo_buf;
>> -       odp_timeout_hdr_t *tmo_hdr;
>> -       timer_ring_t *timer;
>> +       return ticks * tpid->resolution_ns;
>> +}
>>
>> -       id = (int)timer_hdl - 1;
>> -       timer = &odp_timer.timer[id];
>> +uint64_t odp_timer_ns_to_tick(odp_timer_pool_t tpid, uint64_t ns)
>> +{
>> +       return (uint64_t)(ns / tpid->resolution_ns);
>> +}
>>
>> -       cur_tick = timer->cur_tick;
>> -       if (tmo_tick <= cur_tick) {
>> -               ODP_DBG("timeout too close\n");
>> -               return ODP_TIMER_TMO_INVALID;
>> -       }
>> +uint64_t odp_timer_current_tick(odp_timer_pool_t tpid)
>> +{
>> +       return tpid->cur_tick;
>> +}
>>
>> -       if ((tmo_tick - cur_tick) > MAX_TICKS) {
>> -               ODP_DBG("timeout too far: cur %"PRIu64" tmo %"PRIu64"\n",
>> -                       cur_tick, tmo_tick);
>> -               return ODP_TIMER_TMO_INVALID;
>> +uintptr_t odp_timer_pool_query_conf(odp_timer_pool_t tpid,
>> +                                   odp_timer_pool_conf_t item)
>> +{
>> +       switch (item) {
>> +       case ODP_TIMER_NAME:
>> +               return (uintptr_t)(tpid->name);
>> +       case ODP_TIMER_RESOLUTION:
>> +               return tpid->resolution_ns;
>> +       case ODP_TIMER_MIN_TICKS:
>> +               return tpid->min_tmo_tck;
>> +       case ODP_TIMER_MAX_TICKS:
>> +               return tpid->max_tmo_tck;
>> +       case ODP_TIMER_NUM_TIMERS:
>> +               return tpid->max_timers;
>> +       case ODP_TIMER_SHARED:
>> +               return tpid->shared;
>> +       default:
>> +               return 0;
>>         }
>> +}
>>
>> -       tick = tmo_tick % MAX_TICKS;
>> -
>> -       tmo_buf = odp_buffer_alloc(timer->pool);
>> -       if (tmo_buf == ODP_BUFFER_INVALID) {
>> -               ODP_DBG("tmo buffer alloc failed\n");
>> -               return ODP_TIMER_TMO_INVALID;
>> +odp_timer_t odp_timer_alloc(odp_timer_pool_t tpid,
>> +                           odp_queue_t queue,
>> +                           void *user_ptr)
>> +{
>> +       /* We check this because ODP_QUEUE_INVALID is used */
>> +       /* to indicate a free timer */
>> +       if (odp_unlikely(queue == ODP_QUEUE_INVALID))
>> +               ODP_ABORT("%s: Invalid queue handle\n", tpid->name);
>> +       odp_buffer_t tmo_buf = odp_buffer_alloc(tpid->buf_pool);
>> +       if (odp_likely(tmo_buf != ODP_BUFFER_INVALID)) {
>> +               odp_timer *tim = timer_alloc(tpid, queue, user_ptr,
>> tmo_buf);
>> +               if (odp_likely(tim != ODP_TIMER_INVALID)) {
>> +                       /* Success */
>> +                       assert(tim->queue != ODP_QUEUE_INVALID);
>> +                       return tim;
>> +               }
>> +               odp_buffer_free(tmo_buf);
>>         }
>> +       /* Else failed to allocate timeout event */
>> +       /* errno set by odp_buffer_alloc() or timer_alloc () */
>> +       return ODP_TIMER_INVALID;
>> +}
>>
>> -       tmo_hdr = odp_timeout_hdr((odp_timeout_t) tmo_buf);
>> -       new_tmo = &tmo_hdr->meta;
>> -
>> -       new_tmo->timer_id = id;
>> -       new_tmo->tick     = (int)tick;
>> -       new_tmo->tmo_tick = tmo_tick;
>> -       new_tmo->queue    = queue;
>> -       new_tmo->tmo_buf  = tmo_buf;
>> -
>> -       if (buf != ODP_BUFFER_INVALID)
>> -               new_tmo->buf = buf;
>> -       else
>> -               new_tmo->buf = tmo_buf;
>> -
>> -       add_tmo(&timer->tick[tick], new_tmo);
>> -
>> -       return tmo_buf;
>> +void odp_timer_free(odp_timer_t tim)
>> +{
>> +       odp_timer_pool *tp = (odp_timer_pool *)get_pq(&tim->pqelem);
>> +       timer_free(tp, tim);
>>  }
>>
>> -uint64_t odp_timer_tick_to_ns(odp_timer_t timer_hdl, uint64_t ticks)
>> +odp_timer_set_t odp_timer_set_abs_w_buf(odp_timer_t tim,
>> +                                       uint64_t abs_tck,
>> +                                       odp_buffer_t user_buf)
>>  {
>> -       uint32_t id;
>> +       odp_timer_pool *tp = (odp_timer_pool *)get_pq(&tim->pqelem);
>> +       odp_timer_set_t rc = timer_reset_w_buf(tp, tim, abs_tck,
>> user_buf);
>> +       return rc;
>> +}
>>
>> -       id = timer_hdl - 1;
>> -       return ticks * odp_timer.timer[id].resolution_ns;
>> +odp_timer_set_t odp_timer_set_abs(odp_timer_t tim, uint64_t abs_tck)
>> +{
>> +       odp_timer_pool *tp = (odp_timer_pool *)get_pq(&tim->pqelem);
>> +       odp_timer_set_t rc = timer_reset(tp, tim, abs_tck);
>> +       return rc;
>>  }
>>
>> -uint64_t odp_timer_ns_to_tick(odp_timer_t timer_hdl, uint64_t ns)
>> +odp_timer_set_t odp_timer_set_rel_w_buf(odp_timer_t tim,
>> +                                       uint64_t rel_tck,
>> +                                       odp_buffer_t user_buf)
>>  {
>> -       uint32_t id;
>> +       odp_timer_pool *tp = (odp_timer_pool *)get_pq(&tim->pqelem);
>> +       odp_timer_set_t rc = timer_reset_w_buf(tp, tim, tp->cur_tick +
>> rel_tck,
>> +                                              user_buf);
>> +       return rc;
>> +}
>>
>> -       id = timer_hdl - 1;
>> -       return ns / odp_timer.timer[id].resolution_ns;
>> +odp_timer_set_t odp_timer_set_rel(odp_timer_t tim, uint64_t rel_tck)
>> +{
>> +       odp_timer_pool *tp = (odp_timer_pool *)get_pq(&tim->pqelem);
>> +       odp_timer_set_t rc = timer_reset(tp, tim, tp->cur_tick + rel_tck);
>> +       return rc;
>>  }
>>
>> -uint64_t odp_timer_resolution(odp_timer_t timer_hdl)
>> +void odp_timer_cancel(odp_timer_t tim)
>>  {
>> -       uint32_t id;
>> +       odp_timer_pool *tp = (odp_timer_pool *)get_pq(&tim->pqelem);
>> +       timer_cancel(tp, tim);
>> +}
>>
>> -       id = timer_hdl - 1;
>> -       return odp_timer.timer[id].resolution_ns;
>> +void odp_timer_return_tmo(odp_timer_tmo_t tmo)
>> +{
>> +       const odp_timeout_hdr_t *tmo_hdr = odp_tmo_to_hdr(tmo);
>> +       odp_timer *parent_tim = tmo_hdr->timer;
>> +       odp_timer_pool *tp = (odp_timer_pool
>> *)get_pq(&parent_tim->pqelem);
>> +       timer_return(tp, parent_tim, tmo, tmo_hdr);
>>  }
>>
>> -uint64_t odp_timer_maximum_tmo(odp_timer_t timer_hdl)
>> +odp_timer_tmo_status_t odp_timer_tmo_status(odp_timer_tmo_t tmo)
>>  {
>> -       uint32_t id;
>> +       const odp_timeout_hdr_t *tmo_hdr = odp_tmo_to_hdr(tmo);
>> +       odp_timer *parent_tim = tmo_hdr->timer;
>>
>> -       id = timer_hdl - 1;
>> -       return odp_timer.timer[id].max_ticks;
>> +       if (odp_unlikely(tmo_hdr->gencnt != parent_tim->gencnt)) {
>> +               /* Generation counters differ => timer has been freed */
>> +               return ODP_TMO_ORPHAN;
>> +       }
>> +       /* Else generation counters match => parent timer exists */
>> +
>> +       if (odp_likely(parent_tim->tag == tmo_hdr->tag))
>> +               return ODP_TMO_FRESH;
>> +       else
>> +               return ODP_TMO_STALE;
>>  }
>>
>> -uint64_t odp_timer_current_tick(odp_timer_t timer_hdl)
>> +odp_timer_t odp_timer_handle(odp_timer_tmo_t tmo)
>>  {
>> -       uint32_t id;
>> +       odp_timeout_hdr_t *tmo_hdr = odp_tmo_to_hdr(tmo);
>> +       odp_timer_t parent_tim = tmo_hdr->timer;
>> +       if (odp_likely(tmo_hdr->gencnt == parent_tim->gencnt))
>> +               return parent_tim;
>> +       else
>> +               return ODP_TIMER_INVALID;
>> +}
>>
>> -       id = timer_hdl - 1;
>> -       return odp_timer.timer[id].cur_tick;
>> +uint64_t odp_timer_expiration(odp_timer_tmo_t tmo)
>> +{
>> +       odp_timeout_hdr_t *tmo_hdr = odp_tmo_to_hdr(tmo);
>> +       return tmo_hdr->expiration;
>>  }
>>
>> -odp_timeout_t odp_timeout_from_buffer(odp_buffer_t buf)
>> +void *odp_timer_userptr(odp_timer_tmo_t tmo)
>>  {
>> -       return (odp_timeout_t) buf;
>> +       odp_timeout_hdr_t *tmo_hdr = odp_tmo_to_hdr(tmo);
>> +       return tmo_hdr->user_ptr;
>>  }
>>
>> -uint64_t odp_timeout_tick(odp_timeout_t tmo)
>> +int odp_timer_init_global(void)
>>  {
>> -       odp_timeout_hdr_t *tmo_hdr = odp_timeout_hdr(tmo);
>> -       return tmo_hdr->meta.tmo_tick;
>> +       return 0;
>>  }
>> diff --git a/test/api_test/odp_timer_ping.c
>> b/test/api_test/odp_timer_ping.c
>> index 7406a45..2617b5c 100644
>> --- a/test/api_test/odp_timer_ping.c
>> +++ b/test/api_test/odp_timer_ping.c
>> @@ -20,6 +20,8 @@
>>   *    Otherwise timeout may happen bcz of slow nw speed
>>   */
>>
>> +#include <assert.h>
>> +#include <stdlib.h>
>>  #include <unistd.h>
>>  #include <fcntl.h>
>>  #include <errno.h>
>> @@ -41,14 +43,15 @@
>>  #define MSG_POOL_SIZE         (4*1024*1024)
>>  #define BUF_SIZE               8
>>  #define PING_CNT       10
>> -#define PING_THRD      2       /* Send and Rx Ping thread */
>> +#define PING_THRD      2       /* send_ping and rx_ping threads */
>>
>>  /* Nanoseconds */
>>  #define RESUS  10000
>>  #define MINUS  10000
>>  #define MAXUS  10000000
>>
>> -static odp_timer_t test_timer_ping;
>> +static odp_timer_pool_t tp;
>> +static odp_timer_t test_timer_ping = ODP_TIMER_INVALID;
>>  static odp_timer_tmo_t test_ping_tmo;
>>
>>  #define PKTSIZE      64
>> @@ -128,15 +131,7 @@ static int listen_to_pingack(void)
>>                                          (socklen_t *)&len);
>>                         if (bytes > 0) {
>>                                 /* pkt rxvd therefore cancel the timeout
>> */
>> -                               if (odp_timer_cancel_tmo(test_timer_ping,
>> -                                                        test_ping_tmo)
>> != 0) {
>> -                                       ODP_ERR("cancel_tmo failed
>> ..exiting listner thread\n");
>> -                                       /* avoid exiting from here even
>> if tmo
>> -                                        * failed for current ping,
>> -                                        * allow subsequent ping_rx
>> request */
>> -                                       err = -1;
>> -
>> -                               }
>> +                               odp_timer_cancel(test_timer_ping);
>>                                 /* cruel bad hack used for sender,
>> listner ipc..
>>                                  * euwww.. FIXME ..
>>                                  */
>> @@ -160,7 +155,6 @@ static int send_ping_request(struct sockaddr_in *addr)
>>
>>         uint64_t tick;
>>         odp_queue_t queue;
>> -       odp_buffer_t buf;
>>
>>         int err = 0;
>>
>> @@ -184,8 +178,16 @@ static int send_ping_request(struct sockaddr_in
>> *addr)
>>
>>         /* get the ping queue */
>>         queue = odp_queue_lookup("ping_timer_queue");
>> +       test_timer_ping = odp_timer_alloc(tp, queue, NULL);
>> +       if (test_timer_ping == ODP_TIMER_INVALID) {
>> +               ODP_ERR("Failed to allocate timer.\n");
>> +               err = -1;
>> +               goto err;
>> +       }
>>
>>         for (i = 0; i < PING_CNT; i++) {
>> +               odp_buffer_t buf;
>> +               odp_timer_tmo_t tmo;
>>                 /* prepare icmp pkt */
>>                 bzero(&pckt, sizeof(pckt));
>>                 pckt.hdr.type = ICMP_ECHO;
>> @@ -209,12 +211,10 @@ static int send_ping_request(struct sockaddr_in
>> *addr)
>>                 printf(" icmp_sent msg_cnt %d\n", i);
>>
>>                 /* arm the timer */
>> -               tick = odp_timer_current_tick(test_timer_ping);
>> +               tick = odp_timer_current_tick(tp);
>>
>>                 tick += 1000;
>> -               test_ping_tmo = odp_timer_absolute_tmo(test_timer_ping,
>> tick,
>> -                                                      queue,
>> -
>> ODP_BUFFER_INVALID);
>> +               odp_timer_set_abs(test_timer_ping, tick);
>>                 /* wait for timeout event */
>>                 while ((buf = odp_queue_deq(queue)) ==
>> ODP_BUFFER_INVALID) {
>>                         /* flag true means ack rxvd.. a cruel hack as I
>> @@ -229,17 +229,28 @@ static int send_ping_request(struct sockaddr_in
>> *addr)
>>                                 break;
>>                         }
>>                 }
>> +               assert(odp_buffer_type(buf) == ODP_BUFFER_TYPE_TIMEOUT);
>> +               tmo = odp_timeout_from_buffer(buf);
>>
>> -               /* free tmo_buf for timeout case */
>> -               if (buf != ODP_BUFFER_INVALID) {
>> -                       ODP_DBG(" timeout msg_cnt [%i] \n", i);
>> +               switch (odp_timer_tmo_status(tmo)) {
>> +               case ODP_TMO_FRESH:
>> +                       ODP_DBG(" timeout msg_cnt [%i]\n", i);
>>                         /* so to avoid seg fault commented */
>> -                       odp_buffer_free(buf);
>>                         err = -1;
>> +                       break;
>> +               case ODP_TMO_STALE:
>> +                       /* Ignore stale timeouts */
>> +                       break;
>> +               case ODP_TMO_ORPHAN:
>> +                       ODP_ERR("Received orphaned timeout!\n");
>> +                       abort();
>>                 }
>> +               odp_timer_return_tmo(tmo);
>>         }
>>
>>  err:
>> +       if (test_timer_ping != ODP_TIMER_INVALID)
>> +               odp_timer_free(test_timer_ping);
>>         return err;
>>  }
>>
>> @@ -340,9 +351,9 @@ int main(int argc ODP_UNUSED, char *argv[] ODP_UNUSED)
>>         pool = odp_buffer_pool_create("msg_pool", pool_base,
>> MSG_POOL_SIZE,
>>                                       BUF_SIZE,
>>                                       ODP_CACHE_LINE_SIZE,
>> -                                     ODP_BUFFER_TYPE_RAW);
>> +                                     ODP_BUFFER_TYPE_TIMEOUT);
>>         if (pool == ODP_BUFFER_POOL_INVALID) {
>> -               ODP_ERR("Pool create failed.\n");
>> +               ODP_ERR("Buffer pool create failed.\n");
>>                 return -1;
>>         }
>>
>> @@ -357,15 +368,19 @@ int main(int argc ODP_UNUSED, char *argv[]
>> ODP_UNUSED)
>>                 return -1;
>>         }
>>
>> -       test_timer_ping = odp_timer_create("ping_timer", pool,
>> -                                          RESUS*ODP_TIME_USEC,
>> -                                          MINUS*ODP_TIME_USEC,
>> -                                          MAXUS*ODP_TIME_USEC);
>> -
>> -       if (test_timer_ping == ODP_TIMER_INVALID) {
>> -               ODP_ERR("Timer create failed.\n");
>> +       /*
>> +        * Create timer pool
>> +        */
>> +       tp = odp_timer_pool_create("timer_pool", pool,
>> +                                  RESUS*ODP_TIME_USEC,
>> +                                  MINUS*ODP_TIME_USEC,
>> +                                  MAXUS*ODP_TIME_USEC,
>> +                                  1, false, ODP_CLOCK_CPU);
>> +       if (tp == ODP_TIMER_POOL_INVALID) {
>> +               ODP_ERR("Timer pool create failed.\n");
>>                 return -1;
>>         }
>> +       odp_timer_pool_start();
>>
>>         odp_shm_print_all();
>>
>> --
>> 1.9.1
>>
>>
>> _______________________________________________
>> lng-odp mailing list
>> lng-odp@lists.linaro.org
>> http://lists.linaro.org/mailman/listinfo/lng-odp
>>
>
>
>
> --
> *Mike Holmes*
> Linaro  Sr Technical Manager
> LNG - ODP
>
Maxim Uvarov Oct. 5, 2014, 6:33 p.m. UTC | #3
On 10/05/2014 07:58 PM, Ola Liljedahl wrote:
> This is an existing program, I just updated it to use the new Timer 
> API. If there are no comments regarding this change, I don't think any 
> complaints here should stop the Timer API patch from being merged.
>
Thanks Ola. Petri, Anders please add sign-offs if patch is good to go.

> However. it is not a very good test program, more of a simple example 
> program. So it ought to be renamed to better describe what it is. We 
> can leave that for another patch as nothing has really changed due to 
> this patch. We could use a real test program as well. I did write one 
> for the timer implementation using a priority queue but it is only 
> single-threaded so not a good test program for ODP.

I'm fine with that. Does anybody have objections?

Thanks,
Maxim.

>
> On 3 October 2014 20:57, Mike Holmes <mike.holmes@linaro.org 
> <mailto:mike.holmes@linaro.org>> wrote:
>
>
>
>     On 2 October 2014 11:23, Ola Liljedahl <ola.liljedahl@linaro.org
>     <mailto:ola.liljedahl@linaro.org>> wrote:
>
>         Signed-off-by: Ola Liljedahl <ola.liljedahl@linaro.org
>         <mailto:ola.liljedahl@linaro.org>>
>         ---
>         Fixed review comments for v3 from Anders R.
>         * Example code snippets use @code/@endcode.
>         * Added some missing doxygen comments.
>         * Updated some comments.
>         * Reverted year in copyright notices.
>         * Added odp_likely() hint.
>         * Made some variables self-descriptive and removed redundant
>         comments.
>         Changed to use ticket locks instead of spin locks (ticket
>         locks are more fair).
>         Changed to use ODP_ABORT() which has become available since
>         the last patch.
>
>          example/timer/odp_timer_test.c  | 125 +--
>
>
>     Should this "test"  be under odp/test instead of odp/example, or
>     should the "test " be renamed if it makes a good example ?
>
>          platform/linux-generic/Makefile.am  |   1 +
>          platform/linux-generic/include/api/odp_timer.h  | 570
>         +++++++++++--
>          .../include/odp_priority_queue_internal.h | 108 +++
>          .../linux-generic/include/odp_timer_internal.h  |  71 +-
>          platform/linux-generic/odp_priority_queue.c | 283 +++++++
>          platform/linux-generic/odp_timer.c  | 923 ++++++++++++++-------
>          test/api_test/odp_timer_ping.c  |  73 +-
>          8 files changed, 1648 insertions(+), 506 deletions(-)
>          create mode 100644
>         platform/linux-generic/include/odp_priority_queue_internal.h
>          create mode 100644 platform/linux-generic/odp_priority_queue.c
>
>         diff --git a/example/timer/odp_timer_test.c
>         b/example/timer/odp_timer_test.c
>         index 6e1715d..750d785 100644
>         --- a/example/timer/odp_timer_test.c
>         +++ b/example/timer/odp_timer_test.c
>         @@ -41,67 +41,89 @@ typedef struct {
>          /** @private Barrier for test synchronisation */
>          static odp_barrier_t test_barrier;
>
>         -/** @private Timer handle*/
>         -static odp_timer_t test_timer;
>         +/** @private Timer pool handle */
>         +static odp_timer_pool_t tp;
>
>
>         +/** @private Timeout status ASCII strings */
>         +static const char *const status2str[] = {
>         +       "fresh", "stale", "orphaned"
>         +};
>         +
>          /** @private test timeout */
>          static void test_abs_timeouts(int thr, test_args_t *args)
>          {
>         -       uint64_t tick;
>                 uint64_t period;
>                 uint64_t period_ns;
>                 odp_queue_t queue;
>         -       odp_buffer_t buf;
>         -       int num;
>         +       int remain = args->tmo_count;
>         +       odp_timer_t hdl;
>         +       uint64_t tick;
>
>                 ODP_DBG("  [%i] test_timeouts\n", thr);
>
>                 queue = odp_queue_lookup("timer_queue");
>
>                 period_ns = args->period_us*ODP_TIME_USEC;
>         -       period    = odp_timer_ns_to_tick(test_timer, period_ns);
>         +       period    = odp_timer_ns_to_tick(tp, period_ns);
>
>                 ODP_DBG("  [%i] period %"PRIu64" ticks, %"PRIu64"
>         ns\n", thr,
>                         period, period_ns);
>
>         -       tick = odp_timer_current_tick(test_timer);
>         -
>         -       ODP_DBG("  [%i] current tick %"PRIu64"\n", thr, tick);
>         -
>         -       tick += period;
>         +       ODP_DBG("  [%i] current tick %"PRIu64"\n", thr,
>         +               odp_timer_current_tick(tp));
>
>         -       if (odp_timer_absolute_tmo(test_timer, tick, queue,
>         ODP_BUFFER_INVALID)
>         -           == ODP_TIMER_TMO_INVALID){
>         -               ODP_DBG("Timeout request failed\n");
>         +       odp_timer_t test_timer;
>         +       test_timer = odp_timer_alloc(tp, queue, NULL);
>         +       if (test_timer == ODP_TIMER_INVALID) {
>         +               ODP_ERR("Failed to allocate timer\n");
>                         return;
>                 }
>         +       tick = odp_timer_current_tick(tp);
>         +       hdl = test_timer;
>
>         -       num = args->tmo_count;
>         -
>         -       while (1) {
>         -               odp_timeout_t tmo;
>         -
>         -               buf = odp_schedule_one(&queue, ODP_SCHED_WAIT);
>         -
>         -               tmo  = odp_timeout_from_buffer(buf);
>         -               tick = odp_timeout_tick(tmo);
>         -
>         -               ODP_DBG("  [%i] timeout, tick %"PRIu64"\n",
>         thr, tick);
>         -
>         -               odp_buffer_free(buf);
>         -
>         -               num--;
>         -
>         -               if (num == 0)
>         -                       break;
>         +       while (remain != 0) {
>         +               odp_buffer_t buf;
>         +               odp_timer_tmo_t tmo;
>         +               odp_timer_tmo_status_t stat;
>         +               odp_timer_set_t rc;
>
>                         tick += period;
>         +               rc = odp_timer_set_abs(hdl, tick);
>         +               if (odp_unlikely(rc != ODP_TIMER_SET_SUCCESS)) {
>         +                       ODP_ERR("odp_timer_set_abs() failed
>         (%u)\n", rc);
>         +                       abort();
>         +               }
>
>         -               odp_timer_absolute_tmo(test_timer, tick,
>         -                                      queue, ODP_BUFFER_INVALID);
>         +               /* Get the next ready buffer/timeout */
>         +               buf = odp_schedule_one(&queue, ODP_SCHED_WAIT);
>         +               if (odp_unlikely(odp_buffer_type(buf) !=
>         + ODP_BUFFER_TYPE_TIMEOUT)) {
>         +                       ODP_ERR("Unexpected buffer type
>         received\n");
>         +                       abort();
>         +               }
>         +               tmo = odp_timeout_from_buffer(buf);
>         +               stat = odp_timer_tmo_status(tmo);
>         +               tick = odp_timer_expiration(tmo);
>         +               hdl = odp_timer_handle(tmo);
>         +               ODP_DBG("  [%i] timeout, tick %"PRIu64",
>         status %s\n",
>         +                       thr, tick, status2str[stat]);
>         +               /* if (stat == ODP_TMO_FRESH)  - do your thing! */
>         +               if (odp_likely(stat == ODP_TMO_ORPHAN)) {
>         +                       /* Some other thread freed the
>         corresponding
>         +                          timer after the timeout was already
>         +                          enqueued */
>         +                       /* Timeout handle is invalid, use our
>         own timer */
>         +                       hdl = test_timer;
>         +               }
>         +               /* Return timeout to timer manager, regardless
>         of status */
>         +               odp_timer_return_tmo(tmo);
>         +               remain--;
>                 }
>
>         +       odp_timer_cancel(test_timer);
>         +       odp_timer_free(test_timer);
>         +
>                 if (odp_queue_sched_type(queue) == ODP_SCHED_SYNC_ATOMIC)
>                         odp_schedule_release_atomic();
>          }
>         @@ -155,7 +177,6 @@ static void print_usage(void)
>                 printf("Options:\n");
>                 printf("  -c, --count <number>    core count, core IDs
>         start from 1\n");
>                 printf("  -r, --resolution <us>  timeout resolution in
>         usec\n");
>         -       printf("  -m, --min <us> minimum timeout in usec\n");
>                 printf("  -x, --max <us> maximum timeout in usec\n");
>                 printf("  -p, --period <us>  timeout period in usec\n");
>                 printf("  -t, --timeouts <count> timeout repeat count\n");
>         @@ -190,14 +211,14 @@ static void parse_args(int argc, char
>         *argv[], test_args_t *args)
>                 /* defaults */
>                 args->core_count    = 0; /* all cores */
>                 args->resolution_us = 10000;
>         -       args->min_us        = args->resolution_us;
>         +       args->min_us        = 0;
>                 args->max_us        = 10000000;
>                 args->period_us     = 1000000;
>                 args->tmo_count     = 30;
>
>                 while (1) {
>                         opt = getopt_long(argc, argv, "+c:r:m:x:p:t:h",
>         -                                longopts, &long_index);
>         +                                 longopts, &long_index);
>
>                         if (opt == -1)
>                                 break;  /* No more options */
>         @@ -321,10 +342,25 @@ int main(int argc, char *argv[])
>         ODP_BUFFER_TYPE_TIMEOUT);
>
>                 if (pool == ODP_BUFFER_POOL_INVALID) {
>         -               ODP_ERR("Pool create failed.\n");
>         +               ODP_ERR("Buffer pool create failed.\n");
>                         return -1;
>                 }
>
>         +       tp = odp_timer_pool_create("timer_pool", pool,
>         + args.resolution_us*ODP_TIME_USEC,
>         + args.min_us*ODP_TIME_USEC,
>         + args.max_us*ODP_TIME_USEC,
>         +                                  num_workers, /* One timer
>         per worker */
>         +                                  true,
>         +                                  ODP_CLOCK_CPU);
>         +       if (tp == ODP_TIMER_POOL_INVALID) {
>         +               ODP_ERR("Timer pool create failed.\n");
>         +               return -1;
>         +       }
>         +       odp_timer_pool_start();
>         +
>         +       odp_shm_print_all();
>         +
>                 /*
>                  * Create a queue for timer test
>                  */
>         @@ -340,19 +376,6 @@ int main(int argc, char *argv[])
>                         return -1;
>                 }
>
>         -       test_timer = odp_timer_create("test_timer", pool,
>         -  args.resolution_us*ODP_TIME_USEC,
>         -  args.min_us*ODP_TIME_USEC,
>         -  args.max_us*ODP_TIME_USEC);
>         -
>         -       if (test_timer == ODP_TIMER_INVALID) {
>         -               ODP_ERR("Timer create failed.\n");
>         -               return -1;
>         -       }
>         -
>         -
>         -       odp_shm_print_all();
>         -
>                 printf("CPU freq %"PRIu64" hz\n", odp_sys_cpu_hz());
>                 printf("Cycles vs nanoseconds:\n");
>                 ns = 0;
>         diff --git a/platform/linux-generic/Makefile.am
>         b/platform/linux-generic/Makefile.am
>         index d076d50..71f923c 100644
>         --- a/platform/linux-generic/Makefile.am
>         +++ b/platform/linux-generic/Makefile.am
>         @@ -59,6 +59,7 @@ __LIB__libodp_la_SOURCES = \
>                                    odp_packet_flags.c \
>                                    odp_packet_io.c \
>                                    odp_packet_socket.c \
>         +                          odp_priority_queue.c \
>                                    odp_queue.c \
>                                    odp_ring.c \
>                                    odp_rwlock.c \
>         diff --git a/platform/linux-generic/include/api/odp_timer.h
>         b/platform/linux-generic/include/api/odp_timer.h
>         index 01db839..82a1e05 100644
>         --- a/platform/linux-generic/include/api/odp_timer.h
>         +++ b/platform/linux-generic/include/api/odp_timer.h
>         @@ -8,9 +8,193 @@
>          /**
>           * @file
>           *
>         - * ODP timer
>         + * ODP timer service
>           */
>
>         +/** Example #1 Retransmission timer (e.g. for reliable
>         connections)
>         + @code
>         +
>         +//Create timer pool for reliable connections
>         +#define SEC 1000000000ULL //1s expressed in nanoseconds
>         +odp_timer_pool_t tcp_tpid =
>         +    odp_timer_pool_create("TCP",
>         +                         buffer_pool,
>         +                         1000000,//resolution 1ms
>         +                         0,//min tmo
>         +                         7200 * SEC,//max tmo length 2hours
>         +                         40000,//num_timers
>         +                         true,//shared
>         +                         ODP_CLOCK_CPU
>         +                        );
>         +if (tcp_tpid == ODP_TIMER_POOL_INVALID)
>         +{
>         +       //Failed to create timer pool => fatal error
>         +}
>         +
>         +
>         +//Setting up a new connection
>         +//Allocate retransmission timeout (identical for supervision
>         timeout)
>         +//The user pointer points back to the connection context
>         +conn->ret_tim = odp_timer_alloc(tcp_tpid, queue, conn);
>         +//Check if all resources were successfully allocated
>         +if (conn->ret_tim == ODP_TIMER_INVALID)
>         +{
>         +       //Failed to allocate all resources for connection =>
>         tear down
>         +       //Destroy timeout
>         +       odp_timer_free(conn->ret_tim);
>         +       //Tear down connection
>         +       ...
>         +       return false;
>         +}
>         +//All necessary resources successfully allocated
>         +//Compute initial retransmission length in timer ticks
>         +conn->ret_len = odp_timer_ns_to_tick(tcp_tpid, 3 * SEC);//Per
>         RFC1122
>         +//Arm the timer
>         +odp_timer_set_rel(conn->ret_tim, conn->ret_len);
>         +return true;
>         +
>         +
>         +//A packet for the connection has just been transmitted
>         +//Reset the retransmission timer
>         +odp_timer_set_rel(conn->ret_tim, conn->ret_len);
>         +
>         +
>         +//A retransmission timeout buffer for the connection has been
>         received
>         +odp_timer_tmo_t tmo = odp_timeout_from_buffer(buf);
>         +odp_timer_tmo_status_t stat = odp_timer_tmo_status(tmo);
>         +//Check if timeout is fresh or stale, for stale timeouts we
>         need to reset the
>         +//timer
>         +if (stat == ODP_TMO_FRESH) {
>         +       //Fresh timeout, last transmitted packet not acked in
>         time =>
>         +         retransmit
>         +       //Get connection from timeout event
>         +       conn = odp_timer_get_userptr(tmo);
>         +       //Retransmit last packet (e.g. TCP segment)
>         +       ...
>         +       //Re-arm timer using original delta value
>         +       odp_timer_set_rel(conn->ret_tim, conn->ret_len);
>         +} else if (stat == ODP_TMO_ORPHAN) {
>         +       odp_free_buffer(buf);
>         +       return;//Get out of here
>         +} // else stat == ODP_TMO_STALE, do nothing
>         +//Finished processing, return timeout
>         +odp_timer_return_tmo(tmo);
>         +
>         + @endcode
>         +*/
>         +
>         +/** Example #2 Periodic tick
>         + @code
>         +
>         +//Create timer pool for periodic ticks
>         +odp_timer_pool_t per_tpid =
>         +    odp_timer_pool_create("periodic-tick",
>         +                         buffer_pool,
>         +                         1,//resolution 1ns
>         +                         1,//minimum timeout length 1ns
>         +                         1000000000,//maximum timeout length 1s
>         +                         10,//num_timers
>         +                         false,//not shared
>         +                         ODP_CLOCK_CPU
>         +                        );
>         +if (per_tpid == ODP_TIMER_POOL_INVALID)
>         +{
>         +    //Failed to create timer pool => fatal error
>         +}
>         +
>         +
>         +//Allocate periodic timer
>         +tim_1733 = odp_timer_alloc(per_tpid, queue, NULL);
>         +//Check if all resources were successfully allocated
>         +if (tim_1733 == ODP_TIMER_INVALID)
>         +{
>         +       //Failed to allocate all resources => tear down
>         +       //Destroy timeout
>         +       odp_timer_free(tim_1733);
>         +       //Tear down other state
>         +       ...
>         +       return false;
>         +}
>         +//All necessary resources successfully allocated
>         +//Compute tick period in timer ticks
>         +period_1733 = odp_timer_ns_to_tick(per_tpid, 1000000000U /
>         1733U);//1733Hz
>         +//Compute when next tick should expire
>         +next_1733 = odp_timer_current_tick(per_tpid) + period_1733;
>         +//Arm the periodic timer
>         +odp_timer_set_abs(tim_1733, next_1733);
>         +return true;
>         +
>         +
>         +
>         +//A periodic timer timeout has been received
>         +odp_timer_tmo_t tmo = odp_timeout_from_buffer(buf);
>         +//Get status of timeout
>         +odp_timer_tmo_status_t stat = odp_timer_tmo_status(tmo);
>         +//We expect the timeout is always fresh since we are not
>         calling set or cancel
>         +on active or expired timers in this example
>         +assert(stat == ODP_TMO_FRESH);
>         +//Do processing driven by timeout *before*
>         +...
>         +do {
>         +       //Compute when the timer should expire next
>         +       next_1733 += period_1733;
>         +       //Check that this is in the future
>         +       if (likely(next_1733 > odp_timer_current_tick(per_tpid))
>         +       break;//Yes, done
>         +       //Else we missed a timeout
>         +       //Optionally attempt some recovery and/or logging of
>         the problem
>         +       ...
>         +} while (0);
>         +//Re-arm periodic timer
>         +odp_timer_set_abs(tim_1733, next_1733);
>         +//Or do processing driven by timeout *after*
>         +...
>         +odp_timer_return_tmo(tmo);
>         +return;
>         +
>         + @endcode
>         +*/
>         +
>         +/** Example #3 Tear down of flow
>         + @code
>         +//ctx points to flow context data structure owned by application
>         +//Free the timer, cancelling any timeout
>         +odp_timer_free(ctx->timer);//Any enqueued timeout will be
>         made invalid
>         +//Continue tearing down and eventually freeing context
>         +...
>         +return;
>         +
>         +//A timeout has been received, check status
>         +odp_timer_tmo_t tmo = odp_timeout_from_buffer(buf);
>         +switch (odp_timer_tmo_status(tmo))
>         +{
>         +    case ODP_TMO_FRESH :
>         +       //A flow has timed out, tear it down
>         +       //Find flow context from timeout
>         +       ctx = (context *)odp_timer_get_userptr(tmo);
>         +       //Free the supervision timer, any enqueued timeout
>         will remain
>         +       odp_timer_free(ctx->tim);
>         +       //Free other flow related resources
>         +       ...
>         +       //Free the timeout buffer
>         +       odp_buffer_free(buf);
>         +       //Flow torn down
>         +       break;
>         +    case ODP_TMO_STALE :
>         +       //A stale timeout was received, return timeout and
>         update timer
>         +       odp_timer_return_tmo(tmo);
>         +       break;
>         +    case ODP_TMO_ORPHAN :
>         +       //Orphaned timeout (from previously torn down flow)
>         +       //No corresponding timer or flow context
>         +       //Free the timeout buffer
>         +       odp_buffer_free(buf);
>         +       break;
>         +}
>         +
>         + @endcode
>         +*/
>         +
>          #ifndef ODP_TIMER_H_
>          #define ODP_TIMER_H_
>
>         @@ -18,144 +202,408 @@
>          extern "C" {
>          #endif
>
>         +#include <stdlib.h>
>          #include <odp_std_types.h>
>          #include <odp_buffer.h>
>          #include <odp_buffer_pool.h>
>          #include <odp_queue.h>
>
>         +struct odp_timer_pool_s; /**< Forward declaration */
>         +
>         +/**
>         +* ODP timer pool handle (platform dependent)
>         +*/
>         +typedef struct odp_timer_pool_s *odp_timer_pool_t;
>         +
>         +/**
>         + * Invalid timer pool handle (platform dependent).
>         + */
>         +#define ODP_TIMER_POOL_INVALID NULL
>
>          /**
>         - * ODP timer handle
>         + * Clock sources for timers in timer pool.
>           */
>         -typedef uint32_t odp_timer_t;
>         +typedef enum odp_timer_clk_src_e {
>         +       /** Use CPU clock as clock source for timers */
>         +       ODP_CLOCK_CPU,
>         +       /** Use external clock as clock source for timers */
>         +       ODP_CLOCK_EXT
>         +       /* Platform dependent which other clock sources exist */
>         +} odp_timer_clk_src_t;
>
>         -/** Invalid timer */
>         -#define ODP_TIMER_INVALID 0
>         +struct odp_timer_s; /**< Forward declaration */
>
>         +/**
>         +* ODP timer handle (platform dependent).
>         +*/
>         +typedef struct odp_timer_s *odp_timer_t;
>
>          /**
>         - * ODP timeout handle
>         + * Invalid timer handle (platform dependent).
>           */
>         -typedef odp_buffer_t odp_timer_tmo_t;
>         -
>         -/** Invalid timeout */
>         -#define ODP_TIMER_TMO_INVALID 0
>         +#define ODP_TIMER_INVALID NULL
>
>         +/**
>         + * Return values of timer set calls.
>         + */
>         +typedef enum odp_timer_set_e {
>         +       /** Timer set operation successful */
>         +       ODP_TIMER_SET_SUCCESS,
>         +       /** Timer set operation failed, expiration too early */
>         +       ODP_TIMER_SET_TOOEARLY,
>         +       /** Timer set operation failed, expiration too late */
>         +       ODP_TIMER_SET_TOOLATE
>         +} odp_timer_set_t;
>
>          /**
>         - * Timeout notification
>         + * Timeout event handle.
>           */
>         -typedef odp_buffer_t odp_timeout_t;
>         +typedef odp_buffer_t odp_timer_tmo_t;
>
>         +/**
>         + * Status of a timeout event.
>         + */
>         +typedef enum odp_timer_tmo_status_e {
>         +       /** Timeout is fresh, process it and return timeout */
>         +       ODP_TMO_FRESH,
>         +       /** Timer reset or cancelled, just return timeout  */
>         +       ODP_TMO_STALE,
>         +       /** Timer deleted, return or free timeout */
>         +       ODP_TMO_ORPHAN
>         +} odp_timer_tmo_status_t;
>
>          /**
>         - * Create a timer
>         + * Create a timer pool
>           *
>         - * Creates a new timer with requested properties.
>         + * Create a new timer pool.
>           *
>           * @param name       Name
>         - * @param pool       Buffer pool for allocating timeout
>         notifications
>         + * @param buf_pool   Buffer pool for allocating timeouts (and
>         only timeouts)
>           * @param resolution Timeout resolution in nanoseconds
>         - * @param min_tmo    Minimum timeout duration in nanoseconds
>         - * @param max_tmo    Maximum timeout duration in nanoseconds
>         + * @param min_tmo    Minimum relative timeout in nanoseconds
>         + * @param max_tmo    Maximum relative timeout in nanoseconds
>         + * @param num_timers Number of supported timers (minimum)
>         + * @param shared     Shared or private timer pool.
>         + *                Operations on shared timers will include
>         the necessary
>         + *                mutual exclusion, operations on private
>         timers may not
>         + *                (mutual exclusion is the responsibility of
>         the caller).
>         + * @param clk_src    Clock source to use
>           *
>         - * @return Timer handle if successful, otherwise
>         ODP_TIMER_INVALID
>         + * @return Timer pool handle if successful, otherwise
>         ODP_TIMER_POOL_INVALID
>         + * and errno set
>           */
>         -odp_timer_t odp_timer_create(const char *name,
>         odp_buffer_pool_t pool,
>         -                            uint64_t resolution, uint64_t
>         min_tmo,
>         -                            uint64_t max_tmo);
>         +odp_timer_pool_t
>         +odp_timer_pool_create(const char *name,
>         +                     odp_buffer_pool_t buf_pool,
>         +                     uint64_t resolution,
>         +                     uint64_t min_tmo,
>         +                     uint64_t max_tmo,
>         +                     uint32_t num_timers,
>         +                     bool shared,
>         +                     odp_timer_clk_src_t clk_src);
>         +
>         +/**
>         + * Start a timer pool
>         + *
>         + * Start all created timer pools, enabling the allocation of
>         timers.
>         + * The purpose of this call is to coordinate the creation of
>         multiple timer
>         + * pools that may use the same underlying HW resources.
>         + * This function may be called multiple times.
>         + */
>         +void odp_timer_pool_start(void);
>         +
>         +/**
>         + * Destroy a timer pool
>         + *
>         + * Destroy a timer pool, freeing all resources.
>         + * All timers must have been freed.
>         + *
>         + * @param tpid  Timer pool identifier
>         + */
>         +void odp_timer_pool_destroy(odp_timer_pool_t tpid);
>
>          /**
>           * Convert timer ticks to nanoseconds
>           *
>         - * @param timer Timer
>         + * @param tpid  Timer pool identifier
>           * @param ticks Timer ticks
>           *
>           * @return Nanoseconds
>           */
>         -uint64_t odp_timer_tick_to_ns(odp_timer_t timer, uint64_t ticks);
>         +uint64_t odp_timer_tick_to_ns(odp_timer_pool_t tpid, uint64_t
>         ticks);
>
>          /**
>           * Convert nanoseconds to timer ticks
>           *
>         - * @param timer Timer
>         + * @param tpid  Timer pool identifier
>           * @param ns    Nanoseconds
>           *
>           * @return Timer ticks
>           */
>         -uint64_t odp_timer_ns_to_tick(odp_timer_t timer, uint64_t ns);
>         +uint64_t odp_timer_ns_to_tick(odp_timer_pool_t tpid, uint64_t
>         ns);
>
>          /**
>         - * Timer resolution in nanoseconds
>         + * Current tick value
>           *
>         - * @param timer Timer
>         + * @param tpid Timer pool identifier
>           *
>         - * @return Resolution in nanoseconds
>         + * @return Current time in timer ticks
>         + */
>         +uint64_t odp_timer_current_tick(odp_timer_pool_t tpid);
>         +
>         +/**
>         + * ODP timer configurations
>           */
>         -uint64_t odp_timer_resolution(odp_timer_t timer);
>         +
>         +typedef enum odp_timer_pool_conf_e {
>         +       ODP_TIMER_NAME,      /**< Return name of timer pool */
>         +       ODP_TIMER_RESOLUTION,/**< Return the timer resolution
>         (in ns) */
>         +       ODP_TIMER_MIN_TICKS, /**< Return the min supported rel
>         timeout (ticks)*/
>         +       ODP_TIMER_MAX_TICKS, /**< Return the max supported rel
>         timeout (ticks)*/
>         +       ODP_TIMER_NUM_TIMERS,/**< Return number of supported
>         timers */
>         +       ODP_TIMER_SHARED     /**< Return shared flag */
>         +} odp_timer_pool_conf_t;
>
>          /**
>         - * Maximum timeout in timer ticks
>         + * Query different timer pool configurations, e.g.
>         + *  Timer resolution in nanoseconds
>         + *  Maximum timeout in timer ticks
>         + *  Number of supported timers
>         + *  Shared or private timer pool
>           *
>         - * @param timer Timer
>         + * @param tpid Timer pool identifier
>         + * @param item Configuration item being queried
>           *
>         - * @return Maximum timeout in timer ticks
>         + * @return the requested piece of information or 0 for
>         unknown item.
>           */
>         -uint64_t odp_timer_maximum_tmo(odp_timer_t timer);
>         +uintptr_t odp_timer_pool_query_conf(odp_timer_pool_t tpid,
>         +  odp_timer_pool_conf_t item);
>
>          /**
>         - * Current timer tick
>         + * Allocate a timer
>           *
>         - * @param timer Timer
>         + * Create a timer (allocating all necessary resources e.g.
>         timeout event) from
>         + * the timer pool.
>           *
>         - * @return Current time in timer ticks
>         + * @param tpid     Timer pool identifier
>         + * @param queue    Destination queue for timeout notifications
>         + * @param user_ptr User defined pointer or NULL (copied to
>         timeouts)
>         + *
>         + * @return Timer handle if successful, otherwise
>         ODP_TIMER_INVALID and
>         + *        errno set.
>           */
>         -uint64_t odp_timer_current_tick(odp_timer_t timer);
>         +odp_timer_t odp_timer_alloc(odp_timer_pool_t tpid,
>         +                           odp_queue_t queue,
>         +                           void *user_ptr);
>
>          /**
>         - * Request timeout with an absolute timer tick
>         + * Free a timer
>         + *
>         + * Free (destroy) a timer, freeing all associated resources
>         (e.g. default
>         + * timeout event). An expired and enqueued timeout event will
>         not be freed.
>         + * It is the responsibility of the application to free this
>         timeout when it
>         + * is received.
>           *
>         - * When tick reaches tmo_tick, the timer enqueues the timeout
>         notification into
>         - * the destination queue.
>         + * @param tim      Timer handle
>         + */
>         +void odp_timer_free(odp_timer_t tim);
>         +
>         +/**
>         + * Set a timer (absolute time) with a user-defined timeout buffer
>           *
>         - * @param timer    Timer
>         - * @param tmo_tick Absolute timer tick value which triggers
>         the timeout
>         - * @param queue    Destination queue for the timeout notification
>         - * @param buf      User defined timeout notification buffer. When
>         - *                 ODP_BUFFER_INVALID, default timeout
>         notification is used.
>         + * Set (arm) the timer to expire at specific time. The
>         user-defined
>         + * buffer will be enqueued when the timer expires.
>         + * Arming may fail (if the timer is in state EXPIRED), an
>         earlier timeout
>         + * will then be received. odp_timer_tmo_status() must be used
>         to check if
>         + * the received timeout is valid.
>           *
>         - * @return Timeout handle if successful, otherwise
>         ODP_TIMER_TMO_INVALID
>         + * Note: any invalid parameters will be treated as
>         programming errors and will
>         + * cause the application to abort.
>         + *
>         + * @param tim      Timer
>         + * @param abs_tck  Expiration time in absolute timer ticks
>         + * @param user_buf The buffer to use as timeout event
>         + *
>         + * @return Success or failure code
>           */
>         -odp_timer_tmo_t odp_timer_absolute_tmo(odp_timer_t timer,
>         uint64_t tmo_tick,
>         -                                      odp_queue_t queue,
>         odp_buffer_t buf);
>         +odp_timer_set_t odp_timer_set_abs_w_buf(odp_timer_t tim,
>         +                                       uint64_t abs_tck,
>         +                                       odp_buffer_t user_buf);
>
>          /**
>         - * Cancel a timeout
>         + * Set a timer with an absolute expiration time
>         + *
>         + * Set (arm) the timer to expire at a specific time.
>         + * Arming may fail (if the timer is in state EXPIRED), an
>         earlier timeout
>         + * will then be received. odp_timer_tmo_status() must be used
>         to check if
>         + * the received timeout is valid.
>         + *
>         + * Note: any invalid parameters will be treated as
>         programming errors and will
>         + * cause the application to abort.
>           *
>         - * @param timer Timer
>         - * @param tmo   Timeout to cancel
>         + * @param tim     Timer
>         + * @param abs_tck Expiration time in absolute timer ticks
>           *
>         - * @return 0 if successful
>         + * @return Success or failure code
>           */
>         -int odp_timer_cancel_tmo(odp_timer_t timer, odp_timer_tmo_t tmo);
>         +odp_timer_set_t odp_timer_set_abs(odp_timer_t tim, uint64_t
>         abs_tck);
>
>          /**
>         - * Convert buffer handle to timeout handle
>         + * Set a timer with a relative expiration time and
>         user-defined buffer.
>           *
>         - * @param buf  Buffer handle
>         + * Set (arm) the timer to expire at a relative future time.
>         + * Arming may fail (if the timer is in state EXPIRED),
>         + * an earlier timeout will then be received.
>         odp_timer_tmo_status() must
>         + * be used to check if the received timeout is valid.
>           *
>         - * @return Timeout buffer handle
>         + * Note: any invalid parameters will be treated as
>         programming errors and will
>         + * cause the application to abort.
>         + *
>         + * @param tim      Timer
>         + * @param rel_tck  Expiration time in timer ticks relative to
>         current time of
>         + *                the timer pool the timer belongs to
>         + * @param user_buf The buffer to use as timeout event
>         + *
>         + * @return Success or failure code
>           */
>         -odp_timeout_t odp_timeout_from_buffer(odp_buffer_t buf);
>         +odp_timer_set_t odp_timer_set_rel_w_buf(odp_timer_t tim,
>         +                                       uint64_t rel_tck,
>         +                                       odp_buffer_t user_buf);
>         +/**
>         + * Set a timer with a relative expiration time
>         + *
>         + * Set (arm) the timer to expire at a relative future time.
>         + * Arming may fail (if the timer is in state EXPIRED),
>         + * an earlier timeout will then be received.
>         odp_timer_tmo_status() must
>         + * be used to check if the received timeout is valid.
>         + *
>         + * Note: any invalid parameters will be treated as
>         programming errors and will
>         + * cause the application to abort.
>         + *
>         + * @param tim     Timer
>         + * @param rel_tck Expiration time in timer ticks relative to
>         current time of
>         + *               the timer pool the timer belongs to
>         + *
>         + * @return Success or failure code
>         + */
>         +odp_timer_set_t odp_timer_set_rel(odp_timer_t tim, uint64_t
>         rel_tck);
>
>          /**
>         - * Return absolute timeout tick
>         + * Cancel a timer
>         + *
>         + * Cancel a timer, preventing future expiration and delivery.
>         + *
>         + * A timer that has already expired and been enqueued for
>         delivery may be
>         + * impossible to cancel and will instead be delivered to the
>         destination queue.
>         + * Use odp_timer_tmo_status() the check whether a received
>         timeout is fresh or
>         + * stale (cancelled). Stale timeouts will automatically be
>         recycled.
>         + *
>         + * Note: any invalid parameters will be treated as
>         programming errors and will
>         + * cause the application to abort.
>         + *
>         + * @param tim    Timer handle
>         + */
>         +void odp_timer_cancel(odp_timer_t tim);
>         +
>         +/**
>         + * Translate from buffer to timeout
>         + *
>         + * Return the timeout handle that corresponds to the
>         specified buffer handle.
>         + * The buffer must be of time ODP_BUFFER_TYPE_TIMEOUT.
>         + *
>         + * @param buf   Buffer handle to translate.
>         + *
>         + * @return      The corresponding timeout handle.
>         + */
>         +static inline odp_timer_tmo_t
>         odp_timeout_from_buffer(odp_buffer_t buf)
>         +{
>         +       if (odp_unlikely(odp_buffer_type(buf) !=
>         ODP_BUFFER_TYPE_TIMEOUT)) {
>         +               ODP_ERR("Buffer type %u not timeout\n", buf);
>         +               abort();
>         +       }
>         +       /* In this implementation, timeout == buffer */
>         +       return (odp_timer_tmo_t)buf;
>         +}
>         +
>         +/**
>         + * Translate from timeout to buffer
>         + *
>         + * Return the buffer handle that corresponds to the specified
>         timeout handle.
>         + *
>         + * @param tmo   Timeout handle to translate.
>         + *
>         + * @return      The corresponding buffer handle.
>         + */
>         +static inline odp_buffer_t
>         odp_buffer_from_timeout(odp_timer_tmo_t tmo)
>         +{
>         +       /* In this implementation, buffer == timeout */
>         +       return (odp_buffer_t)tmo;
>         +}
>         +
>         +/**
>         + * Return timeout to timer
>         + *
>         + * Return a received timeout for reuse with the parent timer.
>         + * Note: odp_timer_return_tmo() must be called on all
>         received timeouts!
>         + * (Excluding user defined timeout buffers).
>         + * The timeout must not be accessed after this call, the
>         semantics is
>         + * equivalent to a free call.
>         + *
>         + * @param tmo    Timeout
>         + */
>         +void odp_timer_return_tmo(odp_timer_tmo_t tmo);
>         +
>         +/**
>         + * Return fresh/stale/orphan status of timeout.
>         + *
>         + * Check a received timeout for orphaness (i.e. parent timer
>         freed) and
>         + * staleness (i.e. parent timer has been reset or cancelled
>         after the timeout
>         + * expired and was enqueued).
>         + * If the timeout is fresh, it should be processed.
>         + * If the timeout is stale or orphaned, it should be ignored.
>         + * All timeouts must be returned using the
>         odp_timer_return_tmo() call.
>         + *
>         + * @param tmo    Timeout
>         + *
>         + * @return One of ODP_TMO_FRESH, ODP_TMO_STALE or ODP_TMO_ORPHAN.
>         + */
>         +odp_timer_tmo_status_t odp_timer_tmo_status(odp_timer_tmo_t tmo);
>         +
>         +/**
>         + * Get timer handle
>         + *
>         + * Return Handle of parent timer.
>         + *
>         + * @param tmo   Timeout
>         + *
>         + * @return Timer handle or ODP_TIMER_INVALID for orphaned
>         timeouts.
>         + *         Note that the parent timer could be freed by some
>         other thread
>         + *         at any time and thus the timeout becomes orphaned.
>         + */
>         +odp_timer_t odp_timer_handle(odp_timer_tmo_t tmo);
>         +
>         +/**
>         + * Get expiration time
>         + *
>         + * Return (requested) expiration time of timeout.
>         + *
>         + * @param tmo   Timeout
>         + *
>         + * @return Expiration time
>         + */
>         +uint64_t odp_timer_expiration(odp_timer_tmo_t tmo);
>         +
>         +/**
>         + * Get user pointer
>         + *
>         + * Return User pointer of timer associated with timeout.
>         + * The user pointer is often used to point to some associated
>         context.
>           *
>         - * @param tmo Timeout buffer handle
>         + * @param tmo   Timeout
>           *
>         - * @return Absolute timeout tick
>         + * @return User pointer
>           */
>         -uint64_t odp_timeout_tick(odp_timeout_t tmo);
>         +void *odp_timer_userptr(odp_timer_tmo_t tmo);
>
>          #ifdef __cplusplus
>          }
>         diff --git
>         a/platform/linux-generic/include/odp_priority_queue_internal.h
>         b/platform/linux-generic/include/odp_priority_queue_internal.h
>         new file mode 100644
>         index 0000000..7d7f3a2
>         --- /dev/null
>         +++ b/platform/linux-generic/include/odp_priority_queue_internal.h
>         @@ -0,0 +1,108 @@
>         +#ifndef _PRIORITY_QUEUE_H
>         +#define _PRIORITY_QUEUE_H
>         +
>         +#include <assert.h>
>         +#include <stddef.h>
>         +#include <stdint.h>
>         +#include <stdbool.h>
>         +#include <odp_align.h>
>         +
>         +#define INVALID_INDEX ~0U
>         +#define INVALID_PRIORITY ((pq_priority_t)~0ULL)
>         +
>         +typedef uint64_t pq_priority_t;
>         +
>         +struct heap_node;
>         +
>         +typedef struct priority_queue {
>         +       uint32_t max_elems;/* Number of elements in heap */
>         +       /* Number of registered elements (active + inactive) */
>         +       uint32_t reg_elems;
>         +       uint32_t num_elems;/* Number of active elements */
>         +       struct heap_node *heap;
>         +       struct heap_node *org_ptr;
>         +} priority_queue ODP_ALIGNED(sizeof(uint64_t));
>         +
>         +/* The user gets a pointer to this structure */
>         +typedef struct {
>         +       /* Set when pq_element registered with priority queue */
>         +       priority_queue *pq;
>         +       uint32_t index;/* Index into heap array */
>         +       pq_priority_t prio;
>         +} pq_element;
>         +
>         +/*** Operations on pq_element ***/
>         +
>         +static inline void pq_element_con(pq_element *this)
>         +{
>         +       this->pq = NULL;
>         +       this->index = INVALID_INDEX;
>         +       this->prio = 0U;
>         +}
>         +
>         +static inline void pq_element_des(pq_element *this)
>         +{
>         +       (void)this;
>         +       assert(this->index == INVALID_INDEX);
>         +}
>         +
>         +static inline priority_queue *get_pq(const pq_element *this)
>         +{
>         +       return this->pq;
>         +}
>         +
>         +static inline pq_priority_t get_prio(const pq_element *this)
>         +{
>         +       return this->prio;
>         +}
>         +
>         +static inline uint32_t get_index(const pq_element *this)
>         +{
>         +       return this->index;
>         +}
>         +
>         +static inline bool is_active(const pq_element *this)
>         +{
>         +       return this->index != INVALID_INDEX;
>         +}
>         +
>         +/*** Operations on priority_queue ***/
>         +
>         +extern uint32_t pq_smallest_child(priority_queue *, uint32_t,
>         pq_priority_t);
>         +extern void pq_bubble_down(priority_queue *, pq_element *);
>         +extern void pq_bubble_up(priority_queue *, pq_element *);
>         +
>         +static inline bool valid_index(priority_queue *this, uint32_t
>         idx)
>         +{
>         +       return idx < this->num_elems;
>         +}
>         +
>         +extern void priority_queue_con(priority_queue *, uint32_t
>         _max_elems);
>         +extern void priority_queue_des(priority_queue *);
>         +
>         +/* Register pq_element with priority queue */
>         +/* Return false if priority queue full */
>         +extern bool pq_register_element(priority_queue *, pq_element *);
>         +
>         +/* Activate and add pq_element to priority queue */
>         +/* Element must be disarmed */
>         +extern void pq_activate_element(priority_queue *, pq_element
>         *, pq_priority_t);
>         +
>         +/* Reset (increase) priority for pq_element */
>         +/* Element may be active or inactive (released) */
>         +extern void pq_reset_element(priority_queue *, pq_element *,
>         pq_priority_t);
>         +
>         +/* Deactivate and remove element from priority queue */
>         +/* Element may be active or inactive (released) */
>         +extern void pq_deactivate_element(priority_queue *,
>         pq_element *);
>         +
>         +/* Unregister pq_element */
>         +extern void pq_unregister_element(priority_queue *,
>         pq_element *);
>         +
>         +/* Return priority of first element (lowest numerical value) */
>         +extern pq_priority_t pq_first_priority(const priority_queue *);
>         +
>         +/* Deactivate and return first element if it's prio is <=
>         threshold */
>         +extern pq_element *pq_release_element(priority_queue *,
>         pq_priority_t thresh);
>         +
>         +#endif /* _PRIORITY_QUEUE_H */
>         diff --git
>         a/platform/linux-generic/include/odp_timer_internal.h
>         b/platform/linux-generic/include/odp_timer_internal.h
>         index ad28f53..461f28c 100644
>         --- a/platform/linux-generic/include/odp_timer_internal.h
>         +++ b/platform/linux-generic/include/odp_timer_internal.h
>         @@ -1,4 +1,4 @@
>         -/* Copyright (c) 2013, Linaro Limited
>         +/* Copyright (c) 2014, Linaro Limited
>           * All rights reserved.
>           *
>           * SPDX-License-Identifier:     BSD-3-Clause
>         @@ -8,72 +8,51 @@
>          /**
>           * @file
>           *
>         - * ODP timer timeout descriptor - implementation internal
>         + * ODP timeout descriptor - implementation internal
>           */
>
>          #ifndef ODP_TIMER_INTERNAL_H_
>          #define ODP_TIMER_INTERNAL_H_
>
>         -#ifdef __cplusplus
>         -extern "C" {
>         -#endif
>         -
>         -#include <odp_std_types.h>
>         -#include <odp_queue.h>
>         -#include <odp_buffer.h>
>         +#include <odp_align.h>
>         +#include <odp_debug.h>
>          #include <odp_buffer_internal.h>
>          #include <odp_buffer_pool_internal.h>
>          #include <odp_timer.h>
>
>         -struct timeout_t;
>         -
>         -typedef struct timeout_t {
>         -       struct timeout_t *next;
>         -       int               timer_id;
>         -       int               tick;
>         -       uint64_t          tmo_tick;
>         -       odp_queue_t       queue;
>         -       odp_buffer_t      buf;
>         -       odp_buffer_t      tmo_buf;
>         -} timeout_t;
>         -
>         -
>         -struct odp_timeout_hdr_t;
>         -
>          /**
>         - * Timeout notification header
>         + * Internal Timeout header
>           */
>         -typedef struct odp_timeout_hdr_t {
>         +typedef struct {
>         +       /* common buffer header */
>                 odp_buffer_hdr_t buf_hdr;
>
>         -       timeout_t meta;
>         -
>         -       uint8_t buf_data[];
>         +       /* Requested expiration time */
>         +       uint64_t expiration;
>         +       /* User ptr inherited from parent timer */
>         +       void *user_ptr;
>         +       /* Parent timer */
>         +       odp_timer_t timer;
>         +       /* Tag inherited from parent timer at time of
>         expiration */
>         +       uint32_t tag;
>         +       /* Gen-cnt inherited from parent timer at time of
>         creation */
>         +       uint16_t gencnt;
>         +       uint16_t pad;
>         +       uint8_t buf_data[0];
>          } odp_timeout_hdr_t;
>
>         -
>         -
>          ODP_STATIC_ASSERT(sizeof(odp_timeout_hdr_t) ==
>         -          ODP_OFFSETOF(odp_timeout_hdr_t, buf_data),
>         -          "ODP_TIMEOUT_HDR_T__SIZE_ERR");
>         -
>         +                 ODP_OFFSETOF(odp_timeout_hdr_t, buf_data),
>         +                 "sizeof(odp_timeout_hdr_t) ==
>         ODP_OFFSETOF(odp_timeout_hdr_t, buf_data)");
>          ODP_STATIC_ASSERT(sizeof(odp_timeout_hdr_t) %
>         sizeof(uint64_t) == 0,
>         -          "ODP_TIMEOUT_HDR_T__SIZE_ERR2");
>         -
>         +                 "sizeof(odp_timeout_hdr_t) %
>         sizeof(uint64_t) == 0");
>
>          /**
>         - * Return timeout header
>         + * Return the timeout header
>           */
>         -static inline odp_timeout_hdr_t
>         *odp_timeout_hdr(odp_timeout_t tmo)
>         +static inline odp_timeout_hdr_t *odp_timeout_hdr(odp_buffer_t
>         buf)
>          {
>         -       odp_buffer_hdr_t *buf_hdr =
>         odp_buf_to_hdr((odp_buffer_t)tmo);
>         -       return (odp_timeout_hdr_t *)(uintptr_t)buf_hdr;
>         -}
>         -
>         -
>         -
>         -#ifdef __cplusplus
>         +       return (odp_timeout_hdr_t *)odp_buf_to_hdr(buf);
>          }
>         -#endif
>
>          #endif
>         diff --git a/platform/linux-generic/odp_priority_queue.c
>         b/platform/linux-generic/odp_priority_queue.c
>         new file mode 100644
>         index 0000000..b72c26f
>         --- /dev/null
>         +++ b/platform/linux-generic/odp_priority_queue.c
>         @@ -0,0 +1,283 @@
>         +#define NDEBUG /* Enabled by default by ODP build system */
>         +#include <assert.h>
>         +#include <unistd.h>
>         +#include <stdlib.h>
>         +#include <string.h>
>         +#include <strings.h>
>         +#include <odp_hints.h>
>         +#include <odp_align.h>
>         +#include <odp_debug.h>
>         +
>         +#include "odp_priority_queue_internal.h"
>         +
>         +
>         +#define NUM_CHILDREN 4
>         +#define CHILD(n) (NUM_CHILDREN * (n) + 1)
>         +#define PARENT(n) (((n) - 1) / NUM_CHILDREN)
>         +
>         +/* Internal nodes in the array */
>         +typedef struct heap_node {
>         +       pq_element *elem;
>         +       /* Copy of elem->prio so we avoid unnecessary
>         dereferencing */
>         +       pq_priority_t prio;
>         +} heap_node;
>         +
>         +static void pq_assert_heap(priority_queue *this);
>         +
>         +#define ALIGNMENT(p) (1U << ((unsigned)ffs((int)p) - 1U))
>         +
>         +void priority_queue_con(priority_queue *this, uint32_t
>         _max_elems)
>         +{
>         +       this->max_elems = _max_elems;
>         +       this->reg_elems = 0;
>         +       this->num_elems = 0;
>         +       this->org_ptr = malloc((_max_elems + 64 /
>         sizeof(heap_node)) *
>         +                              sizeof(heap_node));
>         +       if (odp_unlikely(this->org_ptr == NULL)) {
>         +               ODP_ERR("malloc failed\n");
>         +               abort();
>         +       }
>         +       this->heap = this->org_ptr;
>         +       assert((size_t)&this->heap[1] % 8 == 0);
>         +       /* Increment base address until first child (index 1)
>         is cache line */
>         +       /* aligned and thus all children (e.g. index 1-4)
>         stored in the */
>         +       /* same cache line. We are not interested in the
>         alignment of */
>         +       /* heap[0] as this is a lone node */
>         +       while ((size_t)&this->heap[1] % ODP_CACHE_LINE_SIZE !=
>         0) {
>         +               /* Cast to ptr to struct member with the
>         greatest alignment */
>         +               /* requirement */
>         +               this->heap = (heap_node *)((pq_priority_t
>         *)this->heap + 1);
>         +       }
>         +       pq_assert_heap(this);
>         +}
>         +
>         +void priority_queue_des(priority_queue *this)
>         +{
>         +       pq_assert_heap(this);
>         +       free(this->org_ptr);
>         +}
>         +
>         +#ifndef NDEBUG
>         +static uint32_t
>         +pq_assert_elem(priority_queue *this, uint32_t index, bool
>         recurse)
>         +{
>         +       uint32_t num = 1;
>         +       const pq_element *elem = this->heap[index].elem;
>         +       assert(elem->index == index);
>         +       assert(elem->prio == this->heap[index].prio);
>         +       uint32_t child = CHILD(index);
>         +       uint32_t i;
>         +       for (i = 0; i < NUM_CHILDREN; i++, child++) {
>         +               if (valid_index(this, child)) {
>         +  assert(this->heap[child].elem != NULL);
>         +  assert(this->heap[child].prio >= elem->prio);
>         +                       if (recurse)
>         +                               num += pq_assert_elem(this,
>         child, recurse);
>         +               }
>         +       }
>         +       return num;
>         +}
>         +#endif
>         +
>         +static void
>         +pq_assert_heap(priority_queue *this)
>         +{
>         +       (void)this;
>         +#ifndef NDEBUG
>         +       uint32_t num = 0;
>         +       if (odp_likely(this->num_elems != 0)) {
>         +               assert(this->heap[0].elem != NULL);
>         +               num += pq_assert_elem(this, 0, true);
>         +       }
>         +       assert(num == this->num_elems);
>         +       unsigned i;
>         +       for (i = 0; i < this->num_elems; i++) {
>         +               assert(this->heap[i].elem != NULL);
>         +               assert(this->heap[i].prio != INVALID_PRIORITY);
>         +       }
>         +#endif
>         +}
>         +
>         +/* Bubble up to proper position */
>         +void
>         +pq_bubble_up(priority_queue *this, pq_element *elem)
>         +{
>         +       assert(this->heap[elem->index].elem == elem);
>         +       assert(this->heap[elem->index].prio == elem->prio);
>         +       uint32_t current = elem->index;
>         +       pq_priority_t prio = elem->prio;
>         +       assert(current == 0 ||
>         this->heap[PARENT(current)].elem != NULL);
>         +       /* Move up into proper position */
>         +       while (current != 0 &&
>         this->heap[PARENT(current)].prio > prio) {
>         +               uint32_t parent = PARENT(current);
>         +               assert(this->heap[parent].elem != NULL);
>         +               /* Swap current with parent */
>         +               /* 1) Move parent down */
>         +               this->heap[current].elem =
>         this->heap[parent].elem;
>         +               this->heap[current].prio =
>         this->heap[parent].prio;
>         +  this->heap[current].elem->index = current;
>         +               /* 2) Move current up to parent */
>         +               this->heap[parent].elem = elem;
>         +               this->heap[parent].prio = prio;
>         +               this->heap[parent].elem->index = parent;
>         +               /* Continue moving elem until it is in the
>         right place */
>         +               current = parent;
>         +       }
>         +       pq_assert_heap(this);
>         +}
>         +
>         +/* Find the smallest child that is smaller than the specified
>         priority */
>         +/* Very hot function, can we decrease the number of cache
>         misses? */
>         +uint32_t pq_smallest_child(priority_queue *this,
>         +                          uint32_t index,
>         +                          pq_priority_t val)
>         +{
>         +       uint32_t smallest = index;
>         +       uint32_t child = CHILD(index);
>         +#if NUM_CHILDREN == 4
>         +       /* Unroll loop when all children exist */
>         +       if (odp_likely(valid_index(this, child + 3))) {
>         +               if (this->heap[child + 0].prio < val)
>         +                       val = this->heap[smallest = child +
>         0].prio;
>         +               if (this->heap[child + 1].prio < val)
>         +                       val = this->heap[smallest = child +
>         1].prio;
>         +               if (this->heap[child + 2].prio < val)
>         +                       val = this->heap[smallest = child +
>         2].prio;
>         +               if (this->heap[child + 3].prio < val)
>         +                       (void)this->heap[smallest = child +
>         3].prio;
>         +               return smallest;
>         +       }
>         +#endif
>         +       uint32_t i;
>         +       for (i = 0; i < NUM_CHILDREN; i++) {
>         +               if (odp_unlikely(!valid_index(this, child + i)))
>         +                       break;
>         +               if (this->heap[child + i].prio < val) {
>         +                       smallest = child + i;
>         +                       val = this->heap[smallest].prio;
>         +               }
>         +       }
>         +       return smallest;
>         +}
>         +
>         +/* Very hot function, can it be optimised? */
>         +void
>         +pq_bubble_down(priority_queue *this, pq_element *elem)
>         +{
>         +       assert(this->heap[elem->index].elem == elem);
>         +       assert(this->heap[elem->index].prio == elem->prio);
>         +       uint32_t current = elem->index;
>         +       pq_priority_t prio = elem->prio;
>         +       for (;;) {
>         +               uint32_t child = pq_smallest_child(this,
>         current, prio);
>         +               if (current == child) {
>         +                       /* No smaller child, we are done */
>         +                       pq_assert_heap(this);
>         +                       return;
>         +               }
>         +               /* Element larger than smaller child, must
>         move down */
>         +               assert(this->heap[child].elem != NULL);
>         +               /* 1) Move child up to current */
>         +               this->heap[current].elem = this->heap[child].elem;
>         +               this->heap[current].prio = this->heap[child].prio;
>         +               /* 2) Move current down to child */
>         +               this->heap[child].elem = elem;
>         +               this->heap[child].prio = prio;
>         +               this->heap[child].elem->index = child;
>         +
>         +  this->heap[current].elem->index = current; /* cache misses! */
>         +               /* Continue moving element until it is in the
>         right place */
>         +               current = child;
>         +       }
>         +}
>         +
>         +bool
>         +pq_register_element(priority_queue *this, pq_element *elem)
>         +{
>         +       if (odp_likely(this->reg_elems < this->max_elems)) {
>         +               elem->pq = this;
>         +               this->reg_elems++;
>         +               return true;
>         +       }
>         +       return false;
>         +}
>         +
>         +void
>         +pq_unregister_element(priority_queue *this, pq_element *elem)
>         +{
>         +       assert(elem->pq == this);
>         +       if (is_active(elem))
>         +               pq_deactivate_element(this, elem);
>         +       this->reg_elems--;
>         +}
>         +
>         +void
>         +pq_activate_element(priority_queue *this, pq_element *elem,
>         pq_priority_t prio)
>         +{
>         +       assert(elem->index == INVALID_INDEX);
>         +       /* Insert element at end */
>         +       uint32_t index = this->num_elems++;
>         +       this->heap[index].elem = elem;
>         +       this->heap[index].prio = prio;
>         +       elem->index = index;
>         +       elem->prio = prio;
>         +       pq_bubble_up(this, elem);
>         +}
>         +
>         +void
>         +pq_deactivate_element(priority_queue *this, pq_element *elem)
>         +{
>         +       assert(elem->pq == this);
>         +       if (odp_likely(is_active(elem))) {
>         +               /* Swap element with last element */
>         +               uint32_t current = elem->index;
>         +               uint32_t last = --this->num_elems;
>         +               if (odp_likely(last != current)) {
>         +                       /* Move last element to current */
>         +                       this->heap[current].elem =
>         this->heap[last].elem;
>         +                       this->heap[current].prio =
>         this->heap[last].prio;
>         +  this->heap[current].elem->index = current;
>         +                       /* Bubble down old 'last' element to
>         its proper place*/
>         +                       if (this->heap[current].prio < elem->prio)
>         +                               pq_bubble_up(this,
>         this->heap[current].elem);
>         +                       else
>         +                               pq_bubble_down(this,
>         this->heap[current].elem);
>         +               }
>         +               elem->index = INVALID_INDEX;
>         +               pq_assert_heap(this);
>         +       }
>         +}
>         +
>         +void
>         +pq_reset_element(priority_queue *this, pq_element *elem,
>         pq_priority_t prio)
>         +{
>         +       assert(prio != INVALID_PRIORITY);
>         +       if (odp_likely(is_active(elem))) {
>         +               assert(prio >= elem->prio);
>         +               elem->prio = prio;
>         +               this->heap[elem->index].prio = prio;/* cache
>         misses here! */
>         +               pq_bubble_down(this, elem);
>         +               pq_assert_heap(this);
>         +       } else {
>         +               pq_activate_element(this, elem, prio);
>         +       }
>         +}
>         +
>         +pq_priority_t pq_first_priority(const priority_queue *this)
>         +{
>         +       return this->num_elems != 0 ? this->heap[0].prio :
>         INVALID_PRIORITY;
>         +}
>         +
>         +pq_element *
>         +pq_release_element(priority_queue *this, pq_priority_t threshold)
>         +{
>         +       if (odp_likely(this->num_elems != 0 &&
>         +                      this->heap[0].prio <= threshold)) {
>         +               pq_element *elem = this->heap[0].elem;
>         +               /* Remove element from heap */
>         +               pq_deactivate_element(this, elem);
>         +               assert(elem->prio <= threshold);
>         +               return elem;
>         +       }
>         +       return NULL;
>         +}
>         diff --git a/platform/linux-generic/odp_timer.c
>         b/platform/linux-generic/odp_timer.c
>         index 313c713..0e5071c 100644
>         --- a/platform/linux-generic/odp_timer.c
>         +++ b/platform/linux-generic/odp_timer.c
>         @@ -4,428 +4,713 @@
>           * SPDX-License-Identifier:     BSD-3-Clause
>           */
>
>         -#include <odp_timer.h>
>         -#include <odp_timer_internal.h>
>         -#include <odp_time.h>
>         -#include <odp_buffer_pool_internal.h>
>         -#include <odp_internal.h>
>         -#include <odp_atomic.h>
>         -#include <odp_spinlock.h>
>         -#include <odp_sync.h>
>         -#include <odp_debug.h>
>         -
>         -#include <signal.h>
>         -#include <time.h>
>         +/**
>         + * @file
>         + *
>         + * ODP timer service
>         + *
>         + */
>
>         +#include <assert.h>
>         +#include <errno.h>
>          #include <string.h>
>         -
>         -#define NUM_TIMERS    1
>         -#define MAX_TICKS     1024
>         -#define MAX_RES       ODP_TIME_SEC
>         -#define MIN_RES       (100*ODP_TIME_USEC)
>         -
>         -
>         -typedef struct {
>         -       odp_spinlock_t lock;
>         -       timeout_t      *list;
>         -} tick_t;
>         -
>         -typedef struct {
>         -       int               allocated;
>         -       volatile int      active;
>         -       volatile uint64_t cur_tick;
>         -       timer_t           timerid;
>         -       odp_timer_t       timer_hdl;
>         -       odp_buffer_pool_t pool;
>         -       uint64_t          resolution_ns;
>         -       uint64_t          max_ticks;
>         -       tick_t            tick[MAX_TICKS];
>         -
>         -} timer_ring_t;
>         -
>         -typedef struct {
>         -       odp_spinlock_t lock;
>         -       int            num_timers;
>         -       timer_ring_t   timer[NUM_TIMERS];
>         -
>         -} timer_global_t;
>         -
>         -/* Global */
>         -static timer_global_t odp_timer;
>         -
>         -static void add_tmo(tick_t *tick, timeout_t *tmo)
>         +#include <stdlib.h>
>         +#include <time.h>
>         +#include <signal.h>
>         +#include "odp_std_types.h"
>         +#include "odp_buffer.h"
>         +#include "odp_buffer_pool.h"
>         +#include "odp_queue.h"
>         +#include "odp_hints.h"
>         +#include "odp_sync.h"
>         +#include "odp_ticketlock.h"
>         +#include "odp_debug.h"
>         +#include "odp_align.h"
>         +#include "odp_shared_memory.h"
>         +#include "odp_hints.h"
>         +#include "odp_internal.h"
>         +#include "odp_time.h"
>         +#include "odp_timer.h"
>         +#include "odp_timer_internal.h"
>         +#include "odp_priority_queue_internal.h"
>         +
>         +/******************************************************************************
>         + * Translation between timeout and timeout header
>         +
>         *****************************************************************************/
>         +
>         +static inline odp_timeout_hdr_t
>         *odp_tmo_to_hdr(odp_timer_tmo_t tmo)
>          {
>         -       odp_spinlock_lock(&tick->lock);
>         -
>         -       tmo->next  = tick->list;
>         -       tick->list = tmo;
>         +       odp_buffer_t buf = odp_buffer_from_timeout(tmo);
>         +       odp_timeout_hdr_t *tmo_hdr = (odp_timeout_hdr_t
>         *)odp_buf_to_hdr(buf);
>         +       return tmo_hdr;
>         +}
>
>         -       odp_spinlock_unlock(&tick->lock);
>         +/******************************************************************************
>         + * odp_timer abstract datatype
>         +
>         *****************************************************************************/
>         +
>         +typedef struct odp_timer_s {
>         +       pq_element pqelem;/* Base class */
>         +       uint64_t req_tmo;/* Requested timeout tick */
>         +       odp_buffer_t tmo_buf;/* ODP_BUFFER_INVALID if timeout
>         enqueued */
>         +       odp_queue_t queue;/* ODP_QUEUE_INVALID if timer is free */
>         +       uint32_t tag;/* Reusing tag as next pointer/index when
>         timer is free */
>         +       uint16_t gencnt;/* Smaller to make place for user_buf
>         flag */
>         +       unsigned int user_buf:1; /* User-defined buffer? */
>         +} odp_timer;
>         +
>         +/* Constructor */
>         +static inline void odp_timer_con(odp_timer *this)
>         +{
>         +       pq_element_con(&this->pqelem);
>         +       this->tmo_buf = ODP_BUFFER_INVALID;
>         +       this->queue = ODP_QUEUE_INVALID;
>         +       this->gencnt = 0;
>          }
>
>         -static timeout_t *rem_tmo(tick_t *tick)
>         +/* Destructor */
>         +static inline void odp_timer_des(odp_timer *this)
>          {
>         -       timeout_t *tmo;
>         +       assert(this->tmo_buf == ODP_BUFFER_INVALID);
>         +       assert(this->queue == ODP_QUEUE_INVALID);
>         +       pq_element_des(&this->pqelem);
>         +}
>
>         -       odp_spinlock_lock(&tick->lock);
>         +/* Setup when timer is allocated */
>         +static void setup(odp_timer *this,
>         +                 odp_queue_t _q,
>         +                 void *_up,
>         +                 odp_buffer_t _tmo)
>         +{
>         +       this->req_tmo = INVALID_PRIORITY;
>         +       this->tmo_buf = _tmo;
>         +       this->queue = _q;
>         +       this->tag = 0;
>         +       this->user_buf = false;
>         +       /* Initialise constant fields of timeout event */
>         +       odp_timeout_hdr_t *tmo_hdr =
>         +  odp_tmo_to_hdr(odp_timeout_from_buffer(this->tmo_buf));
>         +       tmo_hdr->gencnt = this->gencnt;
>         +       tmo_hdr->timer = this;
>         +       tmo_hdr->user_ptr = _up;
>         +       /* tmo_hdr->tag set at expiration time */
>         +       /* tmo_hdr->expiration set at expiration time */
>         +       assert(this->queue != ODP_QUEUE_INVALID);
>         +}
>
>         -       tmo = tick->list;
>         +/* Teardown when timer is freed */
>         +static odp_buffer_t teardown(odp_timer *this)
>         +{
>         +       /* Increase generation count to make any pending
>         timeout(s) orphaned */
>         +       ++this->gencnt;
>         +       odp_buffer_t buf = this->tmo_buf;
>         +       this->tmo_buf = ODP_BUFFER_INVALID;
>         +       this->queue = ODP_QUEUE_INVALID;
>         +       return buf;
>         +}
>
>         -       if (tmo)
>         -               tick->list = tmo->next;
>         +static inline uint32_t get_next_free(odp_timer *this)
>         +{
>         +       assert(this->queue == ODP_QUEUE_INVALID);
>         +       return this->tag;
>         +}
>
>         -       odp_spinlock_unlock(&tick->lock);
>         +static inline void set_next_free(odp_timer *this, uint32_t nf)
>         +{
>         +       assert(this->queue == ODP_QUEUE_INVALID);
>         +       this->tag = nf;
>         +}
>
>         -       if (tmo)
>         -               tmo->next = NULL;
>         +/******************************************************************************
>         + * odp_timer_pool abstract datatype
>         + * Inludes alloc and free timer
>         +
>         *****************************************************************************/
>         +
>         +typedef struct odp_timer_pool_s {
>         +       priority_queue pq;
>         +       uint64_t cur_tick;/* Current tick value */
>         +       uint64_t min_tick;/* Current expiration lower bound */
>         +       uint64_t max_tick;/* Current expiration higher bound */
>         +       bool shared;
>         +       odp_ticketlock_t lock;
>         +       const char *name;
>         +       odp_buffer_pool_t buf_pool;
>         +       uint64_t resolution_ns;
>         +       uint64_t min_tmo_tck;
>         +       uint64_t max_tmo_tck;
>         +       odp_timer *timers;
>         +       uint32_t num_alloc;/* Current number of allocated
>         timers */
>         +       uint32_t max_timers;/* Max number of timers */
>         +       uint32_t first_free;/* 0..max_timers-1 => free timer */
>         +       timer_t timerid;
>         +       odp_timer_clk_src_t clk_src;
>         +} odp_timer_pool;
>         +
>         +/* Forward declarations */
>         +static void timer_init(odp_timer_pool *tp);
>         +static void timer_exit(odp_timer_pool *tp);
>         +
>         +static void odp_timer_pool_con(odp_timer_pool *this,
>         +                              const char *_n,
>         +                              odp_buffer_pool_t _bp,
>         +                              uint64_t _r,
>         +                              uint64_t _mint,
>         +                              uint64_t _maxt,
>         +                              uint32_t _mt,
>         +                              bool _s,
>         +                              odp_timer_clk_src_t _cs)
>         +{
>         +       priority_queue_con(&this->pq, _mt);
>         +       this->cur_tick = 0;
>         +       this->shared = _s;
>         +       this->name = strdup(_n);
>         +       this->buf_pool = _bp;
>         +       this->resolution_ns = _r;
>         +       this->min_tmo_tck = odp_timer_ns_to_tick(this, _mint);
>         +       this->max_tmo_tck = odp_timer_ns_to_tick(this, _maxt);
>         +       this->min_tick = this->cur_tick + this->min_tmo_tck;
>         +       this->max_tick = this->cur_tick + this->max_tmo_tck;
>         +       this->num_alloc = 0;
>         +       this->max_timers = _mt;
>         +       this->first_free = 0;
>         +       this->clk_src = _cs;
>         +       this->timers = malloc(sizeof(odp_timer) *
>         this->max_timers);
>         +       if (this->timers == NULL)
>         +               ODP_ABORT("%s: malloc failed\n", _n);
>         +       uint32_t i;
>         +       for (i = 0; i < this->max_timers; i++)
>         +  odp_timer_con(&this->timers[i]);
>         +       for (i = 0; i < this->max_timers; i++)
>         +  set_next_free(&this->timers[i], i + 1);
>         +       odp_ticketlock_init(&this->lock);
>         +       if (this->clk_src == ODP_CLOCK_CPU)
>         +               timer_init(this);
>         +       /* Make sure timer pool initialisation is globally
>         observable */
>         +       /* before we return a pointer to it */
>         +       odp_sync_stores();
>         +}
>
>         -       return tmo;
>         +static odp_timer_pool *odp_timer_pool_new(
>         +       const char *_n,
>         +       odp_buffer_pool_t _bp,
>         +       uint64_t _r,
>         +       uint64_t _mint,
>         +       uint64_t _maxt,
>         +       uint32_t _mt,
>         +       bool _s,
>         +       odp_timer_clk_src_t _cs)
>         +{
>         +       odp_timer_pool *this = malloc(sizeof(odp_timer_pool));
>         +       if (odp_unlikely(this == NULL))
>         +               ODP_ABORT("%s: timer pool malloc failed\n", _n);
>         +       odp_timer_pool_con(this, _n, _bp, _r, _mint, _maxt,
>         _mt, _s, _cs);
>         +       return this;
>          }
>
>         -/**
>         - * Search and delete tmo entry from timeout list
>         - * return -1 : on error.. handle not in list
>         - *             0 : success
>         - */
>         -static int find_and_del_tmo(timeout_t **tmo, odp_timer_tmo_t
>         handle)
>         +static void odp_timer_pool_des(odp_timer_pool *this)
>          {
>         -       timeout_t *cur, *prev;
>         -       prev = NULL;
>         +       if (this->shared)
>         +  odp_ticketlock_lock(&this->lock);
>         +       if (this->num_alloc != 0) {
>         +               /* It's a programming error to attempt to
>         destroy a */
>         +               /* timer pool which is still in use */
>         +               ODP_ABORT("%s: timers in use\n", this->name);
>         +       }
>         +       if (this->clk_src == ODP_CLOCK_CPU)
>         +               timer_exit(this);
>         +       uint32_t i;
>         +       for (i = 0; i < this->max_timers; i++)
>         +  odp_timer_des(&this->timers[i]);
>         +       free(this->timers);
>         +       priority_queue_des(&this->pq);
>         +       odp_sync_stores();
>         +}
>
>         -       for (cur = *tmo; cur != NULL; prev = cur, cur =
>         cur->next) {
>         -               if (cur->tmo_buf == handle) {
>         -                       if (prev == NULL)
>         -                               *tmo = cur->next;
>         -                       else
>         -                               prev->next = cur->next;
>         +static void odp_timer_pool_del(odp_timer_pool *this)
>         +{
>         +       odp_timer_pool_des(this);
>         +       free(this);
>         +}
>
>         -                       break;
>         +static inline odp_timer *timer_alloc(odp_timer_pool *this,
>         +                                    odp_queue_t queue,
>         +                                    void *user_ptr,
>         +                                    odp_buffer_t tmo_buf)
>         +{
>         +       odp_timer *tim = ODP_TIMER_INVALID;
>         +       if (odp_likely(this->shared))
>         +  odp_ticketlock_lock(&this->lock);
>         +       if (odp_likely(this->num_alloc < this->max_timers)) {
>         +               this->num_alloc++;
>         +               /* Remove first unused timer from free list */
>         +               assert(this->first_free != this->max_timers);
>         +               tim = &this->timers[this->first_free];
>         +               this->first_free = get_next_free(tim);
>         +               /* Insert timer into priority queue */
>         +               if (odp_unlikely(!pq_register_element(&this->pq,
>         +  &tim->pqelem))) {
>         +                       /* Unexpected internal error */
>         +                       abort();
>                         }
>         +               /* Create timer */
>         +               setup(tim, queue, user_ptr, tmo_buf);
>         +       } else {
>         +               errno = ENFILE; /* Reusing file table overvlow */
>                 }
>         -
>         -       if (!cur)
>         -               /* couldn't find tmo in list */
>         -               return -1;
>         -
>         -       /* application to free tmo_buf provided by
>         absolute_tmo call */
>         -       return 0;
>         +       if (odp_likely(this->shared))
>         +  odp_ticketlock_unlock(&this->lock);
>         +       return tim;
>          }
>
>         -int odp_timer_cancel_tmo(odp_timer_t timer_hdl,
>         odp_timer_tmo_t tmo)
>         +static inline void timer_free(odp_timer_pool *this, odp_timer
>         *tim)
>          {
>         -       int id;
>         -       int tick_idx;
>         -       timeout_t *cancel_tmo;
>         -       odp_timeout_hdr_t *tmo_hdr;
>         -       tick_t *tick;
>         -
>         -       /* get id */
>         -       id = (int)timer_hdl - 1;
>         -
>         -       tmo_hdr = odp_timeout_hdr((odp_timeout_t) tmo);
>         -       /* get tmo_buf to cancel */
>         -       cancel_tmo = &tmo_hdr->meta;
>         +       if (odp_likely(this->shared))
>         +  odp_ticketlock_lock(&this->lock);
>         +       if (odp_unlikely(tim->queue == ODP_QUEUE_INVALID))
>         +               ODP_ABORT("Invalid timer %p\n", tim);
>         +       /* Destroy timer */
>         +       odp_buffer_t buf = teardown(tim);
>         +       /* Remove timer from priority queue */
>         +       pq_unregister_element(&this->pq, &tim->pqelem);
>         +       /* Insert timer into free list */
>         +       set_next_free(tim, this->first_free);
>         +       this->first_free = (tim - &this->timers[0]) /
>         sizeof(this->timers[0]);
>         +       assert(this->num_alloc != 0);
>         +       this->num_alloc--;
>         +       if (odp_likely(this->shared))
>         +  odp_ticketlock_unlock(&this->lock);
>         +       if (buf != ODP_BUFFER_INVALID)
>         +               odp_buffer_free(buf);
>         +}
>
>         -       tick_idx = cancel_tmo->tick;
>         -       tick = &odp_timer.timer[id].tick[tick_idx];
>         +/******************************************************************************
>         + * Operations on timers
>         + * reset/reset_w_buf/cancel timer, return timeout
>         +
>         *****************************************************************************/
>
>         -       odp_spinlock_lock(&tick->lock);
>         -       /* search and delete tmo from tick list */
>         -       if (find_and_del_tmo(&tick->list, tmo) != 0) {
>         -  odp_spinlock_unlock(&tick->lock);
>         -               ODP_DBG("Couldn't find the tmo (%d) in tick
>         list\n", (int)tmo);
>         -               return -1;
>         +static inline void timer_expire(odp_timer *tim)
>         +{
>         +       assert(tim->req_tmo != INVALID_PRIORITY);
>         +       /* Timer expired, is there actually any timeout event */
>         +       /* we can enqueue? */
>         +       if (odp_likely(tim->tmo_buf != ODP_BUFFER_INVALID)) {
>         +               /* Swap out timeout buffer */
>         +               odp_buffer_t buf = tim->tmo_buf;
>         +               tim->tmo_buf = ODP_BUFFER_INVALID;
>         +               if (odp_likely(!tim->user_buf)) {
>         +                       odp_timeout_hdr_t *tmo_hdr =
>         +  odp_tmo_to_hdr(odp_timeout_from_buffer(buf));
>         +                       /* Copy tag and requested expiration
>         tick from timer */
>         +                       tmo_hdr->tag = tim->tag;
>         +                       tmo_hdr->expiration = tim->req_tmo;
>         +               }
>         +               /* Else don't touch user-defined buffer */
>         +               int rc = odp_queue_enq(tim->queue, buf);
>         +               if (odp_unlikely(rc != 0))
>         +                       ODP_ABORT("Failed to enqueue timeout
>         buffer (%d)\n",
>         +                                 rc);
>         +               /* Mark timer as inactive */
>         +               tim->req_tmo = INVALID_PRIORITY;
>                 }
>         -       odp_spinlock_unlock(&tick->lock);
>         -
>         -       return 0;
>         +       /* No, timeout event already enqueued or unavailable */
>         +       /* Keep timer active, odp_timer_return_tmo() will
>         patch up */
>          }
>
>         -static void notify_function(union sigval sigval)
>         +static odp_timer_set_t timer_reset(odp_timer_pool *tp,
>         +                                  odp_timer *tim,
>         +                                  uint64_t abs_tck)
>          {
>         -       uint64_t cur_tick;
>         -       timeout_t *tmo;
>         -       tick_t *tick;
>         -       timer_ring_t *timer;
>         +       assert(tim->user_buf == false);
>         +       if (odp_unlikely(abs_tck < tp->min_tick))
>         +               return ODP_TIMER_SET_TOOEARLY;
>         +       if (odp_unlikely(abs_tck > tp->max_tick))
>         +               return ODP_TIMER_SET_TOOLATE;
>         +
>         +       if (odp_likely(tp->shared))
>         +  odp_ticketlock_lock(&tp->lock);
>         +
>         +       if (odp_unlikely(tim->queue == ODP_QUEUE_INVALID))
>         +               ODP_ABORT("Invalid timer %p\n", tim);
>         +       if (odp_unlikely(tim->user_buf))
>         +               ODP_ABORT("Timer %p has user buffer\n", tim);
>         +       /* Increase timer tag to make any pending timeout stale */
>         +       tim->tag++;
>         +       /* Save requested timeout */
>         +       tim->req_tmo = abs_tck;
>         +       /* Update timer position in priority queue */
>         +       pq_reset_element(&tp->pq, &tim->pqelem, abs_tck);
>         +
>         +       if (odp_likely(tp->shared))
>         +  odp_ticketlock_unlock(&tp->lock);
>         +       return ODP_TIMER_SET_SUCCESS;
>         +}
>
>         -       timer = sigval.sival_ptr;
>         +static odp_timer_set_t timer_reset_w_buf(odp_timer_pool *tp,
>         +               odp_timer *tim,
>         +               uint64_t abs_tck,
>         +               odp_buffer_t user_buf)
>         +{
>         +       if (odp_unlikely(abs_tck < tp->min_tick))
>         +               return ODP_TIMER_SET_TOOEARLY;
>         +       if (odp_unlikely(abs_tck > tp->max_tick))
>         +               return ODP_TIMER_SET_TOOLATE;
>         +
>         +       if (odp_likely(tp->shared))
>         +  odp_ticketlock_lock(&tp->lock);
>         +
>         +       if (odp_unlikely(tim->queue == ODP_QUEUE_INVALID))
>         +               ODP_ABORT("Invalid timer %p\n", tim);
>         +       /* Increase timer tag to make any pending timeout stale */
>         +       tim->tag++;
>         +       /* Save requested timeout */
>         +       tim->req_tmo = abs_tck;
>         +       /* Set flag indicating presence of user defined buffer */
>         +       tim->user_buf = true;
>         +       /* Swap in new buffer, save any old buffer pointer */
>         +       odp_buffer_t old_buf = tim->tmo_buf;
>         +       tim->tmo_buf = user_buf;
>         +       /* Update timer position in priority queue */
>         +       pq_reset_element(&tp->pq, &tim->pqelem, abs_tck);
>         +
>         +       if (odp_likely(tp->shared))
>         +  odp_ticketlock_unlock(&tp->lock);
>         +
>         +       /* Free old buffer if present */
>         +       if (odp_unlikely(old_buf != ODP_BUFFER_INVALID))
>         +               odp_buffer_free(old_buf);
>         +       return ODP_TIMER_SET_SUCCESS;
>         +}
>
>         -       if (timer->active == 0) {
>         -               ODP_DBG("Timer (%u) not active\n",
>         timer->timer_hdl);
>         -               return;
>         +static inline void timer_cancel(odp_timer_pool *tp,
>         +                               odp_timer *tim)
>         +{
>         +       odp_buffer_t old_buf = ODP_BUFFER_INVALID;
>         +       if (odp_likely(tp->shared))
>         +  odp_ticketlock_lock(&tp->lock);
>         +
>         +       if (odp_unlikely(tim->queue == ODP_QUEUE_INVALID))
>         +               ODP_ABORT("Invalid timer %p\n", tim);
>         +       if (odp_unlikely(tim->user_buf)) {
>         +               /* Swap out old user buffer */
>         +               old_buf = tim->tmo_buf;
>         +               tim->tmo_buf = ODP_BUFFER_INVALID;
>         +               /* tim->user_buf stays true */
>                 }
>         +       /* Else a normal timer (no user-defined buffer) */
>         +       /* Increase timer tag to make any pending timeout stale */
>         +       tim->tag++;
>         +       /* Clear requested timeout, mark timer inactive */
>         +       tim->req_tmo = INVALID_PRIORITY;
>         +       /* Remove timer from the priority queue */
>         +       pq_deactivate_element(&tp->pq, &tim->pqelem);
>         +
>         +       if (odp_likely(tp->shared))
>         +  odp_ticketlock_unlock(&tp->lock);
>         +       /* Free user-defined buffer if present */
>         +       if (odp_unlikely(old_buf != ODP_BUFFER_INVALID))
>         +               odp_buffer_free(old_buf);
>         +}
>
>         -       /* ODP_DBG("Tick\n"); */
>         -
>         -       cur_tick = timer->cur_tick++;
>         -
>         -       odp_sync_stores();
>         +static inline void timer_return(odp_timer_pool *tp,
>         +                               odp_timer *tim,
>         +                               odp_timer_tmo_t tmo,
>         +                               const odp_timeout_hdr_t *tmo_hdr)
>         +{
>         +       odp_buffer_t tmo_buf = odp_buffer_from_timeout(tmo);
>         +       if (odp_likely(tp->shared))
>         +  odp_ticketlock_lock(&tp->lock);
>         +       if (odp_unlikely(tim->user_buf))
>         +               ODP_ABORT("Timer %p has user-defined
>         buffer\n", tim);
>         +       if (odp_likely(tmo_hdr->gencnt == tim->gencnt)) {
>         +               assert(tim->tmo_buf == ODP_BUFFER_INVALID);
>         +               /* Save returned buffer for use when timer
>         expires next time */
>         +               tim->tmo_buf = tmo_buf;
>         +               tmo_buf = ODP_BUFFER_INVALID;
>         +               /* Check if timer is active and should have
>         expired */
>         +               if (odp_unlikely(tim->req_tmo !=
>         INVALID_PRIORITY &&
>         +                                tim->req_tmo <= tp->cur_tick)) {
>         +                       /* Expire timer now since we have
>         restored the timeout
>         +                          buffer */
>         +                       timer_expire(tim);
>         +               }
>         +               /* Else timer inactive or expires in the future */
>         +       }
>         +       /* Else timeout orphaned, free buffer later */
>         +       if (odp_likely(tp->shared))
>         +  odp_ticketlock_unlock(&tp->lock);
>         +       if (odp_unlikely(tmo_buf != ODP_BUFFER_INVALID))
>         +               odp_buffer_free(tmo_buf);
>         +}
>
>         -       tick = &timer->tick[cur_tick % MAX_TICKS];
>         +/* Non-public so not in odp_timer.h but externally visible,
>         must declare
>         + * somewhere */
>         +unsigned odp_timer_pool_expire(odp_timer_pool_t tpid,
>         uint64_t tick);
>
>         -       while ((tmo = rem_tmo(tick)) != NULL) {
>         -               odp_queue_t  queue;
>         -               odp_buffer_t buf;
>         +unsigned odp_timer_pool_expire(odp_timer_pool_t tpid,
>         uint64_t tick)
>         +{
>         +       if (odp_likely(tpid->shared))
>         +  odp_ticketlock_lock(&tpid->lock);
>         +
>         +       unsigned nexp = 0;
>         +       odp_timer_t tim;
>         +       tpid->cur_tick = tick;
>         +       tpid->min_tick = tick + tpid->min_tmo_tck;
>         +       tpid->max_tick = tick + tpid->max_tmo_tck;
>         +       while ((tim =
>         (odp_timer_t)pq_release_element(&tpid->pq, tick)) !=
>         +              ODP_TIMER_INVALID) {
>         +               assert(get_prio(&tim->pqelem) <= tick);
>         +               timer_expire(tim);
>         +               nexp++;
>         +       }
>
>         -               queue = tmo->queue;
>         -               buf   = tmo->buf;
>         +       if (odp_likely(tpid->shared))
>         +  odp_ticketlock_unlock(&tpid->lock);
>         +       return nexp;
>         +}
>
>         -               if (buf != tmo->tmo_buf)
>         -  odp_buffer_free(tmo->tmo_buf);
>         +/******************************************************************************
>         + * POSIX timer support
>         + * Functions that use Linux/POSIX per-process timers and
>         related facilities
>         +
>         *****************************************************************************/
>
>         -               odp_queue_enq(queue, buf);
>         -       }
>         +static void timer_notify(sigval_t sigval)
>         +{
>         +       odp_timer_pool *tp = (odp_timer_pool *)sigval.sival_ptr;
>         +       uint64_t new_tick = tp->cur_tick + 1;
>         +       (void)odp_timer_pool_expire(tp, new_tick);
>          }
>
>         -static void timer_start(timer_ring_t *timer)
>         +static void timer_init(odp_timer_pool *tp)
>          {
>                 struct sigevent   sigev;
>                 struct itimerspec ispec;
>                 uint64_t res, sec, nsec;
>
>         -       ODP_DBG("\nTimer (%u) starts\n", timer->timer_hdl);
>         +       ODP_DBG("Creating POSIX timer for timer pool %s, period %"
>         +               PRIu64" ns\n", tp->name, tp->resolution_ns);
>
>                 memset(&sigev, 0, sizeof(sigev));
>                 memset(&ispec, 0, sizeof(ispec));
>
>                 sigev.sigev_notify          = SIGEV_THREAD;
>         -       sigev.sigev_notify_function = notify_function;
>         -       sigev.sigev_value.sival_ptr = timer;
>         +       sigev.sigev_notify_function = timer_notify;
>         +       sigev.sigev_value.sival_ptr = tp;
>
>         -       if (timer_create(CLOCK_MONOTONIC, &sigev,
>         &timer->timerid)) {
>         -               ODP_DBG("Timer create failed\n");
>         -               return;
>         -       }
>         +       if (timer_create(CLOCK_MONOTONIC, &sigev, &tp->timerid))
>         +               ODP_ABORT("timer_create() returned error %s\n",
>         +                         strerror(errno));
>
>         -       res  = timer->resolution_ns;
>         +       res  = tp->resolution_ns;
>                 sec  = res / ODP_TIME_SEC;
>         -       nsec = res - sec*ODP_TIME_SEC;
>         +       nsec = res - sec * ODP_TIME_SEC;
>
>                 ispec.it_interval.tv_sec  = (time_t)sec;
>                 ispec.it_interval.tv_nsec = (long)nsec;
>                 ispec.it_value.tv_sec     = (time_t)sec;
>                 ispec.it_value.tv_nsec    = (long)nsec;
>
>         -       if (timer_settime(timer->timerid, 0, &ispec, NULL)) {
>         -               ODP_DBG("Timer set failed\n");
>         -               return;
>         -       }
>         -
>         -       return;
>         +       if (timer_settime(&tp->timerid, 0, &ispec, NULL))
>         +               ODP_ABORT("timer_settime() returned error %s\n",
>         +                         strerror(errno));
>          }
>
>         -int odp_timer_init_global(void)
>         +static void timer_exit(odp_timer_pool *tp)
>          {
>         -       ODP_DBG("Timer init ...");
>         -
>         -       memset(&odp_timer, 0, sizeof(timer_global_t));
>         -
>         -       odp_spinlock_init(&odp_timer.lock);
>         -
>         -       ODP_DBG("done\n");
>         -
>         -       return 0;
>         +       if (timer_delete(tp->timerid) != 0)
>         +               ODP_ABORT("timer_delete() returned error %s\n",
>         +                         strerror(errno));
>          }
>
>         -int odp_timer_disarm_all(void)
>         +/******************************************************************************
>         + * Public API functions
>         + * Some parameter checks and error messages
>         + * No modificatios of internal state
>         +
>         *****************************************************************************/
>         +odp_timer_pool_t
>         +odp_timer_pool_create(const char *name,
>         +                     odp_buffer_pool_t buf_pool,
>         +                     uint64_t resolution_ns,
>         +                     uint64_t min_timeout,
>         +                     uint64_t max_timeout,
>         +                     uint32_t num_timers,
>         +                     bool shared,
>         +                     odp_timer_clk_src_t clk_src)
>          {
>         -       int timers;
>         -       struct itimerspec ispec;
>         -
>         -       odp_spinlock_lock(&odp_timer.lock);
>         -
>         -       timers = odp_timer.num_timers;
>         -
>         -       ispec.it_interval.tv_sec  = 0;
>         -       ispec.it_interval.tv_nsec = 0;
>         -       ispec.it_value.tv_sec     = 0;
>         -       ispec.it_value.tv_nsec    = 0;
>         -
>         -       for (; timers >= 0; timers--) {
>         -               if (timer_settime(odp_timer.timer[timers].timerid,
>         -                                 0, &ispec, NULL)) {
>         -                       ODP_DBG("Timer reset failed\n");
>         -  odp_spinlock_unlock(&odp_timer.lock);
>         -                       return -1;
>         -               }
>         -               odp_timer.num_timers--;
>         -       }
>         -
>         -       odp_spinlock_unlock(&odp_timer.lock);
>         -
>         -       return 0;
>         +       /* Verify that buffer pool can be used for timeouts */
>         +       odp_buffer_t buf = odp_buffer_alloc(buf_pool);
>         +       if (buf == ODP_BUFFER_INVALID)
>         +               ODP_ABORT("%s: Failed to allocate buffer\n",
>         name);
>         +       if (odp_buffer_type(buf) != ODP_BUFFER_TYPE_TIMEOUT)
>         +               ODP_ABORT("%s: Buffer pool wrong type\n", name);
>         +       odp_buffer_free(buf);
>         +       odp_timer_pool_t tp = odp_timer_pool_new(name,
>         buf_pool, resolution_ns,
>         +                             min_timeout, max_timeout,
>         num_timers,
>         +                             shared, clk_src);
>         +       return tp;
>          }
>
>         -odp_timer_t odp_timer_create(const char *name,
>         odp_buffer_pool_t pool,
>         -                            uint64_t resolution_ns, uint64_t
>         min_ns,
>         -                            uint64_t max_ns)
>         +void odp_timer_pool_start(void)
>          {
>         -       uint32_t id;
>         -       timer_ring_t *timer;
>         -       odp_timer_t timer_hdl;
>         -       int i;
>         -       uint64_t max_ticks;
>         -       (void) name;
>         -
>         -       if (resolution_ns < MIN_RES)
>         -               resolution_ns = MIN_RES;
>         -
>         -       if (resolution_ns > MAX_RES)
>         -               resolution_ns = MAX_RES;
>         -
>         -       max_ticks = max_ns / resolution_ns;
>         -
>         -       if (max_ticks > MAX_TICKS) {
>         -               ODP_DBG("Maximum timeout too long: %"PRIu64"
>         ticks\n",
>         -                       max_ticks);
>         -               return ODP_TIMER_INVALID;
>         -       }
>         -
>         -       if (min_ns < resolution_ns) {
>         -               ODP_DBG("Min timeout %"PRIu64" ns < resolution
>         %"PRIu64" ns\n",
>         -                       min_ns, resolution_ns);
>         -               return ODP_TIMER_INVALID;
>         -       }
>         -
>         -       odp_spinlock_lock(&odp_timer.lock);
>         -
>         -       if (odp_timer.num_timers >= NUM_TIMERS) {
>         -  odp_spinlock_unlock(&odp_timer.lock);
>         -               ODP_DBG("All timers allocated\n");
>         -               return ODP_TIMER_INVALID;
>         -       }
>         -
>         -       for (id = 0; id < NUM_TIMERS; id++) {
>         -               if (odp_timer.timer[id].allocated == 0)
>         -                       break;
>         -       }
>         -
>         -       timer = &odp_timer.timer[id];
>         -       timer->allocated = 1;
>         -       odp_timer.num_timers++;
>         -
>         -       odp_spinlock_unlock(&odp_timer.lock);
>         -
>         -       timer_hdl = id + 1;
>         -
>         -       timer->timer_hdl     = timer_hdl;
>         -       timer->pool          = pool;
>         -       timer->resolution_ns = resolution_ns;
>         -       timer->max_ticks     = MAX_TICKS;
>         -
>         -       for (i = 0; i < MAX_TICKS; i++) {
>         -  odp_spinlock_init(&timer->tick[i].lock);
>         -               timer->tick[i].list = NULL;
>         -       }
>         -
>         -       timer->active = 1;
>         -       odp_sync_stores();
>         -
>         -       timer_start(timer);
>         +       /* Nothing to do here, timer pools are started by the
>         create call */
>         +}
>
>         -       return timer_hdl;
>         +void odp_timer_pool_destroy(odp_timer_pool_t tpid)
>         +{
>         +       odp_timer_pool_del(tpid);
>          }
>
>         -odp_timer_tmo_t odp_timer_absolute_tmo(odp_timer_t timer_hdl,
>         uint64_t tmo_tick,
>         -                                      odp_queue_t queue,
>         odp_buffer_t buf)
>         +uint64_t odp_timer_tick_to_ns(odp_timer_pool_t tpid, uint64_t
>         ticks)
>          {
>         -       int id;
>         -       uint64_t tick;
>         -       uint64_t cur_tick;
>         -       timeout_t *new_tmo;
>         -       odp_buffer_t tmo_buf;
>         -       odp_timeout_hdr_t *tmo_hdr;
>         -       timer_ring_t *timer;
>         +       return ticks * tpid->resolution_ns;
>         +}
>
>         -       id = (int)timer_hdl - 1;
>         -       timer = &odp_timer.timer[id];
>         +uint64_t odp_timer_ns_to_tick(odp_timer_pool_t tpid, uint64_t ns)
>         +{
>         +       return (uint64_t)(ns / tpid->resolution_ns);
>         +}
>
>         -       cur_tick = timer->cur_tick;
>         -       if (tmo_tick <= cur_tick) {
>         -               ODP_DBG("timeout too close\n");
>         -               return ODP_TIMER_TMO_INVALID;
>         -       }
>         +uint64_t odp_timer_current_tick(odp_timer_pool_t tpid)
>         +{
>         +       return tpid->cur_tick;
>         +}
>
>         -       if ((tmo_tick - cur_tick) > MAX_TICKS) {
>         -               ODP_DBG("timeout too far: cur %"PRIu64" tmo
>         %"PRIu64"\n",
>         -                       cur_tick, tmo_tick);
>         -               return ODP_TIMER_TMO_INVALID;
>         +uintptr_t odp_timer_pool_query_conf(odp_timer_pool_t tpid,
>         +  odp_timer_pool_conf_t item)
>         +{
>         +       switch (item) {
>         +       case ODP_TIMER_NAME:
>         +               return (uintptr_t)(tpid->name);
>         +       case ODP_TIMER_RESOLUTION:
>         +               return tpid->resolution_ns;
>         +       case ODP_TIMER_MIN_TICKS:
>         +               return tpid->min_tmo_tck;
>         +       case ODP_TIMER_MAX_TICKS:
>         +               return tpid->max_tmo_tck;
>         +       case ODP_TIMER_NUM_TIMERS:
>         +               return tpid->max_timers;
>         +       case ODP_TIMER_SHARED:
>         +               return tpid->shared;
>         +       default:
>         +               return 0;
>                 }
>         +}
>
>         -       tick = tmo_tick % MAX_TICKS;
>         -
>         -       tmo_buf = odp_buffer_alloc(timer->pool);
>         -       if (tmo_buf == ODP_BUFFER_INVALID) {
>         -               ODP_DBG("tmo buffer alloc failed\n");
>         -               return ODP_TIMER_TMO_INVALID;
>         +odp_timer_t odp_timer_alloc(odp_timer_pool_t tpid,
>         +                           odp_queue_t queue,
>         +                           void *user_ptr)
>         +{
>         +       /* We check this because ODP_QUEUE_INVALID is used */
>         +       /* to indicate a free timer */
>         +       if (odp_unlikely(queue == ODP_QUEUE_INVALID))
>         +               ODP_ABORT("%s: Invalid queue handle\n",
>         tpid->name);
>         +       odp_buffer_t tmo_buf = odp_buffer_alloc(tpid->buf_pool);
>         +       if (odp_likely(tmo_buf != ODP_BUFFER_INVALID)) {
>         +               odp_timer *tim = timer_alloc(tpid, queue,
>         user_ptr, tmo_buf);
>         +               if (odp_likely(tim != ODP_TIMER_INVALID)) {
>         +                       /* Success */
>         +                       assert(tim->queue != ODP_QUEUE_INVALID);
>         +                       return tim;
>         +               }
>         +               odp_buffer_free(tmo_buf);
>                 }
>         +       /* Else failed to allocate timeout event */
>         +       /* errno set by odp_buffer_alloc() or timer_alloc () */
>         +       return ODP_TIMER_INVALID;
>         +}
>
>         -       tmo_hdr = odp_timeout_hdr((odp_timeout_t) tmo_buf);
>         -       new_tmo = &tmo_hdr->meta;
>         -
>         -       new_tmo->timer_id = id;
>         -       new_tmo->tick     = (int)tick;
>         -       new_tmo->tmo_tick = tmo_tick;
>         -       new_tmo->queue    = queue;
>         -       new_tmo->tmo_buf  = tmo_buf;
>         -
>         -       if (buf != ODP_BUFFER_INVALID)
>         -               new_tmo->buf = buf;
>         -       else
>         -               new_tmo->buf = tmo_buf;
>         -
>         -       add_tmo(&timer->tick[tick], new_tmo);
>         -
>         -       return tmo_buf;
>         +void odp_timer_free(odp_timer_t tim)
>         +{
>         +       odp_timer_pool *tp = (odp_timer_pool
>         *)get_pq(&tim->pqelem);
>         +       timer_free(tp, tim);
>          }
>
>         -uint64_t odp_timer_tick_to_ns(odp_timer_t timer_hdl, uint64_t
>         ticks)
>         +odp_timer_set_t odp_timer_set_abs_w_buf(odp_timer_t tim,
>         +                                       uint64_t abs_tck,
>         +                                       odp_buffer_t user_buf)
>          {
>         -       uint32_t id;
>         +       odp_timer_pool *tp = (odp_timer_pool
>         *)get_pq(&tim->pqelem);
>         +       odp_timer_set_t rc = timer_reset_w_buf(tp, tim,
>         abs_tck, user_buf);
>         +       return rc;
>         +}
>
>         -       id = timer_hdl - 1;
>         -       return ticks * odp_timer.timer[id].resolution_ns;
>         +odp_timer_set_t odp_timer_set_abs(odp_timer_t tim, uint64_t
>         abs_tck)
>         +{
>         +       odp_timer_pool *tp = (odp_timer_pool
>         *)get_pq(&tim->pqelem);
>         +       odp_timer_set_t rc = timer_reset(tp, tim, abs_tck);
>         +       return rc;
>          }
>
>         -uint64_t odp_timer_ns_to_tick(odp_timer_t timer_hdl, uint64_t ns)
>         +odp_timer_set_t odp_timer_set_rel_w_buf(odp_timer_t tim,
>         +                                       uint64_t rel_tck,
>         +                                       odp_buffer_t user_buf)
>          {
>         -       uint32_t id;
>         +       odp_timer_pool *tp = (odp_timer_pool
>         *)get_pq(&tim->pqelem);
>         +       odp_timer_set_t rc = timer_reset_w_buf(tp, tim,
>         tp->cur_tick + rel_tck,
>         + user_buf);
>         +       return rc;
>         +}
>
>         -       id = timer_hdl - 1;
>         -       return ns / odp_timer.timer[id].resolution_ns;
>         +odp_timer_set_t odp_timer_set_rel(odp_timer_t tim, uint64_t
>         rel_tck)
>         +{
>         +       odp_timer_pool *tp = (odp_timer_pool
>         *)get_pq(&tim->pqelem);
>         +       odp_timer_set_t rc = timer_reset(tp, tim, tp->cur_tick
>         + rel_tck);
>         +       return rc;
>          }
>
>         -uint64_t odp_timer_resolution(odp_timer_t timer_hdl)
>         +void odp_timer_cancel(odp_timer_t tim)
>          {
>         -       uint32_t id;
>         +       odp_timer_pool *tp = (odp_timer_pool
>         *)get_pq(&tim->pqelem);
>         +       timer_cancel(tp, tim);
>         +}
>
>         -       id = timer_hdl - 1;
>         -       return odp_timer.timer[id].resolution_ns;
>         +void odp_timer_return_tmo(odp_timer_tmo_t tmo)
>         +{
>         +       const odp_timeout_hdr_t *tmo_hdr = odp_tmo_to_hdr(tmo);
>         +       odp_timer *parent_tim = tmo_hdr->timer;
>         +       odp_timer_pool *tp = (odp_timer_pool
>         *)get_pq(&parent_tim->pqelem);
>         +       timer_return(tp, parent_tim, tmo, tmo_hdr);
>          }
>
>         -uint64_t odp_timer_maximum_tmo(odp_timer_t timer_hdl)
>         +odp_timer_tmo_status_t odp_timer_tmo_status(odp_timer_tmo_t tmo)
>          {
>         -       uint32_t id;
>         +       const odp_timeout_hdr_t *tmo_hdr = odp_tmo_to_hdr(tmo);
>         +       odp_timer *parent_tim = tmo_hdr->timer;
>
>         -       id = timer_hdl - 1;
>         -       return odp_timer.timer[id].max_ticks;
>         +       if (odp_unlikely(tmo_hdr->gencnt != parent_tim->gencnt)) {
>         +               /* Generation counters differ => timer has
>         been freed */
>         +               return ODP_TMO_ORPHAN;
>         +       }
>         +       /* Else generation counters match => parent timer
>         exists */
>         +
>         +       if (odp_likely(parent_tim->tag == tmo_hdr->tag))
>         +               return ODP_TMO_FRESH;
>         +       else
>         +               return ODP_TMO_STALE;
>          }
>
>         -uint64_t odp_timer_current_tick(odp_timer_t timer_hdl)
>         +odp_timer_t odp_timer_handle(odp_timer_tmo_t tmo)
>          {
>         -       uint32_t id;
>         +       odp_timeout_hdr_t *tmo_hdr = odp_tmo_to_hdr(tmo);
>         +       odp_timer_t parent_tim = tmo_hdr->timer;
>         +       if (odp_likely(tmo_hdr->gencnt == parent_tim->gencnt))
>         +               return parent_tim;
>         +       else
>         +               return ODP_TIMER_INVALID;
>         +}
>
>         -       id = timer_hdl - 1;
>         -       return odp_timer.timer[id].cur_tick;
>         +uint64_t odp_timer_expiration(odp_timer_tmo_t tmo)
>         +{
>         +       odp_timeout_hdr_t *tmo_hdr = odp_tmo_to_hdr(tmo);
>         +       return tmo_hdr->expiration;
>          }
>
>         -odp_timeout_t odp_timeout_from_buffer(odp_buffer_t buf)
>         +void *odp_timer_userptr(odp_timer_tmo_t tmo)
>          {
>         -       return (odp_timeout_t) buf;
>         +       odp_timeout_hdr_t *tmo_hdr = odp_tmo_to_hdr(tmo);
>         +       return tmo_hdr->user_ptr;
>          }
>
>         -uint64_t odp_timeout_tick(odp_timeout_t tmo)
>         +int odp_timer_init_global(void)
>          {
>         -       odp_timeout_hdr_t *tmo_hdr = odp_timeout_hdr(tmo);
>         -       return tmo_hdr->meta.tmo_tick;
>         +       return 0;
>          }
>         diff --git a/test/api_test/odp_timer_ping.c
>         b/test/api_test/odp_timer_ping.c
>         index 7406a45..2617b5c 100644
>         --- a/test/api_test/odp_timer_ping.c
>         +++ b/test/api_test/odp_timer_ping.c
>         @@ -20,6 +20,8 @@
>           *    Otherwise timeout may happen bcz of slow nw speed
>           */
>
>         +#include <assert.h>
>         +#include <stdlib.h>
>          #include <unistd.h>
>          #include <fcntl.h>
>          #include <errno.h>
>         @@ -41,14 +43,15 @@
>          #define MSG_POOL_SIZE         (4*1024*1024)
>          #define BUF_SIZE               8
>          #define PING_CNT       10
>         -#define PING_THRD      2       /* Send and Rx Ping thread */
>         +#define PING_THRD      2       /* send_ping and rx_ping
>         threads */
>
>          /* Nanoseconds */
>          #define RESUS  10000
>          #define MINUS  10000
>          #define MAXUS  10000000
>
>         -static odp_timer_t test_timer_ping;
>         +static odp_timer_pool_t tp;
>         +static odp_timer_t test_timer_ping = ODP_TIMER_INVALID;
>          static odp_timer_tmo_t test_ping_tmo;
>
>          #define PKTSIZE      64
>         @@ -128,15 +131,7 @@ static int listen_to_pingack(void)
>                                                  (socklen_t *)&len);
>                                 if (bytes > 0) {
>                                         /* pkt rxvd therefore cancel
>         the timeout */
>         -                               if
>         (odp_timer_cancel_tmo(test_timer_ping,
>         -     test_ping_tmo) != 0) {
>         -  ODP_ERR("cancel_tmo failed ..exiting listner thread\n");
>         -                                       /* avoid exiting from
>         here even if tmo
>         -                                        * failed for current
>         ping,
>         -                                        * allow subsequent
>         ping_rx request */
>         -                                       err = -1;
>         -
>         -                               }
>         +  odp_timer_cancel(test_timer_ping);
>                                         /* cruel bad hack used for
>         sender, listner ipc..
>                                          * euwww.. FIXME ..
>                                          */
>         @@ -160,7 +155,6 @@ static int send_ping_request(struct
>         sockaddr_in *addr)
>
>                 uint64_t tick;
>                 odp_queue_t queue;
>         -       odp_buffer_t buf;
>
>                 int err = 0;
>
>         @@ -184,8 +178,16 @@ static int send_ping_request(struct
>         sockaddr_in *addr)
>
>                 /* get the ping queue */
>                 queue = odp_queue_lookup("ping_timer_queue");
>         +       test_timer_ping = odp_timer_alloc(tp, queue, NULL);
>         +       if (test_timer_ping == ODP_TIMER_INVALID) {
>         +               ODP_ERR("Failed to allocate timer.\n");
>         +               err = -1;
>         +               goto err;
>         +       }
>
>                 for (i = 0; i < PING_CNT; i++) {
>         +               odp_buffer_t buf;
>         +               odp_timer_tmo_t tmo;
>                         /* prepare icmp pkt */
>                         bzero(&pckt, sizeof(pckt));
>                         pckt.hdr.type = ICMP_ECHO;
>         @@ -209,12 +211,10 @@ static int send_ping_request(struct
>         sockaddr_in *addr)
>                         printf(" icmp_sent msg_cnt %d\n", i);
>
>                         /* arm the timer */
>         -               tick = odp_timer_current_tick(test_timer_ping);
>         +               tick = odp_timer_current_tick(tp);
>
>                         tick += 1000;
>         -               test_ping_tmo =
>         odp_timer_absolute_tmo(test_timer_ping, tick,
>         -   queue,
>         -   ODP_BUFFER_INVALID);
>         +               odp_timer_set_abs(test_timer_ping, tick);
>                         /* wait for timeout event */
>                         while ((buf = odp_queue_deq(queue)) ==
>         ODP_BUFFER_INVALID) {
>                                 /* flag true means ack rxvd.. a cruel
>         hack as I
>         @@ -229,17 +229,28 @@ static int send_ping_request(struct
>         sockaddr_in *addr)
>                                         break;
>                                 }
>                         }
>         +               assert(odp_buffer_type(buf) ==
>         ODP_BUFFER_TYPE_TIMEOUT);
>         +               tmo = odp_timeout_from_buffer(buf);
>
>         -               /* free tmo_buf for timeout case */
>         -               if (buf != ODP_BUFFER_INVALID) {
>         -                       ODP_DBG(" timeout msg_cnt [%i] \n", i);
>         +               switch (odp_timer_tmo_status(tmo)) {
>         +               case ODP_TMO_FRESH:
>         +                       ODP_DBG(" timeout msg_cnt [%i]\n", i);
>                                 /* so to avoid seg fault commented */
>         -                       odp_buffer_free(buf);
>                                 err = -1;
>         +                       break;
>         +               case ODP_TMO_STALE:
>         +                       /* Ignore stale timeouts */
>         +                       break;
>         +               case ODP_TMO_ORPHAN:
>         +                       ODP_ERR("Received orphaned timeout!\n");
>         +                       abort();
>                         }
>         +               odp_timer_return_tmo(tmo);
>                 }
>
>          err:
>         +       if (test_timer_ping != ODP_TIMER_INVALID)
>         +               odp_timer_free(test_timer_ping);
>                 return err;
>          }
>
>         @@ -340,9 +351,9 @@ int main(int argc ODP_UNUSED, char *argv[]
>         ODP_UNUSED)
>                 pool = odp_buffer_pool_create("msg_pool", pool_base,
>         MSG_POOL_SIZE,
>                                               BUF_SIZE,
>         ODP_CACHE_LINE_SIZE,
>         -  ODP_BUFFER_TYPE_RAW);
>         +  ODP_BUFFER_TYPE_TIMEOUT);
>                 if (pool == ODP_BUFFER_POOL_INVALID) {
>         -               ODP_ERR("Pool create failed.\n");
>         +               ODP_ERR("Buffer pool create failed.\n");
>                         return -1;
>                 }
>
>         @@ -357,15 +368,19 @@ int main(int argc ODP_UNUSED, char
>         *argv[] ODP_UNUSED)
>                         return -1;
>                 }
>
>         -       test_timer_ping = odp_timer_create("ping_timer", pool,
>         - RESUS*ODP_TIME_USEC,
>         - MINUS*ODP_TIME_USEC,
>         - MAXUS*ODP_TIME_USEC);
>         -
>         -       if (test_timer_ping == ODP_TIMER_INVALID) {
>         -               ODP_ERR("Timer create failed.\n");
>         +       /*
>         +        * Create timer pool
>         +        */
>         +       tp = odp_timer_pool_create("timer_pool", pool,
>         + RESUS*ODP_TIME_USEC,
>         + MINUS*ODP_TIME_USEC,
>         + MAXUS*ODP_TIME_USEC,
>         +                                  1, false, ODP_CLOCK_CPU);
>         +       if (tp == ODP_TIMER_POOL_INVALID) {
>         +               ODP_ERR("Timer pool create failed.\n");
>                         return -1;
>                 }
>         +       odp_timer_pool_start();
>
>                 odp_shm_print_all();
>
>         --
>         1.9.1
>
>
>         _______________________________________________
>         lng-odp mailing list
>         lng-odp@lists.linaro.org <mailto:lng-odp@lists.linaro.org>
>         http://lists.linaro.org/mailman/listinfo/lng-odp
>
>
>
>
>     -- 
>     *Mike Holmes*
>     Linaro  Sr Technical Manager
>     LNG - ODP
>
>
>
>
> _______________________________________________
> lng-odp mailing list
> lng-odp@lists.linaro.org
> http://lists.linaro.org/mailman/listinfo/lng-odp
Gilad Ben-Yossef Oct. 6, 2014, 6:22 a.m. UTC | #4
Another one of my stupid questions, I'm afraid.  :-)
If we have a timer implemented as an event pushed to queue which can be scheduled as any other queue (which is good thing I think), why does our schedule APIs need a timeout?
I mean, if you want a timeout, just add a scheduled timer queue and send yourself timeout events. That's how I would implement the schedule timeouts internally anyway (running a native timer on a core that does packet processing stops it from enjoying Linux full NOHZ cpu isolation so we really don't want timers there...)
Anything I've missed?
Thanks,
Gilad
Gilad Ben-Yossef
Software Architect
EZchip Technologies Ltd.
37 Israel Pollak Ave, Kiryat Gat 82025 ,Israel
Tel: +972-4-959-6666 ext. 576, Fax: +972-8-681-1483 
Mobile: +972-52-826-0388, US Mobile: +1-973-826-0388
Email: giladb@ezchip.com, Web: http://www.ezchip.com

"Ethernet always wins."
        — Andy Bechtolsheim


> -----Original Message-----

> From: lng-odp-bounces@lists.linaro.org [mailto:lng-odp-

> bounces@lists.linaro.org] On Behalf Of Ola Liljedahl

> Sent: Thursday, October 02, 2014 6:23 PM

> To: lng-odp@lists.linaro.org

> Subject: [lng-odp] [PATCHv4] Timer API and and priority queue-based

> implementation

> 

> Signed-off-by: Ola Liljedahl <ola.liljedahl@linaro.org>

> ---

> Fixed review comments for v3 from Anders R.

> * Example code snippets use @code/@endcode.

> * Added some missing doxygen comments.

> * Updated some comments.

> * Reverted year in copyright notices.

> * Added odp_likely() hint.

> * Made some variables self-descriptive and removed redundant comments.

> Changed to use ticket locks instead of spin locks (ticket locks are more

> fair).

> Changed to use ODP_ABORT() which has become available since the last

> patch.

> 

>  example/timer/odp_timer_test.c                     | 125 +--

>  platform/linux-generic/Makefile.am                 |   1 +

>  platform/linux-generic/include/api/odp_timer.h     | 570 +++++++++++--

>  .../include/odp_priority_queue_internal.h          | 108 +++

>  .../linux-generic/include/odp_timer_internal.h     |  71 +-

>  platform/linux-generic/odp_priority_queue.c        | 283 +++++++

>  platform/linux-generic/odp_timer.c                 | 923 ++++++++++++++-

> ------

>  test/api_test/odp_timer_ping.c                     |  73 +-

>  8 files changed, 1648 insertions(+), 506 deletions(-)

>  create mode 100644 platform/linux-

> generic/include/odp_priority_queue_internal.h

>  create mode 100644 platform/linux-generic/odp_priority_queue.c

> 

> diff --git a/example/timer/odp_timer_test.c

> b/example/timer/odp_timer_test.c

> index 6e1715d..750d785 100644

> --- a/example/timer/odp_timer_test.c

> +++ b/example/timer/odp_timer_test.c

> @@ -41,67 +41,89 @@ typedef struct {

>  /** @private Barrier for test synchronisation */

>  static odp_barrier_t test_barrier;

> 

> -/** @private Timer handle*/

> -static odp_timer_t test_timer;

> +/** @private Timer pool handle */

> +static odp_timer_pool_t tp;

> 

> 

> +/** @private Timeout status ASCII strings */

> +static const char *const status2str[] = {

> +	"fresh", "stale", "orphaned"

> +};

> +

>  /** @private test timeout */

>  static void test_abs_timeouts(int thr, test_args_t *args)

>  {

> -	uint64_t tick;

>  	uint64_t period;

>  	uint64_t period_ns;

>  	odp_queue_t queue;

> -	odp_buffer_t buf;

> -	int num;

> +	int remain = args->tmo_count;

> +	odp_timer_t hdl;

> +	uint64_t tick;

> 

>  	ODP_DBG("  [%i] test_timeouts\n", thr);

> 

>  	queue = odp_queue_lookup("timer_queue");

> 

>  	period_ns = args->period_us*ODP_TIME_USEC;

> -	period    = odp_timer_ns_to_tick(test_timer, period_ns);

> +	period    = odp_timer_ns_to_tick(tp, period_ns);

> 

>  	ODP_DBG("  [%i] period %"PRIu64" ticks,  %"PRIu64" ns\n", thr,

>  		period, period_ns);

> 

> -	tick = odp_timer_current_tick(test_timer);

> -

> -	ODP_DBG("  [%i] current tick %"PRIu64"\n", thr, tick);

> -

> -	tick += period;

> +	ODP_DBG("  [%i] current tick %"PRIu64"\n", thr,

> +		odp_timer_current_tick(tp));

> 

> -	if (odp_timer_absolute_tmo(test_timer, tick, queue,

> ODP_BUFFER_INVALID)

> -	    == ODP_TIMER_TMO_INVALID){

> -		ODP_DBG("Timeout request failed\n");

> +	odp_timer_t test_timer;

> +	test_timer = odp_timer_alloc(tp, queue, NULL);

> +	if (test_timer == ODP_TIMER_INVALID) {

> +		ODP_ERR("Failed to allocate timer\n");

>  		return;

>  	}

> +	tick = odp_timer_current_tick(tp);

> +	hdl = test_timer;

> 

> -	num = args->tmo_count;

> -

> -	while (1) {

> -		odp_timeout_t tmo;

> -

> -		buf = odp_schedule_one(&queue, ODP_SCHED_WAIT);

> -

> -		tmo  = odp_timeout_from_buffer(buf);

> -		tick = odp_timeout_tick(tmo);

> -

> -		ODP_DBG("  [%i] timeout, tick %"PRIu64"\n", thr, tick);

> -

> -		odp_buffer_free(buf);

> -

> -		num--;

> -

> -		if (num == 0)

> -			break;

> +	while (remain != 0) {

> +		odp_buffer_t buf;

> +		odp_timer_tmo_t tmo;

> +		odp_timer_tmo_status_t stat;

> +		odp_timer_set_t rc;

> 

>  		tick += period;

> +		rc = odp_timer_set_abs(hdl, tick);

> +		if (odp_unlikely(rc != ODP_TIMER_SET_SUCCESS)) {

> +			ODP_ERR("odp_timer_set_abs() failed (%u)\n", rc);

> +			abort();

> +		}

> 

> -		odp_timer_absolute_tmo(test_timer, tick,

> -				       queue, ODP_BUFFER_INVALID);

> +		/* Get the next ready buffer/timeout */

> +		buf = odp_schedule_one(&queue, ODP_SCHED_WAIT);

> +		if (odp_unlikely(odp_buffer_type(buf) !=

> +				 ODP_BUFFER_TYPE_TIMEOUT)) {

> +			ODP_ERR("Unexpected buffer type received\n");

> +			abort();

> +		}

> +		tmo = odp_timeout_from_buffer(buf);

> +		stat = odp_timer_tmo_status(tmo);

> +		tick = odp_timer_expiration(tmo);

> +		hdl = odp_timer_handle(tmo);

> +		ODP_DBG("  [%i] timeout, tick %"PRIu64", status %s\n",

> +			thr, tick, status2str[stat]);

> +		/* if (stat == ODP_TMO_FRESH)  - do your thing! */

> +		if (odp_likely(stat == ODP_TMO_ORPHAN)) {

> +			/* Some other thread freed the corresponding

> +			   timer after the timeout was already

> +			   enqueued */

> +			/* Timeout handle is invalid, use our own timer */

> +			hdl = test_timer;

> +		}

> +		/* Return timeout to timer manager, regardless of status */

> +		odp_timer_return_tmo(tmo);

> +		remain--;

>  	}

> 

> +	odp_timer_cancel(test_timer);

> +	odp_timer_free(test_timer);

> +

>  	if (odp_queue_sched_type(queue) == ODP_SCHED_SYNC_ATOMIC)

>  		odp_schedule_release_atomic();

>  }

> @@ -155,7 +177,6 @@ static void print_usage(void)

>  	printf("Options:\n");

>  	printf("  -c, --count <number>    core count, core IDs start from

> 1\n");

>  	printf("  -r, --resolution <us>   timeout resolution in usec\n");

> -	printf("  -m, --min <us>          minimum timeout in usec\n");

>  	printf("  -x, --max <us>          maximum timeout in usec\n");

>  	printf("  -p, --period <us>       timeout period in usec\n");

>  	printf("  -t, --timeouts <count>  timeout repeat count\n");

> @@ -190,14 +211,14 @@ static void parse_args(int argc, char *argv[],

> test_args_t *args)

>  	/* defaults */

>  	args->core_count    = 0; /* all cores */

>  	args->resolution_us = 10000;

> -	args->min_us        = args->resolution_us;

> +	args->min_us        = 0;

>  	args->max_us        = 10000000;

>  	args->period_us     = 1000000;

>  	args->tmo_count     = 30;

> 

>  	while (1) {

>  		opt = getopt_long(argc, argv, "+c:r:m:x:p:t:h",

> -				 longopts, &long_index);

> +				  longopts, &long_index);

> 

>  		if (opt == -1)

>  			break;	/* No more options */

> @@ -321,10 +342,25 @@ int main(int argc, char *argv[])

>  				      ODP_BUFFER_TYPE_TIMEOUT);

> 

>  	if (pool == ODP_BUFFER_POOL_INVALID) {

> -		ODP_ERR("Pool create failed.\n");

> +		ODP_ERR("Buffer pool create failed.\n");

>  		return -1;

>  	}

> 

> +	tp = odp_timer_pool_create("timer_pool", pool,

> +				   args.resolution_us*ODP_TIME_USEC,

> +				   args.min_us*ODP_TIME_USEC,

> +				   args.max_us*ODP_TIME_USEC,

> +				   num_workers, /* One timer per worker */

> +				   true,

> +				   ODP_CLOCK_CPU);

> +	if (tp == ODP_TIMER_POOL_INVALID) {

> +		ODP_ERR("Timer pool create failed.\n");

> +		return -1;

> +	}

> +	odp_timer_pool_start();

> +

> +	odp_shm_print_all();

> +

>  	/*

>  	 * Create a queue for timer test

>  	 */

> @@ -340,19 +376,6 @@ int main(int argc, char *argv[])

>  		return -1;

>  	}

> 

> -	test_timer = odp_timer_create("test_timer", pool,

> -				      args.resolution_us*ODP_TIME_USEC,

> -				      args.min_us*ODP_TIME_USEC,

> -				      args.max_us*ODP_TIME_USEC);

> -

> -	if (test_timer == ODP_TIMER_INVALID) {

> -		ODP_ERR("Timer create failed.\n");

> -		return -1;

> -	}

> -

> -

> -	odp_shm_print_all();

> -

>  	printf("CPU freq %"PRIu64" hz\n", odp_sys_cpu_hz());

>  	printf("Cycles vs nanoseconds:\n");

>  	ns = 0;

> diff --git a/platform/linux-generic/Makefile.am b/platform/linux-

> generic/Makefile.am

> index d076d50..71f923c 100644

> --- a/platform/linux-generic/Makefile.am

> +++ b/platform/linux-generic/Makefile.am

> @@ -59,6 +59,7 @@ __LIB__libodp_la_SOURCES = \

>  			   odp_packet_flags.c \

>  			   odp_packet_io.c \

>  			   odp_packet_socket.c \

> +			   odp_priority_queue.c \

>  			   odp_queue.c \

>  			   odp_ring.c \

>  			   odp_rwlock.c \

> diff --git a/platform/linux-generic/include/api/odp_timer.h

> b/platform/linux-generic/include/api/odp_timer.h

> index 01db839..82a1e05 100644

> --- a/platform/linux-generic/include/api/odp_timer.h

> +++ b/platform/linux-generic/include/api/odp_timer.h

> @@ -8,9 +8,193 @@

>  /**

>   * @file

>   *

> - * ODP timer

> + * ODP timer service

>   */

> 

> +/** Example #1 Retransmission timer (e.g. for reliable connections)

> + @code

> +

> +//Create timer pool for reliable connections

> +#define SEC 1000000000ULL //1s expressed in nanoseconds

> +odp_timer_pool_t tcp_tpid =

> +    odp_timer_pool_create("TCP",

> +			  buffer_pool,

> +			  1000000,//resolution 1ms

> +			  0,//min tmo

> +			  7200 * SEC,//max tmo length 2hours

> +			  40000,//num_timers

> +			  true,//shared

> +			  ODP_CLOCK_CPU

> +			 );

> +if (tcp_tpid == ODP_TIMER_POOL_INVALID)

> +{

> +	//Failed to create timer pool => fatal error

> +}

> +

> +

> +//Setting up a new connection

> +//Allocate retransmission timeout (identical for supervision timeout)

> +//The user pointer points back to the connection context

> +conn->ret_tim = odp_timer_alloc(tcp_tpid, queue, conn);

> +//Check if all resources were successfully allocated

> +if (conn->ret_tim == ODP_TIMER_INVALID)

> +{

> +	//Failed to allocate all resources for connection => tear down

> +	//Destroy timeout

> +	odp_timer_free(conn->ret_tim);

> +	//Tear down connection

> +	...

> +	return false;

> +}

> +//All necessary resources successfully allocated

> +//Compute initial retransmission length in timer ticks

> +conn->ret_len = odp_timer_ns_to_tick(tcp_tpid, 3 * SEC);//Per RFC1122

> +//Arm the timer

> +odp_timer_set_rel(conn->ret_tim, conn->ret_len);

> +return true;

> +

> +

> +//A packet for the connection has just been transmitted

> +//Reset the retransmission timer

> +odp_timer_set_rel(conn->ret_tim, conn->ret_len);

> +

> +

> +//A retransmission timeout buffer for the connection has been received

> +odp_timer_tmo_t tmo = odp_timeout_from_buffer(buf);

> +odp_timer_tmo_status_t stat = odp_timer_tmo_status(tmo);

> +//Check if timeout is fresh or stale, for stale timeouts we need to

> reset the

> +//timer

> +if (stat == ODP_TMO_FRESH) {

> +	//Fresh timeout, last transmitted packet not acked in time =>

> +	  retransmit

> +	//Get connection from timeout event

> +	conn = odp_timer_get_userptr(tmo);

> +	//Retransmit last packet (e.g. TCP segment)

> +	...

> +	//Re-arm timer using original delta value

> +	odp_timer_set_rel(conn->ret_tim, conn->ret_len);

> +} else if (stat == ODP_TMO_ORPHAN) {

> +	odp_free_buffer(buf);

> +	return;//Get out of here

> +} // else stat == ODP_TMO_STALE, do nothing

> +//Finished processing, return timeout

> +odp_timer_return_tmo(tmo);

> +

> + @endcode

> +*/

> +

> +/** Example #2 Periodic tick

> + @code

> +

> +//Create timer pool for periodic ticks

> +odp_timer_pool_t per_tpid =

> +    odp_timer_pool_create("periodic-tick",

> +			  buffer_pool,

> +			  1,//resolution 1ns

> +			  1,//minimum timeout length 1ns

> +			  1000000000,//maximum timeout length 1s

> +			  10,//num_timers

> +			  false,//not shared

> +			  ODP_CLOCK_CPU

> +			 );

> +if (per_tpid == ODP_TIMER_POOL_INVALID)

> +{

> +    //Failed to create timer pool => fatal error

> +}

> +

> +

> +//Allocate periodic timer

> +tim_1733 = odp_timer_alloc(per_tpid, queue, NULL);

> +//Check if all resources were successfully allocated

> +if (tim_1733 == ODP_TIMER_INVALID)

> +{

> +	//Failed to allocate all resources => tear down

> +	//Destroy timeout

> +	odp_timer_free(tim_1733);

> +	//Tear down other state

> +	...

> +	return false;

> +}

> +//All necessary resources successfully allocated

> +//Compute tick period in timer ticks

> +period_1733 = odp_timer_ns_to_tick(per_tpid, 1000000000U /

> 1733U);//1733Hz

> +//Compute when next tick should expire

> +next_1733 = odp_timer_current_tick(per_tpid) + period_1733;

> +//Arm the periodic timer

> +odp_timer_set_abs(tim_1733, next_1733);

> +return true;

> +

> +

> +

> +//A periodic timer timeout has been received

> +odp_timer_tmo_t tmo = odp_timeout_from_buffer(buf);

> +//Get status of timeout

> +odp_timer_tmo_status_t stat = odp_timer_tmo_status(tmo);

> +//We expect the timeout is always fresh since we are not calling set or

> cancel

> +on active or expired timers in this example

> +assert(stat == ODP_TMO_FRESH);

> +//Do processing driven by timeout *before*

> +...

> +do {

> +	//Compute when the timer should expire next

> +	next_1733 += period_1733;

> +	//Check that this is in the future

> +	if (likely(next_1733 > odp_timer_current_tick(per_tpid))

> +	break;//Yes, done

> +	//Else we missed a timeout

> +	//Optionally attempt some recovery and/or logging of the problem

> +	...

> +} while (0);

> +//Re-arm periodic timer

> +odp_timer_set_abs(tim_1733, next_1733);

> +//Or do processing driven by timeout *after*

> +...

> +odp_timer_return_tmo(tmo);

> +return;

> +

> + @endcode

> +*/

> +

> +/** Example #3 Tear down of flow

> + @code

> +//ctx points to flow context data structure owned by application

> +//Free the timer, cancelling any timeout

> +odp_timer_free(ctx->timer);//Any enqueued timeout will be made invalid

> +//Continue tearing down and eventually freeing context

> +...

> +return;

> +

> +//A timeout has been received, check status

> +odp_timer_tmo_t tmo = odp_timeout_from_buffer(buf);

> +switch (odp_timer_tmo_status(tmo))

> +{

> +    case ODP_TMO_FRESH :

> +	//A flow has timed out, tear it down

> +	//Find flow context from timeout

> +	ctx = (context *)odp_timer_get_userptr(tmo);

> +	//Free the supervision timer, any enqueued timeout will remain

> +	odp_timer_free(ctx->tim);

> +	//Free other flow related resources

> +	...

> +	//Free the timeout buffer

> +	odp_buffer_free(buf);

> +	//Flow torn down

> +	break;

> +    case ODP_TMO_STALE :

> +	//A stale timeout was received, return timeout and update timer

> +	odp_timer_return_tmo(tmo);

> +	break;

> +    case ODP_TMO_ORPHAN :

> +	//Orphaned timeout (from previously torn down flow)

> +	//No corresponding timer or flow context

> +	//Free the timeout buffer

> +	odp_buffer_free(buf);

> +	break;

> +}

> +

> + @endcode

> +*/

> +

>  #ifndef ODP_TIMER_H_

>  #define ODP_TIMER_H_

> 

> @@ -18,144 +202,408 @@

>  extern "C" {

>  #endif

> 

> +#include <stdlib.h>

>  #include <odp_std_types.h>

>  #include <odp_buffer.h>

>  #include <odp_buffer_pool.h>

>  #include <odp_queue.h>

> 

> +struct odp_timer_pool_s; /**< Forward declaration */

> +

> +/**

> +* ODP timer pool handle (platform dependent)

> +*/

> +typedef struct odp_timer_pool_s *odp_timer_pool_t;

> +

> +/**

> + * Invalid timer pool handle (platform dependent).

> + */

> +#define ODP_TIMER_POOL_INVALID NULL

> 

>  /**

> - * ODP timer handle

> + * Clock sources for timers in timer pool.

>   */

> -typedef uint32_t odp_timer_t;

> +typedef enum odp_timer_clk_src_e {

> +	/** Use CPU clock as clock source for timers */

> +	ODP_CLOCK_CPU,

> +	/** Use external clock as clock source for timers */

> +	ODP_CLOCK_EXT

> +	/* Platform dependent which other clock sources exist */

> +} odp_timer_clk_src_t;

> 

> -/** Invalid timer */

> -#define ODP_TIMER_INVALID 0

> +struct odp_timer_s; /**< Forward declaration */

> 

> +/**

> +* ODP timer handle (platform dependent).

> +*/

> +typedef struct odp_timer_s *odp_timer_t;

> 

>  /**

> - * ODP timeout handle

> + * Invalid timer handle (platform dependent).

>   */

> -typedef odp_buffer_t odp_timer_tmo_t;

> -

> -/** Invalid timeout */

> -#define ODP_TIMER_TMO_INVALID 0

> +#define ODP_TIMER_INVALID NULL

> 

> +/**

> + * Return values of timer set calls.

> + */

> +typedef enum odp_timer_set_e {

> +	/** Timer set operation successful */

> +	ODP_TIMER_SET_SUCCESS,

> +	/** Timer set operation failed, expiration too early */

> +	ODP_TIMER_SET_TOOEARLY,

> +	/** Timer set operation failed, expiration too late */

> +	ODP_TIMER_SET_TOOLATE

> +} odp_timer_set_t;

> 

>  /**

> - * Timeout notification

> + * Timeout event handle.

>   */

> -typedef odp_buffer_t odp_timeout_t;

> +typedef odp_buffer_t odp_timer_tmo_t;

> 

> +/**

> + * Status of a timeout event.

> + */

> +typedef enum odp_timer_tmo_status_e {

> +	/** Timeout is fresh, process it and return timeout */

> +	ODP_TMO_FRESH,

> +	/** Timer reset or cancelled, just return timeout  */

> +	ODP_TMO_STALE,

> +	/** Timer deleted, return or free timeout */

> +	ODP_TMO_ORPHAN

> +} odp_timer_tmo_status_t;

> 

>  /**

> - * Create a timer

> + * Create a timer pool

>   *

> - * Creates a new timer with requested properties.

> + * Create a new timer pool.

>   *

>   * @param name       Name

> - * @param pool       Buffer pool for allocating timeout notifications

> + * @param buf_pool   Buffer pool for allocating timeouts (and only

> timeouts)

>   * @param resolution Timeout resolution in nanoseconds

> - * @param min_tmo    Minimum timeout duration in nanoseconds

> - * @param max_tmo    Maximum timeout duration in nanoseconds

> + * @param min_tmo    Minimum relative timeout in nanoseconds

> + * @param max_tmo    Maximum relative timeout in nanoseconds

> + * @param num_timers Number of supported timers (minimum)

> + * @param shared     Shared or private timer pool.

> + *		   Operations on shared timers will include the necessary

> + *		   mutual exclusion, operations on private timers may not

> + *		   (mutual exclusion is the responsibility of the caller).

> + * @param clk_src    Clock source to use

>   *

> - * @return Timer handle if successful, otherwise ODP_TIMER_INVALID

> + * @return Timer pool handle if successful, otherwise

> ODP_TIMER_POOL_INVALID

> + * and errno set

>   */

> -odp_timer_t odp_timer_create(const char *name, odp_buffer_pool_t pool,

> -			     uint64_t resolution, uint64_t min_tmo,

> -			     uint64_t max_tmo);

> +odp_timer_pool_t

> +odp_timer_pool_create(const char *name,

> +		      odp_buffer_pool_t buf_pool,

> +		      uint64_t resolution,

> +		      uint64_t min_tmo,

> +		      uint64_t max_tmo,

> +		      uint32_t num_timers,

> +		      bool shared,

> +		      odp_timer_clk_src_t clk_src);

> +

> +/**

> + * Start a timer pool

> + *

> + * Start all created timer pools, enabling the allocation of timers.

> + * The purpose of this call is to coordinate the creation of multiple

> timer

> + * pools that may use the same underlying HW resources.

> + * This function may be called multiple times.

> + */

> +void odp_timer_pool_start(void);

> +

> +/**

> + * Destroy a timer pool

> + *

> + * Destroy a timer pool, freeing all resources.

> + * All timers must have been freed.

> + *

> + * @param tpid  Timer pool identifier

> + */

> +void odp_timer_pool_destroy(odp_timer_pool_t tpid);

> 

>  /**

>   * Convert timer ticks to nanoseconds

>   *

> - * @param timer Timer

> + * @param tpid  Timer pool identifier

>   * @param ticks Timer ticks

>   *

>   * @return Nanoseconds

>   */

> -uint64_t odp_timer_tick_to_ns(odp_timer_t timer, uint64_t ticks);

> +uint64_t odp_timer_tick_to_ns(odp_timer_pool_t tpid, uint64_t ticks);

> 

>  /**

>   * Convert nanoseconds to timer ticks

>   *

> - * @param timer Timer

> + * @param tpid  Timer pool identifier

>   * @param ns    Nanoseconds

>   *

>   * @return Timer ticks

>   */

> -uint64_t odp_timer_ns_to_tick(odp_timer_t timer, uint64_t ns);

> +uint64_t odp_timer_ns_to_tick(odp_timer_pool_t tpid, uint64_t ns);

> 

>  /**

> - * Timer resolution in nanoseconds

> + * Current tick value

>   *

> - * @param timer Timer

> + * @param tpid Timer pool identifier

>   *

> - * @return Resolution in nanoseconds

> + * @return Current time in timer ticks

> + */

> +uint64_t odp_timer_current_tick(odp_timer_pool_t tpid);

> +

> +/**

> + * ODP timer configurations

>   */

> -uint64_t odp_timer_resolution(odp_timer_t timer);

> +

> +typedef enum odp_timer_pool_conf_e {

> +	ODP_TIMER_NAME,      /**< Return name of timer pool */

> +	ODP_TIMER_RESOLUTION,/**< Return the timer resolution (in ns) */

> +	ODP_TIMER_MIN_TICKS, /**< Return the min supported rel timeout

> (ticks)*/

> +	ODP_TIMER_MAX_TICKS, /**< Return the max supported rel timeout

> (ticks)*/

> +	ODP_TIMER_NUM_TIMERS,/**< Return number of supported timers */

> +	ODP_TIMER_SHARED     /**< Return shared flag */

> +} odp_timer_pool_conf_t;

> 

>  /**

> - * Maximum timeout in timer ticks

> + * Query different timer pool configurations, e.g.

> + *  Timer resolution in nanoseconds

> + *  Maximum timeout in timer ticks

> + *  Number of supported timers

> + *  Shared or private timer pool

>   *

> - * @param timer Timer

> + * @param tpid Timer pool identifier

> + * @param item Configuration item being queried

>   *

> - * @return Maximum timeout in timer ticks

> + * @return the requested piece of information or 0 for unknown item.

>   */

> -uint64_t odp_timer_maximum_tmo(odp_timer_t timer);

> +uintptr_t odp_timer_pool_query_conf(odp_timer_pool_t tpid,

> +				    odp_timer_pool_conf_t item);

> 

>  /**

> - * Current timer tick

> + * Allocate a timer

>   *

> - * @param timer Timer

> + * Create a timer (allocating all necessary resources e.g. timeout

> event) from

> + * the timer pool.

>   *

> - * @return Current time in timer ticks

> + * @param tpid     Timer pool identifier

> + * @param queue    Destination queue for timeout notifications

> + * @param user_ptr User defined pointer or NULL (copied to timeouts)

> + *

> + * @return Timer handle if successful, otherwise ODP_TIMER_INVALID and

> + *	   errno set.

>   */

> -uint64_t odp_timer_current_tick(odp_timer_t timer);

> +odp_timer_t odp_timer_alloc(odp_timer_pool_t tpid,

> +			    odp_queue_t queue,

> +			    void *user_ptr);

> 

>  /**

> - * Request timeout with an absolute timer tick

> + * Free a timer

> + *

> + * Free (destroy) a timer, freeing all associated resources (e.g.

> default

> + * timeout event). An expired and enqueued timeout event will not be

> freed.

> + * It is the responsibility of the application to free this timeout when

> it

> + * is received.

>   *

> - * When tick reaches tmo_tick, the timer enqueues the timeout

> notification into

> - * the destination queue.

> + * @param tim      Timer handle

> + */

> +void odp_timer_free(odp_timer_t tim);

> +

> +/**

> + * Set a timer (absolute time) with a user-defined timeout buffer

>   *

> - * @param timer    Timer

> - * @param tmo_tick Absolute timer tick value which triggers the timeout

> - * @param queue    Destination queue for the timeout notification

> - * @param buf      User defined timeout notification buffer. When

> - *                 ODP_BUFFER_INVALID, default timeout notification is

> used.

> + * Set (arm) the timer to expire at specific time. The user-defined

> + * buffer will be enqueued when the timer expires.

> + * Arming may fail (if the timer is in state EXPIRED), an earlier

> timeout

> + * will then be received. odp_timer_tmo_status() must be used to check

> if

> + * the received timeout is valid.

>   *

> - * @return Timeout handle if successful, otherwise ODP_TIMER_TMO_INVALID

> + * Note: any invalid parameters will be treated as programming errors

> and will

> + * cause the application to abort.

> + *

> + * @param tim      Timer

> + * @param abs_tck  Expiration time in absolute timer ticks

> + * @param user_buf The buffer to use as timeout event

> + *

> + * @return Success or failure code

>   */

> -odp_timer_tmo_t odp_timer_absolute_tmo(odp_timer_t timer, uint64_t

> tmo_tick,

> -				       odp_queue_t queue, odp_buffer_t buf);

> +odp_timer_set_t odp_timer_set_abs_w_buf(odp_timer_t tim,

> +					uint64_t abs_tck,

> +					odp_buffer_t user_buf);

> 

>  /**

> - * Cancel a timeout

> + * Set a timer with an absolute expiration time

> + *

> + * Set (arm) the timer to expire at a specific time.

> + * Arming may fail (if the timer is in state EXPIRED), an earlier

> timeout

> + * will then be received. odp_timer_tmo_status() must be used to check

> if

> + * the received timeout is valid.

> + *

> + * Note: any invalid parameters will be treated as programming errors

> and will

> + * cause the application to abort.

>   *

> - * @param timer Timer

> - * @param tmo   Timeout to cancel

> + * @param tim     Timer

> + * @param abs_tck Expiration time in absolute timer ticks

>   *

> - * @return 0 if successful

> + * @return Success or failure code

>   */

> -int odp_timer_cancel_tmo(odp_timer_t timer, odp_timer_tmo_t tmo);

> +odp_timer_set_t odp_timer_set_abs(odp_timer_t tim, uint64_t abs_tck);

> 

>  /**

> - * Convert buffer handle to timeout handle

> + * Set a timer with a relative expiration time and user-defined buffer.

>   *

> - * @param buf  Buffer handle

> + * Set (arm) the timer to expire at a relative future time.

> + * Arming may fail (if the timer is in state EXPIRED),

> + * an earlier timeout will then be received. odp_timer_tmo_status() must

> + * be used to check if the received timeout is valid.

>   *

> - * @return Timeout buffer handle

> + * Note: any invalid parameters will be treated as programming errors

> and will

> + * cause the application to abort.

> + *

> + * @param tim      Timer

> + * @param rel_tck  Expiration time in timer ticks relative to current

> time of

> + *		   the timer pool the timer belongs to

> + * @param user_buf The buffer to use as timeout event

> + *

> + * @return Success or failure code

>   */

> -odp_timeout_t odp_timeout_from_buffer(odp_buffer_t buf);

> +odp_timer_set_t odp_timer_set_rel_w_buf(odp_timer_t tim,

> +					uint64_t rel_tck,

> +					odp_buffer_t user_buf);

> +/**

> + * Set a timer with a relative expiration time

> + *

> + * Set (arm) the timer to expire at a relative future time.

> + * Arming may fail (if the timer is in state EXPIRED),

> + * an earlier timeout will then be received. odp_timer_tmo_status() must

> + * be used to check if the received timeout is valid.

> + *

> + * Note: any invalid parameters will be treated as programming errors

> and will

> + * cause the application to abort.

> + *

> + * @param tim     Timer

> + * @param rel_tck Expiration time in timer ticks relative to current

> time of

> + *		  the timer pool the timer belongs to

> + *

> + * @return Success or failure code

> + */

> +odp_timer_set_t odp_timer_set_rel(odp_timer_t tim, uint64_t rel_tck);

> 

>  /**

> - * Return absolute timeout tick

> + * Cancel a timer

> + *

> + * Cancel a timer, preventing future expiration and delivery.

> + *

> + * A timer that has already expired and been enqueued for delivery may

> be

> + * impossible to cancel and will instead be delivered to the destination

> queue.

> + * Use odp_timer_tmo_status() the check whether a received timeout is

> fresh or

> + * stale (cancelled). Stale timeouts will automatically be recycled.

> + *

> + * Note: any invalid parameters will be treated as programming errors

> and will

> + * cause the application to abort.

> + *

> + * @param tim    Timer handle

> + */

> +void odp_timer_cancel(odp_timer_t tim);

> +

> +/**

> + * Translate from buffer to timeout

> + *

> + * Return the timeout handle that corresponds to the specified buffer

> handle.

> + * The buffer must be of time ODP_BUFFER_TYPE_TIMEOUT.

> + *

> + * @param buf   Buffer handle to translate.

> + *

> + * @return      The corresponding timeout handle.

> + */

> +static inline odp_timer_tmo_t odp_timeout_from_buffer(odp_buffer_t buf)

> +{

> +	if (odp_unlikely(odp_buffer_type(buf) != ODP_BUFFER_TYPE_TIMEOUT))

> {

> +		ODP_ERR("Buffer type %u not timeout\n", buf);

> +		abort();

> +	}

> +	/* In this implementation, timeout == buffer */

> +	return (odp_timer_tmo_t)buf;

> +}

> +

> +/**

> + * Translate from timeout to buffer

> + *

> + * Return the buffer handle that corresponds to the specified timeout

> handle.

> + *

> + * @param tmo   Timeout handle to translate.

> + *

> + * @return      The corresponding buffer handle.

> + */

> +static inline odp_buffer_t odp_buffer_from_timeout(odp_timer_tmo_t tmo)

> +{

> +	/* In this implementation, buffer == timeout */

> +	return (odp_buffer_t)tmo;

> +}

> +

> +/**

> + * Return timeout to timer

> + *

> + * Return a received timeout for reuse with the parent timer.

> + * Note: odp_timer_return_tmo() must be called on all received timeouts!

> + * (Excluding user defined timeout buffers).

> + * The timeout must not be accessed after this call, the semantics is

> + * equivalent to a free call.

> + *

> + * @param tmo    Timeout

> + */

> +void odp_timer_return_tmo(odp_timer_tmo_t tmo);

> +

> +/**

> + * Return fresh/stale/orphan status of timeout.

> + *

> + * Check a received timeout for orphaness (i.e. parent timer freed) and

> + * staleness (i.e. parent timer has been reset or cancelled after the

> timeout

> + * expired and was enqueued).

> + * If the timeout is fresh, it should be processed.

> + * If the timeout is stale or orphaned, it should be ignored.

> + * All timeouts must be returned using the odp_timer_return_tmo() call.

> + *

> + * @param tmo    Timeout

> + *

> + * @return One of ODP_TMO_FRESH, ODP_TMO_STALE or ODP_TMO_ORPHAN.

> + */

> +odp_timer_tmo_status_t odp_timer_tmo_status(odp_timer_tmo_t tmo);

> +

> +/**

> + * Get timer handle

> + *

> + * Return Handle of parent timer.

> + *

> + * @param tmo   Timeout

> + *

> + * @return Timer handle or ODP_TIMER_INVALID for orphaned timeouts.

> + *         Note that the parent timer could be freed by some other

> thread

> + *         at any time and thus the timeout becomes orphaned.

> + */

> +odp_timer_t odp_timer_handle(odp_timer_tmo_t tmo);

> +

> +/**

> + * Get expiration time

> + *

> + * Return (requested) expiration time of timeout.

> + *

> + * @param tmo   Timeout

> + *

> + * @return Expiration time

> + */

> +uint64_t odp_timer_expiration(odp_timer_tmo_t tmo);

> +

> +/**

> + * Get user pointer

> + *

> + * Return User pointer of timer associated with timeout.

> + * The user pointer is often used to point to some associated context.

>   *

> - * @param tmo Timeout buffer handle

> + * @param tmo   Timeout

>   *

> - * @return Absolute timeout tick

> + * @return User pointer

>   */

> -uint64_t odp_timeout_tick(odp_timeout_t tmo);

> +void *odp_timer_userptr(odp_timer_tmo_t tmo);

> 

>  #ifdef __cplusplus

>  }

> diff --git a/platform/linux-generic/include/odp_priority_queue_internal.h

> b/platform/linux-generic/include/odp_priority_queue_internal.h

> new file mode 100644

> index 0000000..7d7f3a2

> --- /dev/null

> +++ b/platform/linux-generic/include/odp_priority_queue_internal.h

> @@ -0,0 +1,108 @@

> +#ifndef _PRIORITY_QUEUE_H

> +#define _PRIORITY_QUEUE_H

> +

> +#include <assert.h>

> +#include <stddef.h>

> +#include <stdint.h>

> +#include <stdbool.h>

> +#include <odp_align.h>

> +

> +#define INVALID_INDEX ~0U

> +#define INVALID_PRIORITY ((pq_priority_t)~0ULL)

> +

> +typedef uint64_t pq_priority_t;

> +

> +struct heap_node;

> +

> +typedef struct priority_queue {

> +	uint32_t max_elems;/* Number of elements in heap */

> +	/* Number of registered elements (active + inactive) */

> +	uint32_t reg_elems;

> +	uint32_t num_elems;/* Number of active elements */

> +	struct heap_node *heap;

> +	struct heap_node *org_ptr;

> +} priority_queue ODP_ALIGNED(sizeof(uint64_t));

> +

> +/* The user gets a pointer to this structure */

> +typedef struct {

> +	/* Set when pq_element registered with priority queue */

> +	priority_queue *pq;

> +	uint32_t index;/* Index into heap array */

> +	pq_priority_t prio;

> +} pq_element;

> +

> +/*** Operations on pq_element ***/

> +

> +static inline void pq_element_con(pq_element *this)

> +{

> +	this->pq = NULL;

> +	this->index = INVALID_INDEX;

> +	this->prio = 0U;

> +}

> +

> +static inline void pq_element_des(pq_element *this)

> +{

> +	(void)this;

> +	assert(this->index == INVALID_INDEX);

> +}

> +

> +static inline priority_queue *get_pq(const pq_element *this)

> +{

> +	return this->pq;

> +}

> +

> +static inline pq_priority_t get_prio(const pq_element *this)

> +{

> +	return this->prio;

> +}

> +

> +static inline uint32_t get_index(const pq_element *this)

> +{

> +	return this->index;

> +}

> +

> +static inline bool is_active(const pq_element *this)

> +{

> +	return this->index != INVALID_INDEX;

> +}

> +

> +/*** Operations on priority_queue ***/

> +

> +extern uint32_t pq_smallest_child(priority_queue *, uint32_t,

> pq_priority_t);

> +extern void pq_bubble_down(priority_queue *, pq_element *);

> +extern void pq_bubble_up(priority_queue *, pq_element *);

> +

> +static inline bool valid_index(priority_queue *this, uint32_t idx)

> +{

> +	return idx < this->num_elems;

> +}

> +

> +extern void priority_queue_con(priority_queue *, uint32_t _max_elems);

> +extern void priority_queue_des(priority_queue *);

> +

> +/* Register pq_element with priority queue */

> +/* Return false if priority queue full */

> +extern bool pq_register_element(priority_queue *, pq_element *);

> +

> +/* Activate and add pq_element to priority queue */

> +/* Element must be disarmed */

> +extern void pq_activate_element(priority_queue *, pq_element *,

> pq_priority_t);

> +

> +/* Reset (increase) priority for pq_element */

> +/* Element may be active or inactive (released) */

> +extern void pq_reset_element(priority_queue *, pq_element *,

> pq_priority_t);

> +

> +/* Deactivate and remove element from priority queue */

> +/* Element may be active or inactive (released) */

> +extern void pq_deactivate_element(priority_queue *, pq_element *);

> +

> +/* Unregister pq_element */

> +extern void pq_unregister_element(priority_queue *, pq_element *);

> +

> +/* Return priority of first element (lowest numerical value) */

> +extern pq_priority_t pq_first_priority(const priority_queue *);

> +

> +/* Deactivate and return first element if it's prio is <= threshold */

> +extern pq_element *pq_release_element(priority_queue *, pq_priority_t

> thresh);

> +

> +#endif /* _PRIORITY_QUEUE_H */

> diff --git a/platform/linux-generic/include/odp_timer_internal.h

> b/platform/linux-generic/include/odp_timer_internal.h

> index ad28f53..461f28c 100644

> --- a/platform/linux-generic/include/odp_timer_internal.h

> +++ b/platform/linux-generic/include/odp_timer_internal.h

> @@ -1,4 +1,4 @@

> -/* Copyright (c) 2013, Linaro Limited

> +/* Copyright (c) 2014, Linaro Limited

>   * All rights reserved.

>   *

>   * SPDX-License-Identifier:     BSD-3-Clause

> @@ -8,72 +8,51 @@

>  /**

>   * @file

>   *

> - * ODP timer timeout descriptor - implementation internal

> + * ODP timeout descriptor - implementation internal

>   */

> 

>  #ifndef ODP_TIMER_INTERNAL_H_

>  #define ODP_TIMER_INTERNAL_H_

> 

> -#ifdef __cplusplus

> -extern "C" {

> -#endif

> -

> -#include <odp_std_types.h>

> -#include <odp_queue.h>

> -#include <odp_buffer.h>

> +#include <odp_align.h>

> +#include <odp_debug.h>

>  #include <odp_buffer_internal.h>

>  #include <odp_buffer_pool_internal.h>

>  #include <odp_timer.h>

> 

> -struct timeout_t;

> -

> -typedef struct timeout_t {

> -	struct timeout_t *next;

> -	int               timer_id;

> -	int               tick;

> -	uint64_t          tmo_tick;

> -	odp_queue_t       queue;

> -	odp_buffer_t      buf;

> -	odp_buffer_t      tmo_buf;

> -} timeout_t;

> -

> -

> -struct odp_timeout_hdr_t;

> -

>  /**

> - * Timeout notification header

> + * Internal Timeout header

>   */

> -typedef struct odp_timeout_hdr_t {

> +typedef struct {

> +	/* common buffer header */

>  	odp_buffer_hdr_t buf_hdr;

> 

> -	timeout_t meta;

> -

> -	uint8_t buf_data[];

> +	/* Requested expiration time */

> +	uint64_t expiration;

> +	/* User ptr inherited from parent timer */

> +	void *user_ptr;

> +	/* Parent timer */

> +	odp_timer_t timer;

> +	/* Tag inherited from parent timer at time of expiration */

> +	uint32_t tag;

> +	/* Gen-cnt inherited from parent timer at time of creation */

> +	uint16_t gencnt;

> +	uint16_t pad;

> +	uint8_t buf_data[0];

>  } odp_timeout_hdr_t;

> 

> -

> -

>  ODP_STATIC_ASSERT(sizeof(odp_timeout_hdr_t) ==

> -	   ODP_OFFSETOF(odp_timeout_hdr_t, buf_data),

> -	   "ODP_TIMEOUT_HDR_T__SIZE_ERR");

> -

> +		  ODP_OFFSETOF(odp_timeout_hdr_t, buf_data),

> +		  "sizeof(odp_timeout_hdr_t) ==

> ODP_OFFSETOF(odp_timeout_hdr_t, buf_data)");

>  ODP_STATIC_ASSERT(sizeof(odp_timeout_hdr_t) % sizeof(uint64_t) == 0,

> -	   "ODP_TIMEOUT_HDR_T__SIZE_ERR2");

> -

> +		  "sizeof(odp_timeout_hdr_t) % sizeof(uint64_t) == 0");

> 

>  /**

> - * Return timeout header

> + * Return the timeout header

>   */

> -static inline odp_timeout_hdr_t *odp_timeout_hdr(odp_timeout_t tmo)

> +static inline odp_timeout_hdr_t *odp_timeout_hdr(odp_buffer_t buf)

>  {

> -	odp_buffer_hdr_t *buf_hdr = odp_buf_to_hdr((odp_buffer_t)tmo);

> -	return (odp_timeout_hdr_t *)(uintptr_t)buf_hdr;

> -}

> -

> -

> -

> -#ifdef __cplusplus

> +	return (odp_timeout_hdr_t *)odp_buf_to_hdr(buf);

>  }

> -#endif

> 

>  #endif

> diff --git a/platform/linux-generic/odp_priority_queue.c

> b/platform/linux-generic/odp_priority_queue.c

> new file mode 100644

> index 0000000..b72c26f

> --- /dev/null

> +++ b/platform/linux-generic/odp_priority_queue.c

> @@ -0,0 +1,283 @@

> +#define NDEBUG /* Enabled by default by ODP build system */

> +#include <assert.h>

> +#include <unistd.h>

> +#include <stdlib.h>

> +#include <string.h>

> +#include <strings.h>

> +#include <odp_hints.h>

> +#include <odp_align.h>

> +#include <odp_debug.h>

> +

> +#include "odp_priority_queue_internal.h"

> +

> +

> +#define NUM_CHILDREN 4

> +#define CHILD(n) (NUM_CHILDREN * (n) + 1)

> +#define PARENT(n) (((n) - 1) / NUM_CHILDREN)

> +

> +/* Internal nodes in the array */

> +typedef struct heap_node {

> +	pq_element *elem;

> +	/* Copy of elem->prio so we avoid unnecessary dereferencing */

> +	pq_priority_t prio;

> +} heap_node;

> +

> +static void pq_assert_heap(priority_queue *this);

> +

> +#define ALIGNMENT(p) (1U << ((unsigned)ffs((int)p) - 1U))

> +

> +void priority_queue_con(priority_queue *this, uint32_t _max_elems)

> +{

> +	this->max_elems = _max_elems;

> +	this->reg_elems = 0;

> +	this->num_elems = 0;

> +	this->org_ptr = malloc((_max_elems + 64 / sizeof(heap_node)) *

> +			       sizeof(heap_node));

> +	if (odp_unlikely(this->org_ptr == NULL)) {

> +		ODP_ERR("malloc failed\n");

> +		abort();

> +	}

> +	this->heap = this->org_ptr;

> +	assert((size_t)&this->heap[1] % 8 == 0);

> +	/* Increment base address until first child (index 1) is cache line

> */

> +	/* aligned and thus all children (e.g. index 1-4) stored in the */

> +	/* same cache line. We are not interested in the alignment of */

> +	/* heap[0] as this is a lone node */

> +	while ((size_t)&this->heap[1] % ODP_CACHE_LINE_SIZE != 0) {

> +		/* Cast to ptr to struct member with the greatest alignment

> */

> +		/* requirement */

> +		this->heap = (heap_node *)((pq_priority_t *)this->heap + 1);

> +	}

> +	pq_assert_heap(this);

> +}

> +

> +void priority_queue_des(priority_queue *this)

> +{

> +	pq_assert_heap(this);

> +	free(this->org_ptr);

> +}

> +

> +#ifndef NDEBUG

> +static uint32_t

> +pq_assert_elem(priority_queue *this, uint32_t index, bool recurse)

> +{

> +	uint32_t num = 1;

> +	const pq_element *elem = this->heap[index].elem;

> +	assert(elem->index == index);

> +	assert(elem->prio == this->heap[index].prio);

> +	uint32_t child = CHILD(index);

> +	uint32_t i;

> +	for (i = 0; i < NUM_CHILDREN; i++, child++) {

> +		if (valid_index(this, child)) {

> +			assert(this->heap[child].elem != NULL);

> +			assert(this->heap[child].prio >= elem->prio);

> +			if (recurse)

> +				num += pq_assert_elem(this, child, recurse);

> +		}

> +	}

> +	return num;

> +}

> +#endif

> +

> +static void

> +pq_assert_heap(priority_queue *this)

> +{

> +	(void)this;

> +#ifndef NDEBUG

> +	uint32_t num = 0;

> +	if (odp_likely(this->num_elems != 0)) {

> +		assert(this->heap[0].elem != NULL);

> +		num += pq_assert_elem(this, 0, true);

> +	}

> +	assert(num == this->num_elems);

> +	unsigned i;

> +	for (i = 0; i < this->num_elems; i++) {

> +		assert(this->heap[i].elem != NULL);

> +		assert(this->heap[i].prio != INVALID_PRIORITY);

> +	}

> +#endif

> +}

> +

> +/* Bubble up to proper position */

> +void

> +pq_bubble_up(priority_queue *this, pq_element *elem)

> +{

> +	assert(this->heap[elem->index].elem == elem);

> +	assert(this->heap[elem->index].prio == elem->prio);

> +	uint32_t current = elem->index;

> +	pq_priority_t prio = elem->prio;

> +	assert(current == 0 || this->heap[PARENT(current)].elem != NULL);

> +	/* Move up into proper position */

> +	while (current != 0 && this->heap[PARENT(current)].prio > prio) {

> +		uint32_t parent = PARENT(current);

> +		assert(this->heap[parent].elem != NULL);

> +		/* Swap current with parent */

> +		/* 1) Move parent down */

> +		this->heap[current].elem = this->heap[parent].elem;

> +		this->heap[current].prio = this->heap[parent].prio;

> +		this->heap[current].elem->index = current;

> +		/* 2) Move current up to parent */

> +		this->heap[parent].elem = elem;

> +		this->heap[parent].prio = prio;

> +		this->heap[parent].elem->index = parent;

> +		/* Continue moving elem until it is in the right place */

> +		current = parent;

> +	}

> +	pq_assert_heap(this);

> +}

> +

> +/* Find the smallest child that is smaller than the specified priority

> */

> +/* Very hot function, can we decrease the number of cache misses? */

> +uint32_t pq_smallest_child(priority_queue *this,

> +			   uint32_t index,

> +			   pq_priority_t val)

> +{

> +	uint32_t smallest = index;

> +	uint32_t child = CHILD(index);

> +#if NUM_CHILDREN == 4

> +	/* Unroll loop when all children exist */

> +	if (odp_likely(valid_index(this, child + 3))) {

> +		if (this->heap[child + 0].prio < val)

> +			val = this->heap[smallest = child + 0].prio;

> +		if (this->heap[child + 1].prio < val)

> +			val = this->heap[smallest = child + 1].prio;

> +		if (this->heap[child + 2].prio < val)

> +			val = this->heap[smallest = child + 2].prio;

> +		if (this->heap[child + 3].prio < val)

> +			(void)this->heap[smallest = child + 3].prio;

> +		return smallest;

> +	}

> +#endif

> +	uint32_t i;

> +	for (i = 0; i < NUM_CHILDREN; i++) {

> +		if (odp_unlikely(!valid_index(this, child + i)))

> +			break;

> +		if (this->heap[child + i].prio < val) {

> +			smallest = child + i;

> +			val = this->heap[smallest].prio;

> +		}

> +	}

> +	return smallest;

> +}

> +

> +/* Very hot function, can it be optimised? */

> +void

> +pq_bubble_down(priority_queue *this, pq_element *elem)

> +{

> +	assert(this->heap[elem->index].elem == elem);

> +	assert(this->heap[elem->index].prio == elem->prio);

> +	uint32_t current = elem->index;

> +	pq_priority_t prio = elem->prio;

> +	for (;;) {

> +		uint32_t child = pq_smallest_child(this, current, prio);

> +		if (current == child) {

> +			/* No smaller child, we are done */

> +			pq_assert_heap(this);

> +			return;

> +		}

> +		/* Element larger than smaller child, must move down */

> +		assert(this->heap[child].elem != NULL);

> +		/* 1) Move child up to current */

> +		this->heap[current].elem = this->heap[child].elem;

> +		this->heap[current].prio = this->heap[child].prio;

> +		/* 2) Move current down to child */

> +		this->heap[child].elem = elem;

> +		this->heap[child].prio = prio;

> +		this->heap[child].elem->index = child;

> +

> +		this->heap[current].elem->index = current; /* cache misses!

> */

> +		/* Continue moving element until it is in the right place */

> +		current = child;

> +	}

> +}

> +

> +bool

> +pq_register_element(priority_queue *this, pq_element *elem)

> +{

> +	if (odp_likely(this->reg_elems < this->max_elems)) {

> +		elem->pq = this;

> +		this->reg_elems++;

> +		return true;

> +	}

> +	return false;

> +}

> +

> +void

> +pq_unregister_element(priority_queue *this, pq_element *elem)

> +{

> +	assert(elem->pq == this);

> +	if (is_active(elem))

> +		pq_deactivate_element(this, elem);

> +	this->reg_elems--;

> +}

> +

> +void

> +pq_activate_element(priority_queue *this, pq_element *elem,

> pq_priority_t prio)

> +{

> +	assert(elem->index == INVALID_INDEX);

> +	/* Insert element at end */

> +	uint32_t index = this->num_elems++;

> +	this->heap[index].elem = elem;

> +	this->heap[index].prio = prio;

> +	elem->index = index;

> +	elem->prio = prio;

> +	pq_bubble_up(this, elem);

> +}

> +

> +void

> +pq_deactivate_element(priority_queue *this, pq_element *elem)

> +{

> +	assert(elem->pq == this);

> +	if (odp_likely(is_active(elem))) {

> +		/* Swap element with last element */

> +		uint32_t current = elem->index;

> +		uint32_t last = --this->num_elems;

> +		if (odp_likely(last != current)) {

> +			/* Move last element to current */

> +			this->heap[current].elem = this->heap[last].elem;

> +			this->heap[current].prio = this->heap[last].prio;

> +			this->heap[current].elem->index = current;

> +			/* Bubble down old 'last' element to its proper

> place*/

> +			if (this->heap[current].prio < elem->prio)

> +				pq_bubble_up(this, this->heap[current].elem);

> +			else

> +				pq_bubble_down(this, this->heap[current].elem);

> +		}

> +		elem->index = INVALID_INDEX;

> +		pq_assert_heap(this);

> +	}

> +}

> +

> +void

> +pq_reset_element(priority_queue *this, pq_element *elem, pq_priority_t

> prio)

> +{

> +	assert(prio != INVALID_PRIORITY);

> +	if (odp_likely(is_active(elem))) {

> +		assert(prio >= elem->prio);

> +		elem->prio = prio;

> +		this->heap[elem->index].prio = prio;/* cache misses here! */

> +		pq_bubble_down(this, elem);

> +		pq_assert_heap(this);

> +	} else {

> +		pq_activate_element(this, elem, prio);

> +	}

> +}

> +

> +pq_priority_t pq_first_priority(const priority_queue *this)

> +{

> +	return this->num_elems != 0 ? this->heap[0].prio :

> INVALID_PRIORITY;

> +}

> +

> +pq_element *

> +pq_release_element(priority_queue *this, pq_priority_t threshold)

> +{

> +	if (odp_likely(this->num_elems != 0 &&

> +		       this->heap[0].prio <= threshold)) {

> +		pq_element *elem = this->heap[0].elem;

> +		/* Remove element from heap */

> +		pq_deactivate_element(this, elem);

> +		assert(elem->prio <= threshold);

> +		return elem;

> +	}

> +	return NULL;

> +}

> diff --git a/platform/linux-generic/odp_timer.c b/platform/linux-

> generic/odp_timer.c

> index 313c713..0e5071c 100644

> --- a/platform/linux-generic/odp_timer.c

> +++ b/platform/linux-generic/odp_timer.c

> @@ -4,428 +4,713 @@

>   * SPDX-License-Identifier:     BSD-3-Clause

>   */

> 

> -#include <odp_timer.h>

> -#include <odp_timer_internal.h>

> -#include <odp_time.h>

> -#include <odp_buffer_pool_internal.h>

> -#include <odp_internal.h>

> -#include <odp_atomic.h>

> -#include <odp_spinlock.h>

> -#include <odp_sync.h>

> -#include <odp_debug.h>

> -

> -#include <signal.h>

> -#include <time.h>

> +/**

> + * @file

> + *

> + * ODP timer service

> + *

> + */

> 

> +#include <assert.h>

> +#include <errno.h>

>  #include <string.h>

> -

> -#define NUM_TIMERS    1

> -#define MAX_TICKS     1024

> -#define MAX_RES       ODP_TIME_SEC

> -#define MIN_RES       (100*ODP_TIME_USEC)

> -

> -

> -typedef struct {

> -	odp_spinlock_t lock;

> -	timeout_t      *list;

> -} tick_t;

> -

> -typedef struct {

> -	int               allocated;

> -	volatile int      active;

> -	volatile uint64_t cur_tick;

> -	timer_t           timerid;

> -	odp_timer_t       timer_hdl;

> -	odp_buffer_pool_t pool;

> -	uint64_t          resolution_ns;

> -	uint64_t          max_ticks;

> -	tick_t            tick[MAX_TICKS];

> -

> -} timer_ring_t;

> -

> -typedef struct {

> -	odp_spinlock_t lock;

> -	int            num_timers;

> -	timer_ring_t   timer[NUM_TIMERS];

> -

> -} timer_global_t;

> -

> -/* Global */

> -static timer_global_t odp_timer;

> -

> -static void add_tmo(tick_t *tick, timeout_t *tmo)

> +#include <stdlib.h>

> +#include <time.h>

> +#include <signal.h>

> +#include "odp_std_types.h"

> +#include "odp_buffer.h"

> +#include "odp_buffer_pool.h"

> +#include "odp_queue.h"

> +#include "odp_hints.h"

> +#include "odp_sync.h"

> +#include "odp_ticketlock.h"

> +#include "odp_debug.h"

> +#include "odp_align.h"

> +#include "odp_shared_memory.h"

> +#include "odp_hints.h"

> +#include "odp_internal.h"

> +#include "odp_time.h"

> +#include "odp_timer.h"

> +#include "odp_timer_internal.h"

> +#include "odp_priority_queue_internal.h"

> +

> +/***********************************************************************

> *******

> + * Translation between timeout and timeout header

> +

> *************************************************************************

> ****/

> +

> +static inline odp_timeout_hdr_t *odp_tmo_to_hdr(odp_timer_tmo_t tmo)

>  {

> -	odp_spinlock_lock(&tick->lock);

> -

> -	tmo->next  = tick->list;

> -	tick->list = tmo;

> +	odp_buffer_t buf = odp_buffer_from_timeout(tmo);

> +	odp_timeout_hdr_t *tmo_hdr = (odp_timeout_hdr_t

> *)odp_buf_to_hdr(buf);

> +	return tmo_hdr;

> +}

> 

> -	odp_spinlock_unlock(&tick->lock);

> +/***********************************************************************

> *******

> + * odp_timer abstract datatype

> +

> *************************************************************************

> ****/

> +

> +typedef struct odp_timer_s {

> +	pq_element pqelem;/* Base class */

> +	uint64_t req_tmo;/* Requested timeout tick */

> +	odp_buffer_t tmo_buf;/* ODP_BUFFER_INVALID if timeout enqueued */

> +	odp_queue_t queue;/* ODP_QUEUE_INVALID if timer is free */

> +	uint32_t tag;/* Reusing tag as next pointer/index when timer is

> free */

> +	uint16_t gencnt;/* Smaller to make place for user_buf flag */

> +	unsigned int user_buf:1; /* User-defined buffer? */

> +} odp_timer;

> +

> +/* Constructor */

> +static inline void odp_timer_con(odp_timer *this)

> +{

> +	pq_element_con(&this->pqelem);

> +	this->tmo_buf = ODP_BUFFER_INVALID;

> +	this->queue = ODP_QUEUE_INVALID;

> +	this->gencnt = 0;

>  }

> 

> -static timeout_t *rem_tmo(tick_t *tick)

> +/* Destructor */

> +static inline void odp_timer_des(odp_timer *this)

>  {

> -	timeout_t *tmo;

> +	assert(this->tmo_buf == ODP_BUFFER_INVALID);

> +	assert(this->queue == ODP_QUEUE_INVALID);

> +	pq_element_des(&this->pqelem);

> +}

> 

> -	odp_spinlock_lock(&tick->lock);

> +/* Setup when timer is allocated */

> +static void setup(odp_timer *this,

> +		  odp_queue_t _q,

> +		  void *_up,

> +		  odp_buffer_t _tmo)

> +{

> +	this->req_tmo = INVALID_PRIORITY;

> +	this->tmo_buf = _tmo;

> +	this->queue = _q;

> +	this->tag = 0;

> +	this->user_buf = false;

> +	/* Initialise constant fields of timeout event */

> +	odp_timeout_hdr_t *tmo_hdr =

> +		odp_tmo_to_hdr(odp_timeout_from_buffer(this->tmo_buf));

> +	tmo_hdr->gencnt = this->gencnt;

> +	tmo_hdr->timer = this;

> +	tmo_hdr->user_ptr = _up;

> +	/* tmo_hdr->tag set at expiration time */

> +	/* tmo_hdr->expiration set at expiration time */

> +	assert(this->queue != ODP_QUEUE_INVALID);

> +}

> 

> -	tmo = tick->list;

> +/* Teardown when timer is freed */

> +static odp_buffer_t teardown(odp_timer *this)

> +{

> +	/* Increase generation count to make any pending timeout(s)

> orphaned */

> +	++this->gencnt;

> +	odp_buffer_t buf = this->tmo_buf;

> +	this->tmo_buf = ODP_BUFFER_INVALID;

> +	this->queue = ODP_QUEUE_INVALID;

> +	return buf;

> +}

> 

> -	if (tmo)

> -		tick->list = tmo->next;

> +static inline uint32_t get_next_free(odp_timer *this)

> +{

> +	assert(this->queue == ODP_QUEUE_INVALID);

> +	return this->tag;

> +}

> 

> -	odp_spinlock_unlock(&tick->lock);

> +static inline void set_next_free(odp_timer *this, uint32_t nf)

> +{

> +	assert(this->queue == ODP_QUEUE_INVALID);

> +	this->tag = nf;

> +}

> 

> -	if (tmo)

> -		tmo->next = NULL;

> +/***********************************************************************

> *******

> + * odp_timer_pool abstract datatype

> + * Inludes alloc and free timer

> +

> *************************************************************************

> ****/

> +

> +typedef struct odp_timer_pool_s {

> +	priority_queue pq;

> +	uint64_t cur_tick;/* Current tick value */

> +	uint64_t min_tick;/* Current expiration lower bound */

> +	uint64_t max_tick;/* Current expiration higher bound */

> +	bool shared;

> +	odp_ticketlock_t lock;

> +	const char *name;

> +	odp_buffer_pool_t buf_pool;

> +	uint64_t resolution_ns;

> +	uint64_t min_tmo_tck;

> +	uint64_t max_tmo_tck;

> +	odp_timer *timers;

> +	uint32_t num_alloc;/* Current number of allocated timers */

> +	uint32_t max_timers;/* Max number of timers */

> +	uint32_t first_free;/* 0..max_timers-1 => free timer */

> +	timer_t timerid;

> +	odp_timer_clk_src_t clk_src;

> +} odp_timer_pool;

> +

> +/* Forward declarations */

> +static void timer_init(odp_timer_pool *tp);

> +static void timer_exit(odp_timer_pool *tp);

> +

> +static void odp_timer_pool_con(odp_timer_pool *this,

> +			       const char *_n,

> +			       odp_buffer_pool_t _bp,

> +			       uint64_t _r,

> +			       uint64_t _mint,

> +			       uint64_t _maxt,

> +			       uint32_t _mt,

> +			       bool _s,

> +			       odp_timer_clk_src_t _cs)

> +{

> +	priority_queue_con(&this->pq, _mt);

> +	this->cur_tick = 0;

> +	this->shared = _s;

> +	this->name = strdup(_n);

> +	this->buf_pool = _bp;

> +	this->resolution_ns = _r;

> +	this->min_tmo_tck = odp_timer_ns_to_tick(this, _mint);

> +	this->max_tmo_tck = odp_timer_ns_to_tick(this, _maxt);

> +	this->min_tick = this->cur_tick + this->min_tmo_tck;

> +	this->max_tick = this->cur_tick + this->max_tmo_tck;

> +	this->num_alloc = 0;

> +	this->max_timers = _mt;

> +	this->first_free = 0;

> +	this->clk_src = _cs;

> +	this->timers = malloc(sizeof(odp_timer) * this->max_timers);

> +	if (this->timers == NULL)

> +		ODP_ABORT("%s: malloc failed\n", _n);

> +	uint32_t i;

> +	for (i = 0; i < this->max_timers; i++)

> +		odp_timer_con(&this->timers[i]);

> +	for (i = 0; i < this->max_timers; i++)

> +		set_next_free(&this->timers[i], i + 1);

> +	odp_ticketlock_init(&this->lock);

> +	if (this->clk_src == ODP_CLOCK_CPU)

> +		timer_init(this);

> +	/* Make sure timer pool initialisation is globally observable */

> +	/* before we return a pointer to it */

> +	odp_sync_stores();

> +}

> 

> -	return tmo;

> +static odp_timer_pool *odp_timer_pool_new(

> +	const char *_n,

> +	odp_buffer_pool_t _bp,

> +	uint64_t _r,

> +	uint64_t _mint,

> +	uint64_t _maxt,

> +	uint32_t _mt,

> +	bool _s,

> +	odp_timer_clk_src_t _cs)

> +{

> +	odp_timer_pool *this = malloc(sizeof(odp_timer_pool));

> +	if (odp_unlikely(this == NULL))

> +		ODP_ABORT("%s: timer pool malloc failed\n", _n);

> +	odp_timer_pool_con(this, _n, _bp, _r, _mint, _maxt, _mt, _s, _cs);

> +	return this;

>  }

> 

> -/**

> - * Search and delete tmo entry from timeout list

> - * return -1 : on error.. handle not in list

> - *		0 : success

> - */

> -static int find_and_del_tmo(timeout_t **tmo, odp_timer_tmo_t handle)

> +static void odp_timer_pool_des(odp_timer_pool *this)

>  {

> -	timeout_t *cur, *prev;

> -	prev = NULL;

> +	if (this->shared)

> +		odp_ticketlock_lock(&this->lock);

> +	if (this->num_alloc != 0) {

> +		/* It's a programming error to attempt to destroy a */

> +		/* timer pool which is still in use */

> +		ODP_ABORT("%s: timers in use\n", this->name);

> +	}

> +	if (this->clk_src == ODP_CLOCK_CPU)

> +		timer_exit(this);

> +	uint32_t i;

> +	for (i = 0; i < this->max_timers; i++)

> +		odp_timer_des(&this->timers[i]);

> +	free(this->timers);

> +	priority_queue_des(&this->pq);

> +	odp_sync_stores();

> +}

> 

> -	for (cur = *tmo; cur != NULL; prev = cur, cur = cur->next) {

> -		if (cur->tmo_buf == handle) {

> -			if (prev == NULL)

> -				*tmo = cur->next;

> -			else

> -				prev->next = cur->next;

> +static void odp_timer_pool_del(odp_timer_pool *this)

> +{

> +	odp_timer_pool_des(this);

> +	free(this);

> +}

> 

> -			break;

> +static inline odp_timer *timer_alloc(odp_timer_pool *this,

> +				     odp_queue_t queue,

> +				     void *user_ptr,

> +				     odp_buffer_t tmo_buf)

> +{

> +	odp_timer *tim = ODP_TIMER_INVALID;

> +	if (odp_likely(this->shared))

> +		odp_ticketlock_lock(&this->lock);

> +	if (odp_likely(this->num_alloc < this->max_timers)) {

> +		this->num_alloc++;

> +		/* Remove first unused timer from free list */

> +		assert(this->first_free != this->max_timers);

> +		tim = &this->timers[this->first_free];

> +		this->first_free = get_next_free(tim);

> +		/* Insert timer into priority queue */

> +		if (odp_unlikely(!pq_register_element(&this->pq,

> +						      &tim->pqelem))) {

> +			/* Unexpected internal error */

> +			abort();

>  		}

> +		/* Create timer */

> +		setup(tim, queue, user_ptr, tmo_buf);

> +	} else {

> +		errno = ENFILE; /* Reusing file table overvlow */

>  	}

> -

> -	if (!cur)

> -		/* couldn't find tmo in list */

> -		return -1;

> -

> -	/* application to free tmo_buf provided by absolute_tmo call */

> -	return 0;

> +	if (odp_likely(this->shared))

> +		odp_ticketlock_unlock(&this->lock);

> +	return tim;

>  }

> 

> -int odp_timer_cancel_tmo(odp_timer_t timer_hdl, odp_timer_tmo_t tmo)

> +static inline void timer_free(odp_timer_pool *this, odp_timer *tim)

>  {

> -	int id;

> -	int tick_idx;

> -	timeout_t *cancel_tmo;

> -	odp_timeout_hdr_t *tmo_hdr;

> -	tick_t *tick;

> -

> -	/* get id */

> -	id = (int)timer_hdl - 1;

> -

> -	tmo_hdr = odp_timeout_hdr((odp_timeout_t) tmo);

> -	/* get tmo_buf to cancel */

> -	cancel_tmo = &tmo_hdr->meta;

> +	if (odp_likely(this->shared))

> +		odp_ticketlock_lock(&this->lock);

> +	if (odp_unlikely(tim->queue == ODP_QUEUE_INVALID))

> +		ODP_ABORT("Invalid timer %p\n", tim);

> +	/* Destroy timer */

> +	odp_buffer_t buf = teardown(tim);

> +	/* Remove timer from priority queue */

> +	pq_unregister_element(&this->pq, &tim->pqelem);

> +	/* Insert timer into free list */

> +	set_next_free(tim, this->first_free);

> +	this->first_free = (tim - &this->timers[0]) / sizeof(this-

> >timers[0]);

> +	assert(this->num_alloc != 0);

> +	this->num_alloc--;

> +	if (odp_likely(this->shared))

> +		odp_ticketlock_unlock(&this->lock);

> +	if (buf != ODP_BUFFER_INVALID)

> +		odp_buffer_free(buf);

> +}

> 

> -	tick_idx = cancel_tmo->tick;

> -	tick = &odp_timer.timer[id].tick[tick_idx];

> +/***********************************************************************

> *******

> + * Operations on timers

> + * reset/reset_w_buf/cancel timer, return timeout

> +

> *************************************************************************

> ****/

> 

> -	odp_spinlock_lock(&tick->lock);

> -	/* search and delete tmo from tick list */

> -	if (find_and_del_tmo(&tick->list, tmo) != 0) {

> -		odp_spinlock_unlock(&tick->lock);

> -		ODP_DBG("Couldn't find the tmo (%d) in tick list\n",

> (int)tmo);

> -		return -1;

> +static inline void timer_expire(odp_timer *tim)

> +{

> +	assert(tim->req_tmo != INVALID_PRIORITY);

> +	/* Timer expired, is there actually any timeout event */

> +	/* we can enqueue? */

> +	if (odp_likely(tim->tmo_buf != ODP_BUFFER_INVALID)) {

> +		/* Swap out timeout buffer */

> +		odp_buffer_t buf = tim->tmo_buf;

> +		tim->tmo_buf = ODP_BUFFER_INVALID;

> +		if (odp_likely(!tim->user_buf)) {

> +			odp_timeout_hdr_t *tmo_hdr =

> +				odp_tmo_to_hdr(odp_timeout_from_buffer(buf));

> +			/* Copy tag and requested expiration tick from timer

> */

> +			tmo_hdr->tag = tim->tag;

> +			tmo_hdr->expiration = tim->req_tmo;

> +		}

> +		/* Else don't touch user-defined buffer */

> +		int rc = odp_queue_enq(tim->queue, buf);

> +		if (odp_unlikely(rc != 0))

> +			ODP_ABORT("Failed to enqueue timeout buffer (%d)\n",

> +				  rc);

> +		/* Mark timer as inactive */

> +		tim->req_tmo = INVALID_PRIORITY;

>  	}

> -	odp_spinlock_unlock(&tick->lock);

> -

> -	return 0;

> +	/* No, timeout event already enqueued or unavailable */

> +	/* Keep timer active, odp_timer_return_tmo() will patch up */

>  }

> 

> -static void notify_function(union sigval sigval)

> +static odp_timer_set_t timer_reset(odp_timer_pool *tp,

> +				   odp_timer *tim,

> +				   uint64_t abs_tck)

>  {

> -	uint64_t cur_tick;

> -	timeout_t *tmo;

> -	tick_t *tick;

> -	timer_ring_t *timer;

> +	assert(tim->user_buf == false);

> +	if (odp_unlikely(abs_tck < tp->min_tick))

> +		return ODP_TIMER_SET_TOOEARLY;

> +	if (odp_unlikely(abs_tck > tp->max_tick))

> +		return ODP_TIMER_SET_TOOLATE;

> +

> +	if (odp_likely(tp->shared))

> +		odp_ticketlock_lock(&tp->lock);

> +

> +	if (odp_unlikely(tim->queue == ODP_QUEUE_INVALID))

> +		ODP_ABORT("Invalid timer %p\n", tim);

> +	if (odp_unlikely(tim->user_buf))

> +		ODP_ABORT("Timer %p has user buffer\n", tim);

> +	/* Increase timer tag to make any pending timeout stale */

> +	tim->tag++;

> +	/* Save requested timeout */

> +	tim->req_tmo = abs_tck;

> +	/* Update timer position in priority queue */

> +	pq_reset_element(&tp->pq, &tim->pqelem, abs_tck);

> +

> +	if (odp_likely(tp->shared))

> +		odp_ticketlock_unlock(&tp->lock);

> +	return ODP_TIMER_SET_SUCCESS;

> +}

> 

> -	timer = sigval.sival_ptr;

> +static odp_timer_set_t timer_reset_w_buf(odp_timer_pool *tp,

> +		odp_timer *tim,

> +		uint64_t abs_tck,

> +		odp_buffer_t user_buf)

> +{

> +	if (odp_unlikely(abs_tck < tp->min_tick))

> +		return ODP_TIMER_SET_TOOEARLY;

> +	if (odp_unlikely(abs_tck > tp->max_tick))

> +		return ODP_TIMER_SET_TOOLATE;

> +

> +	if (odp_likely(tp->shared))

> +		odp_ticketlock_lock(&tp->lock);

> +

> +	if (odp_unlikely(tim->queue == ODP_QUEUE_INVALID))

> +		ODP_ABORT("Invalid timer %p\n", tim);

> +	/* Increase timer tag to make any pending timeout stale */

> +	tim->tag++;

> +	/* Save requested timeout */

> +	tim->req_tmo = abs_tck;

> +	/* Set flag indicating presence of user defined buffer */

> +	tim->user_buf = true;

> +	/* Swap in new buffer, save any old buffer pointer */

> +	odp_buffer_t old_buf = tim->tmo_buf;

> +	tim->tmo_buf = user_buf;

> +	/* Update timer position in priority queue */

> +	pq_reset_element(&tp->pq, &tim->pqelem, abs_tck);

> +

> +	if (odp_likely(tp->shared))

> +		odp_ticketlock_unlock(&tp->lock);

> +

> +	/* Free old buffer if present */

> +	if (odp_unlikely(old_buf != ODP_BUFFER_INVALID))

> +		odp_buffer_free(old_buf);

> +	return ODP_TIMER_SET_SUCCESS;

> +}

> 

> -	if (timer->active == 0) {

> -		ODP_DBG("Timer (%u) not active\n", timer->timer_hdl);

> -		return;

> +static inline void timer_cancel(odp_timer_pool *tp,

> +				odp_timer *tim)

> +{

> +	odp_buffer_t old_buf = ODP_BUFFER_INVALID;

> +	if (odp_likely(tp->shared))

> +		odp_ticketlock_lock(&tp->lock);

> +

> +	if (odp_unlikely(tim->queue == ODP_QUEUE_INVALID))

> +		ODP_ABORT("Invalid timer %p\n", tim);

> +	if (odp_unlikely(tim->user_buf)) {

> +		/* Swap out old user buffer */

> +		old_buf = tim->tmo_buf;

> +		tim->tmo_buf = ODP_BUFFER_INVALID;

> +		/* tim->user_buf stays true */

>  	}

> +	/* Else a normal timer (no user-defined buffer) */

> +	/* Increase timer tag to make any pending timeout stale */

> +	tim->tag++;

> +	/* Clear requested timeout, mark timer inactive */

> +	tim->req_tmo = INVALID_PRIORITY;

> +	/* Remove timer from the priority queue */

> +	pq_deactivate_element(&tp->pq, &tim->pqelem);

> +

> +	if (odp_likely(tp->shared))

> +		odp_ticketlock_unlock(&tp->lock);

> +	/* Free user-defined buffer if present */

> +	if (odp_unlikely(old_buf != ODP_BUFFER_INVALID))

> +		odp_buffer_free(old_buf);

> +}

> 

> -	/* ODP_DBG("Tick\n"); */

> -

> -	cur_tick = timer->cur_tick++;

> -

> -	odp_sync_stores();

> +static inline void timer_return(odp_timer_pool *tp,

> +				odp_timer *tim,

> +				odp_timer_tmo_t tmo,

> +				const odp_timeout_hdr_t *tmo_hdr)

> +{

> +	odp_buffer_t tmo_buf = odp_buffer_from_timeout(tmo);

> +	if (odp_likely(tp->shared))

> +		odp_ticketlock_lock(&tp->lock);

> +	if (odp_unlikely(tim->user_buf))

> +		ODP_ABORT("Timer %p has user-defined buffer\n", tim);

> +	if (odp_likely(tmo_hdr->gencnt == tim->gencnt)) {

> +		assert(tim->tmo_buf == ODP_BUFFER_INVALID);

> +		/* Save returned buffer for use when timer expires next time

> */

> +		tim->tmo_buf = tmo_buf;

> +		tmo_buf = ODP_BUFFER_INVALID;

> +		/* Check if timer is active and should have expired */

> +		if (odp_unlikely(tim->req_tmo != INVALID_PRIORITY &&

> +				 tim->req_tmo <= tp->cur_tick)) {

> +			/* Expire timer now since we have restored the timeout

> +			   buffer */

> +			timer_expire(tim);

> +		}

> +		/* Else timer inactive or expires in the future */

> +	}

> +	/* Else timeout orphaned, free buffer later */

> +	if (odp_likely(tp->shared))

> +		odp_ticketlock_unlock(&tp->lock);

> +	if (odp_unlikely(tmo_buf != ODP_BUFFER_INVALID))

> +		odp_buffer_free(tmo_buf);

> +}

> 

> -	tick = &timer->tick[cur_tick % MAX_TICKS];

> +/* Non-public so not in odp_timer.h but externally visible, must declare

> + * somewhere */

> +unsigned odp_timer_pool_expire(odp_timer_pool_t tpid, uint64_t tick);

> 

> -	while ((tmo = rem_tmo(tick)) != NULL) {

> -		odp_queue_t  queue;

> -		odp_buffer_t buf;

> +unsigned odp_timer_pool_expire(odp_timer_pool_t tpid, uint64_t tick)

> +{

> +	if (odp_likely(tpid->shared))

> +		odp_ticketlock_lock(&tpid->lock);

> +

> +	unsigned nexp = 0;

> +	odp_timer_t tim;

> +	tpid->cur_tick = tick;

> +	tpid->min_tick = tick + tpid->min_tmo_tck;

> +	tpid->max_tick = tick + tpid->max_tmo_tck;

> +	while ((tim = (odp_timer_t)pq_release_element(&tpid->pq, tick)) !=

> +	       ODP_TIMER_INVALID) {

> +		assert(get_prio(&tim->pqelem) <= tick);

> +		timer_expire(tim);

> +		nexp++;

> +	}

> 

> -		queue = tmo->queue;

> -		buf   = tmo->buf;

> +	if (odp_likely(tpid->shared))

> +		odp_ticketlock_unlock(&tpid->lock);

> +	return nexp;

> +}

> 

> -		if (buf != tmo->tmo_buf)

> -			odp_buffer_free(tmo->tmo_buf);

> +/***********************************************************************

> *******

> + * POSIX timer support

> + * Functions that use Linux/POSIX per-process timers and related

> facilities

> +

> *************************************************************************

> ****/

> 

> -		odp_queue_enq(queue, buf);

> -	}

> +static void timer_notify(sigval_t sigval)

> +{

> +	odp_timer_pool *tp = (odp_timer_pool *)sigval.sival_ptr;

> +	uint64_t new_tick = tp->cur_tick + 1;

> +	(void)odp_timer_pool_expire(tp, new_tick);

>  }

> 

> -static void timer_start(timer_ring_t *timer)

> +static void timer_init(odp_timer_pool *tp)

>  {

>  	struct sigevent   sigev;

>  	struct itimerspec ispec;

>  	uint64_t res, sec, nsec;

> 

> -	ODP_DBG("\nTimer (%u) starts\n", timer->timer_hdl);

> +	ODP_DBG("Creating POSIX timer for timer pool %s, period %"

> +		PRIu64" ns\n", tp->name, tp->resolution_ns);

> 

>  	memset(&sigev, 0, sizeof(sigev));

>  	memset(&ispec, 0, sizeof(ispec));

> 

>  	sigev.sigev_notify          = SIGEV_THREAD;

> -	sigev.sigev_notify_function = notify_function;

> -	sigev.sigev_value.sival_ptr = timer;

> +	sigev.sigev_notify_function = timer_notify;

> +	sigev.sigev_value.sival_ptr = tp;

> 

> -	if (timer_create(CLOCK_MONOTONIC, &sigev, &timer->timerid)) {

> -		ODP_DBG("Timer create failed\n");

> -		return;

> -	}

> +	if (timer_create(CLOCK_MONOTONIC, &sigev, &tp->timerid))

> +		ODP_ABORT("timer_create() returned error %s\n",

> +			  strerror(errno));

> 

> -	res  = timer->resolution_ns;

> +	res  = tp->resolution_ns;

>  	sec  = res / ODP_TIME_SEC;

> -	nsec = res - sec*ODP_TIME_SEC;

> +	nsec = res - sec * ODP_TIME_SEC;

> 

>  	ispec.it_interval.tv_sec  = (time_t)sec;

>  	ispec.it_interval.tv_nsec = (long)nsec;

>  	ispec.it_value.tv_sec     = (time_t)sec;

>  	ispec.it_value.tv_nsec    = (long)nsec;

> 

> -	if (timer_settime(timer->timerid, 0, &ispec, NULL)) {

> -		ODP_DBG("Timer set failed\n");

> -		return;

> -	}

> -

> -	return;

> +	if (timer_settime(&tp->timerid, 0, &ispec, NULL))

> +		ODP_ABORT("timer_settime() returned error %s\n",

> +			  strerror(errno));

>  }

> 

> -int odp_timer_init_global(void)

> +static void timer_exit(odp_timer_pool *tp)

>  {

> -	ODP_DBG("Timer init ...");

> -

> -	memset(&odp_timer, 0, sizeof(timer_global_t));

> -

> -	odp_spinlock_init(&odp_timer.lock);

> -

> -	ODP_DBG("done\n");

> -

> -	return 0;

> +	if (timer_delete(tp->timerid) != 0)

> +		ODP_ABORT("timer_delete() returned error %s\n",

> +			  strerror(errno));

>  }

> 

> -int odp_timer_disarm_all(void)

> +/***********************************************************************

> *******

> + * Public API functions

> + * Some parameter checks and error messages

> + * No modificatios of internal state

> +

> *************************************************************************

> ****/

> +odp_timer_pool_t

> +odp_timer_pool_create(const char *name,

> +		      odp_buffer_pool_t buf_pool,

> +		      uint64_t resolution_ns,

> +		      uint64_t min_timeout,

> +		      uint64_t max_timeout,

> +		      uint32_t num_timers,

> +		      bool shared,

> +		      odp_timer_clk_src_t clk_src)

>  {

> -	int timers;

> -	struct itimerspec ispec;

> -

> -	odp_spinlock_lock(&odp_timer.lock);

> -

> -	timers = odp_timer.num_timers;

> -

> -	ispec.it_interval.tv_sec  = 0;

> -	ispec.it_interval.tv_nsec = 0;

> -	ispec.it_value.tv_sec     = 0;

> -	ispec.it_value.tv_nsec    = 0;

> -

> -	for (; timers >= 0; timers--) {

> -		if (timer_settime(odp_timer.timer[timers].timerid,

> -				  0, &ispec, NULL)) {

> -			ODP_DBG("Timer reset failed\n");

> -			odp_spinlock_unlock(&odp_timer.lock);

> -			return -1;

> -		}

> -		odp_timer.num_timers--;

> -	}

> -

> -	odp_spinlock_unlock(&odp_timer.lock);

> -

> -	return 0;

> +	/* Verify that buffer pool can be used for timeouts */

> +	odp_buffer_t buf = odp_buffer_alloc(buf_pool);

> +	if (buf == ODP_BUFFER_INVALID)

> +		ODP_ABORT("%s: Failed to allocate buffer\n", name);

> +	if (odp_buffer_type(buf) != ODP_BUFFER_TYPE_TIMEOUT)

> +		ODP_ABORT("%s: Buffer pool wrong type\n", name);

> +	odp_buffer_free(buf);

> +	odp_timer_pool_t tp = odp_timer_pool_new(name, buf_pool,

> resolution_ns,

> +			      min_timeout, max_timeout, num_timers,

> +			      shared, clk_src);

> +	return tp;

>  }

> 

> -odp_timer_t odp_timer_create(const char *name, odp_buffer_pool_t pool,

> -			     uint64_t resolution_ns, uint64_t min_ns,

> -			     uint64_t max_ns)

> +void odp_timer_pool_start(void)

>  {

> -	uint32_t id;

> -	timer_ring_t *timer;

> -	odp_timer_t timer_hdl;

> -	int i;

> -	uint64_t max_ticks;

> -	(void) name;

> -

> -	if (resolution_ns < MIN_RES)

> -		resolution_ns = MIN_RES;

> -

> -	if (resolution_ns > MAX_RES)

> -		resolution_ns = MAX_RES;

> -

> -	max_ticks = max_ns / resolution_ns;

> -

> -	if (max_ticks > MAX_TICKS) {

> -		ODP_DBG("Maximum timeout too long: %"PRIu64" ticks\n",

> -			max_ticks);

> -		return ODP_TIMER_INVALID;

> -	}

> -

> -	if (min_ns < resolution_ns) {

> -		ODP_DBG("Min timeout %"PRIu64" ns < resolution %"PRIu64"

> ns\n",

> -			min_ns, resolution_ns);

> -		return ODP_TIMER_INVALID;

> -	}

> -

> -	odp_spinlock_lock(&odp_timer.lock);

> -

> -	if (odp_timer.num_timers >= NUM_TIMERS) {

> -		odp_spinlock_unlock(&odp_timer.lock);

> -		ODP_DBG("All timers allocated\n");

> -		return ODP_TIMER_INVALID;

> -	}

> -

> -	for (id = 0; id < NUM_TIMERS; id++) {

> -		if (odp_timer.timer[id].allocated == 0)

> -			break;

> -	}

> -

> -	timer = &odp_timer.timer[id];

> -	timer->allocated = 1;

> -	odp_timer.num_timers++;

> -

> -	odp_spinlock_unlock(&odp_timer.lock);

> -

> -	timer_hdl = id + 1;

> -

> -	timer->timer_hdl     = timer_hdl;

> -	timer->pool          = pool;

> -	timer->resolution_ns = resolution_ns;

> -	timer->max_ticks     = MAX_TICKS;

> -

> -	for (i = 0; i < MAX_TICKS; i++) {

> -		odp_spinlock_init(&timer->tick[i].lock);

> -		timer->tick[i].list = NULL;

> -	}

> -

> -	timer->active = 1;

> -	odp_sync_stores();

> -

> -	timer_start(timer);

> +	/* Nothing to do here, timer pools are started by the create call

> */

> +}

> 

> -	return timer_hdl;

> +void odp_timer_pool_destroy(odp_timer_pool_t tpid)

> +{

> +	odp_timer_pool_del(tpid);

>  }

> 

> -odp_timer_tmo_t odp_timer_absolute_tmo(odp_timer_t timer_hdl, uint64_t

> tmo_tick,

> -				       odp_queue_t queue, odp_buffer_t buf)

> +uint64_t odp_timer_tick_to_ns(odp_timer_pool_t tpid, uint64_t ticks)

>  {

> -	int id;

> -	uint64_t tick;

> -	uint64_t cur_tick;

> -	timeout_t *new_tmo;

> -	odp_buffer_t tmo_buf;

> -	odp_timeout_hdr_t *tmo_hdr;

> -	timer_ring_t *timer;

> +	return ticks * tpid->resolution_ns;

> +}

> 

> -	id = (int)timer_hdl - 1;

> -	timer = &odp_timer.timer[id];

> +uint64_t odp_timer_ns_to_tick(odp_timer_pool_t tpid, uint64_t ns)

> +{

> +	return (uint64_t)(ns / tpid->resolution_ns);

> +}

> 

> -	cur_tick = timer->cur_tick;

> -	if (tmo_tick <= cur_tick) {

> -		ODP_DBG("timeout too close\n");

> -		return ODP_TIMER_TMO_INVALID;

> -	}

> +uint64_t odp_timer_current_tick(odp_timer_pool_t tpid)

> +{

> +	return tpid->cur_tick;

> +}

> 

> -	if ((tmo_tick - cur_tick) > MAX_TICKS) {

> -		ODP_DBG("timeout too far: cur %"PRIu64" tmo %"PRIu64"\n",

> -			cur_tick, tmo_tick);

> -		return ODP_TIMER_TMO_INVALID;

> +uintptr_t odp_timer_pool_query_conf(odp_timer_pool_t tpid,

> +				    odp_timer_pool_conf_t item)

> +{

> +	switch (item) {

> +	case ODP_TIMER_NAME:

> +		return (uintptr_t)(tpid->name);

> +	case ODP_TIMER_RESOLUTION:

> +		return tpid->resolution_ns;

> +	case ODP_TIMER_MIN_TICKS:

> +		return tpid->min_tmo_tck;

> +	case ODP_TIMER_MAX_TICKS:

> +		return tpid->max_tmo_tck;

> +	case ODP_TIMER_NUM_TIMERS:

> +		return tpid->max_timers;

> +	case ODP_TIMER_SHARED:

> +		return tpid->shared;

> +	default:

> +		return 0;

>  	}

> +}

> 

> -	tick = tmo_tick % MAX_TICKS;

> -

> -	tmo_buf = odp_buffer_alloc(timer->pool);

> -	if (tmo_buf == ODP_BUFFER_INVALID) {

> -		ODP_DBG("tmo buffer alloc failed\n");

> -		return ODP_TIMER_TMO_INVALID;

> +odp_timer_t odp_timer_alloc(odp_timer_pool_t tpid,

> +			    odp_queue_t queue,

> +			    void *user_ptr)

> +{

> +	/* We check this because ODP_QUEUE_INVALID is used */

> +	/* to indicate a free timer */

> +	if (odp_unlikely(queue == ODP_QUEUE_INVALID))

> +		ODP_ABORT("%s: Invalid queue handle\n", tpid->name);

> +	odp_buffer_t tmo_buf = odp_buffer_alloc(tpid->buf_pool);

> +	if (odp_likely(tmo_buf != ODP_BUFFER_INVALID)) {

> +		odp_timer *tim = timer_alloc(tpid, queue, user_ptr,

> tmo_buf);

> +		if (odp_likely(tim != ODP_TIMER_INVALID)) {

> +			/* Success */

> +			assert(tim->queue != ODP_QUEUE_INVALID);

> +			return tim;

> +		}

> +		odp_buffer_free(tmo_buf);

>  	}

> +	/* Else failed to allocate timeout event */

> +	/* errno set by odp_buffer_alloc() or timer_alloc () */

> +	return ODP_TIMER_INVALID;

> +}

> 

> -	tmo_hdr = odp_timeout_hdr((odp_timeout_t) tmo_buf);

> -	new_tmo = &tmo_hdr->meta;

> -

> -	new_tmo->timer_id = id;

> -	new_tmo->tick     = (int)tick;

> -	new_tmo->tmo_tick = tmo_tick;

> -	new_tmo->queue    = queue;

> -	new_tmo->tmo_buf  = tmo_buf;

> -

> -	if (buf != ODP_BUFFER_INVALID)

> -		new_tmo->buf = buf;

> -	else

> -		new_tmo->buf = tmo_buf;

> -

> -	add_tmo(&timer->tick[tick], new_tmo);

> -

> -	return tmo_buf;

> +void odp_timer_free(odp_timer_t tim)

> +{

> +	odp_timer_pool *tp = (odp_timer_pool *)get_pq(&tim->pqelem);

> +	timer_free(tp, tim);

>  }

> 

> -uint64_t odp_timer_tick_to_ns(odp_timer_t timer_hdl, uint64_t ticks)

> +odp_timer_set_t odp_timer_set_abs_w_buf(odp_timer_t tim,

> +					uint64_t abs_tck,

> +					odp_buffer_t user_buf)

>  {

> -	uint32_t id;

> +	odp_timer_pool *tp = (odp_timer_pool *)get_pq(&tim->pqelem);

> +	odp_timer_set_t rc = timer_reset_w_buf(tp, tim, abs_tck, user_buf);

> +	return rc;

> +}

> 

> -	id = timer_hdl - 1;

> -	return ticks * odp_timer.timer[id].resolution_ns;

> +odp_timer_set_t odp_timer_set_abs(odp_timer_t tim, uint64_t abs_tck)

> +{

> +	odp_timer_pool *tp = (odp_timer_pool *)get_pq(&tim->pqelem);

> +	odp_timer_set_t rc = timer_reset(tp, tim, abs_tck);

> +	return rc;

>  }

> 

> -uint64_t odp_timer_ns_to_tick(odp_timer_t timer_hdl, uint64_t ns)

> +odp_timer_set_t odp_timer_set_rel_w_buf(odp_timer_t tim,

> +					uint64_t rel_tck,

> +					odp_buffer_t user_buf)

>  {

> -	uint32_t id;

> +	odp_timer_pool *tp = (odp_timer_pool *)get_pq(&tim->pqelem);

> +	odp_timer_set_t rc = timer_reset_w_buf(tp, tim, tp->cur_tick +

> rel_tck,

> +					       user_buf);

> +	return rc;

> +}

> 

> -	id = timer_hdl - 1;

> -	return ns / odp_timer.timer[id].resolution_ns;

> +odp_timer_set_t odp_timer_set_rel(odp_timer_t tim, uint64_t rel_tck)

> +{

> +	odp_timer_pool *tp = (odp_timer_pool *)get_pq(&tim->pqelem);

> +	odp_timer_set_t rc = timer_reset(tp, tim, tp->cur_tick + rel_tck);

> +	return rc;

>  }

> 

> -uint64_t odp_timer_resolution(odp_timer_t timer_hdl)

> +void odp_timer_cancel(odp_timer_t tim)

>  {

> -	uint32_t id;

> +	odp_timer_pool *tp = (odp_timer_pool *)get_pq(&tim->pqelem);

> +	timer_cancel(tp, tim);

> +}

> 

> -	id = timer_hdl - 1;

> -	return odp_timer.timer[id].resolution_ns;

> +void odp_timer_return_tmo(odp_timer_tmo_t tmo)

> +{

> +	const odp_timeout_hdr_t *tmo_hdr = odp_tmo_to_hdr(tmo);

> +	odp_timer *parent_tim = tmo_hdr->timer;

> +	odp_timer_pool *tp = (odp_timer_pool *)get_pq(&parent_tim->pqelem);

> +	timer_return(tp, parent_tim, tmo, tmo_hdr);

>  }

> 

> -uint64_t odp_timer_maximum_tmo(odp_timer_t timer_hdl)

> +odp_timer_tmo_status_t odp_timer_tmo_status(odp_timer_tmo_t tmo)

>  {

> -	uint32_t id;

> +	const odp_timeout_hdr_t *tmo_hdr = odp_tmo_to_hdr(tmo);

> +	odp_timer *parent_tim = tmo_hdr->timer;

> 

> -	id = timer_hdl - 1;

> -	return odp_timer.timer[id].max_ticks;

> +	if (odp_unlikely(tmo_hdr->gencnt != parent_tim->gencnt)) {

> +		/* Generation counters differ => timer has been freed */

> +		return ODP_TMO_ORPHAN;

> +	}

> +	/* Else generation counters match => parent timer exists */

> +

> +	if (odp_likely(parent_tim->tag == tmo_hdr->tag))

> +		return ODP_TMO_FRESH;

> +	else

> +		return ODP_TMO_STALE;

>  }

> 

> -uint64_t odp_timer_current_tick(odp_timer_t timer_hdl)

> +odp_timer_t odp_timer_handle(odp_timer_tmo_t tmo)

>  {

> -	uint32_t id;

> +	odp_timeout_hdr_t *tmo_hdr = odp_tmo_to_hdr(tmo);

> +	odp_timer_t parent_tim = tmo_hdr->timer;

> +	if (odp_likely(tmo_hdr->gencnt == parent_tim->gencnt))

> +		return parent_tim;

> +	else

> +		return ODP_TIMER_INVALID;

> +}

> 

> -	id = timer_hdl - 1;

> -	return odp_timer.timer[id].cur_tick;

> +uint64_t odp_timer_expiration(odp_timer_tmo_t tmo)

> +{

> +	odp_timeout_hdr_t *tmo_hdr = odp_tmo_to_hdr(tmo);

> +	return tmo_hdr->expiration;

>  }

> 

> -odp_timeout_t odp_timeout_from_buffer(odp_buffer_t buf)

> +void *odp_timer_userptr(odp_timer_tmo_t tmo)

>  {

> -	return (odp_timeout_t) buf;

> +	odp_timeout_hdr_t *tmo_hdr = odp_tmo_to_hdr(tmo);

> +	return tmo_hdr->user_ptr;

>  }

> 

> -uint64_t odp_timeout_tick(odp_timeout_t tmo)

> +int odp_timer_init_global(void)

>  {

> -	odp_timeout_hdr_t *tmo_hdr = odp_timeout_hdr(tmo);

> -	return tmo_hdr->meta.tmo_tick;

> +	return 0;

>  }

> diff --git a/test/api_test/odp_timer_ping.c

> b/test/api_test/odp_timer_ping.c

> index 7406a45..2617b5c 100644

> --- a/test/api_test/odp_timer_ping.c

> +++ b/test/api_test/odp_timer_ping.c

> @@ -20,6 +20,8 @@

>   *    Otherwise timeout may happen bcz of slow nw speed

>   */

> 

> +#include <assert.h>

> +#include <stdlib.h>

>  #include <unistd.h>

>  #include <fcntl.h>

>  #include <errno.h>

> @@ -41,14 +43,15 @@

>  #define MSG_POOL_SIZE         (4*1024*1024)

>  #define BUF_SIZE		8

>  #define PING_CNT	10

> -#define PING_THRD	2	/* Send and Rx Ping thread */

> +#define PING_THRD	2	/* send_ping and rx_ping threads */

> 

>  /* Nanoseconds */

>  #define RESUS	10000

>  #define MINUS	10000

>  #define MAXUS	10000000

> 

> -static odp_timer_t test_timer_ping;

> +static odp_timer_pool_t tp;

> +static odp_timer_t test_timer_ping = ODP_TIMER_INVALID;

>  static odp_timer_tmo_t test_ping_tmo;

> 

>  #define PKTSIZE      64

> @@ -128,15 +131,7 @@ static int listen_to_pingack(void)

>  					 (socklen_t *)&len);

>  			if (bytes > 0) {

>  				/* pkt rxvd therefore cancel the timeout */

> -				if (odp_timer_cancel_tmo(test_timer_ping,

> -							 test_ping_tmo) != 0) {

> -					ODP_ERR("cancel_tmo failed ..exiting

> listner thread\n");

> -					/* avoid exiting from here even if tmo

> -					 * failed for current ping,

> -					 * allow subsequent ping_rx request */

> -					err = -1;

> -

> -				}

> +				odp_timer_cancel(test_timer_ping);

>  				/* cruel bad hack used for sender, listner ipc..

>  				 * euwww.. FIXME ..

>  				 */

> @@ -160,7 +155,6 @@ static int send_ping_request(struct sockaddr_in

> *addr)

> 

>  	uint64_t tick;

>  	odp_queue_t queue;

> -	odp_buffer_t buf;

> 

>  	int err = 0;

> 

> @@ -184,8 +178,16 @@ static int send_ping_request(struct sockaddr_in

> *addr)

> 

>  	/* get the ping queue */

>  	queue = odp_queue_lookup("ping_timer_queue");

> +	test_timer_ping = odp_timer_alloc(tp, queue, NULL);

> +	if (test_timer_ping == ODP_TIMER_INVALID) {

> +		ODP_ERR("Failed to allocate timer.\n");

> +		err = -1;

> +		goto err;

> +	}

> 

>  	for (i = 0; i < PING_CNT; i++) {

> +		odp_buffer_t buf;

> +		odp_timer_tmo_t tmo;

>  		/* prepare icmp pkt */

>  		bzero(&pckt, sizeof(pckt));

>  		pckt.hdr.type = ICMP_ECHO;

> @@ -209,12 +211,10 @@ static int send_ping_request(struct sockaddr_in

> *addr)

>  		printf(" icmp_sent msg_cnt %d\n", i);

> 

>  		/* arm the timer */

> -		tick = odp_timer_current_tick(test_timer_ping);

> +		tick = odp_timer_current_tick(tp);

> 

>  		tick += 1000;

> -		test_ping_tmo = odp_timer_absolute_tmo(test_timer_ping,

> tick,

> -						       queue,

> -						       ODP_BUFFER_INVALID);

> +		odp_timer_set_abs(test_timer_ping, tick);

>  		/* wait for timeout event */

>  		while ((buf = odp_queue_deq(queue)) == ODP_BUFFER_INVALID) {

>  			/* flag true means ack rxvd.. a cruel hack as I

> @@ -229,17 +229,28 @@ static int send_ping_request(struct sockaddr_in

> *addr)

>  				break;

>  			}

>  		}

> +		assert(odp_buffer_type(buf) == ODP_BUFFER_TYPE_TIMEOUT);

> +		tmo = odp_timeout_from_buffer(buf);

> 

> -		/* free tmo_buf for timeout case */

> -		if (buf != ODP_BUFFER_INVALID) {

> -			ODP_DBG(" timeout msg_cnt [%i] \n", i);

> +		switch (odp_timer_tmo_status(tmo)) {

> +		case ODP_TMO_FRESH:

> +			ODP_DBG(" timeout msg_cnt [%i]\n", i);

>  			/* so to avoid seg fault commented */

> -			odp_buffer_free(buf);

>  			err = -1;

> +			break;

> +		case ODP_TMO_STALE:

> +			/* Ignore stale timeouts */

> +			break;

> +		case ODP_TMO_ORPHAN:

> +			ODP_ERR("Received orphaned timeout!\n");

> +			abort();

>  		}

> +		odp_timer_return_tmo(tmo);

>  	}

> 

>  err:

> +	if (test_timer_ping != ODP_TIMER_INVALID)

> +		odp_timer_free(test_timer_ping);

>  	return err;

>  }

> 

> @@ -340,9 +351,9 @@ int main(int argc ODP_UNUSED, char *argv[]

> ODP_UNUSED)

>  	pool = odp_buffer_pool_create("msg_pool", pool_base, MSG_POOL_SIZE,

>  				      BUF_SIZE,

>  				      ODP_CACHE_LINE_SIZE,

> -				      ODP_BUFFER_TYPE_RAW);

> +				      ODP_BUFFER_TYPE_TIMEOUT);

>  	if (pool == ODP_BUFFER_POOL_INVALID) {

> -		ODP_ERR("Pool create failed.\n");

> +		ODP_ERR("Buffer pool create failed.\n");

>  		return -1;

>  	}

> 

> @@ -357,15 +368,19 @@ int main(int argc ODP_UNUSED, char *argv[]

> ODP_UNUSED)

>  		return -1;

>  	}

> 

> -	test_timer_ping = odp_timer_create("ping_timer", pool,

> -					   RESUS*ODP_TIME_USEC,

> -					   MINUS*ODP_TIME_USEC,

> -					   MAXUS*ODP_TIME_USEC);

> -

> -	if (test_timer_ping == ODP_TIMER_INVALID) {

> -		ODP_ERR("Timer create failed.\n");

> +	/*

> +	 * Create timer pool

> +	 */

> +	tp = odp_timer_pool_create("timer_pool", pool,

> +				   RESUS*ODP_TIME_USEC,

> +				   MINUS*ODP_TIME_USEC,

> +				   MAXUS*ODP_TIME_USEC,

> +				   1, false, ODP_CLOCK_CPU);

> +	if (tp == ODP_TIMER_POOL_INVALID) {

> +		ODP_ERR("Timer pool create failed.\n");

>  		return -1;

>  	}

> +	odp_timer_pool_start();

> 

>  	odp_shm_print_all();

> 

> --

> 1.9.1

> 

> 

> _______________________________________________

> lng-odp mailing list

> lng-odp@lists.linaro.org

> http://lists.linaro.org/mailman/listinfo/lng-odp
Ola Liljedahl Oct. 6, 2014, 9:36 a.m. UTC | #5
Gilad,

Your suggestion makes sense. This is how a purely event-driven application
would be designed.

Possibly the wait/timeout parameter to the odp_schedule calls is a legacy
from the time before there was a timer API in ODP. Maybe Petri can s(c)hed
some light on this.

I suspect there could be some performance benefits from specifying the
timeout as an explicit parameter. If scheduling timeout is implemented
using a timer event facility (e.g. the ODP timer API), the application (or
the ODP implementation if it uses the same design) would have to reset that
timer for every odp_schedule call, for a SW timer implementation this could
add serious overhead. With an explicit timeout parameter, the scheduler
implementation could be reading e.g some cycle counter while (busy-)
waiting for events to become available. This overhead should be less and
also incurred only when the thread is idle and waiting for work.

The current API does not prevent an implementation from using timer events
internally and does not limit an application from using the timer API for
timeouts. It does add a little bit of implementation complexity. What is
the best trade-off?

-- Ola

On 6 October 2014 08:22, Gilad Ben Yossef <giladb@ezchip.com> wrote:

>
> Another one of my stupid questions, I'm afraid.  :-)
> If we have a timer implemented as an event pushed to queue which can be
> scheduled as any other queue (which is good thing I think), why does our
> schedule APIs need a timeout?
> I mean, if you want a timeout, just add a scheduled timer queue and send
> yourself timeout events. That's how I would implement the schedule timeouts
> internally anyway (running a native timer on a core that does packet
> processing stops it from enjoying Linux full NOHZ cpu isolation so we
> really don't want timers there...)
> Anything I've missed?
> Thanks,
> Gilad
> Gilad Ben-Yossef
> Software Architect
> EZchip Technologies Ltd.
> 37 Israel Pollak Ave, Kiryat Gat 82025 ,Israel
> Tel: +972-4-959-6666 ext. 576, Fax: +972-8-681-1483
> Mobile: +972-52-826-0388, US Mobile: +1-973-826-0388
> Email: giladb@ezchip.com, Web: http://www.ezchip.com
>
> "Ethernet always wins."
>         — Andy Bechtolsheim
>
>
> > -----Original Message-----
> > From: lng-odp-bounces@lists.linaro.org [mailto:lng-odp-
> > bounces@lists.linaro.org] On Behalf Of Ola Liljedahl
> > Sent: Thursday, October 02, 2014 6:23 PM
> > To: lng-odp@lists.linaro.org
> > Subject: [lng-odp] [PATCHv4] Timer API and and priority queue-based
> > implementation
> >
> > Signed-off-by: Ola Liljedahl <ola.liljedahl@linaro.org>
> > ---
> > Fixed review comments for v3 from Anders R.
> > * Example code snippets use @code/@endcode.
> > * Added some missing doxygen comments.
> > * Updated some comments.
> > * Reverted year in copyright notices.
> > * Added odp_likely() hint.
> > * Made some variables self-descriptive and removed redundant comments.
> > Changed to use ticket locks instead of spin locks (ticket locks are more
> > fair).
> > Changed to use ODP_ABORT() which has become available since the last
> > patch.
> >
> >  example/timer/odp_timer_test.c                     | 125 +--
> >  platform/linux-generic/Makefile.am                 |   1 +
> >  platform/linux-generic/include/api/odp_timer.h     | 570 +++++++++++--
> >  .../include/odp_priority_queue_internal.h          | 108 +++
> >  .../linux-generic/include/odp_timer_internal.h     |  71 +-
> >  platform/linux-generic/odp_priority_queue.c        | 283 +++++++
> >  platform/linux-generic/odp_timer.c                 | 923 ++++++++++++++-
> > ------
> >  test/api_test/odp_timer_ping.c                     |  73 +-
> >  8 files changed, 1648 insertions(+), 506 deletions(-)
> >  create mode 100644 platform/linux-
> > generic/include/odp_priority_queue_internal.h
> >  create mode 100644 platform/linux-generic/odp_priority_queue.c
> >
> > diff --git a/example/timer/odp_timer_test.c
> > b/example/timer/odp_timer_test.c
> > index 6e1715d..750d785 100644
> > --- a/example/timer/odp_timer_test.c
> > +++ b/example/timer/odp_timer_test.c
> > @@ -41,67 +41,89 @@ typedef struct {
> >  /** @private Barrier for test synchronisation */
> >  static odp_barrier_t test_barrier;
> >
> > -/** @private Timer handle*/
> > -static odp_timer_t test_timer;
> > +/** @private Timer pool handle */
> > +static odp_timer_pool_t tp;
> >
> >
> > +/** @private Timeout status ASCII strings */
> > +static const char *const status2str[] = {
> > +     "fresh", "stale", "orphaned"
> > +};
> > +
> >  /** @private test timeout */
> >  static void test_abs_timeouts(int thr, test_args_t *args)
> >  {
> > -     uint64_t tick;
> >       uint64_t period;
> >       uint64_t period_ns;
> >       odp_queue_t queue;
> > -     odp_buffer_t buf;
> > -     int num;
> > +     int remain = args->tmo_count;
> > +     odp_timer_t hdl;
> > +     uint64_t tick;
> >
> >       ODP_DBG("  [%i] test_timeouts\n", thr);
> >
> >       queue = odp_queue_lookup("timer_queue");
> >
> >       period_ns = args->period_us*ODP_TIME_USEC;
> > -     period    = odp_timer_ns_to_tick(test_timer, period_ns);
> > +     period    = odp_timer_ns_to_tick(tp, period_ns);
> >
> >       ODP_DBG("  [%i] period %"PRIu64" ticks,  %"PRIu64" ns\n", thr,
> >               period, period_ns);
> >
> > -     tick = odp_timer_current_tick(test_timer);
> > -
> > -     ODP_DBG("  [%i] current tick %"PRIu64"\n", thr, tick);
> > -
> > -     tick += period;
> > +     ODP_DBG("  [%i] current tick %"PRIu64"\n", thr,
> > +             odp_timer_current_tick(tp));
> >
> > -     if (odp_timer_absolute_tmo(test_timer, tick, queue,
> > ODP_BUFFER_INVALID)
> > -         == ODP_TIMER_TMO_INVALID){
> > -             ODP_DBG("Timeout request failed\n");
> > +     odp_timer_t test_timer;
> > +     test_timer = odp_timer_alloc(tp, queue, NULL);
> > +     if (test_timer == ODP_TIMER_INVALID) {
> > +             ODP_ERR("Failed to allocate timer\n");
> >               return;
> >       }
> > +     tick = odp_timer_current_tick(tp);
> > +     hdl = test_timer;
> >
> > -     num = args->tmo_count;
> > -
> > -     while (1) {
> > -             odp_timeout_t tmo;
> > -
> > -             buf = odp_schedule_one(&queue, ODP_SCHED_WAIT);
> > -
> > -             tmo  = odp_timeout_from_buffer(buf);
> > -             tick = odp_timeout_tick(tmo);
> > -
> > -             ODP_DBG("  [%i] timeout, tick %"PRIu64"\n", thr, tick);
> > -
> > -             odp_buffer_free(buf);
> > -
> > -             num--;
> > -
> > -             if (num == 0)
> > -                     break;
> > +     while (remain != 0) {
> > +             odp_buffer_t buf;
> > +             odp_timer_tmo_t tmo;
> > +             odp_timer_tmo_status_t stat;
> > +             odp_timer_set_t rc;
> >
> >               tick += period;
> > +             rc = odp_timer_set_abs(hdl, tick);
> > +             if (odp_unlikely(rc != ODP_TIMER_SET_SUCCESS)) {
> > +                     ODP_ERR("odp_timer_set_abs() failed (%u)\n", rc);
> > +                     abort();
> > +             }
> >
> > -             odp_timer_absolute_tmo(test_timer, tick,
> > -                                    queue, ODP_BUFFER_INVALID);
> > +             /* Get the next ready buffer/timeout */
> > +             buf = odp_schedule_one(&queue, ODP_SCHED_WAIT);
> > +             if (odp_unlikely(odp_buffer_type(buf) !=
> > +                              ODP_BUFFER_TYPE_TIMEOUT)) {
> > +                     ODP_ERR("Unexpected buffer type received\n");
> > +                     abort();
> > +             }
> > +             tmo = odp_timeout_from_buffer(buf);
> > +             stat = odp_timer_tmo_status(tmo);
> > +             tick = odp_timer_expiration(tmo);
> > +             hdl = odp_timer_handle(tmo);
> > +             ODP_DBG("  [%i] timeout, tick %"PRIu64", status %s\n",
> > +                     thr, tick, status2str[stat]);
> > +             /* if (stat == ODP_TMO_FRESH)  - do your thing! */
> > +             if (odp_likely(stat == ODP_TMO_ORPHAN)) {
> > +                     /* Some other thread freed the corresponding
> > +                        timer after the timeout was already
> > +                        enqueued */
> > +                     /* Timeout handle is invalid, use our own timer */
> > +                     hdl = test_timer;
> > +             }
> > +             /* Return timeout to timer manager, regardless of status */
> > +             odp_timer_return_tmo(tmo);
> > +             remain--;
> >       }
> >
> > +     odp_timer_cancel(test_timer);
> > +     odp_timer_free(test_timer);
> > +
> >       if (odp_queue_sched_type(queue) == ODP_SCHED_SYNC_ATOMIC)
> >               odp_schedule_release_atomic();
> >  }
> > @@ -155,7 +177,6 @@ static void print_usage(void)
> >       printf("Options:\n");
> >       printf("  -c, --count <number>    core count, core IDs start from
> > 1\n");
> >       printf("  -r, --resolution <us>   timeout resolution in usec\n");
> > -     printf("  -m, --min <us>          minimum timeout in usec\n");
> >       printf("  -x, --max <us>          maximum timeout in usec\n");
> >       printf("  -p, --period <us>       timeout period in usec\n");
> >       printf("  -t, --timeouts <count>  timeout repeat count\n");
> > @@ -190,14 +211,14 @@ static void parse_args(int argc, char *argv[],
> > test_args_t *args)
> >       /* defaults */
> >       args->core_count    = 0; /* all cores */
> >       args->resolution_us = 10000;
> > -     args->min_us        = args->resolution_us;
> > +     args->min_us        = 0;
> >       args->max_us        = 10000000;
> >       args->period_us     = 1000000;
> >       args->tmo_count     = 30;
> >
> >       while (1) {
> >               opt = getopt_long(argc, argv, "+c:r:m:x:p:t:h",
> > -                              longopts, &long_index);
> > +                               longopts, &long_index);
> >
> >               if (opt == -1)
> >                       break;  /* No more options */
> > @@ -321,10 +342,25 @@ int main(int argc, char *argv[])
> >                                     ODP_BUFFER_TYPE_TIMEOUT);
> >
> >       if (pool == ODP_BUFFER_POOL_INVALID) {
> > -             ODP_ERR("Pool create failed.\n");
> > +             ODP_ERR("Buffer pool create failed.\n");
> >               return -1;
> >       }
> >
> > +     tp = odp_timer_pool_create("timer_pool", pool,
> > +                                args.resolution_us*ODP_TIME_USEC,
> > +                                args.min_us*ODP_TIME_USEC,
> > +                                args.max_us*ODP_TIME_USEC,
> > +                                num_workers, /* One timer per worker */
> > +                                true,
> > +                                ODP_CLOCK_CPU);
> > +     if (tp == ODP_TIMER_POOL_INVALID) {
> > +             ODP_ERR("Timer pool create failed.\n");
> > +             return -1;
> > +     }
> > +     odp_timer_pool_start();
> > +
> > +     odp_shm_print_all();
> > +
> >       /*
> >        * Create a queue for timer test
> >        */
> > @@ -340,19 +376,6 @@ int main(int argc, char *argv[])
> >               return -1;
> >       }
> >
> > -     test_timer = odp_timer_create("test_timer", pool,
> > -                                   args.resolution_us*ODP_TIME_USEC,
> > -                                   args.min_us*ODP_TIME_USEC,
> > -                                   args.max_us*ODP_TIME_USEC);
> > -
> > -     if (test_timer == ODP_TIMER_INVALID) {
> > -             ODP_ERR("Timer create failed.\n");
> > -             return -1;
> > -     }
> > -
> > -
> > -     odp_shm_print_all();
> > -
> >       printf("CPU freq %"PRIu64" hz\n", odp_sys_cpu_hz());
> >       printf("Cycles vs nanoseconds:\n");
> >       ns = 0;
> > diff --git a/platform/linux-generic/Makefile.am b/platform/linux-
> > generic/Makefile.am
> > index d076d50..71f923c 100644
> > --- a/platform/linux-generic/Makefile.am
> > +++ b/platform/linux-generic/Makefile.am
> > @@ -59,6 +59,7 @@ __LIB__libodp_la_SOURCES = \
> >                          odp_packet_flags.c \
> >                          odp_packet_io.c \
> >                          odp_packet_socket.c \
> > +                        odp_priority_queue.c \
> >                          odp_queue.c \
> >                          odp_ring.c \
> >                          odp_rwlock.c \
> > diff --git a/platform/linux-generic/include/api/odp_timer.h
> > b/platform/linux-generic/include/api/odp_timer.h
> > index 01db839..82a1e05 100644
> > --- a/platform/linux-generic/include/api/odp_timer.h
> > +++ b/platform/linux-generic/include/api/odp_timer.h
> > @@ -8,9 +8,193 @@
> >  /**
> >   * @file
> >   *
> > - * ODP timer
> > + * ODP timer service
> >   */
> >
> > +/** Example #1 Retransmission timer (e.g. for reliable connections)
> > + @code
> > +
> > +//Create timer pool for reliable connections
> > +#define SEC 1000000000ULL //1s expressed in nanoseconds
> > +odp_timer_pool_t tcp_tpid =
> > +    odp_timer_pool_create("TCP",
> > +                       buffer_pool,
> > +                       1000000,//resolution 1ms
> > +                       0,//min tmo
> > +                       7200 * SEC,//max tmo length 2hours
> > +                       40000,//num_timers
> > +                       true,//shared
> > +                       ODP_CLOCK_CPU
> > +                      );
> > +if (tcp_tpid == ODP_TIMER_POOL_INVALID)
> > +{
> > +     //Failed to create timer pool => fatal error
> > +}
> > +
> > +
> > +//Setting up a new connection
> > +//Allocate retransmission timeout (identical for supervision timeout)
> > +//The user pointer points back to the connection context
> > +conn->ret_tim = odp_timer_alloc(tcp_tpid, queue, conn);
> > +//Check if all resources were successfully allocated
> > +if (conn->ret_tim == ODP_TIMER_INVALID)
> > +{
> > +     //Failed to allocate all resources for connection => tear down
> > +     //Destroy timeout
> > +     odp_timer_free(conn->ret_tim);
> > +     //Tear down connection
> > +     ...
> > +     return false;
> > +}
> > +//All necessary resources successfully allocated
> > +//Compute initial retransmission length in timer ticks
> > +conn->ret_len = odp_timer_ns_to_tick(tcp_tpid, 3 * SEC);//Per RFC1122
> > +//Arm the timer
> > +odp_timer_set_rel(conn->ret_tim, conn->ret_len);
> > +return true;
> > +
> > +
> > +//A packet for the connection has just been transmitted
> > +//Reset the retransmission timer
> > +odp_timer_set_rel(conn->ret_tim, conn->ret_len);
> > +
> > +
> > +//A retransmission timeout buffer for the connection has been received
> > +odp_timer_tmo_t tmo = odp_timeout_from_buffer(buf);
> > +odp_timer_tmo_status_t stat = odp_timer_tmo_status(tmo);
> > +//Check if timeout is fresh or stale, for stale timeouts we need to
> > reset the
> > +//timer
> > +if (stat == ODP_TMO_FRESH) {
> > +     //Fresh timeout, last transmitted packet not acked in time =>
> > +       retransmit
> > +     //Get connection from timeout event
> > +     conn = odp_timer_get_userptr(tmo);
> > +     //Retransmit last packet (e.g. TCP segment)
> > +     ...
> > +     //Re-arm timer using original delta value
> > +     odp_timer_set_rel(conn->ret_tim, conn->ret_len);
> > +} else if (stat == ODP_TMO_ORPHAN) {
> > +     odp_free_buffer(buf);
> > +     return;//Get out of here
> > +} // else stat == ODP_TMO_STALE, do nothing
> > +//Finished processing, return timeout
> > +odp_timer_return_tmo(tmo);
> > +
> > + @endcode
> > +*/
> > +
> > +/** Example #2 Periodic tick
> > + @code
> > +
> > +//Create timer pool for periodic ticks
> > +odp_timer_pool_t per_tpid =
> > +    odp_timer_pool_create("periodic-tick",
> > +                       buffer_pool,
> > +                       1,//resolution 1ns
> > +                       1,//minimum timeout length 1ns
> > +                       1000000000,//maximum timeout length 1s
> > +                       10,//num_timers
> > +                       false,//not shared
> > +                       ODP_CLOCK_CPU
> > +                      );
> > +if (per_tpid == ODP_TIMER_POOL_INVALID)
> > +{
> > +    //Failed to create timer pool => fatal error
> > +}
> > +
> > +
> > +//Allocate periodic timer
> > +tim_1733 = odp_timer_alloc(per_tpid, queue, NULL);
> > +//Check if all resources were successfully allocated
> > +if (tim_1733 == ODP_TIMER_INVALID)
> > +{
> > +     //Failed to allocate all resources => tear down
> > +     //Destroy timeout
> > +     odp_timer_free(tim_1733);
> > +     //Tear down other state
> > +     ...
> > +     return false;
> > +}
> > +//All necessary resources successfully allocated
> > +//Compute tick period in timer ticks
> > +period_1733 = odp_timer_ns_to_tick(per_tpid, 1000000000U /
> > 1733U);//1733Hz
> > +//Compute when next tick should expire
> > +next_1733 = odp_timer_current_tick(per_tpid) + period_1733;
> > +//Arm the periodic timer
> > +odp_timer_set_abs(tim_1733, next_1733);
> > +return true;
> > +
> > +
> > +
> > +//A periodic timer timeout has been received
> > +odp_timer_tmo_t tmo = odp_timeout_from_buffer(buf);
> > +//Get status of timeout
> > +odp_timer_tmo_status_t stat = odp_timer_tmo_status(tmo);
> > +//We expect the timeout is always fresh since we are not calling set or
> > cancel
> > +on active or expired timers in this example
> > +assert(stat == ODP_TMO_FRESH);
> > +//Do processing driven by timeout *before*
> > +...
> > +do {
> > +     //Compute when the timer should expire next
> > +     next_1733 += period_1733;
> > +     //Check that this is in the future
> > +     if (likely(next_1733 > odp_timer_current_tick(per_tpid))
> > +     break;//Yes, done
> > +     //Else we missed a timeout
> > +     //Optionally attempt some recovery and/or logging of the problem
> > +     ...
> > +} while (0);
> > +//Re-arm periodic timer
> > +odp_timer_set_abs(tim_1733, next_1733);
> > +//Or do processing driven by timeout *after*
> > +...
> > +odp_timer_return_tmo(tmo);
> > +return;
> > +
> > + @endcode
> > +*/
> > +
> > +/** Example #3 Tear down of flow
> > + @code
> > +//ctx points to flow context data structure owned by application
> > +//Free the timer, cancelling any timeout
> > +odp_timer_free(ctx->timer);//Any enqueued timeout will be made invalid
> > +//Continue tearing down and eventually freeing context
> > +...
> > +return;
> > +
> > +//A timeout has been received, check status
> > +odp_timer_tmo_t tmo = odp_timeout_from_buffer(buf);
> > +switch (odp_timer_tmo_status(tmo))
> > +{
> > +    case ODP_TMO_FRESH :
> > +     //A flow has timed out, tear it down
> > +     //Find flow context from timeout
> > +     ctx = (context *)odp_timer_get_userptr(tmo);
> > +     //Free the supervision timer, any enqueued timeout will remain
> > +     odp_timer_free(ctx->tim);
> > +     //Free other flow related resources
> > +     ...
> > +     //Free the timeout buffer
> > +     odp_buffer_free(buf);
> > +     //Flow torn down
> > +     break;
> > +    case ODP_TMO_STALE :
> > +     //A stale timeout was received, return timeout and update timer
> > +     odp_timer_return_tmo(tmo);
> > +     break;
> > +    case ODP_TMO_ORPHAN :
> > +     //Orphaned timeout (from previously torn down flow)
> > +     //No corresponding timer or flow context
> > +     //Free the timeout buffer
> > +     odp_buffer_free(buf);
> > +     break;
> > +}
> > +
> > + @endcode
> > +*/
> > +
> >  #ifndef ODP_TIMER_H_
> >  #define ODP_TIMER_H_
> >
> > @@ -18,144 +202,408 @@
> >  extern "C" {
> >  #endif
> >
> > +#include <stdlib.h>
> >  #include <odp_std_types.h>
> >  #include <odp_buffer.h>
> >  #include <odp_buffer_pool.h>
> >  #include <odp_queue.h>
> >
> > +struct odp_timer_pool_s; /**< Forward declaration */
> > +
> > +/**
> > +* ODP timer pool handle (platform dependent)
> > +*/
> > +typedef struct odp_timer_pool_s *odp_timer_pool_t;
> > +
> > +/**
> > + * Invalid timer pool handle (platform dependent).
> > + */
> > +#define ODP_TIMER_POOL_INVALID NULL
> >
> >  /**
> > - * ODP timer handle
> > + * Clock sources for timers in timer pool.
> >   */
> > -typedef uint32_t odp_timer_t;
> > +typedef enum odp_timer_clk_src_e {
> > +     /** Use CPU clock as clock source for timers */
> > +     ODP_CLOCK_CPU,
> > +     /** Use external clock as clock source for timers */
> > +     ODP_CLOCK_EXT
> > +     /* Platform dependent which other clock sources exist */
> > +} odp_timer_clk_src_t;
> >
> > -/** Invalid timer */
> > -#define ODP_TIMER_INVALID 0
> > +struct odp_timer_s; /**< Forward declaration */
> >
> > +/**
> > +* ODP timer handle (platform dependent).
> > +*/
> > +typedef struct odp_timer_s *odp_timer_t;
> >
> >  /**
> > - * ODP timeout handle
> > + * Invalid timer handle (platform dependent).
> >   */
> > -typedef odp_buffer_t odp_timer_tmo_t;
> > -
> > -/** Invalid timeout */
> > -#define ODP_TIMER_TMO_INVALID 0
> > +#define ODP_TIMER_INVALID NULL
> >
> > +/**
> > + * Return values of timer set calls.
> > + */
> > +typedef enum odp_timer_set_e {
> > +     /** Timer set operation successful */
> > +     ODP_TIMER_SET_SUCCESS,
> > +     /** Timer set operation failed, expiration too early */
> > +     ODP_TIMER_SET_TOOEARLY,
> > +     /** Timer set operation failed, expiration too late */
> > +     ODP_TIMER_SET_TOOLATE
> > +} odp_timer_set_t;
> >
> >  /**
> > - * Timeout notification
> > + * Timeout event handle.
> >   */
> > -typedef odp_buffer_t odp_timeout_t;
> > +typedef odp_buffer_t odp_timer_tmo_t;
> >
> > +/**
> > + * Status of a timeout event.
> > + */
> > +typedef enum odp_timer_tmo_status_e {
> > +     /** Timeout is fresh, process it and return timeout */
> > +     ODP_TMO_FRESH,
> > +     /** Timer reset or cancelled, just return timeout  */
> > +     ODP_TMO_STALE,
> > +     /** Timer deleted, return or free timeout */
> > +     ODP_TMO_ORPHAN
> > +} odp_timer_tmo_status_t;
> >
> >  /**
> > - * Create a timer
> > + * Create a timer pool
> >   *
> > - * Creates a new timer with requested properties.
> > + * Create a new timer pool.
> >   *
> >   * @param name       Name
> > - * @param pool       Buffer pool for allocating timeout notifications
> > + * @param buf_pool   Buffer pool for allocating timeouts (and only
> > timeouts)
> >   * @param resolution Timeout resolution in nanoseconds
> > - * @param min_tmo    Minimum timeout duration in nanoseconds
> > - * @param max_tmo    Maximum timeout duration in nanoseconds
> > + * @param min_tmo    Minimum relative timeout in nanoseconds
> > + * @param max_tmo    Maximum relative timeout in nanoseconds
> > + * @param num_timers Number of supported timers (minimum)
> > + * @param shared     Shared or private timer pool.
> > + *              Operations on shared timers will include the necessary
> > + *              mutual exclusion, operations on private timers may not
> > + *              (mutual exclusion is the responsibility of the caller).
> > + * @param clk_src    Clock source to use
> >   *
> > - * @return Timer handle if successful, otherwise ODP_TIMER_INVALID
> > + * @return Timer pool handle if successful, otherwise
> > ODP_TIMER_POOL_INVALID
> > + * and errno set
> >   */
> > -odp_timer_t odp_timer_create(const char *name, odp_buffer_pool_t pool,
> > -                          uint64_t resolution, uint64_t min_tmo,
> > -                          uint64_t max_tmo);
> > +odp_timer_pool_t
> > +odp_timer_pool_create(const char *name,
> > +                   odp_buffer_pool_t buf_pool,
> > +                   uint64_t resolution,
> > +                   uint64_t min_tmo,
> > +                   uint64_t max_tmo,
> > +                   uint32_t num_timers,
> > +                   bool shared,
> > +                   odp_timer_clk_src_t clk_src);
> > +
> > +/**
> > + * Start a timer pool
> > + *
> > + * Start all created timer pools, enabling the allocation of timers.
> > + * The purpose of this call is to coordinate the creation of multiple
> > timer
> > + * pools that may use the same underlying HW resources.
> > + * This function may be called multiple times.
> > + */
> > +void odp_timer_pool_start(void);
> > +
> > +/**
> > + * Destroy a timer pool
> > + *
> > + * Destroy a timer pool, freeing all resources.
> > + * All timers must have been freed.
> > + *
> > + * @param tpid  Timer pool identifier
> > + */
> > +void odp_timer_pool_destroy(odp_timer_pool_t tpid);
> >
> >  /**
> >   * Convert timer ticks to nanoseconds
> >   *
> > - * @param timer Timer
> > + * @param tpid  Timer pool identifier
> >   * @param ticks Timer ticks
> >   *
> >   * @return Nanoseconds
> >   */
> > -uint64_t odp_timer_tick_to_ns(odp_timer_t timer, uint64_t ticks);
> > +uint64_t odp_timer_tick_to_ns(odp_timer_pool_t tpid, uint64_t ticks);
> >
> >  /**
> >   * Convert nanoseconds to timer ticks
> >   *
> > - * @param timer Timer
> > + * @param tpid  Timer pool identifier
> >   * @param ns    Nanoseconds
> >   *
> >   * @return Timer ticks
> >   */
> > -uint64_t odp_timer_ns_to_tick(odp_timer_t timer, uint64_t ns);
> > +uint64_t odp_timer_ns_to_tick(odp_timer_pool_t tpid, uint64_t ns);
> >
> >  /**
> > - * Timer resolution in nanoseconds
> > + * Current tick value
> >   *
> > - * @param timer Timer
> > + * @param tpid Timer pool identifier
> >   *
> > - * @return Resolution in nanoseconds
> > + * @return Current time in timer ticks
> > + */
> > +uint64_t odp_timer_current_tick(odp_timer_pool_t tpid);
> > +
> > +/**
> > + * ODP timer configurations
> >   */
> > -uint64_t odp_timer_resolution(odp_timer_t timer);
> > +
> > +typedef enum odp_timer_pool_conf_e {
> > +     ODP_TIMER_NAME,      /**< Return name of timer pool */
> > +     ODP_TIMER_RESOLUTION,/**< Return the timer resolution (in ns) */
> > +     ODP_TIMER_MIN_TICKS, /**< Return the min supported rel timeout
> > (ticks)*/
> > +     ODP_TIMER_MAX_TICKS, /**< Return the max supported rel timeout
> > (ticks)*/
> > +     ODP_TIMER_NUM_TIMERS,/**< Return number of supported timers */
> > +     ODP_TIMER_SHARED     /**< Return shared flag */
> > +} odp_timer_pool_conf_t;
> >
> >  /**
> > - * Maximum timeout in timer ticks
> > + * Query different timer pool configurations, e.g.
> > + *  Timer resolution in nanoseconds
> > + *  Maximum timeout in timer ticks
> > + *  Number of supported timers
> > + *  Shared or private timer pool
> >   *
> > - * @param timer Timer
> > + * @param tpid Timer pool identifier
> > + * @param item Configuration item being queried
> >   *
> > - * @return Maximum timeout in timer ticks
> > + * @return the requested piece of information or 0 for unknown item.
> >   */
> > -uint64_t odp_timer_maximum_tmo(odp_timer_t timer);
> > +uintptr_t odp_timer_pool_query_conf(odp_timer_pool_t tpid,
> > +                                 odp_timer_pool_conf_t item);
> >
> >  /**
> > - * Current timer tick
> > + * Allocate a timer
> >   *
> > - * @param timer Timer
> > + * Create a timer (allocating all necessary resources e.g. timeout
> > event) from
> > + * the timer pool.
> >   *
> > - * @return Current time in timer ticks
> > + * @param tpid     Timer pool identifier
> > + * @param queue    Destination queue for timeout notifications
> > + * @param user_ptr User defined pointer or NULL (copied to timeouts)
> > + *
> > + * @return Timer handle if successful, otherwise ODP_TIMER_INVALID and
> > + *      errno set.
> >   */
> > -uint64_t odp_timer_current_tick(odp_timer_t timer);
> > +odp_timer_t odp_timer_alloc(odp_timer_pool_t tpid,
> > +                         odp_queue_t queue,
> > +                         void *user_ptr);
> >
> >  /**
> > - * Request timeout with an absolute timer tick
> > + * Free a timer
> > + *
> > + * Free (destroy) a timer, freeing all associated resources (e.g.
> > default
> > + * timeout event). An expired and enqueued timeout event will not be
> > freed.
> > + * It is the responsibility of the application to free this timeout when
> > it
> > + * is received.
> >   *
> > - * When tick reaches tmo_tick, the timer enqueues the timeout
> > notification into
> > - * the destination queue.
> > + * @param tim      Timer handle
> > + */
> > +void odp_timer_free(odp_timer_t tim);
> > +
> > +/**
> > + * Set a timer (absolute time) with a user-defined timeout buffer
> >   *
> > - * @param timer    Timer
> > - * @param tmo_tick Absolute timer tick value which triggers the timeout
> > - * @param queue    Destination queue for the timeout notification
> > - * @param buf      User defined timeout notification buffer. When
> > - *                 ODP_BUFFER_INVALID, default timeout notification is
> > used.
> > + * Set (arm) the timer to expire at specific time. The user-defined
> > + * buffer will be enqueued when the timer expires.
> > + * Arming may fail (if the timer is in state EXPIRED), an earlier
> > timeout
> > + * will then be received. odp_timer_tmo_status() must be used to check
> > if
> > + * the received timeout is valid.
> >   *
> > - * @return Timeout handle if successful, otherwise ODP_TIMER_TMO_INVALID
> > + * Note: any invalid parameters will be treated as programming errors
> > and will
> > + * cause the application to abort.
> > + *
> > + * @param tim      Timer
> > + * @param abs_tck  Expiration time in absolute timer ticks
> > + * @param user_buf The buffer to use as timeout event
> > + *
> > + * @return Success or failure code
> >   */
> > -odp_timer_tmo_t odp_timer_absolute_tmo(odp_timer_t timer, uint64_t
> > tmo_tick,
> > -                                    odp_queue_t queue, odp_buffer_t
> buf);
> > +odp_timer_set_t odp_timer_set_abs_w_buf(odp_timer_t tim,
> > +                                     uint64_t abs_tck,
> > +                                     odp_buffer_t user_buf);
> >
> >  /**
> > - * Cancel a timeout
> > + * Set a timer with an absolute expiration time
> > + *
> > + * Set (arm) the timer to expire at a specific time.
> > + * Arming may fail (if the timer is in state EXPIRED), an earlier
> > timeout
> > + * will then be received. odp_timer_tmo_status() must be used to check
> > if
> > + * the received timeout is valid.
> > + *
> > + * Note: any invalid parameters will be treated as programming errors
> > and will
> > + * cause the application to abort.
> >   *
> > - * @param timer Timer
> > - * @param tmo   Timeout to cancel
> > + * @param tim     Timer
> > + * @param abs_tck Expiration time in absolute timer ticks
> >   *
> > - * @return 0 if successful
> > + * @return Success or failure code
> >   */
> > -int odp_timer_cancel_tmo(odp_timer_t timer, odp_timer_tmo_t tmo);
> > +odp_timer_set_t odp_timer_set_abs(odp_timer_t tim, uint64_t abs_tck);
> >
> >  /**
> > - * Convert buffer handle to timeout handle
> > + * Set a timer with a relative expiration time and user-defined buffer.
> >   *
> > - * @param buf  Buffer handle
> > + * Set (arm) the timer to expire at a relative future time.
> > + * Arming may fail (if the timer is in state EXPIRED),
> > + * an earlier timeout will then be received. odp_timer_tmo_status() must
> > + * be used to check if the received timeout is valid.
> >   *
> > - * @return Timeout buffer handle
> > + * Note: any invalid parameters will be treated as programming errors
> > and will
> > + * cause the application to abort.
> > + *
> > + * @param tim      Timer
> > + * @param rel_tck  Expiration time in timer ticks relative to current
> > time of
> > + *              the timer pool the timer belongs to
> > + * @param user_buf The buffer to use as timeout event
> > + *
> > + * @return Success or failure code
> >   */
> > -odp_timeout_t odp_timeout_from_buffer(odp_buffer_t buf);
> > +odp_timer_set_t odp_timer_set_rel_w_buf(odp_timer_t tim,
> > +                                     uint64_t rel_tck,
> > +                                     odp_buffer_t user_buf);
> > +/**
> > + * Set a timer with a relative expiration time
> > + *
> > + * Set (arm) the timer to expire at a relative future time.
> > + * Arming may fail (if the timer is in state EXPIRED),
> > + * an earlier timeout will then be received. odp_timer_tmo_status() must
> > + * be used to check if the received timeout is valid.
> > + *
> > + * Note: any invalid parameters will be treated as programming errors
> > and will
> > + * cause the application to abort.
> > + *
> > + * @param tim     Timer
> > + * @param rel_tck Expiration time in timer ticks relative to current
> > time of
> > + *             the timer pool the timer belongs to
> > + *
> > + * @return Success or failure code
> > + */
> > +odp_timer_set_t odp_timer_set_rel(odp_timer_t tim, uint64_t rel_tck);
> >
> >  /**
> > - * Return absolute timeout tick
> > + * Cancel a timer
> > + *
> > + * Cancel a timer, preventing future expiration and delivery.
> > + *
> > + * A timer that has already expired and been enqueued for delivery may
> > be
> > + * impossible to cancel and will instead be delivered to the destination
> > queue.
> > + * Use odp_timer_tmo_status() the check whether a received timeout is
> > fresh or
> > + * stale (cancelled). Stale timeouts will automatically be recycled.
> > + *
> > + * Note: any invalid parameters will be treated as programming errors
> > and will
> > + * cause the application to abort.
> > + *
> > + * @param tim    Timer handle
> > + */
> > +void odp_timer_cancel(odp_timer_t tim);
> > +
> > +/**
> > + * Translate from buffer to timeout
> > + *
> > + * Return the timeout handle that corresponds to the specified buffer
> > handle.
> > + * The buffer must be of time ODP_BUFFER_TYPE_TIMEOUT.
> > + *
> > + * @param buf   Buffer handle to translate.
> > + *
> > + * @return      The corresponding timeout handle.
> > + */
> > +static inline odp_timer_tmo_t odp_timeout_from_buffer(odp_buffer_t buf)
> > +{
> > +     if (odp_unlikely(odp_buffer_type(buf) != ODP_BUFFER_TYPE_TIMEOUT))
> > {
> > +             ODP_ERR("Buffer type %u not timeout\n", buf);
> > +             abort();
> > +     }
> > +     /* In this implementation, timeout == buffer */
> > +     return (odp_timer_tmo_t)buf;
> > +}
> > +
> > +/**
> > + * Translate from timeout to buffer
> > + *
> > + * Return the buffer handle that corresponds to the specified timeout
> > handle.
> > + *
> > + * @param tmo   Timeout handle to translate.
> > + *
> > + * @return      The corresponding buffer handle.
> > + */
> > +static inline odp_buffer_t odp_buffer_from_timeout(odp_timer_tmo_t tmo)
> > +{
> > +     /* In this implementation, buffer == timeout */
> > +     return (odp_buffer_t)tmo;
> > +}
> > +
> > +/**
> > + * Return timeout to timer
> > + *
> > + * Return a received timeout for reuse with the parent timer.
> > + * Note: odp_timer_return_tmo() must be called on all received timeouts!
> > + * (Excluding user defined timeout buffers).
> > + * The timeout must not be accessed after this call, the semantics is
> > + * equivalent to a free call.
> > + *
> > + * @param tmo    Timeout
> > + */
> > +void odp_timer_return_tmo(odp_timer_tmo_t tmo);
> > +
> > +/**
> > + * Return fresh/stale/orphan status of timeout.
> > + *
> > + * Check a received timeout for orphaness (i.e. parent timer freed) and
> > + * staleness (i.e. parent timer has been reset or cancelled after the
> > timeout
> > + * expired and was enqueued).
> > + * If the timeout is fresh, it should be processed.
> > + * If the timeout is stale or orphaned, it should be ignored.
> > + * All timeouts must be returned using the odp_timer_return_tmo() call.
> > + *
> > + * @param tmo    Timeout
> > + *
> > + * @return One of ODP_TMO_FRESH, ODP_TMO_STALE or ODP_TMO_ORPHAN.
> > + */
> > +odp_timer_tmo_status_t odp_timer_tmo_status(odp_timer_tmo_t tmo);
> > +
> > +/**
> > + * Get timer handle
> > + *
> > + * Return Handle of parent timer.
> > + *
> > + * @param tmo   Timeout
> > + *
> > + * @return Timer handle or ODP_TIMER_INVALID for orphaned timeouts.
> > + *         Note that the parent timer could be freed by some other
> > thread
> > + *         at any time and thus the timeout becomes orphaned.
> > + */
> > +odp_timer_t odp_timer_handle(odp_timer_tmo_t tmo);
> > +
> > +/**
> > + * Get expiration time
> > + *
> > + * Return (requested) expiration time of timeout.
> > + *
> > + * @param tmo   Timeout
> > + *
> > + * @return Expiration time
> > + */
> > +uint64_t odp_timer_expiration(odp_timer_tmo_t tmo);
> > +
> > +/**
> > + * Get user pointer
> > + *
> > + * Return User pointer of timer associated with timeout.
> > + * The user pointer is often used to point to some associated context.
> >   *
> > - * @param tmo Timeout buffer handle
> > + * @param tmo   Timeout
> >   *
> > - * @return Absolute timeout tick
> > + * @return User pointer
> >   */
> > -uint64_t odp_timeout_tick(odp_timeout_t tmo);
> > +void *odp_timer_userptr(odp_timer_tmo_t tmo);
> >
> >  #ifdef __cplusplus
> >  }
> > diff --git a/platform/linux-generic/include/odp_priority_queue_internal.h
> > b/platform/linux-generic/include/odp_priority_queue_internal.h
> > new file mode 100644
> > index 0000000..7d7f3a2
> > --- /dev/null
> > +++ b/platform/linux-generic/include/odp_priority_queue_internal.h
> > @@ -0,0 +1,108 @@
> > +#ifndef _PRIORITY_QUEUE_H
> > +#define _PRIORITY_QUEUE_H
> > +
> > +#include <assert.h>
> > +#include <stddef.h>
> > +#include <stdint.h>
> > +#include <stdbool.h>
> > +#include <odp_align.h>
> > +
> > +#define INVALID_INDEX ~0U
> > +#define INVALID_PRIORITY ((pq_priority_t)~0ULL)
> > +
> > +typedef uint64_t pq_priority_t;
> > +
> > +struct heap_node;
> > +
> > +typedef struct priority_queue {
> > +     uint32_t max_elems;/* Number of elements in heap */
> > +     /* Number of registered elements (active + inactive) */
> > +     uint32_t reg_elems;
> > +     uint32_t num_elems;/* Number of active elements */
> > +     struct heap_node *heap;
> > +     struct heap_node *org_ptr;
> > +} priority_queue ODP_ALIGNED(sizeof(uint64_t));
> > +
> > +/* The user gets a pointer to this structure */
> > +typedef struct {
> > +     /* Set when pq_element registered with priority queue */
> > +     priority_queue *pq;
> > +     uint32_t index;/* Index into heap array */
> > +     pq_priority_t prio;
> > +} pq_element;
> > +
> > +/*** Operations on pq_element ***/
> > +
> > +static inline void pq_element_con(pq_element *this)
> > +{
> > +     this->pq = NULL;
> > +     this->index = INVALID_INDEX;
> > +     this->prio = 0U;
> > +}
> > +
> > +static inline void pq_element_des(pq_element *this)
> > +{
> > +     (void)this;
> > +     assert(this->index == INVALID_INDEX);
> > +}
> > +
> > +static inline priority_queue *get_pq(const pq_element *this)
> > +{
> > +     return this->pq;
> > +}
> > +
> > +static inline pq_priority_t get_prio(const pq_element *this)
> > +{
> > +     return this->prio;
> > +}
> > +
> > +static inline uint32_t get_index(const pq_element *this)
> > +{
> > +     return this->index;
> > +}
> > +
> > +static inline bool is_active(const pq_element *this)
> > +{
> > +     return this->index != INVALID_INDEX;
> > +}
> > +
> > +/*** Operations on priority_queue ***/
> > +
> > +extern uint32_t pq_smallest_child(priority_queue *, uint32_t,
> > pq_priority_t);
> > +extern void pq_bubble_down(priority_queue *, pq_element *);
> > +extern void pq_bubble_up(priority_queue *, pq_element *);
> > +
> > +static inline bool valid_index(priority_queue *this, uint32_t idx)
> > +{
> > +     return idx < this->num_elems;
> > +}
> > +
> > +extern void priority_queue_con(priority_queue *, uint32_t _max_elems);
> > +extern void priority_queue_des(priority_queue *);
> > +
> > +/* Register pq_element with priority queue */
> > +/* Return false if priority queue full */
> > +extern bool pq_register_element(priority_queue *, pq_element *);
> > +
> > +/* Activate and add pq_element to priority queue */
> > +/* Element must be disarmed */
> > +extern void pq_activate_element(priority_queue *, pq_element *,
> > pq_priority_t);
> > +
> > +/* Reset (increase) priority for pq_element */
> > +/* Element may be active or inactive (released) */
> > +extern void pq_reset_element(priority_queue *, pq_element *,
> > pq_priority_t);
> > +
> > +/* Deactivate and remove element from priority queue */
> > +/* Element may be active or inactive (released) */
> > +extern void pq_deactivate_element(priority_queue *, pq_element *);
> > +
> > +/* Unregister pq_element */
> > +extern void pq_unregister_element(priority_queue *, pq_element *);
> > +
> > +/* Return priority of first element (lowest numerical value) */
> > +extern pq_priority_t pq_first_priority(const priority_queue *);
> > +
> > +/* Deactivate and return first element if it's prio is <= threshold */
> > +extern pq_element *pq_release_element(priority_queue *, pq_priority_t
> > thresh);
> > +
> > +#endif /* _PRIORITY_QUEUE_H */
> > diff --git a/platform/linux-generic/include/odp_timer_internal.h
> > b/platform/linux-generic/include/odp_timer_internal.h
> > index ad28f53..461f28c 100644
> > --- a/platform/linux-generic/include/odp_timer_internal.h
> > +++ b/platform/linux-generic/include/odp_timer_internal.h
> > @@ -1,4 +1,4 @@
> > -/* Copyright (c) 2013, Linaro Limited
> > +/* Copyright (c) 2014, Linaro Limited
> >   * All rights reserved.
> >   *
> >   * SPDX-License-Identifier:     BSD-3-Clause
> > @@ -8,72 +8,51 @@
> >  /**
> >   * @file
> >   *
> > - * ODP timer timeout descriptor - implementation internal
> > + * ODP timeout descriptor - implementation internal
> >   */
> >
> >  #ifndef ODP_TIMER_INTERNAL_H_
> >  #define ODP_TIMER_INTERNAL_H_
> >
> > -#ifdef __cplusplus
> > -extern "C" {
> > -#endif
> > -
> > -#include <odp_std_types.h>
> > -#include <odp_queue.h>
> > -#include <odp_buffer.h>
> > +#include <odp_align.h>
> > +#include <odp_debug.h>
> >  #include <odp_buffer_internal.h>
> >  #include <odp_buffer_pool_internal.h>
> >  #include <odp_timer.h>
> >
> > -struct timeout_t;
> > -
> > -typedef struct timeout_t {
> > -     struct timeout_t *next;
> > -     int               timer_id;
> > -     int               tick;
> > -     uint64_t          tmo_tick;
> > -     odp_queue_t       queue;
> > -     odp_buffer_t      buf;
> > -     odp_buffer_t      tmo_buf;
> > -} timeout_t;
> > -
> > -
> > -struct odp_timeout_hdr_t;
> > -
> >  /**
> > - * Timeout notification header
> > + * Internal Timeout header
> >   */
> > -typedef struct odp_timeout_hdr_t {
> > +typedef struct {
> > +     /* common buffer header */
> >       odp_buffer_hdr_t buf_hdr;
> >
> > -     timeout_t meta;
> > -
> > -     uint8_t buf_data[];
> > +     /* Requested expiration time */
> > +     uint64_t expiration;
> > +     /* User ptr inherited from parent timer */
> > +     void *user_ptr;
> > +     /* Parent timer */
> > +     odp_timer_t timer;
> > +     /* Tag inherited from parent timer at time of expiration */
> > +     uint32_t tag;
> > +     /* Gen-cnt inherited from parent timer at time of creation */
> > +     uint16_t gencnt;
> > +     uint16_t pad;
> > +     uint8_t buf_data[0];
> >  } odp_timeout_hdr_t;
> >
> > -
> > -
> >  ODP_STATIC_ASSERT(sizeof(odp_timeout_hdr_t) ==
> > -        ODP_OFFSETOF(odp_timeout_hdr_t, buf_data),
> > -        "ODP_TIMEOUT_HDR_T__SIZE_ERR");
> > -
> > +               ODP_OFFSETOF(odp_timeout_hdr_t, buf_data),
> > +               "sizeof(odp_timeout_hdr_t) ==
> > ODP_OFFSETOF(odp_timeout_hdr_t, buf_data)");
> >  ODP_STATIC_ASSERT(sizeof(odp_timeout_hdr_t) % sizeof(uint64_t) == 0,
> > -        "ODP_TIMEOUT_HDR_T__SIZE_ERR2");
> > -
> > +               "sizeof(odp_timeout_hdr_t) % sizeof(uint64_t) == 0");
> >
> >  /**
> > - * Return timeout header
> > + * Return the timeout header
> >   */
> > -static inline odp_timeout_hdr_t *odp_timeout_hdr(odp_timeout_t tmo)
> > +static inline odp_timeout_hdr_t *odp_timeout_hdr(odp_buffer_t buf)
> >  {
> > -     odp_buffer_hdr_t *buf_hdr = odp_buf_to_hdr((odp_buffer_t)tmo);
> > -     return (odp_timeout_hdr_t *)(uintptr_t)buf_hdr;
> > -}
> > -
> > -
> > -
> > -#ifdef __cplusplus
> > +     return (odp_timeout_hdr_t *)odp_buf_to_hdr(buf);
> >  }
> > -#endif
> >
> >  #endif
> > diff --git a/platform/linux-generic/odp_priority_queue.c
> > b/platform/linux-generic/odp_priority_queue.c
> > new file mode 100644
> > index 0000000..b72c26f
> > --- /dev/null
> > +++ b/platform/linux-generic/odp_priority_queue.c
> > @@ -0,0 +1,283 @@
> > +#define NDEBUG /* Enabled by default by ODP build system */
> > +#include <assert.h>
> > +#include <unistd.h>
> > +#include <stdlib.h>
> > +#include <string.h>
> > +#include <strings.h>
> > +#include <odp_hints.h>
> > +#include <odp_align.h>
> > +#include <odp_debug.h>
> > +
> > +#include "odp_priority_queue_internal.h"
> > +
> > +
> > +#define NUM_CHILDREN 4
> > +#define CHILD(n) (NUM_CHILDREN * (n) + 1)
> > +#define PARENT(n) (((n) - 1) / NUM_CHILDREN)
> > +
> > +/* Internal nodes in the array */
> > +typedef struct heap_node {
> > +     pq_element *elem;
> > +     /* Copy of elem->prio so we avoid unnecessary dereferencing */
> > +     pq_priority_t prio;
> > +} heap_node;
> > +
> > +static void pq_assert_heap(priority_queue *this);
> > +
> > +#define ALIGNMENT(p) (1U << ((unsigned)ffs((int)p) - 1U))
> > +
> > +void priority_queue_con(priority_queue *this, uint32_t _max_elems)
> > +{
> > +     this->max_elems = _max_elems;
> > +     this->reg_elems = 0;
> > +     this->num_elems = 0;
> > +     this->org_ptr = malloc((_max_elems + 64 / sizeof(heap_node)) *
> > +                            sizeof(heap_node));
> > +     if (odp_unlikely(this->org_ptr == NULL)) {
> > +             ODP_ERR("malloc failed\n");
> > +             abort();
> > +     }
> > +     this->heap = this->org_ptr;
> > +     assert((size_t)&this->heap[1] % 8 == 0);
> > +     /* Increment base address until first child (index 1) is cache line
> > */
> > +     /* aligned and thus all children (e.g. index 1-4) stored in the */
> > +     /* same cache line. We are not interested in the alignment of */
> > +     /* heap[0] as this is a lone node */
> > +     while ((size_t)&this->heap[1] % ODP_CACHE_LINE_SIZE != 0) {
> > +             /* Cast to ptr to struct member with the greatest alignment
> > */
> > +             /* requirement */
> > +             this->heap = (heap_node *)((pq_priority_t *)this->heap +
> 1);
> > +     }
> > +     pq_assert_heap(this);
> > +}
> > +
> > +void priority_queue_des(priority_queue *this)
> > +{
> > +     pq_assert_heap(this);
> > +     free(this->org_ptr);
> > +}
> > +
> > +#ifndef NDEBUG
> > +static uint32_t
> > +pq_assert_elem(priority_queue *this, uint32_t index, bool recurse)
> > +{
> > +     uint32_t num = 1;
> > +     const pq_element *elem = this->heap[index].elem;
> > +     assert(elem->index == index);
> > +     assert(elem->prio == this->heap[index].prio);
> > +     uint32_t child = CHILD(index);
> > +     uint32_t i;
> > +     for (i = 0; i < NUM_CHILDREN; i++, child++) {
> > +             if (valid_index(this, child)) {
> > +                     assert(this->heap[child].elem != NULL);
> > +                     assert(this->heap[child].prio >= elem->prio);
> > +                     if (recurse)
> > +                             num += pq_assert_elem(this, child,
> recurse);
> > +             }
> > +     }
> > +     return num;
> > +}
> > +#endif
> > +
> > +static void
> > +pq_assert_heap(priority_queue *this)
> > +{
> > +     (void)this;
> > +#ifndef NDEBUG
> > +     uint32_t num = 0;
> > +     if (odp_likely(this->num_elems != 0)) {
> > +             assert(this->heap[0].elem != NULL);
> > +             num += pq_assert_elem(this, 0, true);
> > +     }
> > +     assert(num == this->num_elems);
> > +     unsigned i;
> > +     for (i = 0; i < this->num_elems; i++) {
> > +             assert(this->heap[i].elem != NULL);
> > +             assert(this->heap[i].prio != INVALID_PRIORITY);
> > +     }
> > +#endif
> > +}
> > +
> > +/* Bubble up to proper position */
> > +void
> > +pq_bubble_up(priority_queue *this, pq_element *elem)
> > +{
> > +     assert(this->heap[elem->index].elem == elem);
> > +     assert(this->heap[elem->index].prio == elem->prio);
> > +     uint32_t current = elem->index;
> > +     pq_priority_t prio = elem->prio;
> > +     assert(current == 0 || this->heap[PARENT(current)].elem != NULL);
> > +     /* Move up into proper position */
> > +     while (current != 0 && this->heap[PARENT(current)].prio > prio) {
> > +             uint32_t parent = PARENT(current);
> > +             assert(this->heap[parent].elem != NULL);
> > +             /* Swap current with parent */
> > +             /* 1) Move parent down */
> > +             this->heap[current].elem = this->heap[parent].elem;
> > +             this->heap[current].prio = this->heap[parent].prio;
> > +             this->heap[current].elem->index = current;
> > +             /* 2) Move current up to parent */
> > +             this->heap[parent].elem = elem;
> > +             this->heap[parent].prio = prio;
> > +             this->heap[parent].elem->index = parent;
> > +             /* Continue moving elem until it is in the right place */
> > +             current = parent;
> > +     }
> > +     pq_assert_heap(this);
> > +}
> > +
> > +/* Find the smallest child that is smaller than the specified priority
> > */
> > +/* Very hot function, can we decrease the number of cache misses? */
> > +uint32_t pq_smallest_child(priority_queue *this,
> > +                        uint32_t index,
> > +                        pq_priority_t val)
> > +{
> > +     uint32_t smallest = index;
> > +     uint32_t child = CHILD(index);
> > +#if NUM_CHILDREN == 4
> > +     /* Unroll loop when all children exist */
> > +     if (odp_likely(valid_index(this, child + 3))) {
> > +             if (this->heap[child + 0].prio < val)
> > +                     val = this->heap[smallest = child + 0].prio;
> > +             if (this->heap[child + 1].prio < val)
> > +                     val = this->heap[smallest = child + 1].prio;
> > +             if (this->heap[child + 2].prio < val)
> > +                     val = this->heap[smallest = child + 2].prio;
> > +             if (this->heap[child + 3].prio < val)
> > +                     (void)this->heap[smallest = child + 3].prio;
> > +             return smallest;
> > +     }
> > +#endif
> > +     uint32_t i;
> > +     for (i = 0; i < NUM_CHILDREN; i++) {
> > +             if (odp_unlikely(!valid_index(this, child + i)))
> > +                     break;
> > +             if (this->heap[child + i].prio < val) {
> > +                     smallest = child + i;
> > +                     val = this->heap[smallest].prio;
> > +             }
> > +     }
> > +     return smallest;
> > +}
> > +
> > +/* Very hot function, can it be optimised? */
> > +void
> > +pq_bubble_down(priority_queue *this, pq_element *elem)
> > +{
> > +     assert(this->heap[elem->index].elem == elem);
> > +     assert(this->heap[elem->index].prio == elem->prio);
> > +     uint32_t current = elem->index;
> > +     pq_priority_t prio = elem->prio;
> > +     for (;;) {
> > +             uint32_t child = pq_smallest_child(this, current, prio);
> > +             if (current == child) {
> > +                     /* No smaller child, we are done */
> > +                     pq_assert_heap(this);
> > +                     return;
> > +             }
> > +             /* Element larger than smaller child, must move down */
> > +             assert(this->heap[child].elem != NULL);
> > +             /* 1) Move child up to current */
> > +             this->heap[current].elem = this->heap[child].elem;
> > +             this->heap[current].prio = this->heap[child].prio;
> > +             /* 2) Move current down to child */
> > +             this->heap[child].elem = elem;
> > +             this->heap[child].prio = prio;
> > +             this->heap[child].elem->index = child;
> > +
> > +             this->heap[current].elem->index = current; /* cache misses!
> > */
> > +             /* Continue moving element until it is in the right place
> */
> > +             current = child;
> > +     }
> > +}
> > +
> > +bool
> > +pq_register_element(priority_queue *this, pq_element *elem)
> > +{
> > +     if (odp_likely(this->reg_elems < this->max_elems)) {
> > +             elem->pq = this;
> > +             this->reg_elems++;
> > +             return true;
> > +     }
> > +     return false;
> > +}
> > +
> > +void
> > +pq_unregister_element(priority_queue *this, pq_element *elem)
> > +{
> > +     assert(elem->pq == this);
> > +     if (is_active(elem))
> > +             pq_deactivate_element(this, elem);
> > +     this->reg_elems--;
> > +}
> > +
> > +void
> > +pq_activate_element(priority_queue *this, pq_element *elem,
> > pq_priority_t prio)
> > +{
> > +     assert(elem->index == INVALID_INDEX);
> > +     /* Insert element at end */
> > +     uint32_t index = this->num_elems++;
> > +     this->heap[index].elem = elem;
> > +     this->heap[index].prio = prio;
> > +     elem->index = index;
> > +     elem->prio = prio;
> > +     pq_bubble_up(this, elem);
> > +}
> > +
> > +void
> > +pq_deactivate_element(priority_queue *this, pq_element *elem)
> > +{
> > +     assert(elem->pq == this);
> > +     if (odp_likely(is_active(elem))) {
> > +             /* Swap element with last element */
> > +             uint32_t current = elem->index;
> > +             uint32_t last = --this->num_elems;
> > +             if (odp_likely(last != current)) {
> > +                     /* Move last element to current */
> > +                     this->heap[current].elem = this->heap[last].elem;
> > +                     this->heap[current].prio = this->heap[last].prio;
> > +                     this->heap[current].elem->index = current;
> > +                     /* Bubble down old 'last' element to its proper
> > place*/
> > +                     if (this->heap[current].prio < elem->prio)
> > +                             pq_bubble_up(this,
> this->heap[current].elem);
> > +                     else
> > +                             pq_bubble_down(this,
> this->heap[current].elem);
> > +             }
> > +             elem->index = INVALID_INDEX;
> > +             pq_assert_heap(this);
> > +     }
> > +}
> > +
> > +void
> > +pq_reset_element(priority_queue *this, pq_element *elem, pq_priority_t
> > prio)
> > +{
> > +     assert(prio != INVALID_PRIORITY);
> > +     if (odp_likely(is_active(elem))) {
> > +             assert(prio >= elem->prio);
> > +             elem->prio = prio;
> > +             this->heap[elem->index].prio = prio;/* cache misses here!
> */
> > +             pq_bubble_down(this, elem);
> > +             pq_assert_heap(this);
> > +     } else {
> > +             pq_activate_element(this, elem, prio);
> > +     }
> > +}
> > +
> > +pq_priority_t pq_first_priority(const priority_queue *this)
> > +{
> > +     return this->num_elems != 0 ? this->heap[0].prio :
> > INVALID_PRIORITY;
> > +}
> > +
> > +pq_element *
> > +pq_release_element(priority_queue *this, pq_priority_t threshold)
> > +{
> > +     if (odp_likely(this->num_elems != 0 &&
> > +                    this->heap[0].prio <= threshold)) {
> > +             pq_element *elem = this->heap[0].elem;
> > +             /* Remove element from heap */
> > +             pq_deactivate_element(this, elem);
> > +             assert(elem->prio <= threshold);
> > +             return elem;
> > +     }
> > +     return NULL;
> > +}
> > diff --git a/platform/linux-generic/odp_timer.c b/platform/linux-
> > generic/odp_timer.c
> > index 313c713..0e5071c 100644
> > --- a/platform/linux-generic/odp_timer.c
> > +++ b/platform/linux-generic/odp_timer.c
> > @@ -4,428 +4,713 @@
> >   * SPDX-License-Identifier:     BSD-3-Clause
> >   */
> >
> > -#include <odp_timer.h>
> > -#include <odp_timer_internal.h>
> > -#include <odp_time.h>
> > -#include <odp_buffer_pool_internal.h>
> > -#include <odp_internal.h>
> > -#include <odp_atomic.h>
> > -#include <odp_spinlock.h>
> > -#include <odp_sync.h>
> > -#include <odp_debug.h>
> > -
> > -#include <signal.h>
> > -#include <time.h>
> > +/**
> > + * @file
> > + *
> > + * ODP timer service
> > + *
> > + */
> >
> > +#include <assert.h>
> > +#include <errno.h>
> >  #include <string.h>
> > -
> > -#define NUM_TIMERS    1
> > -#define MAX_TICKS     1024
> > -#define MAX_RES       ODP_TIME_SEC
> > -#define MIN_RES       (100*ODP_TIME_USEC)
> > -
> > -
> > -typedef struct {
> > -     odp_spinlock_t lock;
> > -     timeout_t      *list;
> > -} tick_t;
> > -
> > -typedef struct {
> > -     int               allocated;
> > -     volatile int      active;
> > -     volatile uint64_t cur_tick;
> > -     timer_t           timerid;
> > -     odp_timer_t       timer_hdl;
> > -     odp_buffer_pool_t pool;
> > -     uint64_t          resolution_ns;
> > -     uint64_t          max_ticks;
> > -     tick_t            tick[MAX_TICKS];
> > -
> > -} timer_ring_t;
> > -
> > -typedef struct {
> > -     odp_spinlock_t lock;
> > -     int            num_timers;
> > -     timer_ring_t   timer[NUM_TIMERS];
> > -
> > -} timer_global_t;
> > -
> > -/* Global */
> > -static timer_global_t odp_timer;
> > -
> > -static void add_tmo(tick_t *tick, timeout_t *tmo)
> > +#include <stdlib.h>
> > +#include <time.h>
> > +#include <signal.h>
> > +#include "odp_std_types.h"
> > +#include "odp_buffer.h"
> > +#include "odp_buffer_pool.h"
> > +#include "odp_queue.h"
> > +#include "odp_hints.h"
> > +#include "odp_sync.h"
> > +#include "odp_ticketlock.h"
> > +#include "odp_debug.h"
> > +#include "odp_align.h"
> > +#include "odp_shared_memory.h"
> > +#include "odp_hints.h"
> > +#include "odp_internal.h"
> > +#include "odp_time.h"
> > +#include "odp_timer.h"
> > +#include "odp_timer_internal.h"
> > +#include "odp_priority_queue_internal.h"
> > +
> > +/***********************************************************************
> > *******
> > + * Translation between timeout and timeout header
> > +
> > *************************************************************************
> > ****/
> > +
> > +static inline odp_timeout_hdr_t *odp_tmo_to_hdr(odp_timer_tmo_t tmo)
> >  {
> > -     odp_spinlock_lock(&tick->lock);
> > -
> > -     tmo->next  = tick->list;
> > -     tick->list = tmo;
> > +     odp_buffer_t buf = odp_buffer_from_timeout(tmo);
> > +     odp_timeout_hdr_t *tmo_hdr = (odp_timeout_hdr_t
> > *)odp_buf_to_hdr(buf);
> > +     return tmo_hdr;
> > +}
> >
> > -     odp_spinlock_unlock(&tick->lock);
> > +/***********************************************************************
> > *******
> > + * odp_timer abstract datatype
> > +
> > *************************************************************************
> > ****/
> > +
> > +typedef struct odp_timer_s {
> > +     pq_element pqelem;/* Base class */
> > +     uint64_t req_tmo;/* Requested timeout tick */
> > +     odp_buffer_t tmo_buf;/* ODP_BUFFER_INVALID if timeout enqueued */
> > +     odp_queue_t queue;/* ODP_QUEUE_INVALID if timer is free */
> > +     uint32_t tag;/* Reusing tag as next pointer/index when timer is
> > free */
> > +     uint16_t gencnt;/* Smaller to make place for user_buf flag */
> > +     unsigned int user_buf:1; /* User-defined buffer? */
> > +} odp_timer;
> > +
> > +/* Constructor */
> > +static inline void odp_timer_con(odp_timer *this)
> > +{
> > +     pq_element_con(&this->pqelem);
> > +     this->tmo_buf = ODP_BUFFER_INVALID;
> > +     this->queue = ODP_QUEUE_INVALID;
> > +     this->gencnt = 0;
> >  }
> >
> > -static timeout_t *rem_tmo(tick_t *tick)
> > +/* Destructor */
> > +static inline void odp_timer_des(odp_timer *this)
> >  {
> > -     timeout_t *tmo;
> > +     assert(this->tmo_buf == ODP_BUFFER_INVALID);
> > +     assert(this->queue == ODP_QUEUE_INVALID);
> > +     pq_element_des(&this->pqelem);
> > +}
> >
> > -     odp_spinlock_lock(&tick->lock);
> > +/* Setup when timer is allocated */
> > +static void setup(odp_timer *this,
> > +               odp_queue_t _q,
> > +               void *_up,
> > +               odp_buffer_t _tmo)
> > +{
> > +     this->req_tmo = INVALID_PRIORITY;
> > +     this->tmo_buf = _tmo;
> > +     this->queue = _q;
> > +     this->tag = 0;
> > +     this->user_buf = false;
> > +     /* Initialise constant fields of timeout event */
> > +     odp_timeout_hdr_t *tmo_hdr =
> > +             odp_tmo_to_hdr(odp_timeout_from_buffer(this->tmo_buf));
> > +     tmo_hdr->gencnt = this->gencnt;
> > +     tmo_hdr->timer = this;
> > +     tmo_hdr->user_ptr = _up;
> > +     /* tmo_hdr->tag set at expiration time */
> > +     /* tmo_hdr->expiration set at expiration time */
> > +     assert(this->queue != ODP_QUEUE_INVALID);
> > +}
> >
> > -     tmo = tick->list;
> > +/* Teardown when timer is freed */
> > +static odp_buffer_t teardown(odp_timer *this)
> > +{
> > +     /* Increase generation count to make any pending timeout(s)
> > orphaned */
> > +     ++this->gencnt;
> > +     odp_buffer_t buf = this->tmo_buf;
> > +     this->tmo_buf = ODP_BUFFER_INVALID;
> > +     this->queue = ODP_QUEUE_INVALID;
> > +     return buf;
> > +}
> >
> > -     if (tmo)
> > -             tick->list = tmo->next;
> > +static inline uint32_t get_next_free(odp_timer *this)
> > +{
> > +     assert(this->queue == ODP_QUEUE_INVALID);
> > +     return this->tag;
> > +}
> >
> > -     odp_spinlock_unlock(&tick->lock);
> > +static inline void set_next_free(odp_timer *this, uint32_t nf)
> > +{
> > +     assert(this->queue == ODP_QUEUE_INVALID);
> > +     this->tag = nf;
> > +}
> >
> > -     if (tmo)
> > -             tmo->next = NULL;
> > +/***********************************************************************
> > *******
> > + * odp_timer_pool abstract datatype
> > + * Inludes alloc and free timer
> > +
> > *************************************************************************
> > ****/
> > +
> > +typedef struct odp_timer_pool_s {
> > +     priority_queue pq;
> > +     uint64_t cur_tick;/* Current tick value */
> > +     uint64_t min_tick;/* Current expiration lower bound */
> > +     uint64_t max_tick;/* Current expiration higher bound */
> > +     bool shared;
> > +     odp_ticketlock_t lock;
> > +     const char *name;
> > +     odp_buffer_pool_t buf_pool;
> > +     uint64_t resolution_ns;
> > +     uint64_t min_tmo_tck;
> > +     uint64_t max_tmo_tck;
> > +     odp_timer *timers;
> > +     uint32_t num_alloc;/* Current number of allocated timers */
> > +     uint32_t max_timers;/* Max number of timers */
> > +     uint32_t first_free;/* 0..max_timers-1 => free timer */
> > +     timer_t timerid;
> > +     odp_timer_clk_src_t clk_src;
> > +} odp_timer_pool;
> > +
> > +/* Forward declarations */
> > +static void timer_init(odp_timer_pool *tp);
> > +static void timer_exit(odp_timer_pool *tp);
> > +
> > +static void odp_timer_pool_con(odp_timer_pool *this,
> > +                            const char *_n,
> > +                            odp_buffer_pool_t _bp,
> > +                            uint64_t _r,
> > +                            uint64_t _mint,
> > +                            uint64_t _maxt,
> > +                            uint32_t _mt,
> > +                            bool _s,
> > +                            odp_timer_clk_src_t _cs)
> > +{
> > +     priority_queue_con(&this->pq, _mt);
> > +     this->cur_tick = 0;
> > +     this->shared = _s;
> > +     this->name = strdup(_n);
> > +     this->buf_pool = _bp;
> > +     this->resolution_ns = _r;
> > +     this->min_tmo_tck = odp_timer_ns_to_tick(this, _mint);
> > +     this->max_tmo_tck = odp_timer_ns_to_tick(this, _maxt);
> > +     this->min_tick = this->cur_tick + this->min_tmo_tck;
> > +     this->max_tick = this->cur_tick + this->max_tmo_tck;
> > +     this->num_alloc = 0;
> > +     this->max_timers = _mt;
> > +     this->first_free = 0;
> > +     this->clk_src = _cs;
> > +     this->timers = malloc(sizeof(odp_timer) * this->max_timers);
> > +     if (this->timers == NULL)
> > +             ODP_ABORT("%s: malloc failed\n", _n);
> > +     uint32_t i;
> > +     for (i = 0; i < this->max_timers; i++)
> > +             odp_timer_con(&this->timers[i]);
> > +     for (i = 0; i < this->max_timers; i++)
> > +             set_next_free(&this->timers[i], i + 1);
> > +     odp_ticketlock_init(&this->lock);
> > +     if (this->clk_src == ODP_CLOCK_CPU)
> > +             timer_init(this);
> > +     /* Make sure timer pool initialisation is globally observable */
> > +     /* before we return a pointer to it */
> > +     odp_sync_stores();
> > +}
> >
> > -     return tmo;
> > +static odp_timer_pool *odp_timer_pool_new(
> > +     const char *_n,
> > +     odp_buffer_pool_t _bp,
> > +     uint64_t _r,
> > +     uint64_t _mint,
> > +     uint64_t _maxt,
> > +     uint32_t _mt,
> > +     bool _s,
> > +     odp_timer_clk_src_t _cs)
> > +{
> > +     odp_timer_pool *this = malloc(sizeof(odp_timer_pool));
> > +     if (odp_unlikely(this == NULL))
> > +             ODP_ABORT("%s: timer pool malloc failed\n", _n);
> > +     odp_timer_pool_con(this, _n, _bp, _r, _mint, _maxt, _mt, _s, _cs);
> > +     return this;
> >  }
> >
> > -/**
> > - * Search and delete tmo entry from timeout list
> > - * return -1 : on error.. handle not in list
> > - *           0 : success
> > - */
> > -static int find_and_del_tmo(timeout_t **tmo, odp_timer_tmo_t handle)
> > +static void odp_timer_pool_des(odp_timer_pool *this)
> >  {
> > -     timeout_t *cur, *prev;
> > -     prev = NULL;
> > +     if (this->shared)
> > +             odp_ticketlock_lock(&this->lock);
> > +     if (this->num_alloc != 0) {
> > +             /* It's a programming error to attempt to destroy a */
> > +             /* timer pool which is still in use */
> > +             ODP_ABORT("%s: timers in use\n", this->name);
> > +     }
> > +     if (this->clk_src == ODP_CLOCK_CPU)
> > +             timer_exit(this);
> > +     uint32_t i;
> > +     for (i = 0; i < this->max_timers; i++)
> > +             odp_timer_des(&this->timers[i]);
> > +     free(this->timers);
> > +     priority_queue_des(&this->pq);
> > +     odp_sync_stores();
> > +}
> >
> > -     for (cur = *tmo; cur != NULL; prev = cur, cur = cur->next) {
> > -             if (cur->tmo_buf == handle) {
> > -                     if (prev == NULL)
> > -                             *tmo = cur->next;
> > -                     else
> > -                             prev->next = cur->next;
> > +static void odp_timer_pool_del(odp_timer_pool *this)
> > +{
> > +     odp_timer_pool_des(this);
> > +     free(this);
> > +}
> >
> > -                     break;
> > +static inline odp_timer *timer_alloc(odp_timer_pool *this,
> > +                                  odp_queue_t queue,
> > +                                  void *user_ptr,
> > +                                  odp_buffer_t tmo_buf)
> > +{
> > +     odp_timer *tim = ODP_TIMER_INVALID;
> > +     if (odp_likely(this->shared))
> > +             odp_ticketlock_lock(&this->lock);
> > +     if (odp_likely(this->num_alloc < this->max_timers)) {
> > +             this->num_alloc++;
> > +             /* Remove first unused timer from free list */
> > +             assert(this->first_free != this->max_timers);
> > +             tim = &this->timers[this->first_free];
> > +             this->first_free = get_next_free(tim);
> > +             /* Insert timer into priority queue */
> > +             if (odp_unlikely(!pq_register_element(&this->pq,
> > +                                                   &tim->pqelem))) {
> > +                     /* Unexpected internal error */
> > +                     abort();
> >               }
> > +             /* Create timer */
> > +             setup(tim, queue, user_ptr, tmo_buf);
> > +     } else {
> > +             errno = ENFILE; /* Reusing file table overvlow */
> >       }
> > -
> > -     if (!cur)
> > -             /* couldn't find tmo in list */
> > -             return -1;
> > -
> > -     /* application to free tmo_buf provided by absolute_tmo call */
> > -     return 0;
> > +     if (odp_likely(this->shared))
> > +             odp_ticketlock_unlock(&this->lock);
> > +     return tim;
> >  }
> >
> > -int odp_timer_cancel_tmo(odp_timer_t timer_hdl, odp_timer_tmo_t tmo)
> > +static inline void timer_free(odp_timer_pool *this, odp_timer *tim)
> >  {
> > -     int id;
> > -     int tick_idx;
> > -     timeout_t *cancel_tmo;
> > -     odp_timeout_hdr_t *tmo_hdr;
> > -     tick_t *tick;
> > -
> > -     /* get id */
> > -     id = (int)timer_hdl - 1;
> > -
> > -     tmo_hdr = odp_timeout_hdr((odp_timeout_t) tmo);
> > -     /* get tmo_buf to cancel */
> > -     cancel_tmo = &tmo_hdr->meta;
> > +     if (odp_likely(this->shared))
> > +             odp_ticketlock_lock(&this->lock);
> > +     if (odp_unlikely(tim->queue == ODP_QUEUE_INVALID))
> > +             ODP_ABORT("Invalid timer %p\n", tim);
> > +     /* Destroy timer */
> > +     odp_buffer_t buf = teardown(tim);
> > +     /* Remove timer from priority queue */
> > +     pq_unregister_element(&this->pq, &tim->pqelem);
> > +     /* Insert timer into free list */
> > +     set_next_free(tim, this->first_free);
> > +     this->first_free = (tim - &this->timers[0]) / sizeof(this-
> > >timers[0]);
> > +     assert(this->num_alloc != 0);
> > +     this->num_alloc--;
> > +     if (odp_likely(this->shared))
> > +             odp_ticketlock_unlock(&this->lock);
> > +     if (buf != ODP_BUFFER_INVALID)
> > +             odp_buffer_free(buf);
> > +}
> >
> > -     tick_idx = cancel_tmo->tick;
> > -     tick = &odp_timer.timer[id].tick[tick_idx];
> > +/***********************************************************************
> > *******
> > + * Operations on timers
> > + * reset/reset_w_buf/cancel timer, return timeout
> > +
> > *************************************************************************
> > ****/
> >
> > -     odp_spinlock_lock(&tick->lock);
> > -     /* search and delete tmo from tick list */
> > -     if (find_and_del_tmo(&tick->list, tmo) != 0) {
> > -             odp_spinlock_unlock(&tick->lock);
> > -             ODP_DBG("Couldn't find the tmo (%d) in tick list\n",
> > (int)tmo);
> > -             return -1;
> > +static inline void timer_expire(odp_timer *tim)
> > +{
> > +     assert(tim->req_tmo != INVALID_PRIORITY);
> > +     /* Timer expired, is there actually any timeout event */
> > +     /* we can enqueue? */
> > +     if (odp_likely(tim->tmo_buf != ODP_BUFFER_INVALID)) {
> > +             /* Swap out timeout buffer */
> > +             odp_buffer_t buf = tim->tmo_buf;
> > +             tim->tmo_buf = ODP_BUFFER_INVALID;
> > +             if (odp_likely(!tim->user_buf)) {
> > +                     odp_timeout_hdr_t *tmo_hdr =
> > +
>  odp_tmo_to_hdr(odp_timeout_from_buffer(buf));
> > +                     /* Copy tag and requested expiration tick from
> timer
> > */
> > +                     tmo_hdr->tag = tim->tag;
> > +                     tmo_hdr->expiration = tim->req_tmo;
> > +             }
> > +             /* Else don't touch user-defined buffer */
> > +             int rc = odp_queue_enq(tim->queue, buf);
> > +             if (odp_unlikely(rc != 0))
> > +                     ODP_ABORT("Failed to enqueue timeout buffer
> (%d)\n",
> > +                               rc);
> > +             /* Mark timer as inactive */
> > +             tim->req_tmo = INVALID_PRIORITY;
> >       }
> > -     odp_spinlock_unlock(&tick->lock);
> > -
> > -     return 0;
> > +     /* No, timeout event already enqueued or unavailable */
> > +     /* Keep timer active, odp_timer_return_tmo() will patch up */
> >  }
> >
> > -static void notify_function(union sigval sigval)
> > +static odp_timer_set_t timer_reset(odp_timer_pool *tp,
> > +                                odp_timer *tim,
> > +                                uint64_t abs_tck)
> >  {
> > -     uint64_t cur_tick;
> > -     timeout_t *tmo;
> > -     tick_t *tick;
> > -     timer_ring_t *timer;
> > +     assert(tim->user_buf == false);
> > +     if (odp_unlikely(abs_tck < tp->min_tick))
> > +             return ODP_TIMER_SET_TOOEARLY;
> > +     if (odp_unlikely(abs_tck > tp->max_tick))
> > +             return ODP_TIMER_SET_TOOLATE;
> > +
> > +     if (odp_likely(tp->shared))
> > +             odp_ticketlock_lock(&tp->lock);
> > +
> > +     if (odp_unlikely(tim->queue == ODP_QUEUE_INVALID))
> > +             ODP_ABORT("Invalid timer %p\n", tim);
> > +     if (odp_unlikely(tim->user_buf))
> > +             ODP_ABORT("Timer %p has user buffer\n", tim);
> > +     /* Increase timer tag to make any pending timeout stale */
> > +     tim->tag++;
> > +     /* Save requested timeout */
> > +     tim->req_tmo = abs_tck;
> > +     /* Update timer position in priority queue */
> > +     pq_reset_element(&tp->pq, &tim->pqelem, abs_tck);
> > +
> > +     if (odp_likely(tp->shared))
> > +             odp_ticketlock_unlock(&tp->lock);
> > +     return ODP_TIMER_SET_SUCCESS;
> > +}
> >
> > -     timer = sigval.sival_ptr;
> > +static odp_timer_set_t timer_reset_w_buf(odp_timer_pool *tp,
> > +             odp_timer *tim,
> > +             uint64_t abs_tck,
> > +             odp_buffer_t user_buf)
> > +{
> > +     if (odp_unlikely(abs_tck < tp->min_tick))
> > +             return ODP_TIMER_SET_TOOEARLY;
> > +     if (odp_unlikely(abs_tck > tp->max_tick))
> > +             return ODP_TIMER_SET_TOOLATE;
> > +
> > +     if (odp_likely(tp->shared))
> > +             odp_ticketlock_lock(&tp->lock);
> > +
> > +     if (odp_unlikely(tim->queue == ODP_QUEUE_INVALID))
> > +             ODP_ABORT("Invalid timer %p\n", tim);
> > +     /* Increase timer tag to make any pending timeout stale */
> > +     tim->tag++;
> > +     /* Save requested timeout */
> > +     tim->req_tmo = abs_tck;
> > +     /* Set flag indicating presence of user defined buffer */
> > +     tim->user_buf = true;
> > +     /* Swap in new buffer, save any old buffer pointer */
> > +     odp_buffer_t old_buf = tim->tmo_buf;
> > +     tim->tmo_buf = user_buf;
> > +     /* Update timer position in priority queue */
> > +     pq_reset_element(&tp->pq, &tim->pqelem, abs_tck);
> > +
> > +     if (odp_likely(tp->shared))
> > +             odp_ticketlock_unlock(&tp->lock);
> > +
> > +     /* Free old buffer if present */
> > +     if (odp_unlikely(old_buf != ODP_BUFFER_INVALID))
> > +             odp_buffer_free(old_buf);
> > +     return ODP_TIMER_SET_SUCCESS;
> > +}
> >
> > -     if (timer->active == 0) {
> > -             ODP_DBG("Timer (%u) not active\n", timer->timer_hdl);
> > -             return;
> > +static inline void timer_cancel(odp_timer_pool *tp,
> > +                             odp_timer *tim)
> > +{
> > +     odp_buffer_t old_buf = ODP_BUFFER_INVALID;
> > +     if (odp_likely(tp->shared))
> > +             odp_ticketlock_lock(&tp->lock);
> > +
> > +     if (odp_unlikely(tim->queue == ODP_QUEUE_INVALID))
> > +             ODP_ABORT("Invalid timer %p\n", tim);
> > +     if (odp_unlikely(tim->user_buf)) {
> > +             /* Swap out old user buffer */
> > +             old_buf = tim->tmo_buf;
> > +             tim->tmo_buf = ODP_BUFFER_INVALID;
> > +             /* tim->user_buf stays true */
> >       }
> > +     /* Else a normal timer (no user-defined buffer) */
> > +     /* Increase timer tag to make any pending timeout stale */
> > +     tim->tag++;
> > +     /* Clear requested timeout, mark timer inactive */
> > +     tim->req_tmo = INVALID_PRIORITY;
> > +     /* Remove timer from the priority queue */
> > +     pq_deactivate_element(&tp->pq, &tim->pqelem);
> > +
> > +     if (odp_likely(tp->shared))
> > +             odp_ticketlock_unlock(&tp->lock);
> > +     /* Free user-defined buffer if present */
> > +     if (odp_unlikely(old_buf != ODP_BUFFER_INVALID))
> > +             odp_buffer_free(old_buf);
> > +}
> >
> > -     /* ODP_DBG("Tick\n"); */
> > -
> > -     cur_tick = timer->cur_tick++;
> > -
> > -     odp_sync_stores();
> > +static inline void timer_return(odp_timer_pool *tp,
> > +                             odp_timer *tim,
> > +                             odp_timer_tmo_t tmo,
> > +                             const odp_timeout_hdr_t *tmo_hdr)
> > +{
> > +     odp_buffer_t tmo_buf = odp_buffer_from_timeout(tmo);
> > +     if (odp_likely(tp->shared))
> > +             odp_ticketlock_lock(&tp->lock);
> > +     if (odp_unlikely(tim->user_buf))
> > +             ODP_ABORT("Timer %p has user-defined buffer\n", tim);
> > +     if (odp_likely(tmo_hdr->gencnt == tim->gencnt)) {
> > +             assert(tim->tmo_buf == ODP_BUFFER_INVALID);
> > +             /* Save returned buffer for use when timer expires next
> time
> > */
> > +             tim->tmo_buf = tmo_buf;
> > +             tmo_buf = ODP_BUFFER_INVALID;
> > +             /* Check if timer is active and should have expired */
> > +             if (odp_unlikely(tim->req_tmo != INVALID_PRIORITY &&
> > +                              tim->req_tmo <= tp->cur_tick)) {
> > +                     /* Expire timer now since we have restored the
> timeout
> > +                        buffer */
> > +                     timer_expire(tim);
> > +             }
> > +             /* Else timer inactive or expires in the future */
> > +     }
> > +     /* Else timeout orphaned, free buffer later */
> > +     if (odp_likely(tp->shared))
> > +             odp_ticketlock_unlock(&tp->lock);
> > +     if (odp_unlikely(tmo_buf != ODP_BUFFER_INVALID))
> > +             odp_buffer_free(tmo_buf);
> > +}
> >
> > -     tick = &timer->tick[cur_tick % MAX_TICKS];
> > +/* Non-public so not in odp_timer.h but externally visible, must declare
> > + * somewhere */
> > +unsigned odp_timer_pool_expire(odp_timer_pool_t tpid, uint64_t tick);
> >
> > -     while ((tmo = rem_tmo(tick)) != NULL) {
> > -             odp_queue_t  queue;
> > -             odp_buffer_t buf;
> > +unsigned odp_timer_pool_expire(odp_timer_pool_t tpid, uint64_t tick)
> > +{
> > +     if (odp_likely(tpid->shared))
> > +             odp_ticketlock_lock(&tpid->lock);
> > +
> > +     unsigned nexp = 0;
> > +     odp_timer_t tim;
> > +     tpid->cur_tick = tick;
> > +     tpid->min_tick = tick + tpid->min_tmo_tck;
> > +     tpid->max_tick = tick + tpid->max_tmo_tck;
> > +     while ((tim = (odp_timer_t)pq_release_element(&tpid->pq, tick)) !=
> > +            ODP_TIMER_INVALID) {
> > +             assert(get_prio(&tim->pqelem) <= tick);
> > +             timer_expire(tim);
> > +             nexp++;
> > +     }
> >
> > -             queue = tmo->queue;
> > -             buf   = tmo->buf;
> > +     if (odp_likely(tpid->shared))
> > +             odp_ticketlock_unlock(&tpid->lock);
> > +     return nexp;
> > +}
> >
> > -             if (buf != tmo->tmo_buf)
> > -                     odp_buffer_free(tmo->tmo_buf);
> > +/***********************************************************************
> > *******
> > + * POSIX timer support
> > + * Functions that use Linux/POSIX per-process timers and related
> > facilities
> > +
> > *************************************************************************
> > ****/
> >
> > -             odp_queue_enq(queue, buf);
> > -     }
> > +static void timer_notify(sigval_t sigval)
> > +{
> > +     odp_timer_pool *tp = (odp_timer_pool *)sigval.sival_ptr;
> > +     uint64_t new_tick = tp->cur_tick + 1;
> > +     (void)odp_timer_pool_expire(tp, new_tick);
> >  }
> >
> > -static void timer_start(timer_ring_t *timer)
> > +static void timer_init(odp_timer_pool *tp)
> >  {
> >       struct sigevent   sigev;
> >       struct itimerspec ispec;
> >       uint64_t res, sec, nsec;
> >
> > -     ODP_DBG("\nTimer (%u) starts\n", timer->timer_hdl);
> > +     ODP_DBG("Creating POSIX timer for timer pool %s, period %"
> > +             PRIu64" ns\n", tp->name, tp->resolution_ns);
> >
> >       memset(&sigev, 0, sizeof(sigev));
> >       memset(&ispec, 0, sizeof(ispec));
> >
> >       sigev.sigev_notify          = SIGEV_THREAD;
> > -     sigev.sigev_notify_function = notify_function;
> > -     sigev.sigev_value.sival_ptr = timer;
> > +     sigev.sigev_notify_function = timer_notify;
> > +     sigev.sigev_value.sival_ptr = tp;
> >
> > -     if (timer_create(CLOCK_MONOTONIC, &sigev, &timer->timerid)) {
> > -             ODP_DBG("Timer create failed\n");
> > -             return;
> > -     }
> > +     if (timer_create(CLOCK_MONOTONIC, &sigev, &tp->timerid))
> > +             ODP_ABORT("timer_create() returned error %s\n",
> > +                       strerror(errno));
> >
> > -     res  = timer->resolution_ns;
> > +     res  = tp->resolution_ns;
> >       sec  = res / ODP_TIME_SEC;
> > -     nsec = res - sec*ODP_TIME_SEC;
> > +     nsec = res - sec * ODP_TIME_SEC;
> >
> >       ispec.it_interval.tv_sec  = (time_t)sec;
> >       ispec.it_interval.tv_nsec = (long)nsec;
> >       ispec.it_value.tv_sec     = (time_t)sec;
> >       ispec.it_value.tv_nsec    = (long)nsec;
> >
> > -     if (timer_settime(timer->timerid, 0, &ispec, NULL)) {
> > -             ODP_DBG("Timer set failed\n");
> > -             return;
> > -     }
> > -
> > -     return;
> > +     if (timer_settime(&tp->timerid, 0, &ispec, NULL))
> > +             ODP_ABORT("timer_settime() returned error %s\n",
> > +                       strerror(errno));
> >  }
> >
> > -int odp_timer_init_global(void)
> > +static void timer_exit(odp_timer_pool *tp)
> >  {
> > -     ODP_DBG("Timer init ...");
> > -
> > -     memset(&odp_timer, 0, sizeof(timer_global_t));
> > -
> > -     odp_spinlock_init(&odp_timer.lock);
> > -
> > -     ODP_DBG("done\n");
> > -
> > -     return 0;
> > +     if (timer_delete(tp->timerid) != 0)
> > +             ODP_ABORT("timer_delete() returned error %s\n",
> > +                       strerror(errno));
> >  }
> >
> > -int odp_timer_disarm_all(void)
> > +/***********************************************************************
> > *******
> > + * Public API functions
> > + * Some parameter checks and error messages
> > + * No modificatios of internal state
> > +
> > *************************************************************************
> > ****/
> > +odp_timer_pool_t
> > +odp_timer_pool_create(const char *name,
> > +                   odp_buffer_pool_t buf_pool,
> > +                   uint64_t resolution_ns,
> > +                   uint64_t min_timeout,
> > +                   uint64_t max_timeout,
> > +                   uint32_t num_timers,
> > +                   bool shared,
> > +                   odp_timer_clk_src_t clk_src)
> >  {
> > -     int timers;
> > -     struct itimerspec ispec;
> > -
> > -     odp_spinlock_lock(&odp_timer.lock);
> > -
> > -     timers = odp_timer.num_timers;
> > -
> > -     ispec.it_interval.tv_sec  = 0;
> > -     ispec.it_interval.tv_nsec = 0;
> > -     ispec.it_value.tv_sec     = 0;
> > -     ispec.it_value.tv_nsec    = 0;
> > -
> > -     for (; timers >= 0; timers--) {
> > -             if (timer_settime(odp_timer.timer[timers].timerid,
> > -                               0, &ispec, NULL)) {
> > -                     ODP_DBG("Timer reset failed\n");
> > -                     odp_spinlock_unlock(&odp_timer.lock);
> > -                     return -1;
> > -             }
> > -             odp_timer.num_timers--;
> > -     }
> > -
> > -     odp_spinlock_unlock(&odp_timer.lock);
> > -
> > -     return 0;
> > +     /* Verify that buffer pool can be used for timeouts */
> > +     odp_buffer_t buf = odp_buffer_alloc(buf_pool);
> > +     if (buf == ODP_BUFFER_INVALID)
> > +             ODP_ABORT("%s: Failed to allocate buffer\n", name);
> > +     if (odp_buffer_type(buf) != ODP_BUFFER_TYPE_TIMEOUT)
> > +             ODP_ABORT("%s: Buffer pool wrong type\n", name);
> > +     odp_buffer_free(buf);
> > +     odp_timer_pool_t tp = odp_timer_pool_new(name, buf_pool,
> > resolution_ns,
> > +                           min_timeout, max_timeout, num_timers,
> > +                           shared, clk_src);
> > +     return tp;
> >  }
> >
> > -odp_timer_t odp_timer_create(const char *name, odp_buffer_pool_t pool,
> > -                          uint64_t resolution_ns, uint64_t min_ns,
> > -                          uint64_t max_ns)
> > +void odp_timer_pool_start(void)
> >  {
> > -     uint32_t id;
> > -     timer_ring_t *timer;
> > -     odp_timer_t timer_hdl;
> > -     int i;
> > -     uint64_t max_ticks;
> > -     (void) name;
> > -
> > -     if (resolution_ns < MIN_RES)
> > -             resolution_ns = MIN_RES;
> > -
> > -     if (resolution_ns > MAX_RES)
> > -             resolution_ns = MAX_RES;
> > -
> > -     max_ticks = max_ns / resolution_ns;
> > -
> > -     if (max_ticks > MAX_TICKS) {
> > -             ODP_DBG("Maximum timeout too long: %"PRIu64" ticks\n",
> > -                     max_ticks);
> > -             return ODP_TIMER_INVALID;
> > -     }
> > -
> > -     if (min_ns < resolution_ns) {
> > -             ODP_DBG("Min timeout %"PRIu64" ns < resolution %"PRIu64"
> > ns\n",
> > -                     min_ns, resolution_ns);
> > -             return ODP_TIMER_INVALID;
> > -     }
> > -
> > -     odp_spinlock_lock(&odp_timer.lock);
> > -
> > -     if (odp_timer.num_timers >= NUM_TIMERS) {
> > -             odp_spinlock_unlock(&odp_timer.lock);
> > -             ODP_DBG("All timers allocated\n");
> > -             return ODP_TIMER_INVALID;
> > -     }
> > -
> > -     for (id = 0; id < NUM_TIMERS; id++) {
> > -             if (odp_timer.timer[id].allocated == 0)
> > -                     break;
> > -     }
> > -
> > -     timer = &odp_timer.timer[id];
> > -     timer->allocated = 1;
> > -     odp_timer.num_timers++;
> > -
> > -     odp_spinlock_unlock(&odp_timer.lock);
> > -
> > -     timer_hdl = id + 1;
> > -
> > -     timer->timer_hdl     = timer_hdl;
> > -     timer->pool          = pool;
> > -     timer->resolution_ns = resolution_ns;
> > -     timer->max_ticks     = MAX_TICKS;
> > -
> > -     for (i = 0; i < MAX_TICKS; i++) {
> > -             odp_spinlock_init(&timer->tick[i].lock);
> > -             timer->tick[i].list = NULL;
> > -     }
> > -
> > -     timer->active = 1;
> > -     odp_sync_stores();
> > -
> > -     timer_start(timer);
> > +     /* Nothing to do here, timer pools are started by the create call
> > */
> > +}
> >
> > -     return timer_hdl;
> > +void odp_timer_pool_destroy(odp_timer_pool_t tpid)
> > +{
> > +     odp_timer_pool_del(tpid);
> >  }
> >
> > -odp_timer_tmo_t odp_timer_absolute_tmo(odp_timer_t timer_hdl, uint64_t
> > tmo_tick,
> > -                                    odp_queue_t queue, odp_buffer_t buf)
> > +uint64_t odp_timer_tick_to_ns(odp_timer_pool_t tpid, uint64_t ticks)
> >  {
> > -     int id;
> > -     uint64_t tick;
> > -     uint64_t cur_tick;
> > -     timeout_t *new_tmo;
> > -     odp_buffer_t tmo_buf;
> > -     odp_timeout_hdr_t *tmo_hdr;
> > -     timer_ring_t *timer;
> > +     return ticks * tpid->resolution_ns;
> > +}
> >
> > -     id = (int)timer_hdl - 1;
> > -     timer = &odp_timer.timer[id];
> > +uint64_t odp_timer_ns_to_tick(odp_timer_pool_t tpid, uint64_t ns)
> > +{
> > +     return (uint64_t)(ns / tpid->resolution_ns);
> > +}
> >
> > -     cur_tick = timer->cur_tick;
> > -     if (tmo_tick <= cur_tick) {
> > -             ODP_DBG("timeout too close\n");
> > -             return ODP_TIMER_TMO_INVALID;
> > -     }
> > +uint64_t odp_timer_current_tick(odp_timer_pool_t tpid)
> > +{
> > +     return tpid->cur_tick;
> > +}
> >
> > -     if ((tmo_tick - cur_tick) > MAX_TICKS) {
> > -             ODP_DBG("timeout too far: cur %"PRIu64" tmo %"PRIu64"\n",
> > -                     cur_tick, tmo_tick);
> > -             return ODP_TIMER_TMO_INVALID;
> > +uintptr_t odp_timer_pool_query_conf(odp_timer_pool_t tpid,
> > +                                 odp_timer_pool_conf_t item)
> > +{
> > +     switch (item) {
> > +     case ODP_TIMER_NAME:
> > +             return (uintptr_t)(tpid->name);
> > +     case ODP_TIMER_RESOLUTION:
> > +             return tpid->resolution_ns;
> > +     case ODP_TIMER_MIN_TICKS:
> > +             return tpid->min_tmo_tck;
> > +     case ODP_TIMER_MAX_TICKS:
> > +             return tpid->max_tmo_tck;
> > +     case ODP_TIMER_NUM_TIMERS:
> > +             return tpid->max_timers;
> > +     case ODP_TIMER_SHARED:
> > +             return tpid->shared;
> > +     default:
> > +             return 0;
> >       }
> > +}
> >
> > -     tick = tmo_tick % MAX_TICKS;
> > -
> > -     tmo_buf = odp_buffer_alloc(timer->pool);
> > -     if (tmo_buf == ODP_BUFFER_INVALID) {
> > -             ODP_DBG("tmo buffer alloc failed\n");
> > -             return ODP_TIMER_TMO_INVALID;
> > +odp_timer_t odp_timer_alloc(odp_timer_pool_t tpid,
> > +                         odp_queue_t queue,
> > +                         void *user_ptr)
> > +{
> > +     /* We check this because ODP_QUEUE_INVALID is used */
> > +     /* to indicate a free timer */
> > +     if (odp_unlikely(queue == ODP_QUEUE_INVALID))
> > +             ODP_ABORT("%s: Invalid queue handle\n", tpid->name);
> > +     odp_buffer_t tmo_buf = odp_buffer_alloc(tpid->buf_pool);
> > +     if (odp_likely(tmo_buf != ODP_BUFFER_INVALID)) {
> > +             odp_timer *tim = timer_alloc(tpid, queue, user_ptr,
> > tmo_buf);
> > +             if (odp_likely(tim != ODP_TIMER_INVALID)) {
> > +                     /* Success */
> > +                     assert(tim->queue != ODP_QUEUE_INVALID);
> > +                     return tim;
> > +             }
> > +             odp_buffer_free(tmo_buf);
> >       }
> > +     /* Else failed to allocate timeout event */
> > +     /* errno set by odp_buffer_alloc() or timer_alloc () */
> > +     return ODP_TIMER_INVALID;
> > +}
> >
> > -     tmo_hdr = odp_timeout_hdr((odp_timeout_t) tmo_buf);
> > -     new_tmo = &tmo_hdr->meta;
> > -
> > -     new_tmo->timer_id = id;
> > -     new_tmo->tick     = (int)tick;
> > -     new_tmo->tmo_tick = tmo_tick;
> > -     new_tmo->queue    = queue;
> > -     new_tmo->tmo_buf  = tmo_buf;
> > -
> > -     if (buf != ODP_BUFFER_INVALID)
> > -             new_tmo->buf = buf;
> > -     else
> > -             new_tmo->buf = tmo_buf;
> > -
> > -     add_tmo(&timer->tick[tick], new_tmo);
> > -
> > -     return tmo_buf;
> > +void odp_timer_free(odp_timer_t tim)
> > +{
> > +     odp_timer_pool *tp = (odp_timer_pool *)get_pq(&tim->pqelem);
> > +     timer_free(tp, tim);
> >  }
> >
> > -uint64_t odp_timer_tick_to_ns(odp_timer_t timer_hdl, uint64_t ticks)
> > +odp_timer_set_t odp_timer_set_abs_w_buf(odp_timer_t tim,
> > +                                     uint64_t abs_tck,
> > +                                     odp_buffer_t user_buf)
> >  {
> > -     uint32_t id;
> > +     odp_timer_pool *tp = (odp_timer_pool *)get_pq(&tim->pqelem);
> > +     odp_timer_set_t rc = timer_reset_w_buf(tp, tim, abs_tck, user_buf);
> > +     return rc;
> > +}
> >
> > -     id = timer_hdl - 1;
> > -     return ticks * odp_timer.timer[id].resolution_ns;
> > +odp_timer_set_t odp_timer_set_abs(odp_timer_t tim, uint64_t abs_tck)
> > +{
> > +     odp_timer_pool *tp = (odp_timer_pool *)get_pq(&tim->pqelem);
> > +     odp_timer_set_t rc = timer_reset(tp, tim, abs_tck);
> > +     return rc;
> >  }
> >
> > -uint64_t odp_timer_ns_to_tick(odp_timer_t timer_hdl, uint64_t ns)
> > +odp_timer_set_t odp_timer_set_rel_w_buf(odp_timer_t tim,
> > +                                     uint64_t rel_tck,
> > +                                     odp_buffer_t user_buf)
> >  {
> > -     uint32_t id;
> > +     odp_timer_pool *tp = (odp_timer_pool *)get_pq(&tim->pqelem);
> > +     odp_timer_set_t rc = timer_reset_w_buf(tp, tim, tp->cur_tick +
> > rel_tck,
> > +                                            user_buf);
> > +     return rc;
> > +}
> >
> > -     id = timer_hdl - 1;
> > -     return ns / odp_timer.timer[id].resolution_ns;
> > +odp_timer_set_t odp_timer_set_rel(odp_timer_t tim, uint64_t rel_tck)
> > +{
> > +     odp_timer_pool *tp = (odp_timer_pool *)get_pq(&tim->pqelem);
> > +     odp_timer_set_t rc = timer_reset(tp, tim, tp->cur_tick + rel_tck);
> > +     return rc;
> >  }
> >
> > -uint64_t odp_timer_resolution(odp_timer_t timer_hdl)
> > +void odp_timer_cancel(odp_timer_t tim)
> >  {
> > -     uint32_t id;
> > +     odp_timer_pool *tp = (odp_timer_pool *)get_pq(&tim->pqelem);
> > +     timer_cancel(tp, tim);
> > +}
> >
> > -     id = timer_hdl - 1;
> > -     return odp_timer.timer[id].resolution_ns;
> > +void odp_timer_return_tmo(odp_timer_tmo_t tmo)
> > +{
> > +     const odp_timeout_hdr_t *tmo_hdr = odp_tmo_to_hdr(tmo);
> > +     odp_timer *parent_tim = tmo_hdr->timer;
> > +     odp_timer_pool *tp = (odp_timer_pool *)get_pq(&parent_tim->pqelem);
> > +     timer_return(tp, parent_tim, tmo, tmo_hdr);
> >  }
> >
> > -uint64_t odp_timer_maximum_tmo(odp_timer_t timer_hdl)
> > +odp_timer_tmo_status_t odp_timer_tmo_status(odp_timer_tmo_t tmo)
> >  {
> > -     uint32_t id;
> > +     const odp_timeout_hdr_t *tmo_hdr = odp_tmo_to_hdr(tmo);
> > +     odp_timer *parent_tim = tmo_hdr->timer;
> >
> > -     id = timer_hdl - 1;
> > -     return odp_timer.timer[id].max_ticks;
> > +     if (odp_unlikely(tmo_hdr->gencnt != parent_tim->gencnt)) {
> > +             /* Generation counters differ => timer has been freed */
> > +             return ODP_TMO_ORPHAN;
> > +     }
> > +     /* Else generation counters match => parent timer exists */
> > +
> > +     if (odp_likely(parent_tim->tag == tmo_hdr->tag))
> > +             return ODP_TMO_FRESH;
> > +     else
> > +             return ODP_TMO_STALE;
> >  }
> >
> > -uint64_t odp_timer_current_tick(odp_timer_t timer_hdl)
> > +odp_timer_t odp_timer_handle(odp_timer_tmo_t tmo)
> >  {
> > -     uint32_t id;
> > +     odp_timeout_hdr_t *tmo_hdr = odp_tmo_to_hdr(tmo);
> > +     odp_timer_t parent_tim = tmo_hdr->timer;
> > +     if (odp_likely(tmo_hdr->gencnt == parent_tim->gencnt))
> > +             return parent_tim;
> > +     else
> > +             return ODP_TIMER_INVALID;
> > +}
> >
> > -     id = timer_hdl - 1;
> > -     return odp_timer.timer[id].cur_tick;
> > +uint64_t odp_timer_expiration(odp_timer_tmo_t tmo)
> > +{
> > +     odp_timeout_hdr_t *tmo_hdr = odp_tmo_to_hdr(tmo);
> > +     return tmo_hdr->expiration;
> >  }
> >
> > -odp_timeout_t odp_timeout_from_buffer(odp_buffer_t buf)
> > +void *odp_timer_userptr(odp_timer_tmo_t tmo)
> >  {
> > -     return (odp_timeout_t) buf;
> > +     odp_timeout_hdr_t *tmo_hdr = odp_tmo_to_hdr(tmo);
> > +     return tmo_hdr->user_ptr;
> >  }
> >
> > -uint64_t odp_timeout_tick(odp_timeout_t tmo)
> > +int odp_timer_init_global(void)
> >  {
> > -     odp_timeout_hdr_t *tmo_hdr = odp_timeout_hdr(tmo);
> > -     return tmo_hdr->meta.tmo_tick;
> > +     return 0;
> >  }
> > diff --git a/test/api_test/odp_timer_ping.c
> > b/test/api_test/odp_timer_ping.c
> > index 7406a45..2617b5c 100644
> > --- a/test/api_test/odp_timer_ping.c
> > +++ b/test/api_test/odp_timer_ping.c
> > @@ -20,6 +20,8 @@
> >   *    Otherwise timeout may happen bcz of slow nw speed
> >   */
> >
> > +#include <assert.h>
> > +#include <stdlib.h>
> >  #include <unistd.h>
> >  #include <fcntl.h>
> >  #include <errno.h>
> > @@ -41,14 +43,15 @@
> >  #define MSG_POOL_SIZE         (4*1024*1024)
> >  #define BUF_SIZE             8
> >  #define PING_CNT     10
> > -#define PING_THRD    2       /* Send and Rx Ping thread */
> > +#define PING_THRD    2       /* send_ping and rx_ping threads */
> >
> >  /* Nanoseconds */
> >  #define RESUS        10000
> >  #define MINUS        10000
> >  #define MAXUS        10000000
> >
> > -static odp_timer_t test_timer_ping;
> > +static odp_timer_pool_t tp;
> > +static odp_timer_t test_timer_ping = ODP_TIMER_INVALID;
> >  static odp_timer_tmo_t test_ping_tmo;
> >
> >  #define PKTSIZE      64
> > @@ -128,15 +131,7 @@ static int listen_to_pingack(void)
> >                                        (socklen_t *)&len);
> >                       if (bytes > 0) {
> >                               /* pkt rxvd therefore cancel the timeout */
> > -                             if (odp_timer_cancel_tmo(test_timer_ping,
> > -                                                      test_ping_tmo) !=
> 0) {
> > -                                     ODP_ERR("cancel_tmo failed
> ..exiting
> > listner thread\n");
> > -                                     /* avoid exiting from here even if
> tmo
> > -                                      * failed for current ping,
> > -                                      * allow subsequent ping_rx
> request */
> > -                                     err = -1;
> > -
> > -                             }
> > +                             odp_timer_cancel(test_timer_ping);
> >                               /* cruel bad hack used for sender, listner
> ipc..
> >                                * euwww.. FIXME ..
> >                                */
> > @@ -160,7 +155,6 @@ static int send_ping_request(struct sockaddr_in
> > *addr)
> >
> >       uint64_t tick;
> >       odp_queue_t queue;
> > -     odp_buffer_t buf;
> >
> >       int err = 0;
> >
> > @@ -184,8 +178,16 @@ static int send_ping_request(struct sockaddr_in
> > *addr)
> >
> >       /* get the ping queue */
> >       queue = odp_queue_lookup("ping_timer_queue");
> > +     test_timer_ping = odp_timer_alloc(tp, queue, NULL);
> > +     if (test_timer_ping == ODP_TIMER_INVALID) {
> > +             ODP_ERR("Failed to allocate timer.\n");
> > +             err = -1;
> > +             goto err;
> > +     }
> >
> >       for (i = 0; i < PING_CNT; i++) {
> > +             odp_buffer_t buf;
> > +             odp_timer_tmo_t tmo;
> >               /* prepare icmp pkt */
> >               bzero(&pckt, sizeof(pckt));
> >               pckt.hdr.type = ICMP_ECHO;
> > @@ -209,12 +211,10 @@ static int send_ping_request(struct sockaddr_in
> > *addr)
> >               printf(" icmp_sent msg_cnt %d\n", i);
> >
> >               /* arm the timer */
> > -             tick = odp_timer_current_tick(test_timer_ping);
> > +             tick = odp_timer_current_tick(tp);
> >
> >               tick += 1000;
> > -             test_ping_tmo = odp_timer_absolute_tmo(test_timer_ping,
> > tick,
> > -                                                    queue,
> > -                                                    ODP_BUFFER_INVALID);
> > +             odp_timer_set_abs(test_timer_ping, tick);
> >               /* wait for timeout event */
> >               while ((buf = odp_queue_deq(queue)) == ODP_BUFFER_INVALID)
> {
> >                       /* flag true means ack rxvd.. a cruel hack as I
> > @@ -229,17 +229,28 @@ static int send_ping_request(struct sockaddr_in
> > *addr)
> >                               break;
> >                       }
> >               }
> > +             assert(odp_buffer_type(buf) == ODP_BUFFER_TYPE_TIMEOUT);
> > +             tmo = odp_timeout_from_buffer(buf);
> >
> > -             /* free tmo_buf for timeout case */
> > -             if (buf != ODP_BUFFER_INVALID) {
> > -                     ODP_DBG(" timeout msg_cnt [%i] \n", i);
> > +             switch (odp_timer_tmo_status(tmo)) {
> > +             case ODP_TMO_FRESH:
> > +                     ODP_DBG(" timeout msg_cnt [%i]\n", i);
> >                       /* so to avoid seg fault commented */
> > -                     odp_buffer_free(buf);
> >                       err = -1;
> > +                     break;
> > +             case ODP_TMO_STALE:
> > +                     /* Ignore stale timeouts */
> > +                     break;
> > +             case ODP_TMO_ORPHAN:
> > +                     ODP_ERR("Received orphaned timeout!\n");
> > +                     abort();
> >               }
> > +             odp_timer_return_tmo(tmo);
> >       }
> >
> >  err:
> > +     if (test_timer_ping != ODP_TIMER_INVALID)
> > +             odp_timer_free(test_timer_ping);
> >       return err;
> >  }
> >
> > @@ -340,9 +351,9 @@ int main(int argc ODP_UNUSED, char *argv[]
> > ODP_UNUSED)
> >       pool = odp_buffer_pool_create("msg_pool", pool_base, MSG_POOL_SIZE,
> >                                     BUF_SIZE,
> >                                     ODP_CACHE_LINE_SIZE,
> > -                                   ODP_BUFFER_TYPE_RAW);
> > +                                   ODP_BUFFER_TYPE_TIMEOUT);
> >       if (pool == ODP_BUFFER_POOL_INVALID) {
> > -             ODP_ERR("Pool create failed.\n");
> > +             ODP_ERR("Buffer pool create failed.\n");
> >               return -1;
> >       }
> >
> > @@ -357,15 +368,19 @@ int main(int argc ODP_UNUSED, char *argv[]
> > ODP_UNUSED)
> >               return -1;
> >       }
> >
> > -     test_timer_ping = odp_timer_create("ping_timer", pool,
> > -                                        RESUS*ODP_TIME_USEC,
> > -                                        MINUS*ODP_TIME_USEC,
> > -                                        MAXUS*ODP_TIME_USEC);
> > -
> > -     if (test_timer_ping == ODP_TIMER_INVALID) {
> > -             ODP_ERR("Timer create failed.\n");
> > +     /*
> > +      * Create timer pool
> > +      */
> > +     tp = odp_timer_pool_create("timer_pool", pool,
> > +                                RESUS*ODP_TIME_USEC,
> > +                                MINUS*ODP_TIME_USEC,
> > +                                MAXUS*ODP_TIME_USEC,
> > +                                1, false, ODP_CLOCK_CPU);
> > +     if (tp == ODP_TIMER_POOL_INVALID) {
> > +             ODP_ERR("Timer pool create failed.\n");
> >               return -1;
> >       }
> > +     odp_timer_pool_start();
> >
> >       odp_shm_print_all();
> >
> > --
> > 1.9.1
> >
> >
> > _______________________________________________
> > lng-odp mailing list
> > lng-odp@lists.linaro.org
> > http://lists.linaro.org/mailman/listinfo/lng-odp
>
Bill Fischofer Oct. 6, 2014, 10:42 a.m. UTC | #6
We're in the process of publishing drafts of formal design docs for each of
the remaining pieces of the ODP architecture. The scheduler and queues are
expected this week, so watch this space.

A lot of the existing code is early prototype stuff which needs to be
revised a bit to get to what we want for ODP v1.0. That's the design focus
over the next several weeks.  Not major stuff, but mostly refinement to
handle these good questions and use cases.

On Mon, Oct 6, 2014 at 4:36 AM, Ola Liljedahl <ola.liljedahl@linaro.org>
wrote:

> Gilad,
>
> Your suggestion makes sense. This is how a purely event-driven application
> would be designed.
>
> Possibly the wait/timeout parameter to the odp_schedule calls is a legacy
> from the time before there was a timer API in ODP. Maybe Petri can s(c)hed
> some light on this.
>
> I suspect there could be some performance benefits from specifying the
> timeout as an explicit parameter. If scheduling timeout is implemented
> using a timer event facility (e.g. the ODP timer API), the application (or
> the ODP implementation if it uses the same design) would have to reset that
> timer for every odp_schedule call, for a SW timer implementation this could
> add serious overhead. With an explicit timeout parameter, the scheduler
> implementation could be reading e.g some cycle counter while (busy-)
> waiting for events to become available. This overhead should be less and
> also incurred only when the thread is idle and waiting for work.
>
> The current API does not prevent an implementation from using timer events
> internally and does not limit an application from using the timer API for
> timeouts. It does add a little bit of implementation complexity. What is
> the best trade-off?
>
> -- Ola
>
> On 6 October 2014 08:22, Gilad Ben Yossef <giladb@ezchip.com> wrote:
>
>>
>> Another one of my stupid questions, I'm afraid.  :-)
>> If we have a timer implemented as an event pushed to queue which can be
>> scheduled as any other queue (which is good thing I think), why does our
>> schedule APIs need a timeout?
>> I mean, if you want a timeout, just add a scheduled timer queue and send
>> yourself timeout events. That's how I would implement the schedule timeouts
>> internally anyway (running a native timer on a core that does packet
>> processing stops it from enjoying Linux full NOHZ cpu isolation so we
>> really don't want timers there...)
>> Anything I've missed?
>> Thanks,
>> Gilad
>> Gilad Ben-Yossef
>> Software Architect
>> EZchip Technologies Ltd.
>> 37 Israel Pollak Ave, Kiryat Gat 82025 ,Israel
>> Tel: +972-4-959-6666 ext. 576, Fax: +972-8-681-1483
>> Mobile: +972-52-826-0388, US Mobile: +1-973-826-0388
>> Email: giladb@ezchip.com, Web: http://www.ezchip.com
>>
>> "Ethernet always wins."
>>         — Andy Bechtolsheim
>>
>>
>> > -----Original Message-----
>> > From: lng-odp-bounces@lists.linaro.org [mailto:lng-odp-
>> > bounces@lists.linaro.org] On Behalf Of Ola Liljedahl
>> > Sent: Thursday, October 02, 2014 6:23 PM
>> > To: lng-odp@lists.linaro.org
>> > Subject: [lng-odp] [PATCHv4] Timer API and and priority queue-based
>> > implementation
>> >
>> > Signed-off-by: Ola Liljedahl <ola.liljedahl@linaro.org>
>> > ---
>> > Fixed review comments for v3 from Anders R.
>> > * Example code snippets use @code/@endcode.
>> > * Added some missing doxygen comments.
>> > * Updated some comments.
>> > * Reverted year in copyright notices.
>> > * Added odp_likely() hint.
>> > * Made some variables self-descriptive and removed redundant comments.
>> > Changed to use ticket locks instead of spin locks (ticket locks are more
>> > fair).
>> > Changed to use ODP_ABORT() which has become available since the last
>> > patch.
>> >
>> >  example/timer/odp_timer_test.c                     | 125 +--
>> >  platform/linux-generic/Makefile.am                 |   1 +
>> >  platform/linux-generic/include/api/odp_timer.h     | 570 +++++++++++--
>> >  .../include/odp_priority_queue_internal.h          | 108 +++
>> >  .../linux-generic/include/odp_timer_internal.h     |  71 +-
>> >  platform/linux-generic/odp_priority_queue.c        | 283 +++++++
>> >  platform/linux-generic/odp_timer.c                 | 923
>> ++++++++++++++-
>> > ------
>> >  test/api_test/odp_timer_ping.c                     |  73 +-
>> >  8 files changed, 1648 insertions(+), 506 deletions(-)
>> >  create mode 100644 platform/linux-
>> > generic/include/odp_priority_queue_internal.h
>> >  create mode 100644 platform/linux-generic/odp_priority_queue.c
>> >
>> > diff --git a/example/timer/odp_timer_test.c
>> > b/example/timer/odp_timer_test.c
>> > index 6e1715d..750d785 100644
>> > --- a/example/timer/odp_timer_test.c
>> > +++ b/example/timer/odp_timer_test.c
>> > @@ -41,67 +41,89 @@ typedef struct {
>> >  /** @private Barrier for test synchronisation */
>> >  static odp_barrier_t test_barrier;
>> >
>> > -/** @private Timer handle*/
>> > -static odp_timer_t test_timer;
>> > +/** @private Timer pool handle */
>> > +static odp_timer_pool_t tp;
>> >
>> >
>> > +/** @private Timeout status ASCII strings */
>> > +static const char *const status2str[] = {
>> > +     "fresh", "stale", "orphaned"
>> > +};
>> > +
>> >  /** @private test timeout */
>> >  static void test_abs_timeouts(int thr, test_args_t *args)
>> >  {
>> > -     uint64_t tick;
>> >       uint64_t period;
>> >       uint64_t period_ns;
>> >       odp_queue_t queue;
>> > -     odp_buffer_t buf;
>> > -     int num;
>> > +     int remain = args->tmo_count;
>> > +     odp_timer_t hdl;
>> > +     uint64_t tick;
>> >
>> >       ODP_DBG("  [%i] test_timeouts\n", thr);
>> >
>> >       queue = odp_queue_lookup("timer_queue");
>> >
>> >       period_ns = args->period_us*ODP_TIME_USEC;
>> > -     period    = odp_timer_ns_to_tick(test_timer, period_ns);
>> > +     period    = odp_timer_ns_to_tick(tp, period_ns);
>> >
>> >       ODP_DBG("  [%i] period %"PRIu64" ticks,  %"PRIu64" ns\n", thr,
>> >               period, period_ns);
>> >
>> > -     tick = odp_timer_current_tick(test_timer);
>> > -
>> > -     ODP_DBG("  [%i] current tick %"PRIu64"\n", thr, tick);
>> > -
>> > -     tick += period;
>> > +     ODP_DBG("  [%i] current tick %"PRIu64"\n", thr,
>> > +             odp_timer_current_tick(tp));
>> >
>> > -     if (odp_timer_absolute_tmo(test_timer, tick, queue,
>> > ODP_BUFFER_INVALID)
>> > -         == ODP_TIMER_TMO_INVALID){
>> > -             ODP_DBG("Timeout request failed\n");
>> > +     odp_timer_t test_timer;
>> > +     test_timer = odp_timer_alloc(tp, queue, NULL);
>> > +     if (test_timer == ODP_TIMER_INVALID) {
>> > +             ODP_ERR("Failed to allocate timer\n");
>> >               return;
>> >       }
>> > +     tick = odp_timer_current_tick(tp);
>> > +     hdl = test_timer;
>> >
>> > -     num = args->tmo_count;
>> > -
>> > -     while (1) {
>> > -             odp_timeout_t tmo;
>> > -
>> > -             buf = odp_schedule_one(&queue, ODP_SCHED_WAIT);
>> > -
>> > -             tmo  = odp_timeout_from_buffer(buf);
>> > -             tick = odp_timeout_tick(tmo);
>> > -
>> > -             ODP_DBG("  [%i] timeout, tick %"PRIu64"\n", thr, tick);
>> > -
>> > -             odp_buffer_free(buf);
>> > -
>> > -             num--;
>> > -
>> > -             if (num == 0)
>> > -                     break;
>> > +     while (remain != 0) {
>> > +             odp_buffer_t buf;
>> > +             odp_timer_tmo_t tmo;
>> > +             odp_timer_tmo_status_t stat;
>> > +             odp_timer_set_t rc;
>> >
>> >               tick += period;
>> > +             rc = odp_timer_set_abs(hdl, tick);
>> > +             if (odp_unlikely(rc != ODP_TIMER_SET_SUCCESS)) {
>> > +                     ODP_ERR("odp_timer_set_abs() failed (%u)\n", rc);
>> > +                     abort();
>> > +             }
>> >
>> > -             odp_timer_absolute_tmo(test_timer, tick,
>> > -                                    queue, ODP_BUFFER_INVALID);
>> > +             /* Get the next ready buffer/timeout */
>> > +             buf = odp_schedule_one(&queue, ODP_SCHED_WAIT);
>> > +             if (odp_unlikely(odp_buffer_type(buf) !=
>> > +                              ODP_BUFFER_TYPE_TIMEOUT)) {
>> > +                     ODP_ERR("Unexpected buffer type received\n");
>> > +                     abort();
>> > +             }
>> > +             tmo = odp_timeout_from_buffer(buf);
>> > +             stat = odp_timer_tmo_status(tmo);
>> > +             tick = odp_timer_expiration(tmo);
>> > +             hdl = odp_timer_handle(tmo);
>> > +             ODP_DBG("  [%i] timeout, tick %"PRIu64", status %s\n",
>> > +                     thr, tick, status2str[stat]);
>> > +             /* if (stat == ODP_TMO_FRESH)  - do your thing! */
>> > +             if (odp_likely(stat == ODP_TMO_ORPHAN)) {
>> > +                     /* Some other thread freed the corresponding
>> > +                        timer after the timeout was already
>> > +                        enqueued */
>> > +                     /* Timeout handle is invalid, use our own timer */
>> > +                     hdl = test_timer;
>> > +             }
>> > +             /* Return timeout to timer manager, regardless of status
>> */
>> > +             odp_timer_return_tmo(tmo);
>> > +             remain--;
>> >       }
>> >
>> > +     odp_timer_cancel(test_timer);
>> > +     odp_timer_free(test_timer);
>> > +
>> >       if (odp_queue_sched_type(queue) == ODP_SCHED_SYNC_ATOMIC)
>> >               odp_schedule_release_atomic();
>> >  }
>> > @@ -155,7 +177,6 @@ static void print_usage(void)
>> >       printf("Options:\n");
>> >       printf("  -c, --count <number>    core count, core IDs start from
>> > 1\n");
>> >       printf("  -r, --resolution <us>   timeout resolution in usec\n");
>> > -     printf("  -m, --min <us>          minimum timeout in usec\n");
>> >       printf("  -x, --max <us>          maximum timeout in usec\n");
>> >       printf("  -p, --period <us>       timeout period in usec\n");
>> >       printf("  -t, --timeouts <count>  timeout repeat count\n");
>> > @@ -190,14 +211,14 @@ static void parse_args(int argc, char *argv[],
>> > test_args_t *args)
>> >       /* defaults */
>> >       args->core_count    = 0; /* all cores */
>> >       args->resolution_us = 10000;
>> > -     args->min_us        = args->resolution_us;
>> > +     args->min_us        = 0;
>> >       args->max_us        = 10000000;
>> >       args->period_us     = 1000000;
>> >       args->tmo_count     = 30;
>> >
>> >       while (1) {
>> >               opt = getopt_long(argc, argv, "+c:r:m:x:p:t:h",
>> > -                              longopts, &long_index);
>> > +                               longopts, &long_index);
>> >
>> >               if (opt == -1)
>> >                       break;  /* No more options */
>> > @@ -321,10 +342,25 @@ int main(int argc, char *argv[])
>> >                                     ODP_BUFFER_TYPE_TIMEOUT);
>> >
>> >       if (pool == ODP_BUFFER_POOL_INVALID) {
>> > -             ODP_ERR("Pool create failed.\n");
>> > +             ODP_ERR("Buffer pool create failed.\n");
>> >               return -1;
>> >       }
>> >
>> > +     tp = odp_timer_pool_create("timer_pool", pool,
>> > +                                args.resolution_us*ODP_TIME_USEC,
>> > +                                args.min_us*ODP_TIME_USEC,
>> > +                                args.max_us*ODP_TIME_USEC,
>> > +                                num_workers, /* One timer per worker */
>> > +                                true,
>> > +                                ODP_CLOCK_CPU);
>> > +     if (tp == ODP_TIMER_POOL_INVALID) {
>> > +             ODP_ERR("Timer pool create failed.\n");
>> > +             return -1;
>> > +     }
>> > +     odp_timer_pool_start();
>> > +
>> > +     odp_shm_print_all();
>> > +
>> >       /*
>> >        * Create a queue for timer test
>> >        */
>> > @@ -340,19 +376,6 @@ int main(int argc, char *argv[])
>> >               return -1;
>> >       }
>> >
>> > -     test_timer = odp_timer_create("test_timer", pool,
>> > -                                   args.resolution_us*ODP_TIME_USEC,
>> > -                                   args.min_us*ODP_TIME_USEC,
>> > -                                   args.max_us*ODP_TIME_USEC);
>> > -
>> > -     if (test_timer == ODP_TIMER_INVALID) {
>> > -             ODP_ERR("Timer create failed.\n");
>> > -             return -1;
>> > -     }
>> > -
>> > -
>> > -     odp_shm_print_all();
>> > -
>> >       printf("CPU freq %"PRIu64" hz\n", odp_sys_cpu_hz());
>> >       printf("Cycles vs nanoseconds:\n");
>> >       ns = 0;
>> > diff --git a/platform/linux-generic/Makefile.am b/platform/linux-
>> > generic/Makefile.am
>> > index d076d50..71f923c 100644
>> > --- a/platform/linux-generic/Makefile.am
>> > +++ b/platform/linux-generic/Makefile.am
>> > @@ -59,6 +59,7 @@ __LIB__libodp_la_SOURCES = \
>> >                          odp_packet_flags.c \
>> >                          odp_packet_io.c \
>> >                          odp_packet_socket.c \
>> > +                        odp_priority_queue.c \
>> >                          odp_queue.c \
>> >                          odp_ring.c \
>> >                          odp_rwlock.c \
>> > diff --git a/platform/linux-generic/include/api/odp_timer.h
>> > b/platform/linux-generic/include/api/odp_timer.h
>> > index 01db839..82a1e05 100644
>> > --- a/platform/linux-generic/include/api/odp_timer.h
>> > +++ b/platform/linux-generic/include/api/odp_timer.h
>> > @@ -8,9 +8,193 @@
>> >  /**
>> >   * @file
>> >   *
>> > - * ODP timer
>> > + * ODP timer service
>> >   */
>> >
>> > +/** Example #1 Retransmission timer (e.g. for reliable connections)
>> > + @code
>> > +
>> > +//Create timer pool for reliable connections
>> > +#define SEC 1000000000ULL //1s expressed in nanoseconds
>> > +odp_timer_pool_t tcp_tpid =
>> > +    odp_timer_pool_create("TCP",
>> > +                       buffer_pool,
>> > +                       1000000,//resolution 1ms
>> > +                       0,//min tmo
>> > +                       7200 * SEC,//max tmo length 2hours
>> > +                       40000,//num_timers
>> > +                       true,//shared
>> > +                       ODP_CLOCK_CPU
>> > +                      );
>> > +if (tcp_tpid == ODP_TIMER_POOL_INVALID)
>> > +{
>> > +     //Failed to create timer pool => fatal error
>> > +}
>> > +
>> > +
>> > +//Setting up a new connection
>> > +//Allocate retransmission timeout (identical for supervision timeout)
>> > +//The user pointer points back to the connection context
>> > +conn->ret_tim = odp_timer_alloc(tcp_tpid, queue, conn);
>> > +//Check if all resources were successfully allocated
>> > +if (conn->ret_tim == ODP_TIMER_INVALID)
>> > +{
>> > +     //Failed to allocate all resources for connection => tear down
>> > +     //Destroy timeout
>> > +     odp_timer_free(conn->ret_tim);
>> > +     //Tear down connection
>> > +     ...
>> > +     return false;
>> > +}
>> > +//All necessary resources successfully allocated
>> > +//Compute initial retransmission length in timer ticks
>> > +conn->ret_len = odp_timer_ns_to_tick(tcp_tpid, 3 * SEC);//Per RFC1122
>> > +//Arm the timer
>> > +odp_timer_set_rel(conn->ret_tim, conn->ret_len);
>> > +return true;
>> > +
>> > +
>> > +//A packet for the connection has just been transmitted
>> > +//Reset the retransmission timer
>> > +odp_timer_set_rel(conn->ret_tim, conn->ret_len);
>> > +
>> > +
>> > +//A retransmission timeout buffer for the connection has been received
>> > +odp_timer_tmo_t tmo = odp_timeout_from_buffer(buf);
>> > +odp_timer_tmo_status_t stat = odp_timer_tmo_status(tmo);
>> > +//Check if timeout is fresh or stale, for stale timeouts we need to
>> > reset the
>> > +//timer
>> > +if (stat == ODP_TMO_FRESH) {
>> > +     //Fresh timeout, last transmitted packet not acked in time =>
>> > +       retransmit
>> > +     //Get connection from timeout event
>> > +     conn = odp_timer_get_userptr(tmo);
>> > +     //Retransmit last packet (e.g. TCP segment)
>> > +     ...
>> > +     //Re-arm timer using original delta value
>> > +     odp_timer_set_rel(conn->ret_tim, conn->ret_len);
>> > +} else if (stat == ODP_TMO_ORPHAN) {
>> > +     odp_free_buffer(buf);
>> > +     return;//Get out of here
>> > +} // else stat == ODP_TMO_STALE, do nothing
>> > +//Finished processing, return timeout
>> > +odp_timer_return_tmo(tmo);
>> > +
>> > + @endcode
>> > +*/
>> > +
>> > +/** Example #2 Periodic tick
>> > + @code
>> > +
>> > +//Create timer pool for periodic ticks
>> > +odp_timer_pool_t per_tpid =
>> > +    odp_timer_pool_create("periodic-tick",
>> > +                       buffer_pool,
>> > +                       1,//resolution 1ns
>> > +                       1,//minimum timeout length 1ns
>> > +                       1000000000,//maximum timeout length 1s
>> > +                       10,//num_timers
>> > +                       false,//not shared
>> > +                       ODP_CLOCK_CPU
>> > +                      );
>> > +if (per_tpid == ODP_TIMER_POOL_INVALID)
>> > +{
>> > +    //Failed to create timer pool => fatal error
>> > +}
>> > +
>> > +
>> > +//Allocate periodic timer
>> > +tim_1733 = odp_timer_alloc(per_tpid, queue, NULL);
>> > +//Check if all resources were successfully allocated
>> > +if (tim_1733 == ODP_TIMER_INVALID)
>> > +{
>> > +     //Failed to allocate all resources => tear down
>> > +     //Destroy timeout
>> > +     odp_timer_free(tim_1733);
>> > +     //Tear down other state
>> > +     ...
>> > +     return false;
>> > +}
>> > +//All necessary resources successfully allocated
>> > +//Compute tick period in timer ticks
>> > +period_1733 = odp_timer_ns_to_tick(per_tpid, 1000000000U /
>> > 1733U);//1733Hz
>> > +//Compute when next tick should expire
>> > +next_1733 = odp_timer_current_tick(per_tpid) + period_1733;
>> > +//Arm the periodic timer
>> > +odp_timer_set_abs(tim_1733, next_1733);
>> > +return true;
>> > +
>> > +
>> > +
>> > +//A periodic timer timeout has been received
>> > +odp_timer_tmo_t tmo = odp_timeout_from_buffer(buf);
>> > +//Get status of timeout
>> > +odp_timer_tmo_status_t stat = odp_timer_tmo_status(tmo);
>> > +//We expect the timeout is always fresh since we are not calling set or
>> > cancel
>> > +on active or expired timers in this example
>> > +assert(stat == ODP_TMO_FRESH);
>> > +//Do processing driven by timeout *before*
>> > +...
>> > +do {
>> > +     //Compute when the timer should expire next
>> > +     next_1733 += period_1733;
>> > +     //Check that this is in the future
>> > +     if (likely(next_1733 > odp_timer_current_tick(per_tpid))
>> > +     break;//Yes, done
>> > +     //Else we missed a timeout
>> > +     //Optionally attempt some recovery and/or logging of the problem
>> > +     ...
>> > +} while (0);
>> > +//Re-arm periodic timer
>> > +odp_timer_set_abs(tim_1733, next_1733);
>> > +//Or do processing driven by timeout *after*
>> > +...
>> > +odp_timer_return_tmo(tmo);
>> > +return;
>> > +
>> > + @endcode
>> > +*/
>> > +
>> > +/** Example #3 Tear down of flow
>> > + @code
>> > +//ctx points to flow context data structure owned by application
>> > +//Free the timer, cancelling any timeout
>> > +odp_timer_free(ctx->timer);//Any enqueued timeout will be made invalid
>> > +//Continue tearing down and eventually freeing context
>> > +...
>> > +return;
>> > +
>> > +//A timeout has been received, check status
>> > +odp_timer_tmo_t tmo = odp_timeout_from_buffer(buf);
>> > +switch (odp_timer_tmo_status(tmo))
>> > +{
>> > +    case ODP_TMO_FRESH :
>> > +     //A flow has timed out, tear it down
>> > +     //Find flow context from timeout
>> > +     ctx = (context *)odp_timer_get_userptr(tmo);
>> > +     //Free the supervision timer, any enqueued timeout will remain
>> > +     odp_timer_free(ctx->tim);
>> > +     //Free other flow related resources
>> > +     ...
>> > +     //Free the timeout buffer
>> > +     odp_buffer_free(buf);
>> > +     //Flow torn down
>> > +     break;
>> > +    case ODP_TMO_STALE :
>> > +     //A stale timeout was received, return timeout and update timer
>> > +     odp_timer_return_tmo(tmo);
>> > +     break;
>> > +    case ODP_TMO_ORPHAN :
>> > +     //Orphaned timeout (from previously torn down flow)
>> > +     //No corresponding timer or flow context
>> > +     //Free the timeout buffer
>> > +     odp_buffer_free(buf);
>> > +     break;
>> > +}
>> > +
>> > + @endcode
>> > +*/
>> > +
>> >  #ifndef ODP_TIMER_H_
>> >  #define ODP_TIMER_H_
>> >
>> > @@ -18,144 +202,408 @@
>> >  extern "C" {
>> >  #endif
>> >
>> > +#include <stdlib.h>
>> >  #include <odp_std_types.h>
>> >  #include <odp_buffer.h>
>> >  #include <odp_buffer_pool.h>
>> >  #include <odp_queue.h>
>> >
>> > +struct odp_timer_pool_s; /**< Forward declaration */
>> > +
>> > +/**
>> > +* ODP timer pool handle (platform dependent)
>> > +*/
>> > +typedef struct odp_timer_pool_s *odp_timer_pool_t;
>> > +
>> > +/**
>> > + * Invalid timer pool handle (platform dependent).
>> > + */
>> > +#define ODP_TIMER_POOL_INVALID NULL
>> >
>> >  /**
>> > - * ODP timer handle
>> > + * Clock sources for timers in timer pool.
>> >   */
>> > -typedef uint32_t odp_timer_t;
>> > +typedef enum odp_timer_clk_src_e {
>> > +     /** Use CPU clock as clock source for timers */
>> > +     ODP_CLOCK_CPU,
>> > +     /** Use external clock as clock source for timers */
>> > +     ODP_CLOCK_EXT
>> > +     /* Platform dependent which other clock sources exist */
>> > +} odp_timer_clk_src_t;
>> >
>> > -/** Invalid timer */
>> > -#define ODP_TIMER_INVALID 0
>> > +struct odp_timer_s; /**< Forward declaration */
>> >
>> > +/**
>> > +* ODP timer handle (platform dependent).
>> > +*/
>> > +typedef struct odp_timer_s *odp_timer_t;
>> >
>> >  /**
>> > - * ODP timeout handle
>> > + * Invalid timer handle (platform dependent).
>> >   */
>> > -typedef odp_buffer_t odp_timer_tmo_t;
>> > -
>> > -/** Invalid timeout */
>> > -#define ODP_TIMER_TMO_INVALID 0
>> > +#define ODP_TIMER_INVALID NULL
>> >
>> > +/**
>> > + * Return values of timer set calls.
>> > + */
>> > +typedef enum odp_timer_set_e {
>> > +     /** Timer set operation successful */
>> > +     ODP_TIMER_SET_SUCCESS,
>> > +     /** Timer set operation failed, expiration too early */
>> > +     ODP_TIMER_SET_TOOEARLY,
>> > +     /** Timer set operation failed, expiration too late */
>> > +     ODP_TIMER_SET_TOOLATE
>> > +} odp_timer_set_t;
>> >
>> >  /**
>> > - * Timeout notification
>> > + * Timeout event handle.
>> >   */
>> > -typedef odp_buffer_t odp_timeout_t;
>> > +typedef odp_buffer_t odp_timer_tmo_t;
>> >
>> > +/**
>> > + * Status of a timeout event.
>> > + */
>> > +typedef enum odp_timer_tmo_status_e {
>> > +     /** Timeout is fresh, process it and return timeout */
>> > +     ODP_TMO_FRESH,
>> > +     /** Timer reset or cancelled, just return timeout  */
>> > +     ODP_TMO_STALE,
>> > +     /** Timer deleted, return or free timeout */
>> > +     ODP_TMO_ORPHAN
>> > +} odp_timer_tmo_status_t;
>> >
>> >  /**
>> > - * Create a timer
>> > + * Create a timer pool
>> >   *
>> > - * Creates a new timer with requested properties.
>> > + * Create a new timer pool.
>> >   *
>> >   * @param name       Name
>> > - * @param pool       Buffer pool for allocating timeout notifications
>> > + * @param buf_pool   Buffer pool for allocating timeouts (and only
>> > timeouts)
>> >   * @param resolution Timeout resolution in nanoseconds
>> > - * @param min_tmo    Minimum timeout duration in nanoseconds
>> > - * @param max_tmo    Maximum timeout duration in nanoseconds
>> > + * @param min_tmo    Minimum relative timeout in nanoseconds
>> > + * @param max_tmo    Maximum relative timeout in nanoseconds
>> > + * @param num_timers Number of supported timers (minimum)
>> > + * @param shared     Shared or private timer pool.
>> > + *              Operations on shared timers will include the necessary
>> > + *              mutual exclusion, operations on private timers may not
>> > + *              (mutual exclusion is the responsibility of the caller).
>> > + * @param clk_src    Clock source to use
>> >   *
>> > - * @return Timer handle if successful, otherwise ODP_TIMER_INVALID
>> > + * @return Timer pool handle if successful, otherwise
>> > ODP_TIMER_POOL_INVALID
>> > + * and errno set
>> >   */
>> > -odp_timer_t odp_timer_create(const char *name, odp_buffer_pool_t pool,
>> > -                          uint64_t resolution, uint64_t min_tmo,
>> > -                          uint64_t max_tmo);
>> > +odp_timer_pool_t
>> > +odp_timer_pool_create(const char *name,
>> > +                   odp_buffer_pool_t buf_pool,
>> > +                   uint64_t resolution,
>> > +                   uint64_t min_tmo,
>> > +                   uint64_t max_tmo,
>> > +                   uint32_t num_timers,
>> > +                   bool shared,
>> > +                   odp_timer_clk_src_t clk_src);
>> > +
>> > +/**
>> > + * Start a timer pool
>> > + *
>> > + * Start all created timer pools, enabling the allocation of timers.
>> > + * The purpose of this call is to coordinate the creation of multiple
>> > timer
>> > + * pools that may use the same underlying HW resources.
>> > + * This function may be called multiple times.
>> > + */
>> > +void odp_timer_pool_start(void);
>> > +
>> > +/**
>> > + * Destroy a timer pool
>> > + *
>> > + * Destroy a timer pool, freeing all resources.
>> > + * All timers must have been freed.
>> > + *
>> > + * @param tpid  Timer pool identifier
>> > + */
>> > +void odp_timer_pool_destroy(odp_timer_pool_t tpid);
>> >
>> >  /**
>> >   * Convert timer ticks to nanoseconds
>> >   *
>> > - * @param timer Timer
>> > + * @param tpid  Timer pool identifier
>> >   * @param ticks Timer ticks
>> >   *
>> >   * @return Nanoseconds
>> >   */
>> > -uint64_t odp_timer_tick_to_ns(odp_timer_t timer, uint64_t ticks);
>> > +uint64_t odp_timer_tick_to_ns(odp_timer_pool_t tpid, uint64_t ticks);
>> >
>> >  /**
>> >   * Convert nanoseconds to timer ticks
>> >   *
>> > - * @param timer Timer
>> > + * @param tpid  Timer pool identifier
>> >   * @param ns    Nanoseconds
>> >   *
>> >   * @return Timer ticks
>> >   */
>> > -uint64_t odp_timer_ns_to_tick(odp_timer_t timer, uint64_t ns);
>> > +uint64_t odp_timer_ns_to_tick(odp_timer_pool_t tpid, uint64_t ns);
>> >
>> >  /**
>> > - * Timer resolution in nanoseconds
>> > + * Current tick value
>> >   *
>> > - * @param timer Timer
>> > + * @param tpid Timer pool identifier
>> >   *
>> > - * @return Resolution in nanoseconds
>> > + * @return Current time in timer ticks
>> > + */
>> > +uint64_t odp_timer_current_tick(odp_timer_pool_t tpid);
>> > +
>> > +/**
>> > + * ODP timer configurations
>> >   */
>> > -uint64_t odp_timer_resolution(odp_timer_t timer);
>> > +
>> > +typedef enum odp_timer_pool_conf_e {
>> > +     ODP_TIMER_NAME,      /**< Return name of timer pool */
>> > +     ODP_TIMER_RESOLUTION,/**< Return the timer resolution (in ns) */
>> > +     ODP_TIMER_MIN_TICKS, /**< Return the min supported rel timeout
>> > (ticks)*/
>> > +     ODP_TIMER_MAX_TICKS, /**< Return the max supported rel timeout
>> > (ticks)*/
>> > +     ODP_TIMER_NUM_TIMERS,/**< Return number of supported timers */
>> > +     ODP_TIMER_SHARED     /**< Return shared flag */
>> > +} odp_timer_pool_conf_t;
>> >
>> >  /**
>> > - * Maximum timeout in timer ticks
>> > + * Query different timer pool configurations, e.g.
>> > + *  Timer resolution in nanoseconds
>> > + *  Maximum timeout in timer ticks
>> > + *  Number of supported timers
>> > + *  Shared or private timer pool
>> >   *
>> > - * @param timer Timer
>> > + * @param tpid Timer pool identifier
>> > + * @param item Configuration item being queried
>> >   *
>> > - * @return Maximum timeout in timer ticks
>> > + * @return the requested piece of information or 0 for unknown item.
>> >   */
>> > -uint64_t odp_timer_maximum_tmo(odp_timer_t timer);
>> > +uintptr_t odp_timer_pool_query_conf(odp_timer_pool_t tpid,
>> > +                                 odp_timer_pool_conf_t item);
>> >
>> >  /**
>> > - * Current timer tick
>> > + * Allocate a timer
>> >   *
>> > - * @param timer Timer
>> > + * Create a timer (allocating all necessary resources e.g. timeout
>> > event) from
>> > + * the timer pool.
>> >   *
>> > - * @return Current time in timer ticks
>> > + * @param tpid     Timer pool identifier
>> > + * @param queue    Destination queue for timeout notifications
>> > + * @param user_ptr User defined pointer or NULL (copied to timeouts)
>> > + *
>> > + * @return Timer handle if successful, otherwise ODP_TIMER_INVALID and
>> > + *      errno set.
>> >   */
>> > -uint64_t odp_timer_current_tick(odp_timer_t timer);
>> > +odp_timer_t odp_timer_alloc(odp_timer_pool_t tpid,
>> > +                         odp_queue_t queue,
>> > +                         void *user_ptr);
>> >
>> >  /**
>> > - * Request timeout with an absolute timer tick
>> > + * Free a timer
>> > + *
>> > + * Free (destroy) a timer, freeing all associated resources (e.g.
>> > default
>> > + * timeout event). An expired and enqueued timeout event will not be
>> > freed.
>> > + * It is the responsibility of the application to free this timeout
>> when
>> > it
>> > + * is received.
>> >   *
>> > - * When tick reaches tmo_tick, the timer enqueues the timeout
>> > notification into
>> > - * the destination queue.
>> > + * @param tim      Timer handle
>> > + */
>> > +void odp_timer_free(odp_timer_t tim);
>> > +
>> > +/**
>> > + * Set a timer (absolute time) with a user-defined timeout buffer
>> >   *
>> > - * @param timer    Timer
>> > - * @param tmo_tick Absolute timer tick value which triggers the timeout
>> > - * @param queue    Destination queue for the timeout notification
>> > - * @param buf      User defined timeout notification buffer. When
>> > - *                 ODP_BUFFER_INVALID, default timeout notification is
>> > used.
>> > + * Set (arm) the timer to expire at specific time. The user-defined
>> > + * buffer will be enqueued when the timer expires.
>> > + * Arming may fail (if the timer is in state EXPIRED), an earlier
>> > timeout
>> > + * will then be received. odp_timer_tmo_status() must be used to check
>> > if
>> > + * the received timeout is valid.
>> >   *
>> > - * @return Timeout handle if successful, otherwise
>> ODP_TIMER_TMO_INVALID
>> > + * Note: any invalid parameters will be treated as programming errors
>> > and will
>> > + * cause the application to abort.
>> > + *
>> > + * @param tim      Timer
>> > + * @param abs_tck  Expiration time in absolute timer ticks
>> > + * @param user_buf The buffer to use as timeout event
>> > + *
>> > + * @return Success or failure code
>> >   */
>> > -odp_timer_tmo_t odp_timer_absolute_tmo(odp_timer_t timer, uint64_t
>> > tmo_tick,
>> > -                                    odp_queue_t queue, odp_buffer_t
>> buf);
>> > +odp_timer_set_t odp_timer_set_abs_w_buf(odp_timer_t tim,
>> > +                                     uint64_t abs_tck,
>> > +                                     odp_buffer_t user_buf);
>> >
>> >  /**
>> > - * Cancel a timeout
>> > + * Set a timer with an absolute expiration time
>> > + *
>> > + * Set (arm) the timer to expire at a specific time.
>> > + * Arming may fail (if the timer is in state EXPIRED), an earlier
>> > timeout
>> > + * will then be received. odp_timer_tmo_status() must be used to check
>> > if
>> > + * the received timeout is valid.
>> > + *
>> > + * Note: any invalid parameters will be treated as programming errors
>> > and will
>> > + * cause the application to abort.
>> >   *
>> > - * @param timer Timer
>> > - * @param tmo   Timeout to cancel
>> > + * @param tim     Timer
>> > + * @param abs_tck Expiration time in absolute timer ticks
>> >   *
>> > - * @return 0 if successful
>> > + * @return Success or failure code
>> >   */
>> > -int odp_timer_cancel_tmo(odp_timer_t timer, odp_timer_tmo_t tmo);
>> > +odp_timer_set_t odp_timer_set_abs(odp_timer_t tim, uint64_t abs_tck);
>> >
>> >  /**
>> > - * Convert buffer handle to timeout handle
>> > + * Set a timer with a relative expiration time and user-defined buffer.
>> >   *
>> > - * @param buf  Buffer handle
>> > + * Set (arm) the timer to expire at a relative future time.
>> > + * Arming may fail (if the timer is in state EXPIRED),
>> > + * an earlier timeout will then be received. odp_timer_tmo_status()
>> must
>> > + * be used to check if the received timeout is valid.
>> >   *
>> > - * @return Timeout buffer handle
>> > + * Note: any invalid parameters will be treated as programming errors
>> > and will
>> > + * cause the application to abort.
>> > + *
>> > + * @param tim      Timer
>> > + * @param rel_tck  Expiration time in timer ticks relative to current
>> > time of
>> > + *              the timer pool the timer belongs to
>> > + * @param user_buf The buffer to use as timeout event
>> > + *
>> > + * @return Success or failure code
>> >   */
>> > -odp_timeout_t odp_timeout_from_buffer(odp_buffer_t buf);
>> > +odp_timer_set_t odp_timer_set_rel_w_buf(odp_timer_t tim,
>> > +                                     uint64_t rel_tck,
>> > +                                     odp_buffer_t user_buf);
>> > +/**
>> > + * Set a timer with a relative expiration time
>> > + *
>> > + * Set (arm) the timer to expire at a relative future time.
>> > + * Arming may fail (if the timer is in state EXPIRED),
>> > + * an earlier timeout will then be received. odp_timer_tmo_status()
>> must
>> > + * be used to check if the received timeout is valid.
>> > + *
>> > + * Note: any invalid parameters will be treated as programming errors
>> > and will
>> > + * cause the application to abort.
>> > + *
>> > + * @param tim     Timer
>> > + * @param rel_tck Expiration time in timer ticks relative to current
>> > time of
>> > + *             the timer pool the timer belongs to
>> > + *
>> > + * @return Success or failure code
>> > + */
>> > +odp_timer_set_t odp_timer_set_rel(odp_timer_t tim, uint64_t rel_tck);
>> >
>> >  /**
>> > - * Return absolute timeout tick
>> > + * Cancel a timer
>> > + *
>> > + * Cancel a timer, preventing future expiration and delivery.
>> > + *
>> > + * A timer that has already expired and been enqueued for delivery may
>> > be
>> > + * impossible to cancel and will instead be delivered to the
>> destination
>> > queue.
>> > + * Use odp_timer_tmo_status() the check whether a received timeout is
>> > fresh or
>> > + * stale (cancelled). Stale timeouts will automatically be recycled.
>> > + *
>> > + * Note: any invalid parameters will be treated as programming errors
>> > and will
>> > + * cause the application to abort.
>> > + *
>> > + * @param tim    Timer handle
>> > + */
>> > +void odp_timer_cancel(odp_timer_t tim);
>> > +
>> > +/**
>> > + * Translate from buffer to timeout
>> > + *
>> > + * Return the timeout handle that corresponds to the specified buffer
>> > handle.
>> > + * The buffer must be of time ODP_BUFFER_TYPE_TIMEOUT.
>> > + *
>> > + * @param buf   Buffer handle to translate.
>> > + *
>> > + * @return      The corresponding timeout handle.
>> > + */
>> > +static inline odp_timer_tmo_t odp_timeout_from_buffer(odp_buffer_t buf)
>> > +{
>> > +     if (odp_unlikely(odp_buffer_type(buf) != ODP_BUFFER_TYPE_TIMEOUT))
>> > {
>> > +             ODP_ERR("Buffer type %u not timeout\n", buf);
>> > +             abort();
>> > +     }
>> > +     /* In this implementation, timeout == buffer */
>> > +     return (odp_timer_tmo_t)buf;
>> > +}
>> > +
>> > +/**
>> > + * Translate from timeout to buffer
>> > + *
>> > + * Return the buffer handle that corresponds to the specified timeout
>> > handle.
>> > + *
>> > + * @param tmo   Timeout handle to translate.
>> > + *
>> > + * @return      The corresponding buffer handle.
>> > + */
>> > +static inline odp_buffer_t odp_buffer_from_timeout(odp_timer_tmo_t tmo)
>> > +{
>> > +     /* In this implementation, buffer == timeout */
>> > +     return (odp_buffer_t)tmo;
>> > +}
>> > +
>> > +/**
>> > + * Return timeout to timer
>> > + *
>> > + * Return a received timeout for reuse with the parent timer.
>> > + * Note: odp_timer_return_tmo() must be called on all received
>> timeouts!
>> > + * (Excluding user defined timeout buffers).
>> > + * The timeout must not be accessed after this call, the semantics is
>> > + * equivalent to a free call.
>> > + *
>> > + * @param tmo    Timeout
>> > + */
>> > +void odp_timer_return_tmo(odp_timer_tmo_t tmo);
>> > +
>> > +/**
>> > + * Return fresh/stale/orphan status of timeout.
>> > + *
>> > + * Check a received timeout for orphaness (i.e. parent timer freed) and
>> > + * staleness (i.e. parent timer has been reset or cancelled after the
>> > timeout
>> > + * expired and was enqueued).
>> > + * If the timeout is fresh, it should be processed.
>> > + * If the timeout is stale or orphaned, it should be ignored.
>> > + * All timeouts must be returned using the odp_timer_return_tmo() call.
>> > + *
>> > + * @param tmo    Timeout
>> > + *
>> > + * @return One of ODP_TMO_FRESH, ODP_TMO_STALE or ODP_TMO_ORPHAN.
>> > + */
>> > +odp_timer_tmo_status_t odp_timer_tmo_status(odp_timer_tmo_t tmo);
>> > +
>> > +/**
>> > + * Get timer handle
>> > + *
>> > + * Return Handle of parent timer.
>> > + *
>> > + * @param tmo   Timeout
>> > + *
>> > + * @return Timer handle or ODP_TIMER_INVALID for orphaned timeouts.
>> > + *         Note that the parent timer could be freed by some other
>> > thread
>> > + *         at any time and thus the timeout becomes orphaned.
>> > + */
>> > +odp_timer_t odp_timer_handle(odp_timer_tmo_t tmo);
>> > +
>> > +/**
>> > + * Get expiration time
>> > + *
>> > + * Return (requested) expiration time of timeout.
>> > + *
>> > + * @param tmo   Timeout
>> > + *
>> > + * @return Expiration time
>> > + */
>> > +uint64_t odp_timer_expiration(odp_timer_tmo_t tmo);
>> > +
>> > +/**
>> > + * Get user pointer
>> > + *
>> > + * Return User pointer of timer associated with timeout.
>> > + * The user pointer is often used to point to some associated context.
>> >   *
>> > - * @param tmo Timeout buffer handle
>> > + * @param tmo   Timeout
>> >   *
>> > - * @return Absolute timeout tick
>> > + * @return User pointer
>> >   */
>> > -uint64_t odp_timeout_tick(odp_timeout_t tmo);
>> > +void *odp_timer_userptr(odp_timer_tmo_t tmo);
>> >
>> >  #ifdef __cplusplus
>> >  }
>> > diff --git
>> a/platform/linux-generic/include/odp_priority_queue_internal.h
>> > b/platform/linux-generic/include/odp_priority_queue_internal.h
>> > new file mode 100644
>> > index 0000000..7d7f3a2
>> > --- /dev/null
>> > +++ b/platform/linux-generic/include/odp_priority_queue_internal.h
>> > @@ -0,0 +1,108 @@
>> > +#ifndef _PRIORITY_QUEUE_H
>> > +#define _PRIORITY_QUEUE_H
>> > +
>> > +#include <assert.h>
>> > +#include <stddef.h>
>> > +#include <stdint.h>
>> > +#include <stdbool.h>
>> > +#include <odp_align.h>
>> > +
>> > +#define INVALID_INDEX ~0U
>> > +#define INVALID_PRIORITY ((pq_priority_t)~0ULL)
>> > +
>> > +typedef uint64_t pq_priority_t;
>> > +
>> > +struct heap_node;
>> > +
>> > +typedef struct priority_queue {
>> > +     uint32_t max_elems;/* Number of elements in heap */
>> > +     /* Number of registered elements (active + inactive) */
>> > +     uint32_t reg_elems;
>> > +     uint32_t num_elems;/* Number of active elements */
>> > +     struct heap_node *heap;
>> > +     struct heap_node *org_ptr;
>> > +} priority_queue ODP_ALIGNED(sizeof(uint64_t));
>> > +
>> > +/* The user gets a pointer to this structure */
>> > +typedef struct {
>> > +     /* Set when pq_element registered with priority queue */
>> > +     priority_queue *pq;
>> > +     uint32_t index;/* Index into heap array */
>> > +     pq_priority_t prio;
>> > +} pq_element;
>> > +
>> > +/*** Operations on pq_element ***/
>> > +
>> > +static inline void pq_element_con(pq_element *this)
>> > +{
>> > +     this->pq = NULL;
>> > +     this->index = INVALID_INDEX;
>> > +     this->prio = 0U;
>> > +}
>> > +
>> > +static inline void pq_element_des(pq_element *this)
>> > +{
>> > +     (void)this;
>> > +     assert(this->index == INVALID_INDEX);
>> > +}
>> > +
>> > +static inline priority_queue *get_pq(const pq_element *this)
>> > +{
>> > +     return this->pq;
>> > +}
>> > +
>> > +static inline pq_priority_t get_prio(const pq_element *this)
>> > +{
>> > +     return this->prio;
>> > +}
>> > +
>> > +static inline uint32_t get_index(const pq_element *this)
>> > +{
>> > +     return this->index;
>> > +}
>> > +
>> > +static inline bool is_active(const pq_element *this)
>> > +{
>> > +     return this->index != INVALID_INDEX;
>> > +}
>> > +
>> > +/*** Operations on priority_queue ***/
>> > +
>> > +extern uint32_t pq_smallest_child(priority_queue *, uint32_t,
>> > pq_priority_t);
>> > +extern void pq_bubble_down(priority_queue *, pq_element *);
>> > +extern void pq_bubble_up(priority_queue *, pq_element *);
>> > +
>> > +static inline bool valid_index(priority_queue *this, uint32_t idx)
>> > +{
>> > +     return idx < this->num_elems;
>> > +}
>> > +
>> > +extern void priority_queue_con(priority_queue *, uint32_t _max_elems);
>> > +extern void priority_queue_des(priority_queue *);
>> > +
>> > +/* Register pq_element with priority queue */
>> > +/* Return false if priority queue full */
>> > +extern bool pq_register_element(priority_queue *, pq_element *);
>> > +
>> > +/* Activate and add pq_element to priority queue */
>> > +/* Element must be disarmed */
>> > +extern void pq_activate_element(priority_queue *, pq_element *,
>> > pq_priority_t);
>> > +
>> > +/* Reset (increase) priority for pq_element */
>> > +/* Element may be active or inactive (released) */
>> > +extern void pq_reset_element(priority_queue *, pq_element *,
>> > pq_priority_t);
>> > +
>> > +/* Deactivate and remove element from priority queue */
>> > +/* Element may be active or inactive (released) */
>> > +extern void pq_deactivate_element(priority_queue *, pq_element *);
>> > +
>> > +/* Unregister pq_element */
>> > +extern void pq_unregister_element(priority_queue *, pq_element *);
>> > +
>> > +/* Return priority of first element (lowest numerical value) */
>> > +extern pq_priority_t pq_first_priority(const priority_queue *);
>> > +
>> > +/* Deactivate and return first element if it's prio is <= threshold */
>> > +extern pq_element *pq_release_element(priority_queue *, pq_priority_t
>> > thresh);
>> > +
>> > +#endif /* _PRIORITY_QUEUE_H */
>> > diff --git a/platform/linux-generic/include/odp_timer_internal.h
>> > b/platform/linux-generic/include/odp_timer_internal.h
>> > index ad28f53..461f28c 100644
>> > --- a/platform/linux-generic/include/odp_timer_internal.h
>> > +++ b/platform/linux-generic/include/odp_timer_internal.h
>> > @@ -1,4 +1,4 @@
>> > -/* Copyright (c) 2013, Linaro Limited
>> > +/* Copyright (c) 2014, Linaro Limited
>> >   * All rights reserved.
>> >   *
>> >   * SPDX-License-Identifier:     BSD-3-Clause
>> > @@ -8,72 +8,51 @@
>> >  /**
>> >   * @file
>> >   *
>> > - * ODP timer timeout descriptor - implementation internal
>> > + * ODP timeout descriptor - implementation internal
>> >   */
>> >
>> >  #ifndef ODP_TIMER_INTERNAL_H_
>> >  #define ODP_TIMER_INTERNAL_H_
>> >
>> > -#ifdef __cplusplus
>> > -extern "C" {
>> > -#endif
>> > -
>> > -#include <odp_std_types.h>
>> > -#include <odp_queue.h>
>> > -#include <odp_buffer.h>
>> > +#include <odp_align.h>
>> > +#include <odp_debug.h>
>> >  #include <odp_buffer_internal.h>
>> >  #include <odp_buffer_pool_internal.h>
>> >  #include <odp_timer.h>
>> >
>> > -struct timeout_t;
>> > -
>> > -typedef struct timeout_t {
>> > -     struct timeout_t *next;
>> > -     int               timer_id;
>> > -     int               tick;
>> > -     uint64_t          tmo_tick;
>> > -     odp_queue_t       queue;
>> > -     odp_buffer_t      buf;
>> > -     odp_buffer_t      tmo_buf;
>> > -} timeout_t;
>> > -
>> > -
>> > -struct odp_timeout_hdr_t;
>> > -
>> >  /**
>> > - * Timeout notification header
>> > + * Internal Timeout header
>> >   */
>> > -typedef struct odp_timeout_hdr_t {
>> > +typedef struct {
>> > +     /* common buffer header */
>> >       odp_buffer_hdr_t buf_hdr;
>> >
>> > -     timeout_t meta;
>> > -
>> > -     uint8_t buf_data[];
>> > +     /* Requested expiration time */
>> > +     uint64_t expiration;
>> > +     /* User ptr inherited from parent timer */
>> > +     void *user_ptr;
>> > +     /* Parent timer */
>> > +     odp_timer_t timer;
>> > +     /* Tag inherited from parent timer at time of expiration */
>> > +     uint32_t tag;
>> > +     /* Gen-cnt inherited from parent timer at time of creation */
>> > +     uint16_t gencnt;
>> > +     uint16_t pad;
>> > +     uint8_t buf_data[0];
>> >  } odp_timeout_hdr_t;
>> >
>> > -
>> > -
>> >  ODP_STATIC_ASSERT(sizeof(odp_timeout_hdr_t) ==
>> > -        ODP_OFFSETOF(odp_timeout_hdr_t, buf_data),
>> > -        "ODP_TIMEOUT_HDR_T__SIZE_ERR");
>> > -
>> > +               ODP_OFFSETOF(odp_timeout_hdr_t, buf_data),
>> > +               "sizeof(odp_timeout_hdr_t) ==
>> > ODP_OFFSETOF(odp_timeout_hdr_t, buf_data)");
>> >  ODP_STATIC_ASSERT(sizeof(odp_timeout_hdr_t) % sizeof(uint64_t) == 0,
>> > -        "ODP_TIMEOUT_HDR_T__SIZE_ERR2");
>> > -
>> > +               "sizeof(odp_timeout_hdr_t) % sizeof(uint64_t) == 0");
>> >
>> >  /**
>> > - * Return timeout header
>> > + * Return the timeout header
>> >   */
>> > -static inline odp_timeout_hdr_t *odp_timeout_hdr(odp_timeout_t tmo)
>> > +static inline odp_timeout_hdr_t *odp_timeout_hdr(odp_buffer_t buf)
>> >  {
>> > -     odp_buffer_hdr_t *buf_hdr = odp_buf_to_hdr((odp_buffer_t)tmo);
>> > -     return (odp_timeout_hdr_t *)(uintptr_t)buf_hdr;
>> > -}
>> > -
>> > -
>> > -
>> > -#ifdef __cplusplus
>> > +     return (odp_timeout_hdr_t *)odp_buf_to_hdr(buf);
>> >  }
>> > -#endif
>> >
>> >  #endif
>> > diff --git a/platform/linux-generic/odp_priority_queue.c
>> > b/platform/linux-generic/odp_priority_queue.c
>> > new file mode 100644
>> > index 0000000..b72c26f
>> > --- /dev/null
>> > +++ b/platform/linux-generic/odp_priority_queue.c
>> > @@ -0,0 +1,283 @@
>> > +#define NDEBUG /* Enabled by default by ODP build system */
>> > +#include <assert.h>
>> > +#include <unistd.h>
>> > +#include <stdlib.h>
>> > +#include <string.h>
>> > +#include <strings.h>
>> > +#include <odp_hints.h>
>> > +#include <odp_align.h>
>> > +#include <odp_debug.h>
>> > +
>> > +#include "odp_priority_queue_internal.h"
>> > +
>> > +
>> > +#define NUM_CHILDREN 4
>> > +#define CHILD(n) (NUM_CHILDREN * (n) + 1)
>> > +#define PARENT(n) (((n) - 1) / NUM_CHILDREN)
>> > +
>> > +/* Internal nodes in the array */
>> > +typedef struct heap_node {
>> > +     pq_element *elem;
>> > +     /* Copy of elem->prio so we avoid unnecessary dereferencing */
>> > +     pq_priority_t prio;
>> > +} heap_node;
>> > +
>> > +static void pq_assert_heap(priority_queue *this);
>> > +
>> > +#define ALIGNMENT(p) (1U << ((unsigned)ffs((int)p) - 1U))
>> > +
>> > +void priority_queue_con(priority_queue *this, uint32_t _max_elems)
>> > +{
>> > +     this->max_elems = _max_elems;
>> > +     this->reg_elems = 0;
>> > +     this->num_elems = 0;
>> > +     this->org_ptr = malloc((_max_elems + 64 / sizeof(heap_node)) *
>> > +                            sizeof(heap_node));
>> > +     if (odp_unlikely(this->org_ptr == NULL)) {
>> > +             ODP_ERR("malloc failed\n");
>> > +             abort();
>> > +     }
>> > +     this->heap = this->org_ptr;
>> > +     assert((size_t)&this->heap[1] % 8 == 0);
>> > +     /* Increment base address until first child (index 1) is cache
>> line
>> > */
>> > +     /* aligned and thus all children (e.g. index 1-4) stored in the */
>> > +     /* same cache line. We are not interested in the alignment of */
>> > +     /* heap[0] as this is a lone node */
>> > +     while ((size_t)&this->heap[1] % ODP_CACHE_LINE_SIZE != 0) {
>> > +             /* Cast to ptr to struct member with the greatest
>> alignment
>> > */
>> > +             /* requirement */
>> > +             this->heap = (heap_node *)((pq_priority_t *)this->heap +
>> 1);
>> > +     }
>> > +     pq_assert_heap(this);
>> > +}
>> > +
>> > +void priority_queue_des(priority_queue *this)
>> > +{
>> > +     pq_assert_heap(this);
>> > +     free(this->org_ptr);
>> > +}
>> > +
>> > +#ifndef NDEBUG
>> > +static uint32_t
>> > +pq_assert_elem(priority_queue *this, uint32_t index, bool recurse)
>> > +{
>> > +     uint32_t num = 1;
>> > +     const pq_element *elem = this->heap[index].elem;
>> > +     assert(elem->index == index);
>> > +     assert(elem->prio == this->heap[index].prio);
>> > +     uint32_t child = CHILD(index);
>> > +     uint32_t i;
>> > +     for (i = 0; i < NUM_CHILDREN; i++, child++) {
>> > +             if (valid_index(this, child)) {
>> > +                     assert(this->heap[child].elem != NULL);
>> > +                     assert(this->heap[child].prio >= elem->prio);
>> > +                     if (recurse)
>> > +                             num += pq_assert_elem(this, child,
>> recurse);
>> > +             }
>> > +     }
>> > +     return num;
>> > +}
>> > +#endif
>> > +
>> > +static void
>> > +pq_assert_heap(priority_queue *this)
>> > +{
>> > +     (void)this;
>> > +#ifndef NDEBUG
>> > +     uint32_t num = 0;
>> > +     if (odp_likely(this->num_elems != 0)) {
>> > +             assert(this->heap[0].elem != NULL);
>> > +             num += pq_assert_elem(this, 0, true);
>> > +     }
>> > +     assert(num == this->num_elems);
>> > +     unsigned i;
>> > +     for (i = 0; i < this->num_elems; i++) {
>> > +             assert(this->heap[i].elem != NULL);
>> > +             assert(this->heap[i].prio != INVALID_PRIORITY);
>> > +     }
>> > +#endif
>> > +}
>> > +
>> > +/* Bubble up to proper position */
>> > +void
>> > +pq_bubble_up(priority_queue *this, pq_element *elem)
>> > +{
>> > +     assert(this->heap[elem->index].elem == elem);
>> > +     assert(this->heap[elem->index].prio == elem->prio);
>> > +     uint32_t current = elem->index;
>> > +     pq_priority_t prio = elem->prio;
>> > +     assert(current == 0 || this->heap[PARENT(current)].elem != NULL);
>> > +     /* Move up into proper position */
>> > +     while (current != 0 && this->heap[PARENT(current)].prio > prio) {
>> > +             uint32_t parent = PARENT(current);
>> > +             assert(this->heap[parent].elem != NULL);
>> > +             /* Swap current with parent */
>> > +             /* 1) Move parent down */
>> > +             this->heap[current].elem = this->heap[parent].elem;
>> > +             this->heap[current].prio = this->heap[parent].prio;
>> > +             this->heap[current].elem->index = current;
>> > +             /* 2) Move current up to parent */
>> > +             this->heap[parent].elem = elem;
>> > +             this->heap[parent].prio = prio;
>> > +             this->heap[parent].elem->index = parent;
>> > +             /* Continue moving elem until it is in the right place */
>> > +             current = parent;
>> > +     }
>> > +     pq_assert_heap(this);
>> > +}
>> > +
>> > +/* Find the smallest child that is smaller than the specified priority
>> > */
>> > +/* Very hot function, can we decrease the number of cache misses? */
>> > +uint32_t pq_smallest_child(priority_queue *this,
>> > +                        uint32_t index,
>> > +                        pq_priority_t val)
>> > +{
>> > +     uint32_t smallest = index;
>> > +     uint32_t child = CHILD(index);
>> > +#if NUM_CHILDREN == 4
>> > +     /* Unroll loop when all children exist */
>> > +     if (odp_likely(valid_index(this, child + 3))) {
>> > +             if (this->heap[child + 0].prio < val)
>> > +                     val = this->heap[smallest = child + 0].prio;
>> > +             if (this->heap[child + 1].prio < val)
>> > +                     val = this->heap[smallest = child + 1].prio;
>> > +             if (this->heap[child + 2].prio < val)
>> > +                     val = this->heap[smallest = child + 2].prio;
>> > +             if (this->heap[child + 3].prio < val)
>> > +                     (void)this->heap[smallest = child + 3].prio;
>> > +             return smallest;
>> > +     }
>> > +#endif
>> > +     uint32_t i;
>> > +     for (i = 0; i < NUM_CHILDREN; i++) {
>> > +             if (odp_unlikely(!valid_index(this, child + i)))
>> > +                     break;
>> > +             if (this->heap[child + i].prio < val) {
>> > +                     smallest = child + i;
>> > +                     val = this->heap[smallest].prio;
>> > +             }
>> > +     }
>> > +     return smallest;
>> > +}
>> > +
>> > +/* Very hot function, can it be optimised? */
>> > +void
>> > +pq_bubble_down(priority_queue *this, pq_element *elem)
>> > +{
>> > +     assert(this->heap[elem->index].elem == elem);
>> > +     assert(this->heap[elem->index].prio == elem->prio);
>> > +     uint32_t current = elem->index;
>> > +     pq_priority_t prio = elem->prio;
>> > +     for (;;) {
>> > +             uint32_t child = pq_smallest_child(this, current, prio);
>> > +             if (current == child) {
>> > +                     /* No smaller child, we are done */
>> > +                     pq_assert_heap(this);
>> > +                     return;
>> > +             }
>> > +             /* Element larger than smaller child, must move down */
>> > +             assert(this->heap[child].elem != NULL);
>> > +             /* 1) Move child up to current */
>> > +             this->heap[current].elem = this->heap[child].elem;
>> > +             this->heap[current].prio = this->heap[child].prio;
>> > +             /* 2) Move current down to child */
>> > +             this->heap[child].elem = elem;
>> > +             this->heap[child].prio = prio;
>> > +             this->heap[child].elem->index = child;
>> > +
>> > +             this->heap[current].elem->index = current; /* cache
>> misses!
>> > */
>> > +             /* Continue moving element until it is in the right place
>> */
>> > +             current = child;
>> > +     }
>> > +}
>> > +
>> > +bool
>> > +pq_register_element(priority_queue *this, pq_element *elem)
>> > +{
>> > +     if (odp_likely(this->reg_elems < this->max_elems)) {
>> > +             elem->pq = this;
>> > +             this->reg_elems++;
>> > +             return true;
>> > +     }
>> > +     return false;
>> > +}
>> > +
>> > +void
>> > +pq_unregister_element(priority_queue *this, pq_element *elem)
>> > +{
>> > +     assert(elem->pq == this);
>> > +     if (is_active(elem))
>> > +             pq_deactivate_element(this, elem);
>> > +     this->reg_elems--;
>> > +}
>> > +
>> > +void
>> > +pq_activate_element(priority_queue *this, pq_element *elem,
>> > pq_priority_t prio)
>> > +{
>> > +     assert(elem->index == INVALID_INDEX);
>> > +     /* Insert element at end */
>> > +     uint32_t index = this->num_elems++;
>> > +     this->heap[index].elem = elem;
>> > +     this->heap[index].prio = prio;
>> > +     elem->index = index;
>> > +     elem->prio = prio;
>> > +     pq_bubble_up(this, elem);
>> > +}
>> > +
>> > +void
>> > +pq_deactivate_element(priority_queue *this, pq_element *elem)
>> > +{
>> > +     assert(elem->pq == this);
>> > +     if (odp_likely(is_active(elem))) {
>> > +             /* Swap element with last element */
>> > +             uint32_t current = elem->index;
>> > +             uint32_t last = --this->num_elems;
>> > +             if (odp_likely(last != current)) {
>> > +                     /* Move last element to current */
>> > +                     this->heap[current].elem = this->heap[last].elem;
>> > +                     this->heap[current].prio = this->heap[last].prio;
>> > +                     this->heap[current].elem->index = current;
>> > +                     /* Bubble down old 'last' element to its proper
>> > place*/
>> > +                     if (this->heap[current].prio < elem->prio)
>> > +                             pq_bubble_up(this,
>> this->heap[current].elem);
>> > +                     else
>> > +                             pq_bubble_down(this,
>> this->heap[current].elem);
>> > +             }
>> > +             elem->index = INVALID_INDEX;
>> > +             pq_assert_heap(this);
>> > +     }
>> > +}
>> > +
>> > +void
>> > +pq_reset_element(priority_queue *this, pq_element *elem, pq_priority_t
>> > prio)
>> > +{
>> > +     assert(prio != INVALID_PRIORITY);
>> > +     if (odp_likely(is_active(elem))) {
>> > +             assert(prio >= elem->prio);
>> > +             elem->prio = prio;
>> > +             this->heap[elem->index].prio = prio;/* cache misses here!
>> */
>> > +             pq_bubble_down(this, elem);
>> > +             pq_assert_heap(this);
>> > +     } else {
>> > +             pq_activate_element(this, elem, prio);
>> > +     }
>> > +}
>> > +
>> > +pq_priority_t pq_first_priority(const priority_queue *this)
>> > +{
>> > +     return this->num_elems != 0 ? this->heap[0].prio :
>> > INVALID_PRIORITY;
>> > +}
>> > +
>> > +pq_element *
>> > +pq_release_element(priority_queue *this, pq_priority_t threshold)
>> > +{
>> > +     if (odp_likely(this->num_elems != 0 &&
>> > +                    this->heap[0].prio <= threshold)) {
>> > +             pq_element *elem = this->heap[0].elem;
>> > +             /* Remove element from heap */
>> > +             pq_deactivate_element(this, elem);
>> > +             assert(elem->prio <= threshold);
>> > +             return elem;
>> > +     }
>> > +     return NULL;
>> > +}
>> > diff --git a/platform/linux-generic/odp_timer.c b/platform/linux-
>> > generic/odp_timer.c
>> > index 313c713..0e5071c 100644
>> > --- a/platform/linux-generic/odp_timer.c
>> > +++ b/platform/linux-generic/odp_timer.c
>> > @@ -4,428 +4,713 @@
>> >   * SPDX-License-Identifier:     BSD-3-Clause
>> >   */
>> >
>> > -#include <odp_timer.h>
>> > -#include <odp_timer_internal.h>
>> > -#include <odp_time.h>
>> > -#include <odp_buffer_pool_internal.h>
>> > -#include <odp_internal.h>
>> > -#include <odp_atomic.h>
>> > -#include <odp_spinlock.h>
>> > -#include <odp_sync.h>
>> > -#include <odp_debug.h>
>> > -
>> > -#include <signal.h>
>> > -#include <time.h>
>> > +/**
>> > + * @file
>> > + *
>> > + * ODP timer service
>> > + *
>> > + */
>> >
>> > +#include <assert.h>
>> > +#include <errno.h>
>> >  #include <string.h>
>> > -
>> > -#define NUM_TIMERS    1
>> > -#define MAX_TICKS     1024
>> > -#define MAX_RES       ODP_TIME_SEC
>> > -#define MIN_RES       (100*ODP_TIME_USEC)
>> > -
>> > -
>> > -typedef struct {
>> > -     odp_spinlock_t lock;
>> > -     timeout_t      *list;
>> > -} tick_t;
>> > -
>> > -typedef struct {
>> > -     int               allocated;
>> > -     volatile int      active;
>> > -     volatile uint64_t cur_tick;
>> > -     timer_t           timerid;
>> > -     odp_timer_t       timer_hdl;
>> > -     odp_buffer_pool_t pool;
>> > -     uint64_t          resolution_ns;
>> > -     uint64_t          max_ticks;
>> > -     tick_t            tick[MAX_TICKS];
>> > -
>> > -} timer_ring_t;
>> > -
>> > -typedef struct {
>> > -     odp_spinlock_t lock;
>> > -     int            num_timers;
>> > -     timer_ring_t   timer[NUM_TIMERS];
>> > -
>> > -} timer_global_t;
>> > -
>> > -/* Global */
>> > -static timer_global_t odp_timer;
>> > -
>> > -static void add_tmo(tick_t *tick, timeout_t *tmo)
>> > +#include <stdlib.h>
>> > +#include <time.h>
>> > +#include <signal.h>
>> > +#include "odp_std_types.h"
>> > +#include "odp_buffer.h"
>> > +#include "odp_buffer_pool.h"
>> > +#include "odp_queue.h"
>> > +#include "odp_hints.h"
>> > +#include "odp_sync.h"
>> > +#include "odp_ticketlock.h"
>> > +#include "odp_debug.h"
>> > +#include "odp_align.h"
>> > +#include "odp_shared_memory.h"
>> > +#include "odp_hints.h"
>> > +#include "odp_internal.h"
>> > +#include "odp_time.h"
>> > +#include "odp_timer.h"
>> > +#include "odp_timer_internal.h"
>> > +#include "odp_priority_queue_internal.h"
>> > +
>> >
>> +/***********************************************************************
>> > *******
>> > + * Translation between timeout and timeout header
>> > +
>> >
>> *************************************************************************
>> > ****/
>> > +
>> > +static inline odp_timeout_hdr_t *odp_tmo_to_hdr(odp_timer_tmo_t tmo)
>> >  {
>> > -     odp_spinlock_lock(&tick->lock);
>> > -
>> > -     tmo->next  = tick->list;
>> > -     tick->list = tmo;
>> > +     odp_buffer_t buf = odp_buffer_from_timeout(tmo);
>> > +     odp_timeout_hdr_t *tmo_hdr = (odp_timeout_hdr_t
>> > *)odp_buf_to_hdr(buf);
>> > +     return tmo_hdr;
>> > +}
>> >
>> > -     odp_spinlock_unlock(&tick->lock);
>> >
>> +/***********************************************************************
>> > *******
>> > + * odp_timer abstract datatype
>> > +
>> >
>> *************************************************************************
>> > ****/
>> > +
>> > +typedef struct odp_timer_s {
>> > +     pq_element pqelem;/* Base class */
>> > +     uint64_t req_tmo;/* Requested timeout tick */
>> > +     odp_buffer_t tmo_buf;/* ODP_BUFFER_INVALID if timeout enqueued */
>> > +     odp_queue_t queue;/* ODP_QUEUE_INVALID if timer is free */
>> > +     uint32_t tag;/* Reusing tag as next pointer/index when timer is
>> > free */
>> > +     uint16_t gencnt;/* Smaller to make place for user_buf flag */
>> > +     unsigned int user_buf:1; /* User-defined buffer? */
>> > +} odp_timer;
>> > +
>> > +/* Constructor */
>> > +static inline void odp_timer_con(odp_timer *this)
>> > +{
>> > +     pq_element_con(&this->pqelem);
>> > +     this->tmo_buf = ODP_BUFFER_INVALID;
>> > +     this->queue = ODP_QUEUE_INVALID;
>> > +     this->gencnt = 0;
>> >  }
>> >
>> > -static timeout_t *rem_tmo(tick_t *tick)
>> > +/* Destructor */
>> > +static inline void odp_timer_des(odp_timer *this)
>> >  {
>> > -     timeout_t *tmo;
>> > +     assert(this->tmo_buf == ODP_BUFFER_INVALID);
>> > +     assert(this->queue == ODP_QUEUE_INVALID);
>> > +     pq_element_des(&this->pqelem);
>> > +}
>> >
>> > -     odp_spinlock_lock(&tick->lock);
>> > +/* Setup when timer is allocated */
>> > +static void setup(odp_timer *this,
>> > +               odp_queue_t _q,
>> > +               void *_up,
>> > +               odp_buffer_t _tmo)
>> > +{
>> > +     this->req_tmo = INVALID_PRIORITY;
>> > +     this->tmo_buf = _tmo;
>> > +     this->queue = _q;
>> > +     this->tag = 0;
>> > +     this->user_buf = false;
>> > +     /* Initialise constant fields of timeout event */
>> > +     odp_timeout_hdr_t *tmo_hdr =
>> > +             odp_tmo_to_hdr(odp_timeout_from_buffer(this->tmo_buf));
>> > +     tmo_hdr->gencnt = this->gencnt;
>> > +     tmo_hdr->timer = this;
>> > +     tmo_hdr->user_ptr = _up;
>> > +     /* tmo_hdr->tag set at expiration time */
>> > +     /* tmo_hdr->expiration set at expiration time */
>> > +     assert(this->queue != ODP_QUEUE_INVALID);
>> > +}
>> >
>> > -     tmo = tick->list;
>> > +/* Teardown when timer is freed */
>> > +static odp_buffer_t teardown(odp_timer *this)
>> > +{
>> > +     /* Increase generation count to make any pending timeout(s)
>> > orphaned */
>> > +     ++this->gencnt;
>> > +     odp_buffer_t buf = this->tmo_buf;
>> > +     this->tmo_buf = ODP_BUFFER_INVALID;
>> > +     this->queue = ODP_QUEUE_INVALID;
>> > +     return buf;
>> > +}
>> >
>> > -     if (tmo)
>> > -             tick->list = tmo->next;
>> > +static inline uint32_t get_next_free(odp_timer *this)
>> > +{
>> > +     assert(this->queue == ODP_QUEUE_INVALID);
>> > +     return this->tag;
>> > +}
>> >
>> > -     odp_spinlock_unlock(&tick->lock);
>> > +static inline void set_next_free(odp_timer *this, uint32_t nf)
>> > +{
>> > +     assert(this->queue == ODP_QUEUE_INVALID);
>> > +     this->tag = nf;
>> > +}
>> >
>> > -     if (tmo)
>> > -             tmo->next = NULL;
>> >
>> +/***********************************************************************
>> > *******
>> > + * odp_timer_pool abstract datatype
>> > + * Inludes alloc and free timer
>> > +
>> >
>> *************************************************************************
>> > ****/
>> > +
>> > +typedef struct odp_timer_pool_s {
>> > +     priority_queue pq;
>> > +     uint64_t cur_tick;/* Current tick value */
>> > +     uint64_t min_tick;/* Current expiration lower bound */
>> > +     uint64_t max_tick;/* Current expiration higher bound */
>> > +     bool shared;
>> > +     odp_ticketlock_t lock;
>> > +     const char *name;
>> > +     odp_buffer_pool_t buf_pool;
>> > +     uint64_t resolution_ns;
>> > +     uint64_t min_tmo_tck;
>> > +     uint64_t max_tmo_tck;
>> > +     odp_timer *timers;
>> > +     uint32_t num_alloc;/* Current number of allocated timers */
>> > +     uint32_t max_timers;/* Max number of timers */
>> > +     uint32_t first_free;/* 0..max_timers-1 => free timer */
>> > +     timer_t timerid;
>> > +     odp_timer_clk_src_t clk_src;
>> > +} odp_timer_pool;
>> > +
>> > +/* Forward declarations */
>> > +static void timer_init(odp_timer_pool *tp);
>> > +static void timer_exit(odp_timer_pool *tp);
>> > +
>> > +static void odp_timer_pool_con(odp_timer_pool *this,
>> > +                            const char *_n,
>> > +                            odp_buffer_pool_t _bp,
>> > +                            uint64_t _r,
>> > +                            uint64_t _mint,
>> > +                            uint64_t _maxt,
>> > +                            uint32_t _mt,
>> > +                            bool _s,
>> > +                            odp_timer_clk_src_t _cs)
>> > +{
>> > +     priority_queue_con(&this->pq, _mt);
>> > +     this->cur_tick = 0;
>> > +     this->shared = _s;
>> > +     this->name = strdup(_n);
>> > +     this->buf_pool = _bp;
>> > +     this->resolution_ns = _r;
>> > +     this->min_tmo_tck = odp_timer_ns_to_tick(this, _mint);
>> > +     this->max_tmo_tck = odp_timer_ns_to_tick(this, _maxt);
>> > +     this->min_tick = this->cur_tick + this->min_tmo_tck;
>> > +     this->max_tick = this->cur_tick + this->max_tmo_tck;
>> > +     this->num_alloc = 0;
>> > +     this->max_timers = _mt;
>> > +     this->first_free = 0;
>> > +     this->clk_src = _cs;
>> > +     this->timers = malloc(sizeof(odp_timer) * this->max_timers);
>> > +     if (this->timers == NULL)
>> > +             ODP_ABORT("%s: malloc failed\n", _n);
>> > +     uint32_t i;
>> > +     for (i = 0; i < this->max_timers; i++)
>> > +             odp_timer_con(&this->timers[i]);
>> > +     for (i = 0; i < this->max_timers; i++)
>> > +             set_next_free(&this->timers[i], i + 1);
>> > +     odp_ticketlock_init(&this->lock);
>> > +     if (this->clk_src == ODP_CLOCK_CPU)
>> > +             timer_init(this);
>> > +     /* Make sure timer pool initialisation is globally observable */
>> > +     /* before we return a pointer to it */
>> > +     odp_sync_stores();
>> > +}
>> >
>> > -     return tmo;
>> > +static odp_timer_pool *odp_timer_pool_new(
>> > +     const char *_n,
>> > +     odp_buffer_pool_t _bp,
>> > +     uint64_t _r,
>> > +     uint64_t _mint,
>> > +     uint64_t _maxt,
>> > +     uint32_t _mt,
>> > +     bool _s,
>> > +     odp_timer_clk_src_t _cs)
>> > +{
>> > +     odp_timer_pool *this = malloc(sizeof(odp_timer_pool));
>> > +     if (odp_unlikely(this == NULL))
>> > +             ODP_ABORT("%s: timer pool malloc failed\n", _n);
>> > +     odp_timer_pool_con(this, _n, _bp, _r, _mint, _maxt, _mt, _s, _cs);
>> > +     return this;
>> >  }
>> >
>> > -/**
>> > - * Search and delete tmo entry from timeout list
>> > - * return -1 : on error.. handle not in list
>> > - *           0 : success
>> > - */
>> > -static int find_and_del_tmo(timeout_t **tmo, odp_timer_tmo_t handle)
>> > +static void odp_timer_pool_des(odp_timer_pool *this)
>> >  {
>> > -     timeout_t *cur, *prev;
>> > -     prev = NULL;
>> > +     if (this->shared)
>> > +             odp_ticketlock_lock(&this->lock);
>> > +     if (this->num_alloc != 0) {
>> > +             /* It's a programming error to attempt to destroy a */
>> > +             /* timer pool which is still in use */
>> > +             ODP_ABORT("%s: timers in use\n", this->name);
>> > +     }
>> > +     if (this->clk_src == ODP_CLOCK_CPU)
>> > +             timer_exit(this);
>> > +     uint32_t i;
>> > +     for (i = 0; i < this->max_timers; i++)
>> > +             odp_timer_des(&this->timers[i]);
>> > +     free(this->timers);
>> > +     priority_queue_des(&this->pq);
>> > +     odp_sync_stores();
>> > +}
>> >
>> > -     for (cur = *tmo; cur != NULL; prev = cur, cur = cur->next) {
>> > -             if (cur->tmo_buf == handle) {
>> > -                     if (prev == NULL)
>> > -                             *tmo = cur->next;
>> > -                     else
>> > -                             prev->next = cur->next;
>> > +static void odp_timer_pool_del(odp_timer_pool *this)
>> > +{
>> > +     odp_timer_pool_des(this);
>> > +     free(this);
>> > +}
>> >
>> > -                     break;
>> > +static inline odp_timer *timer_alloc(odp_timer_pool *this,
>> > +                                  odp_queue_t queue,
>> > +                                  void *user_ptr,
>> > +                                  odp_buffer_t tmo_buf)
>> > +{
>> > +     odp_timer *tim = ODP_TIMER_INVALID;
>> > +     if (odp_likely(this->shared))
>> > +             odp_ticketlock_lock(&this->lock);
>> > +     if (odp_likely(this->num_alloc < this->max_timers)) {
>> > +             this->num_alloc++;
>> > +             /* Remove first unused timer from free list */
>> > +             assert(this->first_free != this->max_timers);
>> > +             tim = &this->timers[this->first_free];
>> > +             this->first_free = get_next_free(tim);
>> > +             /* Insert timer into priority queue */
>> > +             if (odp_unlikely(!pq_register_element(&this->pq,
>> > +                                                   &tim->pqelem))) {
>> > +                     /* Unexpected internal error */
>> > +                     abort();
>> >               }
>> > +             /* Create timer */
>> > +             setup(tim, queue, user_ptr, tmo_buf);
>> > +     } else {
>> > +             errno = ENFILE; /* Reusing file table overvlow */
>> >       }
>> > -
>> > -     if (!cur)
>> > -             /* couldn't find tmo in list */
>> > -             return -1;
>> > -
>> > -     /* application to free tmo_buf provided by absolute_tmo call */
>> > -     return 0;
>> > +     if (odp_likely(this->shared))
>> > +             odp_ticketlock_unlock(&this->lock);
>> > +     return tim;
>> >  }
>> >
>> > -int odp_timer_cancel_tmo(odp_timer_t timer_hdl, odp_timer_tmo_t tmo)
>> > +static inline void timer_free(odp_timer_pool *this, odp_timer *tim)
>> >  {
>> > -     int id;
>> > -     int tick_idx;
>> > -     timeout_t *cancel_tmo;
>> > -     odp_timeout_hdr_t *tmo_hdr;
>> > -     tick_t *tick;
>> > -
>> > -     /* get id */
>> > -     id = (int)timer_hdl - 1;
>> > -
>> > -     tmo_hdr = odp_timeout_hdr((odp_timeout_t) tmo);
>> > -     /* get tmo_buf to cancel */
>> > -     cancel_tmo = &tmo_hdr->meta;
>> > +     if (odp_likely(this->shared))
>> > +             odp_ticketlock_lock(&this->lock);
>> > +     if (odp_unlikely(tim->queue == ODP_QUEUE_INVALID))
>> > +             ODP_ABORT("Invalid timer %p\n", tim);
>> > +     /* Destroy timer */
>> > +     odp_buffer_t buf = teardown(tim);
>> > +     /* Remove timer from priority queue */
>> > +     pq_unregister_element(&this->pq, &tim->pqelem);
>> > +     /* Insert timer into free list */
>> > +     set_next_free(tim, this->first_free);
>> > +     this->first_free = (tim - &this->timers[0]) / sizeof(this-
>> > >timers[0]);
>> > +     assert(this->num_alloc != 0);
>> > +     this->num_alloc--;
>> > +     if (odp_likely(this->shared))
>> > +             odp_ticketlock_unlock(&this->lock);
>> > +     if (buf != ODP_BUFFER_INVALID)
>> > +             odp_buffer_free(buf);
>> > +}
>> >
>> > -     tick_idx = cancel_tmo->tick;
>> > -     tick = &odp_timer.timer[id].tick[tick_idx];
>> >
>> +/***********************************************************************
>> > *******
>> > + * Operations on timers
>> > + * reset/reset_w_buf/cancel timer, return timeout
>> > +
>> >
>> *************************************************************************
>> > ****/
>> >
>> > -     odp_spinlock_lock(&tick->lock);
>> > -     /* search and delete tmo from tick list */
>> > -     if (find_and_del_tmo(&tick->list, tmo) != 0) {
>> > -             odp_spinlock_unlock(&tick->lock);
>> > -             ODP_DBG("Couldn't find the tmo (%d) in tick list\n",
>> > (int)tmo);
>> > -             return -1;
>> > +static inline void timer_expire(odp_timer *tim)
>> > +{
>> > +     assert(tim->req_tmo != INVALID_PRIORITY);
>> > +     /* Timer expired, is there actually any timeout event */
>> > +     /* we can enqueue? */
>> > +     if (odp_likely(tim->tmo_buf != ODP_BUFFER_INVALID)) {
>> > +             /* Swap out timeout buffer */
>> > +             odp_buffer_t buf = tim->tmo_buf;
>> > +             tim->tmo_buf = ODP_BUFFER_INVALID;
>> > +             if (odp_likely(!tim->user_buf)) {
>> > +                     odp_timeout_hdr_t *tmo_hdr =
>> > +
>>  odp_tmo_to_hdr(odp_timeout_from_buffer(buf));
>> > +                     /* Copy tag and requested expiration tick from
>> timer
>> > */
>> > +                     tmo_hdr->tag = tim->tag;
>> > +                     tmo_hdr->expiration = tim->req_tmo;
>> > +             }
>> > +             /* Else don't touch user-defined buffer */
>> > +             int rc = odp_queue_enq(tim->queue, buf);
>> > +             if (odp_unlikely(rc != 0))
>> > +                     ODP_ABORT("Failed to enqueue timeout buffer
>> (%d)\n",
>> > +                               rc);
>> > +             /* Mark timer as inactive */
>> > +             tim->req_tmo = INVALID_PRIORITY;
>> >       }
>> > -     odp_spinlock_unlock(&tick->lock);
>> > -
>> > -     return 0;
>> > +     /* No, timeout event already enqueued or unavailable */
>> > +     /* Keep timer active, odp_timer_return_tmo() will patch up */
>> >  }
>> >
>> > -static void notify_function(union sigval sigval)
>> > +static odp_timer_set_t timer_reset(odp_timer_pool *tp,
>> > +                                odp_timer *tim,
>> > +                                uint64_t abs_tck)
>> >  {
>> > -     uint64_t cur_tick;
>> > -     timeout_t *tmo;
>> > -     tick_t *tick;
>> > -     timer_ring_t *timer;
>> > +     assert(tim->user_buf == false);
>> > +     if (odp_unlikely(abs_tck < tp->min_tick))
>> > +             return ODP_TIMER_SET_TOOEARLY;
>> > +     if (odp_unlikely(abs_tck > tp->max_tick))
>> > +             return ODP_TIMER_SET_TOOLATE;
>> > +
>> > +     if (odp_likely(tp->shared))
>> > +             odp_ticketlock_lock(&tp->lock);
>> > +
>> > +     if (odp_unlikely(tim->queue == ODP_QUEUE_INVALID))
>> > +             ODP_ABORT("Invalid timer %p\n", tim);
>> > +     if (odp_unlikely(tim->user_buf))
>> > +             ODP_ABORT("Timer %p has user buffer\n", tim);
>> > +     /* Increase timer tag to make any pending timeout stale */
>> > +     tim->tag++;
>> > +     /* Save requested timeout */
>> > +     tim->req_tmo = abs_tck;
>> > +     /* Update timer position in priority queue */
>> > +     pq_reset_element(&tp->pq, &tim->pqelem, abs_tck);
>> > +
>> > +     if (odp_likely(tp->shared))
>> > +             odp_ticketlock_unlock(&tp->lock);
>> > +     return ODP_TIMER_SET_SUCCESS;
>> > +}
>> >
>> > -     timer = sigval.sival_ptr;
>> > +static odp_timer_set_t timer_reset_w_buf(odp_timer_pool *tp,
>> > +             odp_timer *tim,
>> > +             uint64_t abs_tck,
>> > +             odp_buffer_t user_buf)
>> > +{
>> > +     if (odp_unlikely(abs_tck < tp->min_tick))
>> > +             return ODP_TIMER_SET_TOOEARLY;
>> > +     if (odp_unlikely(abs_tck > tp->max_tick))
>> > +             return ODP_TIMER_SET_TOOLATE;
>> > +
>> > +     if (odp_likely(tp->shared))
>> > +             odp_ticketlock_lock(&tp->lock);
>> > +
>> > +     if (odp_unlikely(tim->queue == ODP_QUEUE_INVALID))
>> > +             ODP_ABORT("Invalid timer %p\n", tim);
>> > +     /* Increase timer tag to make any pending timeout stale */
>> > +     tim->tag++;
>> > +     /* Save requested timeout */
>> > +     tim->req_tmo = abs_tck;
>> > +     /* Set flag indicating presence of user defined buffer */
>> > +     tim->user_buf = true;
>> > +     /* Swap in new buffer, save any old buffer pointer */
>> > +     odp_buffer_t old_buf = tim->tmo_buf;
>> > +     tim->tmo_buf = user_buf;
>> > +     /* Update timer position in priority queue */
>> > +     pq_reset_element(&tp->pq, &tim->pqelem, abs_tck);
>> > +
>> > +     if (odp_likely(tp->shared))
>> > +             odp_ticketlock_unlock(&tp->lock);
>> > +
>> > +     /* Free old buffer if present */
>> > +     if (odp_unlikely(old_buf != ODP_BUFFER_INVALID))
>> > +             odp_buffer_free(old_buf);
>> > +     return ODP_TIMER_SET_SUCCESS;
>> > +}
>> >
>> > -     if (timer->active == 0) {
>> > -             ODP_DBG("Timer (%u) not active\n", timer->timer_hdl);
>> > -             return;
>> > +static inline void timer_cancel(odp_timer_pool *tp,
>> > +                             odp_timer *tim)
>> > +{
>> > +     odp_buffer_t old_buf = ODP_BUFFER_INVALID;
>> > +     if (odp_likely(tp->shared))
>> > +             odp_ticketlock_lock(&tp->lock);
>> > +
>> > +     if (odp_unlikely(tim->queue == ODP_QUEUE_INVALID))
>> > +             ODP_ABORT("Invalid timer %p\n", tim);
>> > +     if (odp_unlikely(tim->user_buf)) {
>> > +             /* Swap out old user buffer */
>> > +             old_buf = tim->tmo_buf;
>> > +             tim->tmo_buf = ODP_BUFFER_INVALID;
>> > +             /* tim->user_buf stays true */
>> >       }
>> > +     /* Else a normal timer (no user-defined buffer) */
>> > +     /* Increase timer tag to make any pending timeout stale */
>> > +     tim->tag++;
>> > +     /* Clear requested timeout, mark timer inactive */
>> > +     tim->req_tmo = INVALID_PRIORITY;
>> > +     /* Remove timer from the priority queue */
>> > +     pq_deactivate_element(&tp->pq, &tim->pqelem);
>> > +
>> > +     if (odp_likely(tp->shared))
>> > +             odp_ticketlock_unlock(&tp->lock);
>> > +     /* Free user-defined buffer if present */
>> > +     if (odp_unlikely(old_buf != ODP_BUFFER_INVALID))
>> > +             odp_buffer_free(old_buf);
>> > +}
>> >
>> > -     /* ODP_DBG("Tick\n"); */
>> > -
>> > -     cur_tick = timer->cur_tick++;
>> > -
>> > -     odp_sync_stores();
>> > +static inline void timer_return(odp_timer_pool *tp,
>> > +                             odp_timer *tim,
>> > +                             odp_timer_tmo_t tmo,
>> > +                             const odp_timeout_hdr_t *tmo_hdr)
>> > +{
>> > +     odp_buffer_t tmo_buf = odp_buffer_from_timeout(tmo);
>> > +     if (odp_likely(tp->shared))
>> > +             odp_ticketlock_lock(&tp->lock);
>> > +     if (odp_unlikely(tim->user_buf))
>> > +             ODP_ABORT("Timer %p has user-defined buffer\n", tim);
>> > +     if (odp_likely(tmo_hdr->gencnt == tim->gencnt)) {
>> > +             assert(tim->tmo_buf == ODP_BUFFER_INVALID);
>> > +             /* Save returned buffer for use when timer expires next
>> time
>> > */
>> > +             tim->tmo_buf = tmo_buf;
>> > +             tmo_buf = ODP_BUFFER_INVALID;
>> > +             /* Check if timer is active and should have expired */
>> > +             if (odp_unlikely(tim->req_tmo != INVALID_PRIORITY &&
>> > +                              tim->req_tmo <= tp->cur_tick)) {
>> > +                     /* Expire timer now since we have restored the
>> timeout
>> > +                        buffer */
>> > +                     timer_expire(tim);
>> > +             }
>> > +             /* Else timer inactive or expires in the future */
>> > +     }
>> > +     /* Else timeout orphaned, free buffer later */
>> > +     if (odp_likely(tp->shared))
>> > +             odp_ticketlock_unlock(&tp->lock);
>> > +     if (odp_unlikely(tmo_buf != ODP_BUFFER_INVALID))
>> > +             odp_buffer_free(tmo_buf);
>> > +}
>> >
>> > -     tick = &timer->tick[cur_tick % MAX_TICKS];
>> > +/* Non-public so not in odp_timer.h but externally visible, must
>> declare
>> > + * somewhere */
>> > +unsigned odp_timer_pool_expire(odp_timer_pool_t tpid, uint64_t tick);
>> >
>> > -     while ((tmo = rem_tmo(tick)) != NULL) {
>> > -             odp_queue_t  queue;
>> > -             odp_buffer_t buf;
>> > +unsigned odp_timer_pool_expire(odp_timer_pool_t tpid, uint64_t tick)
>> > +{
>> > +     if (odp_likely(tpid->shared))
>> > +             odp_ticketlock_lock(&tpid->lock);
>> > +
>> > +     unsigned nexp = 0;
>> > +     odp_timer_t tim;
>> > +     tpid->cur_tick = tick;
>> > +     tpid->min_tick = tick + tpid->min_tmo_tck;
>> > +     tpid->max_tick = tick + tpid->max_tmo_tck;
>> > +     while ((tim = (odp_timer_t)pq_release_element(&tpid->pq, tick)) !=
>> > +            ODP_TIMER_INVALID) {
>> > +             assert(get_prio(&tim->pqelem) <= tick);
>> > +             timer_expire(tim);
>> > +             nexp++;
>> > +     }
>> >
>> > -             queue = tmo->queue;
>> > -             buf   = tmo->buf;
>> > +     if (odp_likely(tpid->shared))
>> > +             odp_ticketlock_unlock(&tpid->lock);
>> > +     return nexp;
>> > +}
>> >
>> > -             if (buf != tmo->tmo_buf)
>> > -                     odp_buffer_free(tmo->tmo_buf);
>> >
>> +/***********************************************************************
>> > *******
>> > + * POSIX timer support
>> > + * Functions that use Linux/POSIX per-process timers and related
>> > facilities
>> > +
>> >
>> *************************************************************************
>> > ****/
>> >
>> > -             odp_queue_enq(queue, buf);
>> > -     }
>> > +static void timer_notify(sigval_t sigval)
>> > +{
>> > +     odp_timer_pool *tp = (odp_timer_pool *)sigval.sival_ptr;
>> > +     uint64_t new_tick = tp->cur_tick + 1;
>> > +     (void)odp_timer_pool_expire(tp, new_tick);
>> >  }
>> >
>> > -static void timer_start(timer_ring_t *timer)
>> > +static void timer_init(odp_timer_pool *tp)
>> >  {
>> >       struct sigevent   sigev;
>> >       struct itimerspec ispec;
>> >       uint64_t res, sec, nsec;
>> >
>> > -     ODP_DBG("\nTimer (%u) starts\n", timer->timer_hdl);
>> > +     ODP_DBG("Creating POSIX timer for timer pool %s, period %"
>> > +             PRIu64" ns\n", tp->name, tp->resolution_ns);
>> >
>> >       memset(&sigev, 0, sizeof(sigev));
>> >       memset(&ispec, 0, sizeof(ispec));
>> >
>> >       sigev.sigev_notify          = SIGEV_THREAD;
>> > -     sigev.sigev_notify_function = notify_function;
>> > -     sigev.sigev_value.sival_ptr = timer;
>> > +     sigev.sigev_notify_function = timer_notify;
>> > +     sigev.sigev_value.sival_ptr = tp;
>> >
>> > -     if (timer_create(CLOCK_MONOTONIC, &sigev, &timer->timerid)) {
>> > -             ODP_DBG("Timer create failed\n");
>> > -             return;
>> > -     }
>> > +     if (timer_create(CLOCK_MONOTONIC, &sigev, &tp->timerid))
>> > +             ODP_ABORT("timer_create() returned error %s\n",
>> > +                       strerror(errno));
>> >
>> > -     res  = timer->resolution_ns;
>> > +     res  = tp->resolution_ns;
>> >       sec  = res / ODP_TIME_SEC;
>> > -     nsec = res - sec*ODP_TIME_SEC;
>> > +     nsec = res - sec * ODP_TIME_SEC;
>> >
>> >       ispec.it_interval.tv_sec  = (time_t)sec;
>> >       ispec.it_interval.tv_nsec = (long)nsec;
>> >       ispec.it_value.tv_sec     = (time_t)sec;
>> >       ispec.it_value.tv_nsec    = (long)nsec;
>> >
>> > -     if (timer_settime(timer->timerid, 0, &ispec, NULL)) {
>> > -             ODP_DBG("Timer set failed\n");
>> > -             return;
>> > -     }
>> > -
>> > -     return;
>> > +     if (timer_settime(&tp->timerid, 0, &ispec, NULL))
>> > +             ODP_ABORT("timer_settime() returned error %s\n",
>> > +                       strerror(errno));
>> >  }
>> >
>> > -int odp_timer_init_global(void)
>> > +static void timer_exit(odp_timer_pool *tp)
>> >  {
>> > -     ODP_DBG("Timer init ...");
>> > -
>> > -     memset(&odp_timer, 0, sizeof(timer_global_t));
>> > -
>> > -     odp_spinlock_init(&odp_timer.lock);
>> > -
>> > -     ODP_DBG("done\n");
>> > -
>> > -     return 0;
>> > +     if (timer_delete(tp->timerid) != 0)
>> > +             ODP_ABORT("timer_delete() returned error %s\n",
>> > +                       strerror(errno));
>> >  }
>> >
>> > -int odp_timer_disarm_all(void)
>> >
>> +/***********************************************************************
>> > *******
>> > + * Public API functions
>> > + * Some parameter checks and error messages
>> > + * No modificatios of internal state
>> > +
>> >
>> *************************************************************************
>> > ****/
>> > +odp_timer_pool_t
>> > +odp_timer_pool_create(const char *name,
>> > +                   odp_buffer_pool_t buf_pool,
>> > +                   uint64_t resolution_ns,
>> > +                   uint64_t min_timeout,
>> > +                   uint64_t max_timeout,
>> > +                   uint32_t num_timers,
>> > +                   bool shared,
>> > +                   odp_timer_clk_src_t clk_src)
>> >  {
>> > -     int timers;
>> > -     struct itimerspec ispec;
>> > -
>> > -     odp_spinlock_lock(&odp_timer.lock);
>> > -
>> > -     timers = odp_timer.num_timers;
>> > -
>> > -     ispec.it_interval.tv_sec  = 0;
>> > -     ispec.it_interval.tv_nsec = 0;
>> > -     ispec.it_value.tv_sec     = 0;
>> > -     ispec.it_value.tv_nsec    = 0;
>> > -
>> > -     for (; timers >= 0; timers--) {
>> > -             if (timer_settime(odp_timer.timer[timers].timerid,
>> > -                               0, &ispec, NULL)) {
>> > -                     ODP_DBG("Timer reset failed\n");
>> > -                     odp_spinlock_unlock(&odp_timer.lock);
>> > -                     return -1;
>> > -             }
>> > -             odp_timer.num_timers--;
>> > -     }
>> > -
>> > -     odp_spinlock_unlock(&odp_timer.lock);
>> > -
>> > -     return 0;
>> > +     /* Verify that buffer pool can be used for timeouts */
>> > +     odp_buffer_t buf = odp_buffer_alloc(buf_pool);
>> > +     if (buf == ODP_BUFFER_INVALID)
>> > +             ODP_ABORT("%s: Failed to allocate buffer\n", name);
>> > +     if (odp_buffer_type(buf) != ODP_BUFFER_TYPE_TIMEOUT)
>> > +             ODP_ABORT("%s: Buffer pool wrong type\n", name);
>> > +     odp_buffer_free(buf);
>> > +     odp_timer_pool_t tp = odp_timer_pool_new(name, buf_pool,
>> > resolution_ns,
>> > +                           min_timeout, max_timeout, num_timers,
>> > +                           shared, clk_src);
>> > +     return tp;
>> >  }
>> >
>> > -odp_timer_t odp_timer_create(const char *name, odp_buffer_pool_t pool,
>> > -                          uint64_t resolution_ns, uint64_t min_ns,
>> > -                          uint64_t max_ns)
>> > +void odp_timer_pool_start(void)
>> >  {
>> > -     uint32_t id;
>> > -     timer_ring_t *timer;
>> > -     odp_timer_t timer_hdl;
>> > -     int i;
>> > -     uint64_t max_ticks;
>> > -     (void) name;
>> > -
>> > -     if (resolution_ns < MIN_RES)
>> > -             resolution_ns = MIN_RES;
>> > -
>> > -     if (resolution_ns > MAX_RES)
>> > -             resolution_ns = MAX_RES;
>> > -
>> > -     max_ticks = max_ns / resolution_ns;
>> > -
>> > -     if (max_ticks > MAX_TICKS) {
>> > -             ODP_DBG("Maximum timeout too long: %"PRIu64" ticks\n",
>> > -                     max_ticks);
>> > -             return ODP_TIMER_INVALID;
>> > -     }
>> > -
>> > -     if (min_ns < resolution_ns) {
>> > -             ODP_DBG("Min timeout %"PRIu64" ns < resolution %"PRIu64"
>> > ns\n",
>> > -                     min_ns, resolution_ns);
>> > -             return ODP_TIMER_INVALID;
>> > -     }
>> > -
>> > -     odp_spinlock_lock(&odp_timer.lock);
>> > -
>> > -     if (odp_timer.num_timers >= NUM_TIMERS) {
>> > -             odp_spinlock_unlock(&odp_timer.lock);
>> > -             ODP_DBG("All timers allocated\n");
>> > -             return ODP_TIMER_INVALID;
>> > -     }
>> > -
>> > -     for (id = 0; id < NUM_TIMERS; id++) {
>> > -             if (odp_timer.timer[id].allocated == 0)
>> > -                     break;
>> > -     }
>> > -
>> > -     timer = &odp_timer.timer[id];
>> > -     timer->allocated = 1;
>> > -     odp_timer.num_timers++;
>> > -
>> > -     odp_spinlock_unlock(&odp_timer.lock);
>> > -
>> > -     timer_hdl = id + 1;
>> > -
>> > -     timer->timer_hdl     = timer_hdl;
>> > -     timer->pool          = pool;
>> > -     timer->resolution_ns = resolution_ns;
>> > -     timer->max_ticks     = MAX_TICKS;
>> > -
>> > -     for (i = 0; i < MAX_TICKS; i++) {
>> > -             odp_spinlock_init(&timer->tick[i].lock);
>> > -             timer->tick[i].list = NULL;
>> > -     }
>> > -
>> > -     timer->active = 1;
>> > -     odp_sync_stores();
>> > -
>> > -     timer_start(timer);
>> > +     /* Nothing to do here, timer pools are started by the create call
>> > */
>> > +}
>> >
>> > -     return timer_hdl;
>> > +void odp_timer_pool_destroy(odp_timer_pool_t tpid)
>> > +{
>> > +     odp_timer_pool_del(tpid);
>> >  }
>> >
>> > -odp_timer_tmo_t odp_timer_absolute_tmo(odp_timer_t timer_hdl, uint64_t
>> > tmo_tick,
>> > -                                    odp_queue_t queue, odp_buffer_t
>> buf)
>> > +uint64_t odp_timer_tick_to_ns(odp_timer_pool_t tpid, uint64_t ticks)
>> >  {
>> > -     int id;
>> > -     uint64_t tick;
>> > -     uint64_t cur_tick;
>> > -     timeout_t *new_tmo;
>> > -     odp_buffer_t tmo_buf;
>> > -     odp_timeout_hdr_t *tmo_hdr;
>> > -     timer_ring_t *timer;
>> > +     return ticks * tpid->resolution_ns;
>> > +}
>> >
>> > -     id = (int)timer_hdl - 1;
>> > -     timer = &odp_timer.timer[id];
>> > +uint64_t odp_timer_ns_to_tick(odp_timer_pool_t tpid, uint64_t ns)
>> > +{
>> > +     return (uint64_t)(ns / tpid->resolution_ns);
>> > +}
>> >
>> > -     cur_tick = timer->cur_tick;
>> > -     if (tmo_tick <= cur_tick) {
>> > -             ODP_DBG("timeout too close\n");
>> > -             return ODP_TIMER_TMO_INVALID;
>> > -     }
>> > +uint64_t odp_timer_current_tick(odp_timer_pool_t tpid)
>> > +{
>> > +     return tpid->cur_tick;
>> > +}
>> >
>> > -     if ((tmo_tick - cur_tick) > MAX_TICKS) {
>> > -             ODP_DBG("timeout too far: cur %"PRIu64" tmo %"PRIu64"\n",
>> > -                     cur_tick, tmo_tick);
>> > -             return ODP_TIMER_TMO_INVALID;
>> > +uintptr_t odp_timer_pool_query_conf(odp_timer_pool_t tpid,
>> > +                                 odp_timer_pool_conf_t item)
>> > +{
>> > +     switch (item) {
>> > +     case ODP_TIMER_NAME:
>> > +             return (uintptr_t)(tpid->name);
>> > +     case ODP_TIMER_RESOLUTION:
>> > +             return tpid->resolution_ns;
>> > +     case ODP_TIMER_MIN_TICKS:
>> > +             return tpid->min_tmo_tck;
>> > +     case ODP_TIMER_MAX_TICKS:
>> > +             return tpid->max_tmo_tck;
>> > +     case ODP_TIMER_NUM_TIMERS:
>> > +             return tpid->max_timers;
>> > +     case ODP_TIMER_SHARED:
>> > +             return tpid->shared;
>> > +     default:
>> > +             return 0;
>> >       }
>> > +}
>> >
>> > -     tick = tmo_tick % MAX_TICKS;
>> > -
>> > -     tmo_buf = odp_buffer_alloc(timer->pool);
>> > -     if (tmo_buf == ODP_BUFFER_INVALID) {
>> > -             ODP_DBG("tmo buffer alloc failed\n");
>> > -             return ODP_TIMER_TMO_INVALID;
>> > +odp_timer_t odp_timer_alloc(odp_timer_pool_t tpid,
>> > +                         odp_queue_t queue,
>> > +                         void *user_ptr)
>> > +{
>> > +     /* We check this because ODP_QUEUE_INVALID is used */
>> > +     /* to indicate a free timer */
>> > +     if (odp_unlikely(queue == ODP_QUEUE_INVALID))
>> > +             ODP_ABORT("%s: Invalid queue handle\n", tpid->name);
>> > +     odp_buffer_t tmo_buf = odp_buffer_alloc(tpid->buf_pool);
>> > +     if (odp_likely(tmo_buf != ODP_BUFFER_INVALID)) {
>> > +             odp_timer *tim = timer_alloc(tpid, queue, user_ptr,
>> > tmo_buf);
>> > +             if (odp_likely(tim != ODP_TIMER_INVALID)) {
>> > +                     /* Success */
>> > +                     assert(tim->queue != ODP_QUEUE_INVALID);
>> > +                     return tim;
>> > +             }
>> > +             odp_buffer_free(tmo_buf);
>> >       }
>> > +     /* Else failed to allocate timeout event */
>> > +     /* errno set by odp_buffer_alloc() or timer_alloc () */
>> > +     return ODP_TIMER_INVALID;
>> > +}
>> >
>> > -     tmo_hdr = odp_timeout_hdr((odp_timeout_t) tmo_buf);
>> > -     new_tmo = &tmo_hdr->meta;
>> > -
>> > -     new_tmo->timer_id = id;
>> > -     new_tmo->tick     = (int)tick;
>> > -     new_tmo->tmo_tick = tmo_tick;
>> > -     new_tmo->queue    = queue;
>> > -     new_tmo->tmo_buf  = tmo_buf;
>> > -
>> > -     if (buf != ODP_BUFFER_INVALID)
>> > -             new_tmo->buf = buf;
>> > -     else
>> > -             new_tmo->buf = tmo_buf;
>> > -
>> > -     add_tmo(&timer->tick[tick], new_tmo);
>> > -
>> > -     return tmo_buf;
>> > +void odp_timer_free(odp_timer_t tim)
>> > +{
>> > +     odp_timer_pool *tp = (odp_timer_pool *)get_pq(&tim->pqelem);
>> > +     timer_free(tp, tim);
>> >  }
>> >
>> > -uint64_t odp_timer_tick_to_ns(odp_timer_t timer_hdl, uint64_t ticks)
>> > +odp_timer_set_t odp_timer_set_abs_w_buf(odp_timer_t tim,
>> > +                                     uint64_t abs_tck,
>> > +                                     odp_buffer_t user_buf)
>> >  {
>> > -     uint32_t id;
>> > +     odp_timer_pool *tp = (odp_timer_pool *)get_pq(&tim->pqelem);
>> > +     odp_timer_set_t rc = timer_reset_w_buf(tp, tim, abs_tck,
>> user_buf);
>> > +     return rc;
>> > +}
>> >
>> > -     id = timer_hdl - 1;
>> > -     return ticks * odp_timer.timer[id].resolution_ns;
>> > +odp_timer_set_t odp_timer_set_abs(odp_timer_t tim, uint64_t abs_tck)
>> > +{
>> > +     odp_timer_pool *tp = (odp_timer_pool *)get_pq(&tim->pqelem);
>> > +     odp_timer_set_t rc = timer_reset(tp, tim, abs_tck);
>> > +     return rc;
>> >  }
>> >
>> > -uint64_t odp_timer_ns_to_tick(odp_timer_t timer_hdl, uint64_t ns)
>> > +odp_timer_set_t odp_timer_set_rel_w_buf(odp_timer_t tim,
>> > +                                     uint64_t rel_tck,
>> > +                                     odp_buffer_t user_buf)
>> >  {
>> > -     uint32_t id;
>> > +     odp_timer_pool *tp = (odp_timer_pool *)get_pq(&tim->pqelem);
>> > +     odp_timer_set_t rc = timer_reset_w_buf(tp, tim, tp->cur_tick +
>> > rel_tck,
>> > +                                            user_buf);
>> > +     return rc;
>> > +}
>> >
>> > -     id = timer_hdl - 1;
>> > -     return ns / odp_timer.timer[id].resolution_ns;
>> > +odp_timer_set_t odp_timer_set_rel(odp_timer_t tim, uint64_t rel_tck)
>> > +{
>> > +     odp_timer_pool *tp = (odp_timer_pool *)get_pq(&tim->pqelem);
>> > +     odp_timer_set_t rc = timer_reset(tp, tim, tp->cur_tick + rel_tck);
>> > +     return rc;
>> >  }
>> >
>> > -uint64_t odp_timer_resolution(odp_timer_t timer_hdl)
>> > +void odp_timer_cancel(odp_timer_t tim)
>> >  {
>> > -     uint32_t id;
>> > +     odp_timer_pool *tp = (odp_timer_pool *)get_pq(&tim->pqelem);
>> > +     timer_cancel(tp, tim);
>> > +}
>> >
>> > -     id = timer_hdl - 1;
>> > -     return odp_timer.timer[id].resolution_ns;
>> > +void odp_timer_return_tmo(odp_timer_tmo_t tmo)
>> > +{
>> > +     const odp_timeout_hdr_t *tmo_hdr = odp_tmo_to_hdr(tmo);
>> > +     odp_timer *parent_tim = tmo_hdr->timer;
>> > +     odp_timer_pool *tp = (odp_timer_pool
>> *)get_pq(&parent_tim->pqelem);
>> > +     timer_return(tp, parent_tim, tmo, tmo_hdr);
>> >  }
>> >
>> > -uint64_t odp_timer_maximum_tmo(odp_timer_t timer_hdl)
>> > +odp_timer_tmo_status_t odp_timer_tmo_status(odp_timer_tmo_t tmo)
>> >  {
>> > -     uint32_t id;
>> > +     const odp_timeout_hdr_t *tmo_hdr = odp_tmo_to_hdr(tmo);
>> > +     odp_timer *parent_tim = tmo_hdr->timer;
>> >
>> > -     id = timer_hdl - 1;
>> > -     return odp_timer.timer[id].max_ticks;
>> > +     if (odp_unlikely(tmo_hdr->gencnt != parent_tim->gencnt)) {
>> > +             /* Generation counters differ => timer has been freed */
>> > +             return ODP_TMO_ORPHAN;
>> > +     }
>> > +     /* Else generation counters match => parent timer exists */
>> > +
>> > +     if (odp_likely(parent_tim->tag == tmo_hdr->tag))
>> > +             return ODP_TMO_FRESH;
>> > +     else
>> > +             return ODP_TMO_STALE;
>> >  }
>> >
>> > -uint64_t odp_timer_current_tick(odp_timer_t timer_hdl)
>> > +odp_timer_t odp_timer_handle(odp_timer_tmo_t tmo)
>> >  {
>> > -     uint32_t id;
>> > +     odp_timeout_hdr_t *tmo_hdr = odp_tmo_to_hdr(tmo);
>> > +     odp_timer_t parent_tim = tmo_hdr->timer;
>> > +     if (odp_likely(tmo_hdr->gencnt == parent_tim->gencnt))
>> > +             return parent_tim;
>> > +     else
>> > +             return ODP_TIMER_INVALID;
>> > +}
>> >
>> > -     id = timer_hdl - 1;
>> > -     return odp_timer.timer[id].cur_tick;
>> > +uint64_t odp_timer_expiration(odp_timer_tmo_t tmo)
>> > +{
>> > +     odp_timeout_hdr_t *tmo_hdr = odp_tmo_to_hdr(tmo);
>> > +     return tmo_hdr->expiration;
>> >  }
>> >
>> > -odp_timeout_t odp_timeout_from_buffer(odp_buffer_t buf)
>> > +void *odp_timer_userptr(odp_timer_tmo_t tmo)
>> >  {
>> > -     return (odp_timeout_t) buf;
>> > +     odp_timeout_hdr_t *tmo_hdr = odp_tmo_to_hdr(tmo);
>> > +     return tmo_hdr->user_ptr;
>> >  }
>> >
>> > -uint64_t odp_timeout_tick(odp_timeout_t tmo)
>> > +int odp_timer_init_global(void)
>> >  {
>> > -     odp_timeout_hdr_t *tmo_hdr = odp_timeout_hdr(tmo);
>> > -     return tmo_hdr->meta.tmo_tick;
>> > +     return 0;
>> >  }
>> > diff --git a/test/api_test/odp_timer_ping.c
>> > b/test/api_test/odp_timer_ping.c
>> > index 7406a45..2617b5c 100644
>> > --- a/test/api_test/odp_timer_ping.c
>> > +++ b/test/api_test/odp_timer_ping.c
>> > @@ -20,6 +20,8 @@
>> >   *    Otherwise timeout may happen bcz of slow nw speed
>> >   */
>> >
>> > +#include <assert.h>
>> > +#include <stdlib.h>
>> >  #include <unistd.h>
>> >  #include <fcntl.h>
>> >  #include <errno.h>
>> > @@ -41,14 +43,15 @@
>> >  #define MSG_POOL_SIZE         (4*1024*1024)
>> >  #define BUF_SIZE             8
>> >  #define PING_CNT     10
>> > -#define PING_THRD    2       /* Send and Rx Ping thread */
>> > +#define PING_THRD    2       /* send_ping and rx_ping threads */
>> >
>> >  /* Nanoseconds */
>> >  #define RESUS        10000
>> >  #define MINUS        10000
>> >  #define MAXUS        10000000
>> >
>> > -static odp_timer_t test_timer_ping;
>> > +static odp_timer_pool_t tp;
>> > +static odp_timer_t test_timer_ping = ODP_TIMER_INVALID;
>> >  static odp_timer_tmo_t test_ping_tmo;
>> >
>> >  #define PKTSIZE      64
>> > @@ -128,15 +131,7 @@ static int listen_to_pingack(void)
>> >                                        (socklen_t *)&len);
>> >                       if (bytes > 0) {
>> >                               /* pkt rxvd therefore cancel the timeout
>> */
>> > -                             if (odp_timer_cancel_tmo(test_timer_ping,
>> > -                                                      test_ping_tmo)
>> != 0) {
>> > -                                     ODP_ERR("cancel_tmo failed
>> ..exiting
>> > listner thread\n");
>> > -                                     /* avoid exiting from here even
>> if tmo
>> > -                                      * failed for current ping,
>> > -                                      * allow subsequent ping_rx
>> request */
>> > -                                     err = -1;
>> > -
>> > -                             }
>> > +                             odp_timer_cancel(test_timer_ping);
>> >                               /* cruel bad hack used for sender,
>> listner ipc..
>> >                                * euwww.. FIXME ..
>> >                                */
>> > @@ -160,7 +155,6 @@ static int send_ping_request(struct sockaddr_in
>> > *addr)
>> >
>> >       uint64_t tick;
>> >       odp_queue_t queue;
>> > -     odp_buffer_t buf;
>> >
>> >       int err = 0;
>> >
>> > @@ -184,8 +178,16 @@ static int send_ping_request(struct sockaddr_in
>> > *addr)
>> >
>> >       /* get the ping queue */
>> >       queue = odp_queue_lookup("ping_timer_queue");
>> > +     test_timer_ping = odp_timer_alloc(tp, queue, NULL);
>> > +     if (test_timer_ping == ODP_TIMER_INVALID) {
>> > +             ODP_ERR("Failed to allocate timer.\n");
>> > +             err = -1;
>> > +             goto err;
>> > +     }
>> >
>> >       for (i = 0; i < PING_CNT; i++) {
>> > +             odp_buffer_t buf;
>> > +             odp_timer_tmo_t tmo;
>> >               /* prepare icmp pkt */
>> >               bzero(&pckt, sizeof(pckt));
>> >               pckt.hdr.type = ICMP_ECHO;
>> > @@ -209,12 +211,10 @@ static int send_ping_request(struct sockaddr_in
>> > *addr)
>> >               printf(" icmp_sent msg_cnt %d\n", i);
>> >
>> >               /* arm the timer */
>> > -             tick = odp_timer_current_tick(test_timer_ping);
>> > +             tick = odp_timer_current_tick(tp);
>> >
>> >               tick += 1000;
>> > -             test_ping_tmo = odp_timer_absolute_tmo(test_timer_ping,
>> > tick,
>> > -                                                    queue,
>> > -
>> ODP_BUFFER_INVALID);
>> > +             odp_timer_set_abs(test_timer_ping, tick);
>> >               /* wait for timeout event */
>> >               while ((buf = odp_queue_deq(queue)) ==
>> ODP_BUFFER_INVALID) {
>> >                       /* flag true means ack rxvd.. a cruel hack as I
>> > @@ -229,17 +229,28 @@ static int send_ping_request(struct sockaddr_in
>> > *addr)
>> >                               break;
>> >                       }
>> >               }
>> > +             assert(odp_buffer_type(buf) == ODP_BUFFER_TYPE_TIMEOUT);
>> > +             tmo = odp_timeout_from_buffer(buf);
>> >
>> > -             /* free tmo_buf for timeout case */
>> > -             if (buf != ODP_BUFFER_INVALID) {
>> > -                     ODP_DBG(" timeout msg_cnt [%i] \n", i);
>> > +             switch (odp_timer_tmo_status(tmo)) {
>> > +             case ODP_TMO_FRESH:
>> > +                     ODP_DBG(" timeout msg_cnt [%i]\n", i);
>> >                       /* so to avoid seg fault commented */
>> > -                     odp_buffer_free(buf);
>> >                       err = -1;
>> > +                     break;
>> > +             case ODP_TMO_STALE:
>> > +                     /* Ignore stale timeouts */
>> > +                     break;
>> > +             case ODP_TMO_ORPHAN:
>> > +                     ODP_ERR("Received orphaned timeout!\n");
>> > +                     abort();
>> >               }
>> > +             odp_timer_return_tmo(tmo);
>> >       }
>> >
>> >  err:
>> > +     if (test_timer_ping != ODP_TIMER_INVALID)
>> > +             odp_timer_free(test_timer_ping);
>> >       return err;
>> >  }
>> >
>> > @@ -340,9 +351,9 @@ int main(int argc ODP_UNUSED, char *argv[]
>> > ODP_UNUSED)
>> >       pool = odp_buffer_pool_create("msg_pool", pool_base,
>> MSG_POOL_SIZE,
>> >                                     BUF_SIZE,
>> >                                     ODP_CACHE_LINE_SIZE,
>> > -                                   ODP_BUFFER_TYPE_RAW);
>> > +                                   ODP_BUFFER_TYPE_TIMEOUT);
>> >       if (pool == ODP_BUFFER_POOL_INVALID) {
>> > -             ODP_ERR("Pool create failed.\n");
>> > +             ODP_ERR("Buffer pool create failed.\n");
>> >               return -1;
>> >       }
>> >
>> > @@ -357,15 +368,19 @@ int main(int argc ODP_UNUSED, char *argv[]
>> > ODP_UNUSED)
>> >               return -1;
>> >       }
>> >
>> > -     test_timer_ping = odp_timer_create("ping_timer", pool,
>> > -                                        RESUS*ODP_TIME_USEC,
>> > -                                        MINUS*ODP_TIME_USEC,
>> > -                                        MAXUS*ODP_TIME_USEC);
>> > -
>> > -     if (test_timer_ping == ODP_TIMER_INVALID) {
>> > -             ODP_ERR("Timer create failed.\n");
>> > +     /*
>> > +      * Create timer pool
>> > +      */
>> > +     tp = odp_timer_pool_create("timer_pool", pool,
>> > +                                RESUS*ODP_TIME_USEC,
>> > +                                MINUS*ODP_TIME_USEC,
>> > +                                MAXUS*ODP_TIME_USEC,
>> > +                                1, false, ODP_CLOCK_CPU);
>> > +     if (tp == ODP_TIMER_POOL_INVALID) {
>> > +             ODP_ERR("Timer pool create failed.\n");
>> >               return -1;
>> >       }
>> > +     odp_timer_pool_start();
>> >
>> >       odp_shm_print_all();
>> >
>> > --
>> > 1.9.1
>> >
>> >
>> > _______________________________________________
>> > lng-odp mailing list
>> > lng-odp@lists.linaro.org
>> > http://lists.linaro.org/mailman/listinfo/lng-odp
>>
>
>
> _______________________________________________
> lng-odp mailing list
> lng-odp@lists.linaro.org
> http://lists.linaro.org/mailman/listinfo/lng-odp
>
>
Savolainen, Petri (NSN - FI/Espoo) Oct. 6, 2014, 10:45 a.m. UTC | #7
Hi,

The main point is to be able to use an ODP API “à la carte”. In this case, an user can use the scheduler API without being forced to use the timer API. Implementation underneath may run the schedule timeout whichever way is optimal for the HW/implementation.

Also, user has the option to call scheduler with ODP_SCHED_WAIT and setup the timeout through the timer API.

-Petri


From: lng-odp-bounces@lists.linaro.org [mailto:lng-odp-bounces@lists.linaro.org] On Behalf Of ext Ola Liljedahl

Sent: Monday, October 06, 2014 12:37 PM
To: Gilad Ben Yossef
Cc: lng-odp@lists.linaro.org
Subject: Re: [lng-odp] [PATCHv4] Timer API and and priority queue-based implementation

Gilad,

Your suggestion makes sense. This is how a purely event-driven application would be designed.

Possibly the wait/timeout parameter to the odp_schedule calls is a legacy from the time before there was a timer API in ODP. Maybe Petri can s(c)hed some light on this.

I suspect there could be some performance benefits from specifying the timeout as an explicit parameter. If scheduling timeout is implemented using a timer event facility (e.g. the ODP timer API), the application (or the ODP implementation if it uses the same design) would have to reset that timer for every odp_schedule call, for a SW timer implementation this could add serious overhead. With an explicit timeout parameter, the scheduler implementation could be reading e.g some cycle counter while (busy-) waiting for events to become available. This overhead should be less and also incurred only when the thread is idle and waiting for work.

The current API does not prevent an implementation from using timer events internally and does not limit an application from using the timer API for timeouts. It does add a little bit of implementation complexity. What is the best trade-off?

-- Ola

On 6 October 2014 08:22, Gilad Ben Yossef <giladb@ezchip.com<mailto:giladb@ezchip.com>> wrote:

Another one of my stupid questions, I'm afraid.  :-)
If we have a timer implemented as an event pushed to queue which can be scheduled as any other queue (which is good thing I think), why does our schedule APIs need a timeout?
I mean, if you want a timeout, just add a scheduled timer queue and send yourself timeout events. That's how I would implement the schedule timeouts internally anyway (running a native timer on a core that does packet processing stops it from enjoying Linux full NOHZ cpu isolation so we really don't want timers there...)
Anything I've missed?
Thanks,
Gilad
Gilad Ben-Yossef
Software Architect
EZchip Technologies Ltd.
37 Israel Pollak Ave, Kiryat Gat 82025 ,Israel
Tel: +972-4-959-6666 ext. 576<tel:%2B972-4-959-6666%20ext.%20576>, Fax: +972-8-681-1483<tel:%2B972-8-681-1483>
Mobile: +972-52-826-0388<tel:%2B972-52-826-0388>, US Mobile: +1-973-826-0388<tel:%2B1-973-826-0388>
Email: giladb@ezchip.com<mailto:giladb@ezchip.com>, Web: http://www.ezchip.com

"Ethernet always wins."
        — Andy Bechtolsheim


> -----Original Message-----

> From: lng-odp-bounces@lists.linaro.org<mailto:lng-odp-bounces@lists.linaro.org> [mailto:lng-odp-<mailto:lng-odp->

> bounces@lists.linaro.org<mailto:bounces@lists.linaro.org>] On Behalf Of Ola Liljedahl

> Sent: Thursday, October 02, 2014 6:23 PM

> To: lng-odp@lists.linaro.org<mailto:lng-odp@lists.linaro.org>

> Subject: [lng-odp] [PATCHv4] Timer API and and priority queue-based

> implementation

>

> Signed-off-by: Ola Liljedahl <ola.liljedahl@linaro.org<mailto:ola.liljedahl@linaro.org>>

> ---

> Fixed review comments for v3 from Anders R.

> * Example code snippets use @code/@endcode.

> * Added some missing doxygen comments.

> * Updated some comments.

> * Reverted year in copyright notices.

> * Added odp_likely() hint.

> * Made some variables self-descriptive and removed redundant comments.

> Changed to use ticket locks instead of spin locks (ticket locks are more

> fair).

> Changed to use ODP_ABORT() which has become available since the last

> patch.

>

>  example/timer/odp_timer_test.c                     | 125 +--

>  platform/linux-generic/Makefile.am                 |   1 +

>  platform/linux-generic/include/api/odp_timer.h     | 570 +++++++++++--

>  .../include/odp_priority_queue_internal.h          | 108 +++

>  .../linux-generic/include/odp_timer_internal.h     |  71 +-

>  platform/linux-generic/odp_priority_queue.c        | 283 +++++++

>  platform/linux-generic/odp_timer.c                 | 923 ++++++++++++++-

> ------

>  test/api_test/odp_timer_ping.c                     |  73 +-

>  8 files changed, 1648 insertions(+), 506 deletions(-)

>  create mode 100644 platform/linux-

> generic/include/odp_priority_queue_internal.h

>  create mode 100644 platform/linux-generic/odp_priority_queue.c

>

> diff --git a/example/timer/odp_timer_test.c

> b/example/timer/odp_timer_test.c

> index 6e1715d..750d785 100644

> --- a/example/timer/odp_timer_test.c

> +++ b/example/timer/odp_timer_test.c

> @@ -41,67 +41,89 @@ typedef struct {

>  /** @private Barrier for test synchronisation */

>  static odp_barrier_t test_barrier;

>

> -/** @private Timer handle*/

> -static odp_timer_t test_timer;

> +/** @private Timer pool handle */

> +static odp_timer_pool_t tp;

>

>

> +/** @private Timeout status ASCII strings */

> +static const char *const status2str[] = {

> +     "fresh", "stale", "orphaned"

> +};

> +

>  /** @private test timeout */

>  static void test_abs_timeouts(int thr, test_args_t *args)

>  {

> -     uint64_t tick;

>       uint64_t period;

>       uint64_t period_ns;

>       odp_queue_t queue;

> -     odp_buffer_t buf;

> -     int num;

> +     int remain = args->tmo_count;

> +     odp_timer_t hdl;

> +     uint64_t tick;

>

>       ODP_DBG("  [%i] test_timeouts\n", thr);

>

>       queue = odp_queue_lookup("timer_queue");

>

>       period_ns = args->period_us*ODP_TIME_USEC;

> -     period    = odp_timer_ns_to_tick(test_timer, period_ns);

> +     period    = odp_timer_ns_to_tick(tp, period_ns);

>

>       ODP_DBG("  [%i] period %"PRIu64" ticks,  %"PRIu64" ns\n", thr,

>               period, period_ns);

>

> -     tick = odp_timer_current_tick(test_timer);

> -

> -     ODP_DBG("  [%i] current tick %"PRIu64"\n", thr, tick);

> -

> -     tick += period;

> +     ODP_DBG("  [%i] current tick %"PRIu64"\n", thr,

> +             odp_timer_current_tick(tp));

>

> -     if (odp_timer_absolute_tmo(test_timer, tick, queue,

> ODP_BUFFER_INVALID)

> -         == ODP_TIMER_TMO_INVALID){

> -             ODP_DBG("Timeout request failed\n");

> +     odp_timer_t test_timer;

> +     test_timer = odp_timer_alloc(tp, queue, NULL);

> +     if (test_timer == ODP_TIMER_INVALID) {

> +             ODP_ERR("Failed to allocate timer\n");

>               return;

>       }

> +     tick = odp_timer_current_tick(tp);

> +     hdl = test_timer;

>

> -     num = args->tmo_count;

> -

> -     while (1) {

> -             odp_timeout_t tmo;

> -

> -             buf = odp_schedule_one(&queue, ODP_SCHED_WAIT);

> -

> -             tmo  = odp_timeout_from_buffer(buf);

> -             tick = odp_timeout_tick(tmo);

> -

> -             ODP_DBG("  [%i] timeout, tick %"PRIu64"\n", thr, tick);

> -

> -             odp_buffer_free(buf);

> -

> -             num--;

> -

> -             if (num == 0)

> -                     break;

> +     while (remain != 0) {

> +             odp_buffer_t buf;

> +             odp_timer_tmo_t tmo;

> +             odp_timer_tmo_status_t stat;

> +             odp_timer_set_t rc;

>

>               tick += period;

> +             rc = odp_timer_set_abs(hdl, tick);

> +             if (odp_unlikely(rc != ODP_TIMER_SET_SUCCESS)) {

> +                     ODP_ERR("odp_timer_set_abs() failed (%u)\n", rc);

> +                     abort();

> +             }

>

> -             odp_timer_absolute_tmo(test_timer, tick,

> -                                    queue, ODP_BUFFER_INVALID);

> +             /* Get the next ready buffer/timeout */

> +             buf = odp_schedule_one(&queue, ODP_SCHED_WAIT);

> +             if (odp_unlikely(odp_buffer_type(buf) !=

> +                              ODP_BUFFER_TYPE_TIMEOUT)) {

> +                     ODP_ERR("Unexpected buffer type received\n");

> +                     abort();

> +             }

> +             tmo = odp_timeout_from_buffer(buf);

> +             stat = odp_timer_tmo_status(tmo);

> +             tick = odp_timer_expiration(tmo);

> +             hdl = odp_timer_handle(tmo);

> +             ODP_DBG("  [%i] timeout, tick %"PRIu64", status %s\n",

> +                     thr, tick, status2str[stat]);

> +             /* if (stat == ODP_TMO_FRESH)  - do your thing! */

> +             if (odp_likely(stat == ODP_TMO_ORPHAN)) {

> +                     /* Some other thread freed the corresponding

> +                        timer after the timeout was already

> +                        enqueued */

> +                     /* Timeout handle is invalid, use our own timer */

> +                     hdl = test_timer;

> +             }

> +             /* Return timeout to timer manager, regardless of status */

> +             odp_timer_return_tmo(tmo);

> +             remain--;

>       }

>

> +     odp_timer_cancel(test_timer);

> +     odp_timer_free(test_timer);

> +

>       if (odp_queue_sched_type(queue) == ODP_SCHED_SYNC_ATOMIC)

>               odp_schedule_release_atomic();

>  }

> @@ -155,7 +177,6 @@ static void print_usage(void)

>       printf("Options:\n");

>       printf("  -c, --count <number>    core count, core IDs start from

> 1\n");

>       printf("  -r, --resolution <us>   timeout resolution in usec\n");

> -     printf("  -m, --min <us>          minimum timeout in usec\n");

>       printf("  -x, --max <us>          maximum timeout in usec\n");

>       printf("  -p, --period <us>       timeout period in usec\n");

>       printf("  -t, --timeouts <count>  timeout repeat count\n");

> @@ -190,14 +211,14 @@ static void parse_args(int argc, char *argv[],

> test_args_t *args)

>       /* defaults */

>       args->core_count    = 0; /* all cores */

>       args->resolution_us = 10000;

> -     args->min_us        = args->resolution_us;

> +     args->min_us        = 0;

>       args->max_us        = 10000000;

>       args->period_us     = 1000000;

>       args->tmo_count     = 30;

>

>       while (1) {

>               opt = getopt_long(argc, argv, "+c:r:m:x:p:t:h",

> -                              longopts, &long_index);

> +                               longopts, &long_index);

>

>               if (opt == -1)

>                       break;  /* No more options */

> @@ -321,10 +342,25 @@ int main(int argc, char *argv[])

>                                     ODP_BUFFER_TYPE_TIMEOUT);

>

>       if (pool == ODP_BUFFER_POOL_INVALID) {

> -             ODP_ERR("Pool create failed.\n");

> +             ODP_ERR("Buffer pool create failed.\n");

>               return -1;

>       }

>

> +     tp = odp_timer_pool_create("timer_pool", pool,

> +                                args.resolution_us*ODP_TIME_USEC,

> +                                args.min_us*ODP_TIME_USEC,

> +                                args.max_us*ODP_TIME_USEC,

> +                                num_workers, /* One timer per worker */

> +                                true,

> +                                ODP_CLOCK_CPU);

> +     if (tp == ODP_TIMER_POOL_INVALID) {

> +             ODP_ERR("Timer pool create failed.\n");

> +             return -1;

> +     }

> +     odp_timer_pool_start();

> +

> +     odp_shm_print_all();

> +

>       /*

>        * Create a queue for timer test

>        */

> @@ -340,19 +376,6 @@ int main(int argc, char *argv[])

>               return -1;

>       }

>

> -     test_timer = odp_timer_create("test_timer", pool,

> -                                   args.resolution_us*ODP_TIME_USEC,

> -                                   args.min_us*ODP_TIME_USEC,

> -                                   args.max_us*ODP_TIME_USEC);

> -

> -     if (test_timer == ODP_TIMER_INVALID) {

> -             ODP_ERR("Timer create failed.\n");

> -             return -1;

> -     }

> -

> -

> -     odp_shm_print_all();

> -

>       printf("CPU freq %"PRIu64" hz\n", odp_sys_cpu_hz());

>       printf("Cycles vs nanoseconds:\n");

>       ns = 0;

> diff --git a/platform/linux-generic/Makefile.am b/platform/linux-

> generic/Makefile.am

> index d076d50..71f923c 100644

> --- a/platform/linux-generic/Makefile.am

> +++ b/platform/linux-generic/Makefile.am

> @@ -59,6 +59,7 @@ __LIB__libodp_la_SOURCES = \

>                          odp_packet_flags.c \

>                          odp_packet_io.c \

>                          odp_packet_socket.c \

> +                        odp_priority_queue.c \

>                          odp_queue.c \

>                          odp_ring.c \

>                          odp_rwlock.c \

> diff --git a/platform/linux-generic/include/api/odp_timer.h

> b/platform/linux-generic/include/api/odp_timer.h

> index 01db839..82a1e05 100644

> --- a/platform/linux-generic/include/api/odp_timer.h

> +++ b/platform/linux-generic/include/api/odp_timer.h

> @@ -8,9 +8,193 @@

>  /**

>   * @file

>   *

> - * ODP timer

> + * ODP timer service

>   */

>

> +/** Example #1 Retransmission timer (e.g. for reliable connections)

> + @code

> +

> +//Create timer pool for reliable connections

> +#define SEC 1000000000ULL //1s expressed in nanoseconds

> +odp_timer_pool_t tcp_tpid =

> +    odp_timer_pool_create("TCP",

> +                       buffer_pool,

> +                       1000000,//resolution 1ms

> +                       0,//min tmo

> +                       7200 * SEC,//max tmo length 2hours

> +                       40000,//num_timers

> +                       true,//shared

> +                       ODP_CLOCK_CPU

> +                      );

> +if (tcp_tpid == ODP_TIMER_POOL_INVALID)

> +{

> +     //Failed to create timer pool => fatal error

> +}

> +

> +

> +//Setting up a new connection

> +//Allocate retransmission timeout (identical for supervision timeout)

> +//The user pointer points back to the connection context

> +conn->ret_tim = odp_timer_alloc(tcp_tpid, queue, conn);

> +//Check if all resources were successfully allocated

> +if (conn->ret_tim == ODP_TIMER_INVALID)

> +{

> +     //Failed to allocate all resources for connection => tear down

> +     //Destroy timeout

> +     odp_timer_free(conn->ret_tim);

> +     //Tear down connection

> +     ...

> +     return false;

> +}

> +//All necessary resources successfully allocated

> +//Compute initial retransmission length in timer ticks

> +conn->ret_len = odp_timer_ns_to_tick(tcp_tpid, 3 * SEC);//Per RFC1122

> +//Arm the timer

> +odp_timer_set_rel(conn->ret_tim, conn->ret_len);

> +return true;

> +

> +

> +//A packet for the connection has just been transmitted

> +//Reset the retransmission timer

> +odp_timer_set_rel(conn->ret_tim, conn->ret_len);

> +

> +

> +//A retransmission timeout buffer for the connection has been received

> +odp_timer_tmo_t tmo = odp_timeout_from_buffer(buf);

> +odp_timer_tmo_status_t stat = odp_timer_tmo_status(tmo);

> +//Check if timeout is fresh or stale, for stale timeouts we need to

> reset the

> +//timer

> +if (stat == ODP_TMO_FRESH) {

> +     //Fresh timeout, last transmitted packet not acked in time =>

> +       retransmit

> +     //Get connection from timeout event

> +     conn = odp_timer_get_userptr(tmo);

> +     //Retransmit last packet (e.g. TCP segment)

> +     ...

> +     //Re-arm timer using original delta value

> +     odp_timer_set_rel(conn->ret_tim, conn->ret_len);

> +} else if (stat == ODP_TMO_ORPHAN) {

> +     odp_free_buffer(buf);

> +     return;//Get out of here

> +} // else stat == ODP_TMO_STALE, do nothing

> +//Finished processing, return timeout

> +odp_timer_return_tmo(tmo);

> +

> + @endcode

> +*/

> +

> +/** Example #2 Periodic tick

> + @code

> +

> +//Create timer pool for periodic ticks

> +odp_timer_pool_t per_tpid =

> +    odp_timer_pool_create("periodic-tick",

> +                       buffer_pool,

> +                       1,//resolution 1ns

> +                       1,//minimum timeout length 1ns

> +                       1000000000,//maximum timeout length 1s

> +                       10,//num_timers

> +                       false,//not shared

> +                       ODP_CLOCK_CPU

> +                      );

> +if (per_tpid == ODP_TIMER_POOL_INVALID)

> +{

> +    //Failed to create timer pool => fatal error

> +}

> +

> +

> +//Allocate periodic timer

> +tim_1733 = odp_timer_alloc(per_tpid, queue, NULL);

> +//Check if all resources were successfully allocated

> +if (tim_1733 == ODP_TIMER_INVALID)

> +{

> +     //Failed to allocate all resources => tear down

> +     //Destroy timeout

> +     odp_timer_free(tim_1733);

> +     //Tear down other state

> +     ...

> +     return false;

> +}

> +//All necessary resources successfully allocated

> +//Compute tick period in timer ticks

> +period_1733 = odp_timer_ns_to_tick(per_tpid, 1000000000U /

> 1733U);//1733Hz

> +//Compute when next tick should expire

> +next_1733 = odp_timer_current_tick(per_tpid) + period_1733;

> +//Arm the periodic timer

> +odp_timer_set_abs(tim_1733, next_1733);

> +return true;

> +

> +

> +

> +//A periodic timer timeout has been received

> +odp_timer_tmo_t tmo = odp_timeout_from_buffer(buf);

> +//Get status of timeout

> +odp_timer_tmo_status_t stat = odp_timer_tmo_status(tmo);

> +//We expect the timeout is always fresh since we are not calling set or

> cancel

> +on active or expired timers in this example

> +assert(stat == ODP_TMO_FRESH);

> +//Do processing driven by timeout *before*

> +...

> +do {

> +     //Compute when the timer should expire next

> +     next_1733 += period_1733;

> +     //Check that this is in the future

> +     if (likely(next_1733 > odp_timer_current_tick(per_tpid))

> +     break;//Yes, done

> +     //Else we missed a timeout

> +     //Optionally attempt some recovery and/or logging of the problem

> +     ...

> +} while (0);

> +//Re-arm periodic timer

> +odp_timer_set_abs(tim_1733, next_1733);

> +//Or do processing driven by timeout *after*

> +...

> +odp_timer_return_tmo(tmo);

> +return;

> +

> + @endcode

> +*/

> +

> +/** Example #3 Tear down of flow

> + @code

> +//ctx points to flow context data structure owned by application

> +//Free the timer, cancelling any timeout

> +odp_timer_free(ctx->timer);//Any enqueued timeout will be made invalid

> +//Continue tearing down and eventually freeing context

> +...

> +return;

> +

> +//A timeout has been received, check status

> +odp_timer_tmo_t tmo = odp_timeout_from_buffer(buf);

> +switch (odp_timer_tmo_status(tmo))

> +{

> +    case ODP_TMO_FRESH :

> +     //A flow has timed out, tear it down

> +     //Find flow context from timeout

> +     ctx = (context *)odp_timer_get_userptr(tmo);

> +     //Free the supervision timer, any enqueued timeout will remain

> +     odp_timer_free(ctx->tim);

> +     //Free other flow related resources

> +     ...

> +     //Free the timeout buffer

> +     odp_buffer_free(buf);

> +     //Flow torn down

> +     break;

> +    case ODP_TMO_STALE :

> +     //A stale timeout was received, return timeout and update timer

> +     odp_timer_return_tmo(tmo);

> +     break;

> +    case ODP_TMO_ORPHAN :

> +     //Orphaned timeout (from previously torn down flow)

> +     //No corresponding timer or flow context

> +     //Free the timeout buffer

> +     odp_buffer_free(buf);

> +     break;

> +}

> +

> + @endcode

> +*/

> +

>  #ifndef ODP_TIMER_H_

>  #define ODP_TIMER_H_

>

> @@ -18,144 +202,408 @@

>  extern "C" {

>  #endif

>

> +#include <stdlib.h>

>  #include <odp_std_types.h>

>  #include <odp_buffer.h>

>  #include <odp_buffer_pool.h>

>  #include <odp_queue.h>

>

> +struct odp_timer_pool_s; /**< Forward declaration */

> +

> +/**

> +* ODP timer pool handle (platform dependent)

> +*/

> +typedef struct odp_timer_pool_s *odp_timer_pool_t;

> +

> +/**

> + * Invalid timer pool handle (platform dependent).

> + */

> +#define ODP_TIMER_POOL_INVALID NULL

>

>  /**

> - * ODP timer handle

> + * Clock sources for timers in timer pool.

>   */

> -typedef uint32_t odp_timer_t;

> +typedef enum odp_timer_clk_src_e {

> +     /** Use CPU clock as clock source for timers */

> +     ODP_CLOCK_CPU,

> +     /** Use external clock as clock source for timers */

> +     ODP_CLOCK_EXT

> +     /* Platform dependent which other clock sources exist */

> +} odp_timer_clk_src_t;

>

> -/** Invalid timer */

> -#define ODP_TIMER_INVALID 0

> +struct odp_timer_s; /**< Forward declaration */

>

> +/**

> +* ODP timer handle (platform dependent).

> +*/

> +typedef struct odp_timer_s *odp_timer_t;

>

>  /**

> - * ODP timeout handle

> + * Invalid timer handle (platform dependent).

>   */

> -typedef odp_buffer_t odp_timer_tmo_t;

> -

> -/** Invalid timeout */

> -#define ODP_TIMER_TMO_INVALID 0

> +#define ODP_TIMER_INVALID NULL

>

> +/**

> + * Return values of timer set calls.

> + */

> +typedef enum odp_timer_set_e {

> +     /** Timer set operation successful */

> +     ODP_TIMER_SET_SUCCESS,

> +     /** Timer set operation failed, expiration too early */

> +     ODP_TIMER_SET_TOOEARLY,

> +     /** Timer set operation failed, expiration too late */

> +     ODP_TIMER_SET_TOOLATE

> +} odp_timer_set_t;

>

>  /**

> - * Timeout notification

> + * Timeout event handle.

>   */

> -typedef odp_buffer_t odp_timeout_t;

> +typedef odp_buffer_t odp_timer_tmo_t;

>

> +/**

> + * Status of a timeout event.

> + */

> +typedef enum odp_timer_tmo_status_e {

> +     /** Timeout is fresh, process it and return timeout */

> +     ODP_TMO_FRESH,

> +     /** Timer reset or cancelled, just return timeout  */

> +     ODP_TMO_STALE,

> +     /** Timer deleted, return or free timeout */

> +     ODP_TMO_ORPHAN

> +} odp_timer_tmo_status_t;

>

>  /**

> - * Create a timer

> + * Create a timer pool

>   *

> - * Creates a new timer with requested properties.

> + * Create a new timer pool.

>   *

>   * @param name       Name

> - * @param pool       Buffer pool for allocating timeout notifications

> + * @param buf_pool   Buffer pool for allocating timeouts (and only

> timeouts)

>   * @param resolution Timeout resolution in nanoseconds

> - * @param min_tmo    Minimum timeout duration in nanoseconds

> - * @param max_tmo    Maximum timeout duration in nanoseconds

> + * @param min_tmo    Minimum relative timeout in nanoseconds

> + * @param max_tmo    Maximum relative timeout in nanoseconds

> + * @param num_timers Number of supported timers (minimum)

> + * @param shared     Shared or private timer pool.

> + *              Operations on shared timers will include the necessary

> + *              mutual exclusion, operations on private timers may not

> + *              (mutual exclusion is the responsibility of the caller).

> + * @param clk_src    Clock source to use

>   *

> - * @return Timer handle if successful, otherwise ODP_TIMER_INVALID

> + * @return Timer pool handle if successful, otherwise

> ODP_TIMER_POOL_INVALID

> + * and errno set

>   */

> -odp_timer_t odp_timer_create(const char *name, odp_buffer_pool_t pool,

> -                          uint64_t resolution, uint64_t min_tmo,

> -                          uint64_t max_tmo);

> +odp_timer_pool_t

> +odp_timer_pool_create(const char *name,

> +                   odp_buffer_pool_t buf_pool,

> +                   uint64_t resolution,

> +                   uint64_t min_tmo,

> +                   uint64_t max_tmo,

> +                   uint32_t num_timers,

> +                   bool shared,

> +                   odp_timer_clk_src_t clk_src);

> +

> +/**

> + * Start a timer pool

> + *

> + * Start all created timer pools, enabling the allocation of timers.

> + * The purpose of this call is to coordinate the creation of multiple

> timer

> + * pools that may use the same underlying HW resources.

> + * This function may be called multiple times.

> + */

> +void odp_timer_pool_start(void);

> +

> +/**

> + * Destroy a timer pool

> + *

> + * Destroy a timer pool, freeing all resources.

> + * All timers must have been freed.

> + *

> + * @param tpid  Timer pool identifier

> + */

> +void odp_timer_pool_destroy(odp_timer_pool_t tpid);

>

>  /**

>   * Convert timer ticks to nanoseconds

>   *

> - * @param timer Timer

> + * @param tpid  Timer pool identifier

>   * @param ticks Timer ticks

>   *

>   * @return Nanoseconds

>   */

> -uint64_t odp_timer_tick_to_ns(odp_timer_t timer, uint64_t ticks);

> +uint64_t odp_timer_tick_to_ns(odp_timer_pool_t tpid, uint64_t ticks);

>

>  /**

>   * Convert nanoseconds to timer ticks

>   *

> - * @param timer Timer

> + * @param tpid  Timer pool identifier

>   * @param ns    Nanoseconds

>   *

>   * @return Timer ticks

>   */

> -uint64_t odp_timer_ns_to_tick(odp_timer_t timer, uint64_t ns);

> +uint64_t odp_timer_ns_to_tick(odp_timer_pool_t tpid, uint64_t ns);

>

>  /**

> - * Timer resolution in nanoseconds

> + * Current tick value

>   *

> - * @param timer Timer

> + * @param tpid Timer pool identifier

>   *

> - * @return Resolution in nanoseconds

> + * @return Current time in timer ticks

> + */

> +uint64_t odp_timer_current_tick(odp_timer_pool_t tpid);

> +

> +/**

> + * ODP timer configurations

>   */

> -uint64_t odp_timer_resolution(odp_timer_t timer);

> +

> +typedef enum odp_timer_pool_conf_e {

> +     ODP_TIMER_NAME,      /**< Return name of timer pool */

> +     ODP_TIMER_RESOLUTION,/**< Return the timer resolution (in ns) */

> +     ODP_TIMER_MIN_TICKS, /**< Return the min supported rel timeout

> (ticks)*/

> +     ODP_TIMER_MAX_TICKS, /**< Return the max supported rel timeout

> (ticks)*/

> +     ODP_TIMER_NUM_TIMERS,/**< Return number of supported timers */

> +     ODP_TIMER_SHARED     /**< Return shared flag */

> +} odp_timer_pool_conf_t;

>

>  /**

> - * Maximum timeout in timer ticks

> + * Query different timer pool configurations, e.g.

> + *  Timer resolution in nanoseconds

> + *  Maximum timeout in timer ticks

> + *  Number of supported timers

> + *  Shared or private timer pool

>   *

> - * @param timer Timer

> + * @param tpid Timer pool identifier

> + * @param item Configuration item being queried

>   *

> - * @return Maximum timeout in timer ticks

> + * @return the requested piece of information or 0 for unknown item.

>   */

> -uint64_t odp_timer_maximum_tmo(odp_timer_t timer);

> +uintptr_t odp_timer_pool_query_conf(odp_timer_pool_t tpid,

> +                                 odp_timer_pool_conf_t item);

>

>  /**

> - * Current timer tick

> + * Allocate a timer

>   *

> - * @param timer Timer

> + * Create a timer (allocating all necessary resources e.g. timeout

> event) from

> + * the timer pool.

>   *

> - * @return Current time in timer ticks

> + * @param tpid     Timer pool identifier

> + * @param queue    Destination queue for timeout notifications

> + * @param user_ptr User defined pointer or NULL (copied to timeouts)

> + *

> + * @return Timer handle if successful, otherwise ODP_TIMER_INVALID and

> + *      errno set.

>   */

> -uint64_t odp_timer_current_tick(odp_timer_t timer);

> +odp_timer_t odp_timer_alloc(odp_timer_pool_t tpid,

> +                         odp_queue_t queue,

> +                         void *user_ptr);

>

>  /**

> - * Request timeout with an absolute timer tick

> + * Free a timer

> + *

> + * Free (destroy) a timer, freeing all associated resources (e.g.

> default

> + * timeout event). An expired and enqueued timeout event will not be

> freed.

> + * It is the responsibility of the application to free this timeout when

> it

> + * is received.

>   *

> - * When tick reaches tmo_tick, the timer enqueues the timeout

> notification into

> - * the destination queue.

> + * @param tim      Timer handle

> + */

> +void odp_timer_free(odp_timer_t tim);

> +

> +/**

> + * Set a timer (absolute time) with a user-defined timeout buffer

>   *

> - * @param timer    Timer

> - * @param tmo_tick Absolute timer tick value which triggers the timeout

> - * @param queue    Destination queue for the timeout notification

> - * @param buf      User defined timeout notification buffer. When

> - *                 ODP_BUFFER_INVALID, default timeout notification is

> used.

> + * Set (arm) the timer to expire at specific time. The user-defined

> + * buffer will be enqueued when the timer expires.

> + * Arming may fail (if the timer is in state EXPIRED), an earlier

> timeout

> + * will then be received. odp_timer_tmo_status() must be used to check

> if

> + * the received timeout is valid.

>   *

> - * @return Timeout handle if successful, otherwise ODP_TIMER_TMO_INVALID

> + * Note: any invalid parameters will be treated as programming errors

> and will

> + * cause the application to abort.

> + *

> + * @param tim      Timer

> + * @param abs_tck  Expiration time in absolute timer ticks

> + * @param user_buf The buffer to use as timeout event

> + *

> + * @return Success or failure code

>   */

> -odp_timer_tmo_t odp_timer_absolute_tmo(odp_timer_t timer, uint64_t

> tmo_tick,

> -                                    odp_queue_t queue, odp_buffer_t buf);

> +odp_timer_set_t odp_timer_set_abs_w_buf(odp_timer_t tim,

> +                                     uint64_t abs_tck,

> +                                     odp_buffer_t user_buf);

>

>  /**

> - * Cancel a timeout

> + * Set a timer with an absolute expiration time

> + *

> + * Set (arm) the timer to expire at a specific time.

> + * Arming may fail (if the timer is in state EXPIRED), an earlier

> timeout

> + * will then be received. odp_timer_tmo_status() must be used to check

> if

> + * the received timeout is valid.

> + *

> + * Note: any invalid parameters will be treated as programming errors

> and will

> + * cause the application to abort.

>   *

> - * @param timer Timer

> - * @param tmo   Timeout to cancel

> + * @param tim     Timer

> + * @param abs_tck Expiration time in absolute timer ticks

>   *

> - * @return 0 if successful

> + * @return Success or failure code

>   */

> -int odp_timer_cancel_tmo(odp_timer_t timer, odp_timer_tmo_t tmo);

> +odp_timer_set_t odp_timer_set_abs(odp_timer_t tim, uint64_t abs_tck);

>

>  /**

> - * Convert buffer handle to timeout handle

> + * Set a timer with a relative expiration time and user-defined buffer.

>   *

> - * @param buf  Buffer handle

> + * Set (arm) the timer to expire at a relative future time.

> + * Arming may fail (if the timer is in state EXPIRED),

> + * an earlier timeout will then be received. odp_timer_tmo_status() must

> + * be used to check if the received timeout is valid.

>   *

> - * @return Timeout buffer handle

> + * Note: any invalid parameters will be treated as programming errors

> and will

> + * cause the application to abort.

> + *

> + * @param tim      Timer

> + * @param rel_tck  Expiration time in timer ticks relative to current

> time of

> + *              the timer pool the timer belongs to

> + * @param user_buf The buffer to use as timeout event

> + *

> + * @return Success or failure code

>   */

> -odp_timeout_t odp_timeout_from_buffer(odp_buffer_t buf);

> +odp_timer_set_t odp_timer_set_rel_w_buf(odp_timer_t tim,

> +                                     uint64_t rel_tck,

> +                                     odp_buffer_t user_buf);

> +/**

> + * Set a timer with a relative expiration time

> + *

> + * Set (arm) the timer to expire at a relative future time.

> + * Arming may fail (if the timer is in state EXPIRED),

> + * an earlier timeout will then be received. odp_timer_tmo_status() must

> + * be used to check if the received timeout is valid.

> + *

> + * Note: any invalid parameters will be treated as programming errors

> and will

> + * cause the application to abort.

> + *

> + * @param tim     Timer

> + * @param rel_tck Expiration time in timer ticks relative to current

> time of

> + *             the timer pool the timer belongs to

> + *

> + * @return Success or failure code

> + */

> +odp_timer_set_t odp_timer_set_rel(odp_timer_t tim, uint64_t rel_tck);

>

>  /**

> - * Return absolute timeout tick

> + * Cancel a timer

> + *

> + * Cancel a timer, preventing future expiration and delivery.

> + *

> + * A timer that has already expired and been enqueued for delivery may

> be

> + * impossible to cancel and will instead be delivered to the destination

> queue.

> + * Use odp_timer_tmo_status() the check whether a received timeout is

> fresh or

> + * stale (cancelled). Stale timeouts will automatically be recycled.

> + *

> + * Note: any invalid parameters will be treated as programming errors

> and will

> + * cause the application to abort.

> + *

> + * @param tim    Timer handle

> + */

> +void odp_timer_cancel(odp_timer_t tim);

> +

> +/**

> + * Translate from buffer to timeout

> + *

> + * Return the timeout handle that corresponds to the specified buffer

> handle.

> + * The buffer must be of time ODP_BUFFER_TYPE_TIMEOUT.

> + *

> + * @param buf   Buffer handle to translate.

> + *

> + * @return      The corresponding timeout handle.

> + */

> +static inline odp_timer_tmo_t odp_timeout_from_buffer(odp_buffer_t buf)

> +{

> +     if (odp_unlikely(odp_buffer_type(buf) != ODP_BUFFER_TYPE_TIMEOUT))

> {

> +             ODP_ERR("Buffer type %u not timeout\n", buf);

> +             abort();

> +     }

> +     /* In this implementation, timeout == buffer */

> +     return (odp_timer_tmo_t)buf;

> +}

> +

> +/**

> + * Translate from timeout to buffer

> + *

> + * Return the buffer handle that corresponds to the specified timeout

> handle.

> + *

> + * @param tmo   Timeout handle to translate.

> + *

> + * @return      The corresponding buffer handle.

> + */

> +static inline odp_buffer_t odp_buffer_from_timeout(odp_timer_tmo_t tmo)

> +{

> +     /* In this implementation, buffer == timeout */

> +     return (odp_buffer_t)tmo;

> +}

> +

> +/**

> + * Return timeout to timer

> + *

> + * Return a received timeout for reuse with the parent timer.

> + * Note: odp_timer_return_tmo() must be called on all received timeouts!

> + * (Excluding user defined timeout buffers).

> + * The timeout must not be accessed after this call, the semantics is

> + * equivalent to a free call.

> + *

> + * @param tmo    Timeout

> + */

> +void odp_timer_return_tmo(odp_timer_tmo_t tmo);

> +

> +/**

> + * Return fresh/stale/orphan status of timeout.

> + *

> + * Check a received timeout for orphaness (i.e. parent timer freed) and

> + * staleness (i.e. parent timer has been reset or cancelled after the

> timeout

> + * expired and was enqueued).

> + * If the timeout is fresh, it should be processed.

> + * If the timeout is stale or orphaned, it should be ignored.

> + * All timeouts must be returned using the odp_timer_return_tmo() call.

> + *

> + * @param tmo    Timeout

> + *

> + * @return One of ODP_TMO_FRESH, ODP_TMO_STALE or ODP_TMO_ORPHAN.

> + */

> +odp_timer_tmo_status_t odp_timer_tmo_status(odp_timer_tmo_t tmo);

> +

> +/**

> + * Get timer handle

> + *

> + * Return Handle of parent timer.

> + *

> + * @param tmo   Timeout

> + *

> + * @return Timer handle or ODP_TIMER_INVALID for orphaned timeouts.

> + *         Note that the parent timer could be freed by some other

> thread

> + *         at any time and thus the timeout becomes orphaned.

> + */

> +odp_timer_t odp_timer_handle(odp_timer_tmo_t tmo);

> +

> +/**

> + * Get expiration time

> + *

> + * Return (requested) expiration time of timeout.

> + *

> + * @param tmo   Timeout

> + *

> + * @return Expiration time

> + */

> +uint64_t odp_timer_expiration(odp_timer_tmo_t tmo);

> +

> +/**

> + * Get user pointer

> + *

> + * Return User pointer of timer associated with timeout.

> + * The user pointer is often used to point to some associated context.

>   *

> - * @param tmo Timeout buffer handle

> + * @param tmo   Timeout

>   *

> - * @return Absolute timeout tick

> + * @return User pointer

>   */

> -uint64_t odp_timeout_tick(odp_timeout_t tmo);

> +void *odp_timer_userptr(odp_timer_tmo_t tmo);

>

>  #ifdef __cplusplus

>  }

> diff --git a/platform/linux-generic/include/odp_priority_queue_internal.h

> b/platform/linux-generic/include/odp_priority_queue_internal.h

> new file mode 100644

> index 0000000..7d7f3a2

> --- /dev/null

> +++ b/platform/linux-generic/include/odp_priority_queue_internal.h

> @@ -0,0 +1,108 @@

> +#ifndef _PRIORITY_QUEUE_H

> +#define _PRIORITY_QUEUE_H

> +

> +#include <assert.h>

> +#include <stddef.h>

> +#include <stdint.h>

> +#include <stdbool.h>

> +#include <odp_align.h>

> +

> +#define INVALID_INDEX ~0U

> +#define INVALID_PRIORITY ((pq_priority_t)~0ULL)

> +

> +typedef uint64_t pq_priority_t;

> +

> +struct heap_node;

> +

> +typedef struct priority_queue {

> +     uint32_t max_elems;/* Number of elements in heap */

> +     /* Number of registered elements (active + inactive) */

> +     uint32_t reg_elems;

> +     uint32_t num_elems;/* Number of active elements */

> +     struct heap_node *heap;

> +     struct heap_node *org_ptr;

> +} priority_queue ODP_ALIGNED(sizeof(uint64_t));

> +

> +/* The user gets a pointer to this structure */

> +typedef struct {

> +     /* Set when pq_element registered with priority queue */

> +     priority_queue *pq;

> +     uint32_t index;/* Index into heap array */

> +     pq_priority_t prio;

> +} pq_element;

> +

> +/*** Operations on pq_element ***/

> +

> +static inline void pq_element_con(pq_element *this)

> +{

> +     this->pq = NULL;

> +     this->index = INVALID_INDEX;

> +     this->prio = 0U;

> +}

> +

> +static inline void pq_element_des(pq_element *this)

> +{

> +     (void)this;

> +     assert(this->index == INVALID_INDEX);

> +}

> +

> +static inline priority_queue *get_pq(const pq_element *this)

> +{

> +     return this->pq;

> +}

> +

> +static inline pq_priority_t get_prio(const pq_element *this)

> +{

> +     return this->prio;

> +}

> +

> +static inline uint32_t get_index(const pq_element *this)

> +{

> +     return this->index;

> +}

> +

> +static inline bool is_active(const pq_element *this)

> +{

> +     return this->index != INVALID_INDEX;

> +}

> +

> +/*** Operations on priority_queue ***/

> +

> +extern uint32_t pq_smallest_child(priority_queue *, uint32_t,

> pq_priority_t);

> +extern void pq_bubble_down(priority_queue *, pq_element *);

> +extern void pq_bubble_up(priority_queue *, pq_element *);

> +

> +static inline bool valid_index(priority_queue *this, uint32_t idx)

> +{

> +     return idx < this->num_elems;

> +}

> +

> +extern void priority_queue_con(priority_queue *, uint32_t _max_elems);

> +extern void priority_queue_des(priority_queue *);

> +

> +/* Register pq_element with priority queue */

> +/* Return false if priority queue full */

> +extern bool pq_register_element(priority_queue *, pq_element *);

> +

> +/* Activate and add pq_element to priority queue */

> +/* Element must be disarmed */

> +extern void pq_activate_element(priority_queue *, pq_element *,

> pq_priority_t);

> +

> +/* Reset (increase) priority for pq_element */

> +/* Element may be active or inactive (released) */

> +extern void pq_reset_element(priority_queue *, pq_element *,

> pq_priority_t);

> +

> +/* Deactivate and remove element from priority queue */

> +/* Element may be active or inactive (released) */

> +extern void pq_deactivate_element(priority_queue *, pq_element *);

> +

> +/* Unregister pq_element */

> +extern void pq_unregister_element(priority_queue *, pq_element *);

> +

> +/* Return priority of first element (lowest numerical value) */

> +extern pq_priority_t pq_first_priority(const priority_queue *);

> +

> +/* Deactivate and return first element if it's prio is <= threshold */

> +extern pq_element *pq_release_element(priority_queue *, pq_priority_t

> thresh);

> +

> +#endif /* _PRIORITY_QUEUE_H */

> diff --git a/platform/linux-generic/include/odp_timer_internal.h

> b/platform/linux-generic/include/odp_timer_internal.h

> index ad28f53..461f28c 100644

> --- a/platform/linux-generic/include/odp_timer_internal.h

> +++ b/platform/linux-generic/include/odp_timer_internal.h

> @@ -1,4 +1,4 @@

> -/* Copyright (c) 2013, Linaro Limited

> +/* Copyright (c) 2014, Linaro Limited

>   * All rights reserved.

>   *

>   * SPDX-License-Identifier:     BSD-3-Clause

> @@ -8,72 +8,51 @@

>  /**

>   * @file

>   *

> - * ODP timer timeout descriptor - implementation internal

> + * ODP timeout descriptor - implementation internal

>   */

>

>  #ifndef ODP_TIMER_INTERNAL_H_

>  #define ODP_TIMER_INTERNAL_H_

>

> -#ifdef __cplusplus

> -extern "C" {

> -#endif

> -

> -#include <odp_std_types.h>

> -#include <odp_queue.h>

> -#include <odp_buffer.h>

> +#include <odp_align.h>

> +#include <odp_debug.h>

>  #include <odp_buffer_internal.h>

>  #include <odp_buffer_pool_internal.h>

>  #include <odp_timer.h>

>

> -struct timeout_t;

> -

> -typedef struct timeout_t {

> -     struct timeout_t *next;

> -     int               timer_id;

> -     int               tick;

> -     uint64_t          tmo_tick;

> -     odp_queue_t       queue;

> -     odp_buffer_t      buf;

> -     odp_buffer_t      tmo_buf;

> -} timeout_t;

> -

> -

> -struct odp_timeout_hdr_t;

> -

>  /**

> - * Timeout notification header

> + * Internal Timeout header

>   */

> -typedef struct odp_timeout_hdr_t {

> +typedef struct {

> +     /* common buffer header */

>       odp_buffer_hdr_t buf_hdr;

>

> -     timeout_t meta;

> -

> -     uint8_t buf_data[];

> +     /* Requested expiration time */

> +     uint64_t expiration;

> +     /* User ptr inherited from parent timer */

> +     void *user_ptr;

> +     /* Parent timer */

> +     odp_timer_t timer;

> +     /* Tag inherited from parent timer at time of expiration */

> +     uint32_t tag;

> +     /* Gen-cnt inherited from parent timer at time of creation */

> +     uint16_t gencnt;

> +     uint16_t pad;

> +     uint8_t buf_data[0];

>  } odp_timeout_hdr_t;

>

> -

> -

>  ODP_STATIC_ASSERT(sizeof(odp_timeout_hdr_t) ==

> -        ODP_OFFSETOF(odp_timeout_hdr_t, buf_data),

> -        "ODP_TIMEOUT_HDR_T__SIZE_ERR");

> -

> +               ODP_OFFSETOF(odp_timeout_hdr_t, buf_data),

> +               "sizeof(odp_timeout_hdr_t) ==

> ODP_OFFSETOF(odp_timeout_hdr_t, buf_data)");

>  ODP_STATIC_ASSERT(sizeof(odp_timeout_hdr_t) % sizeof(uint64_t) == 0,

> -        "ODP_TIMEOUT_HDR_T__SIZE_ERR2");

> -

> +               "sizeof(odp_timeout_hdr_t) % sizeof(uint64_t) == 0");

>

>  /**

> - * Return timeout header

> + * Return the timeout header

>   */

> -static inline odp_timeout_hdr_t *odp_timeout_hdr(odp_timeout_t tmo)

> +static inline odp_timeout_hdr_t *odp_timeout_hdr(odp_buffer_t buf)

>  {

> -     odp_buffer_hdr_t *buf_hdr = odp_buf_to_hdr((odp_buffer_t)tmo);

> -     return (odp_timeout_hdr_t *)(uintptr_t)buf_hdr;

> -}

> -

> -

> -

> -#ifdef __cplusplus

> +     return (odp_timeout_hdr_t *)odp_buf_to_hdr(buf);

>  }

> -#endif

>

>  #endif

> diff --git a/platform/linux-generic/odp_priority_queue.c

> b/platform/linux-generic/odp_priority_queue.c

> new file mode 100644

> index 0000000..b72c26f

> --- /dev/null

> +++ b/platform/linux-generic/odp_priority_queue.c

> @@ -0,0 +1,283 @@

> +#define NDEBUG /* Enabled by default by ODP build system */

> +#include <assert.h>

> +#include <unistd.h>

> +#include <stdlib.h>

> +#include <string.h>

> +#include <strings.h>

> +#include <odp_hints.h>

> +#include <odp_align.h>

> +#include <odp_debug.h>

> +

> +#include "odp_priority_queue_internal.h"

> +

> +

> +#define NUM_CHILDREN 4

> +#define CHILD(n) (NUM_CHILDREN * (n) + 1)

> +#define PARENT(n) (((n) - 1) / NUM_CHILDREN)

> +

> +/* Internal nodes in the array */

> +typedef struct heap_node {

> +     pq_element *elem;

> +     /* Copy of elem->prio so we avoid unnecessary dereferencing */

> +     pq_priority_t prio;

> +} heap_node;

> +

> +static void pq_assert_heap(priority_queue *this);

> +

> +#define ALIGNMENT(p) (1U << ((unsigned)ffs((int)p) - 1U))

> +

> +void priority_queue_con(priority_queue *this, uint32_t _max_elems)

> +{

> +     this->max_elems = _max_elems;

> +     this->reg_elems = 0;

> +     this->num_elems = 0;

> +     this->org_ptr = malloc((_max_elems + 64 / sizeof(heap_node)) *

> +                            sizeof(heap_node));

> +     if (odp_unlikely(this->org_ptr == NULL)) {

> +             ODP_ERR("malloc failed\n");

> +             abort();

> +     }

> +     this->heap = this->org_ptr;

> +     assert((size_t)&this->heap[1] % 8 == 0);

> +     /* Increment base address until first child (index 1) is cache line

> */

> +     /* aligned and thus all children (e.g. index 1-4) stored in the */

> +     /* same cache line. We are not interested in the alignment of */

> +     /* heap[0] as this is a lone node */

> +     while ((size_t)&this->heap[1] % ODP_CACHE_LINE_SIZE != 0) {

> +             /* Cast to ptr to struct member with the greatest alignment

> */

> +             /* requirement */

> +             this->heap = (heap_node *)((pq_priority_t *)this->heap + 1);

> +     }

> +     pq_assert_heap(this);

> +}

> +

> +void priority_queue_des(priority_queue *this)

> +{

> +     pq_assert_heap(this);

> +     free(this->org_ptr);

> +}

> +

> +#ifndef NDEBUG

> +static uint32_t

> +pq_assert_elem(priority_queue *this, uint32_t index, bool recurse)

> +{

> +     uint32_t num = 1;

> +     const pq_element *elem = this->heap[index].elem;

> +     assert(elem->index == index);

> +     assert(elem->prio == this->heap[index].prio);

> +     uint32_t child = CHILD(index);

> +     uint32_t i;

> +     for (i = 0; i < NUM_CHILDREN; i++, child++) {

> +             if (valid_index(this, child)) {

> +                     assert(this->heap[child].elem != NULL);

> +                     assert(this->heap[child].prio >= elem->prio);

> +                     if (recurse)

> +                             num += pq_assert_elem(this, child, recurse);

> +             }

> +     }

> +     return num;

> +}

> +#endif

> +

> +static void

> +pq_assert_heap(priority_queue *this)

> +{

> +     (void)this;

> +#ifndef NDEBUG

> +     uint32_t num = 0;

> +     if (odp_likely(this->num_elems != 0)) {

> +             assert(this->heap[0].elem != NULL);

> +             num += pq_assert_elem(this, 0, true);

> +     }

> +     assert(num == this->num_elems);

> +     unsigned i;

> +     for (i = 0; i < this->num_elems; i++) {

> +             assert(this->heap[i].elem != NULL);

> +             assert(this->heap[i].prio != INVALID_PRIORITY);

> +     }

> +#endif

> +}

> +

> +/* Bubble up to proper position */

> +void

> +pq_bubble_up(priority_queue *this, pq_element *elem)

> +{

> +     assert(this->heap[elem->index].elem == elem);

> +     assert(this->heap[elem->index].prio == elem->prio);

> +     uint32_t current = elem->index;

> +     pq_priority_t prio = elem->prio;

> +     assert(current == 0 || this->heap[PARENT(current)].elem != NULL);

> +     /* Move up into proper position */

> +     while (current != 0 && this->heap[PARENT(current)].prio > prio) {

> +             uint32_t parent = PARENT(current);

> +             assert(this->heap[parent].elem != NULL);

> +             /* Swap current with parent */

> +             /* 1) Move parent down */

> +             this->heap[current].elem = this->heap[parent].elem;

> +             this->heap[current].prio = this->heap[parent].prio;

> +             this->heap[current].elem->index = current;

> +             /* 2) Move current up to parent */

> +             this->heap[parent].elem = elem;

> +             this->heap[parent].prio = prio;

> +             this->heap[parent].elem->index = parent;

> +             /* Continue moving elem until it is in the right place */

> +             current = parent;

> +     }

> +     pq_assert_heap(this);

> +}

> +

> +/* Find the smallest child that is smaller than the specified priority

> */

> +/* Very hot function, can we decrease the number of cache misses? */

> +uint32_t pq_smallest_child(priority_queue *this,

> +                        uint32_t index,

> +                        pq_priority_t val)

> +{

> +     uint32_t smallest = index;

> +     uint32_t child = CHILD(index);

> +#if NUM_CHILDREN == 4

> +     /* Unroll loop when all children exist */

> +     if (odp_likely(valid_index(this, child + 3))) {

> +             if (this->heap[child + 0].prio < val)

> +                     val = this->heap[smallest = child + 0].prio;

> +             if (this->heap[child + 1].prio < val)

> +                     val = this->heap[smallest = child + 1].prio;

> +             if (this->heap[child + 2].prio < val)

> +                     val = this->heap[smallest = child + 2].prio;

> +             if (this->heap[child + 3].prio < val)

> +                     (void)this->heap[smallest = child + 3].prio;

> +             return smallest;

> +     }

> +#endif

> +     uint32_t i;

> +     for (i = 0; i < NUM_CHILDREN; i++) {

> +             if (odp_unlikely(!valid_index(this, child + i)))

> +                     break;

> +             if (this->heap[child + i].prio < val) {

> +                     smallest = child + i;

> +                     val = this->heap[smallest].prio;

> +             }

> +     }

> +     return smallest;

> +}

> +

> +/* Very hot function, can it be optimised? */

> +void

> +pq_bubble_down(priority_queue *this, pq_element *elem)

> +{

> +     assert(this->heap[elem->index].elem == elem);

> +     assert(this->heap[elem->index].prio == elem->prio);

> +     uint32_t current = elem->index;

> +     pq_priority_t prio = elem->prio;

> +     for (;;) {

> +             uint32_t child = pq_smallest_child(this, current, prio);

> +             if (current == child) {

> +                     /* No smaller child, we are done */

> +                     pq_assert_heap(this);

> +                     return;

> +             }

> +             /* Element larger than smaller child, must move down */

> +             assert(this->heap[child].elem != NULL);

> +             /* 1) Move child up to current */

> +             this->heap[current].elem = this->heap[child].elem;

> +             this->heap[current].prio = this->heap[child].prio;

> +             /* 2) Move current down to child */

> +             this->heap[child].elem = elem;

> +             this->heap[child].prio = prio;

> +             this->heap[child].elem->index = child;

> +

> +             this->heap[current].elem->index = current; /* cache misses!

> */

> +             /* Continue moving element until it is in the right place */

> +             current = child;

> +     }

> +}

> +

> +bool

> +pq_register_element(priority_queue *this, pq_element *elem)

> +{

> +     if (odp_likely(this->reg_elems < this->max_elems)) {

> +             elem->pq = this;

> +             this->reg_elems++;

> +             return true;

> +     }

> +     return false;

> +}

> +

> +void

> +pq_unregister_element(priority_queue *this, pq_element *elem)

> +{

> +     assert(elem->pq == this);

> +     if (is_active(elem))

> +             pq_deactivate_element(this, elem);

> +     this->reg_elems--;

> +}

> +

> +void

> +pq_activate_element(priority_queue *this, pq_element *elem,

> pq_priority_t prio)

> +{

> +     assert(elem->index == INVALID_INDEX);

> +     /* Insert element at end */

> +     uint32_t index = this->num_elems++;

> +     this->heap[index].elem = elem;

> +     this->heap[index].prio = prio;

> +     elem->index = index;

> +     elem->prio = prio;

> +     pq_bubble_up(this, elem);

> +}

> +

> +void

> +pq_deactivate_element(priority_queue *this, pq_element *elem)

> +{

> +     assert(elem->pq == this);

> +     if (odp_likely(is_active(elem))) {

> +             /* Swap element with last element */

> +             uint32_t current = elem->index;

> +             uint32_t last = --this->num_elems;

> +             if (odp_likely(last != current)) {

> +                     /* Move last element to current */

> +                     this->heap[current].elem = this->heap[last].elem;

> +                     this->heap[current].prio = this->heap[last].prio;

> +                     this->heap[current].elem->index = current;

> +                     /* Bubble down old 'last' element to its proper

> place*/

> +                     if (this->heap[current].prio < elem->prio)

> +                             pq_bubble_up(this, this->heap[current].elem);

> +                     else

> +                             pq_bubble_down(this, this->heap[current].elem);

> +             }

> +             elem->index = INVALID_INDEX;

> +             pq_assert_heap(this);

> +     }

> +}

> +

> +void

> +pq_reset_element(priority_queue *this, pq_element *elem, pq_priority_t

> prio)

> +{

> +     assert(prio != INVALID_PRIORITY);

> +     if (odp_likely(is_active(elem))) {

> +             assert(prio >= elem->prio);

> +             elem->prio = prio;

> +             this->heap[elem->index].prio = prio;/* cache misses here! */

> +             pq_bubble_down(this, elem);

> +             pq_assert_heap(this);

> +     } else {

> +             pq_activate_element(this, elem, prio);

> +     }

> +}

> +

> +pq_priority_t pq_first_priority(const priority_queue *this)

> +{

> +     return this->num_elems != 0 ? this->heap[0].prio :

> INVALID_PRIORITY;

> +}

> +

> +pq_element *

> +pq_release_element(priority_queue *this, pq_priority_t threshold)

> +{

> +     if (odp_likely(this->num_elems != 0 &&

> +                    this->heap[0].prio <= threshold)) {

> +             pq_element *elem = this->heap[0].elem;

> +             /* Remove element from heap */

> +             pq_deactivate_element(this, elem);

> +             assert(elem->prio <= threshold);

> +             return elem;

> +     }

> +     return NULL;

> +}

> diff --git a/platform/linux-generic/odp_timer.c b/platform/linux-

> generic/odp_timer.c

> index 313c713..0e5071c 100644

> --- a/platform/linux-generic/odp_timer.c

> +++ b/platform/linux-generic/odp_timer.c

> @@ -4,428 +4,713 @@

>   * SPDX-License-Identifier:     BSD-3-Clause

>   */

>

> -#include <odp_timer.h>

> -#include <odp_timer_internal.h>

> -#include <odp_time.h>

> -#include <odp_buffer_pool_internal.h>

> -#include <odp_internal.h>

> -#include <odp_atomic.h>

> -#include <odp_spinlock.h>

> -#include <odp_sync.h>

> -#include <odp_debug.h>

> -

> -#include <signal.h>

> -#include <time.h>

> +/**

> + * @file

> + *

> + * ODP timer service

> + *

> + */

>

> +#include <assert.h>

> +#include <errno.h>

>  #include <string.h>

> -

> -#define NUM_TIMERS    1

> -#define MAX_TICKS     1024

> -#define MAX_RES       ODP_TIME_SEC

> -#define MIN_RES       (100*ODP_TIME_USEC)

> -

> -

> -typedef struct {

> -     odp_spinlock_t lock;

> -     timeout_t      *list;

> -} tick_t;

> -

> -typedef struct {

> -     int               allocated;

> -     volatile int      active;

> -     volatile uint64_t cur_tick;

> -     timer_t           timerid;

> -     odp_timer_t       timer_hdl;

> -     odp_buffer_pool_t pool;

> -     uint64_t          resolution_ns;

> -     uint64_t          max_ticks;

> -     tick_t            tick[MAX_TICKS];

> -

> -} timer_ring_t;

> -

> -typedef struct {

> -     odp_spinlock_t lock;

> -     int            num_timers;

> -     timer_ring_t   timer[NUM_TIMERS];

> -

> -} timer_global_t;

> -

> -/* Global */

> -static timer_global_t odp_timer;

> -

> -static void add_tmo(tick_t *tick, timeout_t *tmo)

> +#include <stdlib.h>

> +#include <time.h>

> +#include <signal.h>

> +#include "odp_std_types.h"

> +#include "odp_buffer.h"

> +#include "odp_buffer_pool.h"

> +#include "odp_queue.h"

> +#include "odp_hints.h"

> +#include "odp_sync.h"

> +#include "odp_ticketlock.h"

> +#include "odp_debug.h"

> +#include "odp_align.h"

> +#include "odp_shared_memory.h"

> +#include "odp_hints.h"

> +#include "odp_internal.h"

> +#include "odp_time.h"

> +#include "odp_timer.h"

> +#include "odp_timer_internal.h"

> +#include "odp_priority_queue_internal.h"

> +

> +/***********************************************************************

> *******

> + * Translation between timeout and timeout header

> +

> *************************************************************************

> ****/

> +

> +static inline odp_timeout_hdr_t *odp_tmo_to_hdr(odp_timer_tmo_t tmo)

>  {

> -     odp_spinlock_lock(&tick->lock);

> -

> -     tmo->next  = tick->list;

> -     tick->list = tmo;

> +     odp_buffer_t buf = odp_buffer_from_timeout(tmo);

> +     odp_timeout_hdr_t *tmo_hdr = (odp_timeout_hdr_t

> *)odp_buf_to_hdr(buf);

> +     return tmo_hdr;

> +}

>

> -     odp_spinlock_unlock(&tick->lock);

> +/***********************************************************************

> *******

> + * odp_timer abstract datatype

> +

> *************************************************************************

> ****/

> +

> +typedef struct odp_timer_s {

> +     pq_element pqelem;/* Base class */

> +     uint64_t req_tmo;/* Requested timeout tick */

> +     odp_buffer_t tmo_buf;/* ODP_BUFFER_INVALID if timeout enqueued */

> +     odp_queue_t queue;/* ODP_QUEUE_INVALID if timer is free */

> +     uint32_t tag;/* Reusing tag as next pointer/index when timer is

> free */

> +     uint16_t gencnt;/* Smaller to make place for user_buf flag */

> +     unsigned int user_buf:1; /* User-defined buffer? */

> +} odp_timer;

> +

> +/* Constructor */

> +static inline void odp_timer_con(odp_timer *this)

> +{

> +     pq_element_con(&this->pqelem);

> +     this->tmo_buf = ODP_BUFFER_INVALID;

> +     this->queue = ODP_QUEUE_INVALID;

> +     this->gencnt = 0;

>  }

>

> -static timeout_t *rem_tmo(tick_t *tick)

> +/* Destructor */

> +static inline void odp_timer_des(odp_timer *this)

>  {

> -     timeout_t *tmo;

> +     assert(this->tmo_buf == ODP_BUFFER_INVALID);

> +     assert(this->queue == ODP_QUEUE_INVALID);

> +     pq_element_des(&this->pqelem);

> +}

>

> -     odp_spinlock_lock(&tick->lock);

> +/* Setup when timer is allocated */

> +static void setup(odp_timer *this,

> +               odp_queue_t _q,

> +               void *_up,

> +               odp_buffer_t _tmo)

> +{

> +     this->req_tmo = INVALID_PRIORITY;

> +     this->tmo_buf = _tmo;

> +     this->queue = _q;

> +     this->tag = 0;

> +     this->user_buf = false;

> +     /* Initialise constant fields of timeout event */

> +     odp_timeout_hdr_t *tmo_hdr =

> +             odp_tmo_to_hdr(odp_timeout_from_buffer(this->tmo_buf));

> +     tmo_hdr->gencnt = this->gencnt;

> +     tmo_hdr->timer = this;

> +     tmo_hdr->user_ptr = _up;

> +     /* tmo_hdr->tag set at expiration time */

> +     /* tmo_hdr->expiration set at expiration time */

> +     assert(this->queue != ODP_QUEUE_INVALID);

> +}

>

> -     tmo = tick->list;

> +/* Teardown when timer is freed */

> +static odp_buffer_t teardown(odp_timer *this)

> +{

> +     /* Increase generation count to make any pending timeout(s)

> orphaned */

> +     ++this->gencnt;

> +     odp_buffer_t buf = this->tmo_buf;

> +     this->tmo_buf = ODP_BUFFER_INVALID;

> +     this->queue = ODP_QUEUE_INVALID;

> +     return buf;

> +}

>

> -     if (tmo)

> -             tick->list = tmo->next;

> +static inline uint32_t get_next_free(odp_timer *this)

> +{

> +     assert(this->queue == ODP_QUEUE_INVALID);

> +     return this->tag;

> +}

>

> -     odp_spinlock_unlock(&tick->lock);

> +static inline void set_next_free(odp_timer *this, uint32_t nf)

> +{

> +     assert(this->queue == ODP_QUEUE_INVALID);

> +     this->tag = nf;

> +}

>

> -     if (tmo)

> -             tmo->next = NULL;

> +/***********************************************************************

> *******

> + * odp_timer_pool abstract datatype

> + * Inludes alloc and free timer

> +

> *************************************************************************

> ****/

> +

> +typedef struct odp_timer_pool_s {

> +     priority_queue pq;

> +     uint64_t cur_tick;/* Current tick value */

> +     uint64_t min_tick;/* Current expiration lower bound */

> +     uint64_t max_tick;/* Current expiration higher bound */

> +     bool shared;

> +     odp_ticketlock_t lock;

> +     const char *name;

> +     odp_buffer_pool_t buf_pool;

> +     uint64_t resolution_ns;

> +     uint64_t min_tmo_tck;

> +     uint64_t max_tmo_tck;

> +     odp_timer *timers;

> +     uint32_t num_alloc;/* Current number of allocated timers */

> +     uint32_t max_timers;/* Max number of timers */

> +     uint32_t first_free;/* 0..max_timers-1 => free timer */

> +     timer_t timerid;

> +     odp_timer_clk_src_t clk_src;

> +} odp_timer_pool;

> +

> +/* Forward declarations */

> +static void timer_init(odp_timer_pool *tp);

> +static void timer_exit(odp_timer_pool *tp);

> +

> +static void odp_timer_pool_con(odp_timer_pool *this,

> +                            const char *_n,

> +                            odp_buffer_pool_t _bp,

> +                            uint64_t _r,

> +                            uint64_t _mint,

> +                            uint64_t _maxt,

> +                            uint32_t _mt,

> +                            bool _s,

> +                            odp_timer_clk_src_t _cs)

> +{

> +     priority_queue_con(&this->pq, _mt);

> +     this->cur_tick = 0;

> +     this->shared = _s;

> +     this->name = strdup(_n);

> +     this->buf_pool = _bp;

> +     this->resolution_ns = _r;

> +     this->min_tmo_tck = odp_timer_ns_to_tick(this, _mint);

> +     this->max_tmo_tck = odp_timer_ns_to_tick(this, _maxt);

> +     this->min_tick = this->cur_tick + this->min_tmo_tck;

> +     this->max_tick = this->cur_tick + this->max_tmo_tck;

> +     this->num_alloc = 0;

> +     this->max_timers = _mt;

> +     this->first_free = 0;

> +     this->clk_src = _cs;

> +     this->timers = malloc(sizeof(odp_timer) * this->max_timers);

> +     if (this->timers == NULL)

> +             ODP_ABORT("%s: malloc failed\n", _n);

> +     uint32_t i;

> +     for (i = 0; i < this->max_timers; i++)

> +             odp_timer_con(&this->timers[i]);

> +     for (i = 0; i < this->max_timers; i++)

> +             set_next_free(&this->timers[i], i + 1);

> +     odp_ticketlock_init(&this->lock);

> +     if (this->clk_src == ODP_CLOCK_CPU)

> +             timer_init(this);

> +     /* Make sure timer pool initialisation is globally observable */

> +     /* before we return a pointer to it */

> +     odp_sync_stores();

> +}

>

> -     return tmo;

> +static odp_timer_pool *odp_timer_pool_new(

> +     const char *_n,

> +     odp_buffer_pool_t _bp,

> +     uint64_t _r,

> +     uint64_t _mint,

> +     uint64_t _maxt,

> +     uint32_t _mt,

> +     bool _s,

> +     odp_timer_clk_src_t _cs)

> +{

> +     odp_timer_pool *this = malloc(sizeof(odp_timer_pool));

> +     if (odp_unlikely(this == NULL))

> +             ODP_ABORT("%s: timer pool malloc failed\n", _n);

> +     odp_timer_pool_con(this, _n, _bp, _r, _mint, _maxt, _mt, _s, _cs);

> +     return this;

>  }

>

> -/**

> - * Search and delete tmo entry from timeout list

> - * return -1 : on error.. handle not in list

> - *           0 : success

> - */

> -static int find_and_del_tmo(timeout_t **tmo, odp_timer_tmo_t handle)

> +static void odp_timer_pool_des(odp_timer_pool *this)

>  {

> -     timeout_t *cur, *prev;

> -     prev = NULL;

> +     if (this->shared)

> +             odp_ticketlock_lock(&this->lock);

> +     if (this->num_alloc != 0) {

> +             /* It's a programming error to attempt to destroy a */

> +             /* timer pool which is still in use */

> +             ODP_ABORT("%s: timers in use\n", this->name);

> +     }

> +     if (this->clk_src == ODP_CLOCK_CPU)

> +             timer_exit(this);

> +     uint32_t i;

> +     for (i = 0; i < this->max_timers; i++)

> +             odp_timer_des(&this->timers[i]);

> +     free(this->timers);

> +     priority_queue_des(&this->pq);

> +     odp_sync_stores();

> +}

>

> -     for (cur = *tmo; cur != NULL; prev = cur, cur = cur->next) {

> -             if (cur->tmo_buf == handle) {

> -                     if (prev == NULL)

> -                             *tmo = cur->next;

> -                     else

> -                             prev->next = cur->next;

> +static void odp_timer_pool_del(odp_timer_pool *this)

> +{

> +     odp_timer_pool_des(this);

> +     free(this);

> +}

>

> -                     break;

> +static inline odp_timer *timer_alloc(odp_timer_pool *this,

> +                                  odp_queue_t queue,

> +                                  void *user_ptr,

> +                                  odp_buffer_t tmo_buf)

> +{

> +     odp_timer *tim = ODP_TIMER_INVALID;

> +     if (odp_likely(this->shared))

> +             odp_ticketlock_lock(&this->lock);

> +     if (odp_likely(this->num_alloc < this->max_timers)) {

> +             this->num_alloc++;

> +             /* Remove first unused timer from free list */

> +             assert(this->first_free != this->max_timers);

> +             tim = &this->timers[this->first_free];

> +             this->first_free = get_next_free(tim);

> +             /* Insert timer into priority queue */

> +             if (odp_unlikely(!pq_register_element(&this->pq,

> +                                                   &tim->pqelem))) {

> +                     /* Unexpected internal error */

> +                     abort();

>               }

> +             /* Create timer */

> +             setup(tim, queue, user_ptr, tmo_buf);

> +     } else {

> +             errno = ENFILE; /* Reusing file table overvlow */

>       }

> -

> -     if (!cur)

> -             /* couldn't find tmo in list */

> -             return -1;

> -

> -     /* application to free tmo_buf provided by absolute_tmo call */

> -     return 0;

> +     if (odp_likely(this->shared))

> +             odp_ticketlock_unlock(&this->lock);

> +     return tim;

>  }

>

> -int odp_timer_cancel_tmo(odp_timer_t timer_hdl, odp_timer_tmo_t tmo)

> +static inline void timer_free(odp_timer_pool *this, odp_timer *tim)

>  {

> -     int id;

> -     int tick_idx;

> -     timeout_t *cancel_tmo;

> -     odp_timeout_hdr_t *tmo_hdr;

> -     tick_t *tick;

> -

> -     /* get id */

> -     id = (int)timer_hdl - 1;

> -

> -     tmo_hdr = odp_timeout_hdr((odp_timeout_t) tmo);

> -     /* get tmo_buf to cancel */

> -     cancel_tmo = &tmo_hdr->meta;

> +     if (odp_likely(this->shared))

> +             odp_ticketlock_lock(&this->lock);

> +     if (odp_unlikely(tim->queue == ODP_QUEUE_INVALID))

> +             ODP_ABORT("Invalid timer %p\n", tim);

> +     /* Destroy timer */

> +     odp_buffer_t buf = teardown(tim);

> +     /* Remove timer from priority queue */

> +     pq_unregister_element(&this->pq, &tim->pqelem);

> +     /* Insert timer into free list */

> +     set_next_free(tim, this->first_free);

> +     this->first_free = (tim - &this->timers[0]) / sizeof(this-

> >timers[0]);

> +     assert(this->num_alloc != 0);

> +     this->num_alloc--;

> +     if (odp_likely(this->shared))

> +             odp_ticketlock_unlock(&this->lock);

> +     if (buf != ODP_BUFFER_INVALID)

> +             odp_buffer_free(buf);

> +}

>

> -     tick_idx = cancel_tmo->tick;

> -     tick = &odp_timer.timer[id].tick[tick_idx];

> +/***********************************************************************

> *******

> + * Operations on timers

> + * reset/reset_w_buf/cancel timer, return timeout

> +

> *************************************************************************

> ****/

>

> -     odp_spinlock_lock(&tick->lock);

> -     /* search and delete tmo from tick list */

> -     if (find_and_del_tmo(&tick->list, tmo) != 0) {

> -             odp_spinlock_unlock(&tick->lock);

> -             ODP_DBG("Couldn't find the tmo (%d) in tick list\n",

> (int)tmo);

> -             return -1;

> +static inline void timer_expire(odp_timer *tim)

> +{

> +     assert(tim->req_tmo != INVALID_PRIORITY);

> +     /* Timer expired, is there actually any timeout event */

> +     /* we can enqueue? */

> +     if (odp_likely(tim->tmo_buf != ODP_BUFFER_INVALID)) {

> +             /* Swap out timeout buffer */

> +             odp_buffer_t buf = tim->tmo_buf;

> +             tim->tmo_buf = ODP_BUFFER_INVALID;

> +             if (odp_likely(!tim->user_buf)) {

> +                     odp_timeout_hdr_t *tmo_hdr =

> +                             odp_tmo_to_hdr(odp_timeout_from_buffer(buf));

> +                     /* Copy tag and requested expiration tick from timer

> */

> +                     tmo_hdr->tag = tim->tag;

> +                     tmo_hdr->expiration = tim->req_tmo;

> +             }

> +             /* Else don't touch user-defined buffer */

> +             int rc = odp_queue_enq(tim->queue, buf);

> +             if (odp_unlikely(rc != 0))

> +                     ODP_ABORT("Failed to enqueue timeout buffer (%d)\n",

> +                               rc);

> +             /* Mark timer as inactive */

> +             tim->req_tmo = INVALID_PRIORITY;

>       }

> -     odp_spinlock_unlock(&tick->lock);

> -

> -     return 0;

> +     /* No, timeout event already enqueued or unavailable */

> +     /* Keep timer active, odp_timer_return_tmo() will patch up */

>  }

>

> -static void notify_function(union sigval sigval)

> +static odp_timer_set_t timer_reset(odp_timer_pool *tp,

> +                                odp_timer *tim,

> +                                uint64_t abs_tck)

>  {

> -     uint64_t cur_tick;

> -     timeout_t *tmo;

> -     tick_t *tick;

> -     timer_ring_t *timer;

> +     assert(tim->user_buf == false);

> +     if (odp_unlikely(abs_tck < tp->min_tick))

> +             return ODP_TIMER_SET_TOOEARLY;

> +     if (odp_unlikely(abs_tck > tp->max_tick))

> +             return ODP_TIMER_SET_TOOLATE;

> +

> +     if (odp_likely(tp->shared))

> +             odp_ticketlock_lock(&tp->lock);

> +

> +     if (odp_unlikely(tim->queue == ODP_QUEUE_INVALID))

> +             ODP_ABORT("Invalid timer %p\n", tim);

> +     if (odp_unlikely(tim->user_buf))

> +             ODP_ABORT("Timer %p has user buffer\n", tim);

> +     /* Increase timer tag to make any pending timeout stale */

> +     tim->tag++;

> +     /* Save requested timeout */

> +     tim->req_tmo = abs_tck;

> +     /* Update timer position in priority queue */

> +     pq_reset_element(&tp->pq, &tim->pqelem, abs_tck);

> +

> +     if (odp_likely(tp->shared))

> +             odp_ticketlock_unlock(&tp->lock);

> +     return ODP_TIMER_SET_SUCCESS;

> +}

>

> -     timer = sigval.sival_ptr;

> +static odp_timer_set_t timer_reset_w_buf(odp_timer_pool *tp,

> +             odp_timer *tim,

> +             uint64_t abs_tck,

> +             odp_buffer_t user_buf)

> +{

> +     if (odp_unlikely(abs_tck < tp->min_tick))

> +             return ODP_TIMER_SET_TOOEARLY;

> +     if (odp_unlikely(abs_tck > tp->max_tick))

> +             return ODP_TIMER_SET_TOOLATE;

> +

> +     if (odp_likely(tp->shared))

> +             odp_ticketlock_lock(&tp->lock);

> +

> +     if (odp_unlikely(tim->queue == ODP_QUEUE_INVALID))

> +             ODP_ABORT("Invalid timer %p\n", tim);

> +     /* Increase timer tag to make any pending timeout stale */

> +     tim->tag++;

> +     /* Save requested timeout */

> +     tim->req_tmo = abs_tck;

> +     /* Set flag indicating presence of user defined buffer */

> +     tim->user_buf = true;

> +     /* Swap in new buffer, save any old buffer pointer */

> +     odp_buffer_t old_buf = tim->tmo_buf;

> +     tim->tmo_buf = user_buf;

> +     /* Update timer position in priority queue */

> +     pq_reset_element(&tp->pq, &tim->pqelem, abs_tck);

> +

> +     if (odp_likely(tp->shared))

> +             odp_ticketlock_unlock(&tp->lock);

> +

> +     /* Free old buffer if present */

> +     if (odp_unlikely(old_buf != ODP_BUFFER_INVALID))

> +             odp_buffer_free(old_buf);

> +     return ODP_TIMER_SET_SUCCESS;

> +}

>

> -     if (timer->active == 0) {

> -             ODP_DBG("Timer (%u) not active\n", timer->timer_hdl);

> -             return;

> +static inline void timer_cancel(odp_timer_pool *tp,

> +                             odp_timer *tim)

> +{

> +     odp_buffer_t old_buf = ODP_BUFFER_INVALID;

> +     if (odp_likely(tp->shared))

> +             odp_ticketlock_lock(&tp->lock);

> +

> +     if (odp_unlikely(tim->queue == ODP_QUEUE_INVALID))

> +             ODP_ABORT("Invalid timer %p\n", tim);

> +     if (odp_unlikely(tim->user_buf)) {

> +             /* Swap out old user buffer */

> +             old_buf = tim->tmo_buf;

> +             tim->tmo_buf = ODP_BUFFER_INVALID;

> +             /* tim->user_buf stays true */

>       }

> +     /* Else a normal timer (no user-defined buffer) */

> +     /* Increase timer tag to make any pending timeout stale */

> +     tim->tag++;

> +     /* Clear requested timeout, mark timer inactive */

> +     tim->req_tmo = INVALID_PRIORITY;

> +     /* Remove timer from the priority queue */

> +     pq_deactivate_element(&tp->pq, &tim->pqelem);

> +

> +     if (odp_likely(tp->shared))

> +             odp_ticketlock_unlock(&tp->lock);

> +     /* Free user-defined buffer if present */

> +     if (odp_unlikely(old_buf != ODP_BUFFER_INVALID))

> +             odp_buffer_free(old_buf);

> +}

>

> -     /* ODP_DBG("Tick\n"); */

> -

> -     cur_tick = timer->cur_tick++;

> -

> -     odp_sync_stores();

> +static inline void timer_return(odp_timer_pool *tp,

> +                             odp_timer *tim,

> +                             odp_timer_tmo_t tmo,

> +                             const odp_timeout_hdr_t *tmo_hdr)

> +{

> +     odp_buffer_t tmo_buf = odp_buffer_from_timeout(tmo);

> +     if (odp_likely(tp->shared))

> +             odp_ticketlock_lock(&tp->lock);

> +     if (odp_unlikely(tim->user_buf))

> +             ODP_ABORT("Timer %p has user-defined buffer\n", tim);

> +     if (odp_likely(tmo_hdr->gencnt == tim->gencnt)) {

> +             assert(tim->tmo_buf == ODP_BUFFER_INVALID);

> +             /* Save returned buffer for use when timer expires next time

> */

> +             tim->tmo_buf = tmo_buf;

> +             tmo_buf = ODP_BUFFER_INVALID;

> +             /* Check if timer is active and should have expired */

> +             if (odp_unlikely(tim->req_tmo != INVALID_PRIORITY &&

> +                              tim->req_tmo <= tp->cur_tick)) {

> +                     /* Expire timer now since we have restored the timeout

> +                        buffer */

> +                     timer_expire(tim);

> +             }

> +             /* Else timer inactive or expires in the future */

> +     }

> +     /* Else timeout orphaned, free buffer later */

> +     if (odp_likely(tp->shared))

> +             odp_ticketlock_unlock(&tp->lock);

> +     if (odp_unlikely(tmo_buf != ODP_BUFFER_INVALID))

> +             odp_buffer_free(tmo_buf);

> +}

>

> -     tick = &timer->tick[cur_tick % MAX_TICKS];

> +/* Non-public so not in odp_timer.h but externally visible, must declare

> + * somewhere */

> +unsigned odp_timer_pool_expire(odp_timer_pool_t tpid, uint64_t tick);

>

> -     while ((tmo = rem_tmo(tick)) != NULL) {

> -             odp_queue_t  queue;

> -             odp_buffer_t buf;

> +unsigned odp_timer_pool_expire(odp_timer_pool_t tpid, uint64_t tick)

> +{

> +     if (odp_likely(tpid->shared))

> +             odp_ticketlock_lock(&tpid->lock);

> +

> +     unsigned nexp = 0;

> +     odp_timer_t tim;

> +     tpid->cur_tick = tick;

> +     tpid->min_tick = tick + tpid->min_tmo_tck;

> +     tpid->max_tick = tick + tpid->max_tmo_tck;

> +     while ((tim = (odp_timer_t)pq_release_element(&tpid->pq, tick)) !=

> +            ODP_TIMER_INVALID) {

> +             assert(get_prio(&tim->pqelem) <= tick);

> +             timer_expire(tim);

> +             nexp++;

> +     }

>

> -             queue = tmo->queue;

> -             buf   = tmo->buf;

> +     if (odp_likely(tpid->shared))

> +             odp_ticketlock_unlock(&tpid->lock);

> +     return nexp;

> +}

>

> -             if (buf != tmo->tmo_buf)

> -                     odp_buffer_free(tmo->tmo_buf);

> +/***********************************************************************

> *******

> + * POSIX timer support

> + * Functions that use Linux/POSIX per-process timers and related

> facilities

> +

> *************************************************************************

> ****/

>

> -             odp_queue_enq(queue, buf);

> -     }

> +static void timer_notify(sigval_t sigval)

> +{

> +     odp_timer_pool *tp = (odp_timer_pool *)sigval.sival_ptr;

> +     uint64_t new_tick = tp->cur_tick + 1;

> +     (void)odp_timer_pool_expire(tp, new_tick);

>  }

>

> -static void timer_start(timer_ring_t *timer)

> +static void timer_init(odp_timer_pool *tp)

>  {

>       struct sigevent   sigev;

>       struct itimerspec ispec;

>       uint64_t res, sec, nsec;

>

> -     ODP_DBG("\nTimer (%u) starts\n", timer->timer_hdl);

> +     ODP_DBG("Creating POSIX timer for timer pool %s, period %"

> +             PRIu64" ns\n", tp->name, tp->resolution_ns);

>

>       memset(&sigev, 0, sizeof(sigev));

>       memset(&ispec, 0, sizeof(ispec));

>

>       sigev.sigev_notify          = SIGEV_THREAD;

> -     sigev.sigev_notify_function = notify_function;

> -     sigev.sigev_value.sival_ptr = timer;

> +     sigev.sigev_notify_function = timer_notify;

> +     sigev.sigev_value.sival_ptr = tp;

>

> -     if (timer_create(CLOCK_MONOTONIC, &sigev, &timer->timerid)) {

> -             ODP_DBG("Timer create failed\n");

> -             return;

> -     }

> +     if (timer_create(CLOCK_MONOTONIC, &sigev, &tp->timerid))

> +             ODP_ABORT("timer_create() returned error %s\n",

> +                       strerror(errno));

>

> -     res  = timer->resolution_ns;

> +     res  = tp->resolution_ns;

>       sec  = res / ODP_TIME_SEC;

> -     nsec = res - sec*ODP_TIME_SEC;

> +     nsec = res - sec * ODP_TIME_SEC;

>

>       ispec.it_interval.tv_sec  = (time_t)sec;

>       ispec.it_interval.tv_nsec = (long)nsec;

>       ispec.it_value.tv_sec     = (time_t)sec;

>       ispec.it_value.tv_nsec    = (long)nsec;

>

> -     if (timer_settime(timer->timerid, 0, &ispec, NULL)) {

> -             ODP_DBG("Timer set failed\n");

> -             return;

> -     }

> -

> -     return;

> +     if (timer_settime(&tp->timerid, 0, &ispec, NULL))

> +             ODP_ABORT("timer_settime() returned error %s\n",

> +                       strerror(errno));

>  }

>

> -int odp_timer_init_global(void)

> +static void timer_exit(odp_timer_pool *tp)

>  {

> -     ODP_DBG("Timer init ...");

> -

> -     memset(&odp_timer, 0, sizeof(timer_global_t));

> -

> -     odp_spinlock_init(&odp_timer.lock);

> -

> -     ODP_DBG("done\n");

> -

> -     return 0;

> +     if (timer_delete(tp->timerid) != 0)

> +             ODP_ABORT("timer_delete() returned error %s\n",

> +                       strerror(errno));

>  }

>

> -int odp_timer_disarm_all(void)

> +/***********************************************************************

> *******

> + * Public API functions

> + * Some parameter checks and error messages

> + * No modificatios of internal state

> +

> *************************************************************************

> ****/

> +odp_timer_pool_t

> +odp_timer_pool_create(const char *name,

> +                   odp_buffer_pool_t buf_pool,

> +                   uint64_t resolution_ns,

> +                   uint64_t min_timeout,

> +                   uint64_t max_timeout,

> +                   uint32_t num_timers,

> +                   bool shared,

> +                   odp_timer_clk_src_t clk_src)

>  {

> -     int timers;

> -     struct itimerspec ispec;

> -

> -     odp_spinlock_lock(&odp_timer.lock);

> -

> -     timers = odp_timer.num_timers;

> -

> -     ispec.it_interval.tv_sec  = 0;

> -     ispec.it_interval.tv_nsec = 0;

> -     ispec.it_value.tv_sec     = 0;

> -     ispec.it_value.tv_nsec    = 0;

> -

> -     for (; timers >= 0; timers--) {

> -             if (timer_settime(odp_timer.timer[timers].timerid,

> -                               0, &ispec, NULL)) {

> -                     ODP_DBG("Timer reset failed\n");

> -                     odp_spinlock_unlock(&odp_timer.lock);

> -                     return -1;

> -             }

> -             odp_timer.num_timers--;

> -     }

> -

> -     odp_spinlock_unlock(&odp_timer.lock);

> -

> -     return 0;

> +     /* Verify that buffer pool can be used for timeouts */

> +     odp_buffer_t buf = odp_buffer_alloc(buf_pool);

> +     if (buf == ODP_BUFFER_INVALID)

> +             ODP_ABORT("%s: Failed to allocate buffer\n", name);

> +     if (odp_buffer_type(buf) != ODP_BUFFER_TYPE_TIMEOUT)

> +             ODP_ABORT("%s: Buffer pool wrong type\n", name);

> +     odp_buffer_free(buf);

> +     odp_timer_pool_t tp = odp_timer_pool_new(name, buf_pool,

> resolution_ns,

> +                           min_timeout, max_timeout, num_timers,

> +                           shared, clk_src);

> +     return tp;

>  }

>

> -odp_timer_t odp_timer_create(const char *name, odp_buffer_pool_t pool,

> -                          uint64_t resolution_ns, uint64_t min_ns,

> -                          uint64_t max_ns)

> +void odp_timer_pool_start(void)

>  {

> -     uint32_t id;

> -     timer_ring_t *timer;

> -     odp_timer_t timer_hdl;

> -     int i;

> -     uint64_t max_ticks;

> -     (void) name;

> -

> -     if (resolution_ns < MIN_RES)

> -             resolution_ns = MIN_RES;

> -

> -     if (resolution_ns > MAX_RES)

> -             resolution_ns = MAX_RES;

> -

> -     max_ticks = max_ns / resolution_ns;

> -

> -     if (max_ticks > MAX_TICKS) {

> -             ODP_DBG("Maximum timeout too long: %"PRIu64" ticks\n",

> -                     max_ticks);

> -             return ODP_TIMER_INVALID;

> -     }

> -

> -     if (min_ns < resolution_ns) {

> -             ODP_DBG("Min timeout %"PRIu64" ns < resolution %"PRIu64"

> ns\n",

> -                     min_ns, resolution_ns);

> -             return ODP_TIMER_INVALID;

> -     }

> -

> -     odp_spinlock_lock(&odp_timer.lock);

> -

> -     if (odp_timer.num_timers >= NUM_TIMERS) {

> -             odp_spinlock_unlock(&odp_timer.lock);

> -             ODP_DBG("All timers allocated\n");

> -             return ODP_TIMER_INVALID;

> -     }

> -

> -     for (id = 0; id < NUM_TIMERS; id++) {

> -             if (odp_timer.timer[id].allocated == 0)

> -                     break;

> -     }

> -

> -     timer = &odp_timer.timer[id];

> -     timer->allocated = 1;

> -     odp_timer.num_timers++;

> -

> -     odp_spinlock_unlock(&odp_timer.lock);

> -

> -     timer_hdl = id + 1;

> -

> -     timer->timer_hdl     = timer_hdl;

> -     timer->pool          = pool;

> -     timer->resolution_ns = resolution_ns;

> -     timer->max_ticks     = MAX_TICKS;

> -

> -     for (i = 0; i < MAX_TICKS; i++) {

> -             odp_spinlock_init(&timer->tick[i].lock);

> -             timer->tick[i].list = NULL;

> -     }

> -

> -     timer->active = 1;

> -     odp_sync_stores();

> -

> -     timer_start(timer);

> +     /* Nothing to do here, timer pools are started by the create call

> */

> +}

>

> -     return timer_hdl;

> +void odp_timer_pool_destroy(odp_timer_pool_t tpid)

> +{

> +     odp_timer_pool_del(tpid);

>  }

>

> -odp_timer_tmo_t odp_timer_absolute_tmo(odp_timer_t timer_hdl, uint64_t

> tmo_tick,

> -                                    odp_queue_t queue, odp_buffer_t buf)

> +uint64_t odp_timer_tick_to_ns(odp_timer_pool_t tpid, uint64_t ticks)

>  {

> -     int id;

> -     uint64_t tick;

> -     uint64_t cur_tick;

> -     timeout_t *new_tmo;

> -     odp_buffer_t tmo_buf;

> -     odp_timeout_hdr_t *tmo_hdr;

> -     timer_ring_t *timer;

> +     return ticks * tpid->resolution_ns;

> +}

>

> -     id = (int)timer_hdl - 1;

> -     timer = &odp_timer.timer[id];

> +uint64_t odp_timer_ns_to_tick(odp_timer_pool_t tpid, uint64_t ns)

> +{

> +     return (uint64_t)(ns / tpid->resolution_ns);

> +}

>

> -     cur_tick = timer->cur_tick;

> -     if (tmo_tick <= cur_tick) {

> -             ODP_DBG("timeout too close\n");

> -             return ODP_TIMER_TMO_INVALID;

> -     }

> +uint64_t odp_timer_current_tick(odp_timer_pool_t tpid)

> +{

> +     return tpid->cur_tick;

> +}

>

> -     if ((tmo_tick - cur_tick) > MAX_TICKS) {

> -             ODP_DBG("timeout too far: cur %"PRIu64" tmo %"PRIu64"\n",

> -                     cur_tick, tmo_tick);

> -             return ODP_TIMER_TMO_INVALID;

> +uintptr_t odp_timer_pool_query_conf(odp_timer_pool_t tpid,

> +                                 odp_timer_pool_conf_t item)

> +{

> +     switch (item) {

> +     case ODP_TIMER_NAME:

> +             return (uintptr_t)(tpid->name);

> +     case ODP_TIMER_RESOLUTION:

> +             return tpid->resolution_ns;

> +     case ODP_TIMER_MIN_TICKS:

> +             return tpid->min_tmo_tck;

> +     case ODP_TIMER_MAX_TICKS:

> +             return tpid->max_tmo_tck;

> +     case ODP_TIMER_NUM_TIMERS:

> +             return tpid->max_timers;

> +     case ODP_TIMER_SHARED:

> +             return tpid->shared;

> +     default:

> +             return 0;

>       }

> +}

>

> -     tick = tmo_tick % MAX_TICKS;

> -

> -     tmo_buf = odp_buffer_alloc(timer->pool);

> -     if (tmo_buf == ODP_BUFFER_INVALID) {

> -             ODP_DBG("tmo buffer alloc failed\n");

> -             return ODP_TIMER_TMO_INVALID;

> +odp_timer_t odp_timer_alloc(odp_timer_pool_t tpid,

> +                         odp_queue_t queue,

> +                         void *user_ptr)

> +{

> +     /* We check this because ODP_QUEUE_INVALID is used */

> +     /* to indicate a free timer */

> +     if (odp_unlikely(queue == ODP_QUEUE_INVALID))

> +             ODP_ABORT("%s: Invalid queue handle\n", tpid->name);

> +     odp_buffer_t tmo_buf = odp_buffer_alloc(tpid->buf_pool);

> +     if (odp_likely(tmo_buf != ODP_BUFFER_INVALID)) {

> +             odp_timer *tim = timer_alloc(tpid, queue, user_ptr,

> tmo_buf);

> +             if (odp_likely(tim != ODP_TIMER_INVALID)) {

> +                     /* Success */

> +                     assert(tim->queue != ODP_QUEUE_INVALID);

> +                     return tim;

> +             }

> +             odp_buffer_free(tmo_buf);

>       }

> +     /* Else failed to allocate timeout event */

> +     /* errno set by odp_buffer_alloc() or timer_alloc () */

> +     return ODP_TIMER_INVALID;

> +}

>

> -     tmo_hdr = odp_timeout_hdr((odp_timeout_t) tmo_buf);

> -     new_tmo = &tmo_hdr->meta;

> -

> -     new_tmo->timer_id = id;

> -     new_tmo->tick     = (int)tick;

> -     new_tmo->tmo_tick = tmo_tick;

> -     new_tmo->queue    = queue;

> -     new_tmo->tmo_buf  = tmo_buf;

> -

> -     if (buf != ODP_BUFFER_INVALID)

> -             new_tmo->buf = buf;

> -     else

> -             new_tmo->buf = tmo_buf;

> -

> -     add_tmo(&timer->tick[tick], new_tmo);

> -

> -     return tmo_buf;

> +void odp_timer_free(odp_timer_t tim)

> +{

> +     odp_timer_pool *tp = (odp_timer_pool *)get_pq(&tim->pqelem);

> +     timer_free(tp, tim);

>  }

>

> -uint64_t odp_timer_tick_to_ns(odp_timer_t timer_hdl, uint64_t ticks)

> +odp_timer_set_t odp_timer_set_abs_w_buf(odp_timer_t tim,

> +                                     uint64_t abs_tck,

> +                                     odp_buffer_t user_buf)

>  {

> -     uint32_t id;

> +     odp_timer_pool *tp = (odp_timer_pool *)get_pq(&tim->pqelem);

> +     odp_timer_set_t rc = timer_reset_w_buf(tp, tim, abs_tck, user_buf);

> +     return rc;

> +}

>

> -     id = timer_hdl - 1;

> -     return ticks * odp_timer.timer[id].resolution_ns;

> +odp_timer_set_t odp_timer_set_abs(odp_timer_t tim, uint64_t abs_tck)

> +{

> +     odp_timer_pool *tp = (odp_timer_pool *)get_pq(&tim->pqelem);

> +     odp_timer_set_t rc = timer_reset(tp, tim, abs_tck);

> +     return rc;

>  }

>

> -uint64_t odp_timer_ns_to_tick(odp_timer_t timer_hdl, uint64_t ns)

> +odp_timer_set_t odp_timer_set_rel_w_buf(odp_timer_t tim,

> +                                     uint64_t rel_tck,

> +                                     odp_buffer_t user_buf)

>  {

> -     uint32_t id;

> +     odp_timer_pool *tp = (odp_timer_pool *)get_pq(&tim->pqelem);

> +     odp_timer_set_t rc = timer_reset_w_buf(tp, tim, tp->cur_tick +

> rel_tck,

> +                                            user_buf);

> +     return rc;

> +}

>

> -     id = timer_hdl - 1;

> -     return ns / odp_timer.timer[id].resolution_ns;

> +odp_timer_set_t odp_timer_set_rel(odp_timer_t tim, uint64_t rel_tck)

> +{

> +     odp_timer_pool *tp = (odp_timer_pool *)get_pq(&tim->pqelem);

> +     odp_timer_set_t rc = timer_reset(tp, tim, tp->cur_tick + rel_tck);

> +     return rc;

>  }

>

> -uint64_t odp_timer_resolution(odp_timer_t timer_hdl)

> +void odp_timer_cancel(odp_timer_t tim)

>  {

> -     uint32_t id;

> +     odp_timer_pool *tp = (odp_timer_pool *)get_pq(&tim->pqelem);

> +     timer_cancel(tp, tim);

> +}

>

> -     id = timer_hdl - 1;

> -     return odp_timer.timer[id].resolution_ns;

> +void odp_timer_return_tmo(odp_timer_tmo_t tmo)

> +{

> +     const odp_timeout_hdr_t *tmo_hdr = odp_tmo_to_hdr(tmo);

> +     odp_timer *parent_tim = tmo_hdr->timer;

> +     odp_timer_pool *tp = (odp_timer_pool *)get_pq(&parent_tim->pqelem);

> +     timer_return(tp, parent_tim, tmo, tmo_hdr);

>  }

>

> -uint64_t odp_timer_maximum_tmo(odp_timer_t timer_hdl)

> +odp_timer_tmo_status_t odp_timer_tmo_status(odp_timer_tmo_t tmo)

>  {

> -     uint32_t id;

> +     const odp_timeout_hdr_t *tmo_hdr = odp_tmo_to_hdr(tmo);

> +     odp_timer *parent_tim = tmo_hdr->timer;

>

> -     id = timer_hdl - 1;

> -     return odp_timer.timer[id].max_ticks;

> +     if (odp_unlikely(tmo_hdr->gencnt != parent_tim->gencnt)) {

> +             /* Generation counters differ => timer has been freed */

> +             return ODP_TMO_ORPHAN;

> +     }

> +     /* Else generation counters match => parent timer exists */

> +

> +     if (odp_likely(parent_tim->tag == tmo_hdr->tag))

> +             return ODP_TMO_FRESH;

> +     else

> +             return ODP_TMO_STALE;

>  }

>

> -uint64_t odp_timer_current_tick(odp_timer_t timer_hdl)

> +odp_timer_t odp_timer_handle(odp_timer_tmo_t tmo)

>  {

> -     uint32_t id;

> +     odp_timeout_hdr_t *tmo_hdr = odp_tmo_to_hdr(tmo);

> +     odp_timer_t parent_tim = tmo_hdr->timer;

> +     if (odp_likely(tmo_hdr->gencnt == parent_tim->gencnt))

> +             return parent_tim;

> +     else

> +             return ODP_TIMER_INVALID;

> +}

>

> -     id = timer_hdl - 1;

> -     return odp_timer.timer[id].cur_tick;

> +uint64_t odp_timer_expiration(odp_timer_tmo_t tmo)

> +{

> +     odp_timeout_hdr_t *tmo_hdr = odp_tmo_to_hdr(tmo);

> +     return tmo_hdr->expiration;

>  }

>

> -odp_timeout_t odp_timeout_from_buffer(odp_buffer_t buf)

> +void *odp_timer_userptr(odp_timer_tmo_t tmo)

>  {

> -     return (odp_timeout_t) buf;

> +     odp_timeout_hdr_t *tmo_hdr = odp_tmo_to_hdr(tmo);

> +     return tmo_hdr->user_ptr;

>  }

>

> -uint64_t odp_timeout_tick(odp_timeout_t tmo)

> +int odp_timer_init_global(void)

>  {

> -     odp_timeout_hdr_t *tmo_hdr = odp_timeout_hdr(tmo);

> -     return tmo_hdr->meta.tmo_tick;

> +     return 0;

>  }

> diff --git a/test/api_test/odp_timer_ping.c

> b/test/api_test/odp_timer_ping.c

> index 7406a45..2617b5c 100644

> --- a/test/api_test/odp_timer_ping.c

> +++ b/test/api_test/odp_timer_ping.c

> @@ -20,6 +20,8 @@

>   *    Otherwise timeout may happen bcz of slow nw speed

>   */

>

> +#include <assert.h>

> +#include <stdlib.h>

>  #include <unistd.h>

>  #include <fcntl.h>

>  #include <errno.h>

> @@ -41,14 +43,15 @@

>  #define MSG_POOL_SIZE         (4*1024*1024)

>  #define BUF_SIZE             8

>  #define PING_CNT     10

> -#define PING_THRD    2       /* Send and Rx Ping thread */

> +#define PING_THRD    2       /* send_ping and rx_ping threads */

>

>  /* Nanoseconds */

>  #define RESUS        10000

>  #define MINUS        10000

>  #define MAXUS        10000000

>

> -static odp_timer_t test_timer_ping;

> +static odp_timer_pool_t tp;

> +static odp_timer_t test_timer_ping = ODP_TIMER_INVALID;

>  static odp_timer_tmo_t test_ping_tmo;

>

>  #define PKTSIZE      64

> @@ -128,15 +131,7 @@ static int listen_to_pingack(void)

>                                        (socklen_t *)&len);

>                       if (bytes > 0) {

>                               /* pkt rxvd therefore cancel the timeout */

> -                             if (odp_timer_cancel_tmo(test_timer_ping,

> -                                                      test_ping_tmo) != 0) {

> -                                     ODP_ERR("cancel_tmo failed ..exiting

> listner thread\n");

> -                                     /* avoid exiting from here even if tmo

> -                                      * failed for current ping,

> -                                      * allow subsequent ping_rx request */

> -                                     err = -1;

> -

> -                             }

> +                             odp_timer_cancel(test_timer_ping);

>                               /* cruel bad hack used for sender, listner ipc..

>                                * euwww.. FIXME ..

>                                */

> @@ -160,7 +155,6 @@ static int send_ping_request(struct sockaddr_in

> *addr)

>

>       uint64_t tick;

>       odp_queue_t queue;

> -     odp_buffer_t buf;

>

>       int err = 0;

>

> @@ -184,8 +178,16 @@ static int send_ping_request(struct sockaddr_in

> *addr)

>

>       /* get the ping queue */

>       queue = odp_queue_lookup("ping_timer_queue");

> +     test_timer_ping = odp_timer_alloc(tp, queue, NULL);

> +     if (test_timer_ping == ODP_TIMER_INVALID) {

> +             ODP_ERR("Failed to allocate timer.\n");

> +             err = -1;

> +             goto err;

> +     }

>

>       for (i = 0; i < PING_CNT; i++) {

> +             odp_buffer_t buf;

> +             odp_timer_tmo_t tmo;

>               /* prepare icmp pkt */

>               bzero(&pckt, sizeof(pckt));

>               pckt.hdr.type = ICMP_ECHO;

> @@ -209,12 +211,10 @@ static int send_ping_request(struct sockaddr_in

> *addr)

>               printf(" icmp_sent msg_cnt %d\n", i);

>

>               /* arm the timer */

> -             tick = odp_timer_current_tick(test_timer_ping);

> +             tick = odp_timer_current_tick(tp);

>

>               tick += 1000;

> -             test_ping_tmo = odp_timer_absolute_tmo(test_timer_ping,

> tick,

> -                                                    queue,

> -                                                    ODP_BUFFER_INVALID);

> +             odp_timer_set_abs(test_timer_ping, tick);

>               /* wait for timeout event */

>               while ((buf = odp_queue_deq(queue)) == ODP_BUFFER_INVALID) {

>                       /* flag true means ack rxvd.. a cruel hack as I

> @@ -229,17 +229,28 @@ static int send_ping_request(struct sockaddr_in

> *addr)

>                               break;

>                       }

>               }

> +             assert(odp_buffer_type(buf) == ODP_BUFFER_TYPE_TIMEOUT);

> +             tmo = odp_timeout_from_buffer(buf);

>

> -             /* free tmo_buf for timeout case */

> -             if (buf != ODP_BUFFER_INVALID) {

> -                     ODP_DBG(" timeout msg_cnt [%i] \n", i);

> +             switch (odp_timer_tmo_status(tmo)) {

> +             case ODP_TMO_FRESH:

> +                     ODP_DBG(" timeout msg_cnt [%i]\n", i);

>                       /* so to avoid seg fault commented */

> -                     odp_buffer_free(buf);

>                       err = -1;

> +                     break;

> +             case ODP_TMO_STALE:

> +                     /* Ignore stale timeouts */

> +                     break;

> +             case ODP_TMO_ORPHAN:

> +                     ODP_ERR("Received orphaned timeout!\n");

> +                     abort();

>               }

> +             odp_timer_return_tmo(tmo);

>       }

>

>  err:

> +     if (test_timer_ping != ODP_TIMER_INVALID)

> +             odp_timer_free(test_timer_ping);

>       return err;

>  }

>

> @@ -340,9 +351,9 @@ int main(int argc ODP_UNUSED, char *argv[]

> ODP_UNUSED)

>       pool = odp_buffer_pool_create("msg_pool", pool_base, MSG_POOL_SIZE,

>                                     BUF_SIZE,

>                                     ODP_CACHE_LINE_SIZE,

> -                                   ODP_BUFFER_TYPE_RAW);

> +                                   ODP_BUFFER_TYPE_TIMEOUT);

>       if (pool == ODP_BUFFER_POOL_INVALID) {

> -             ODP_ERR("Pool create failed.\n");

> +             ODP_ERR("Buffer pool create failed.\n");

>               return -1;

>       }

>

> @@ -357,15 +368,19 @@ int main(int argc ODP_UNUSED, char *argv[]

> ODP_UNUSED)

>               return -1;

>       }

>

> -     test_timer_ping = odp_timer_create("ping_timer", pool,

> -                                        RESUS*ODP_TIME_USEC,

> -                                        MINUS*ODP_TIME_USEC,

> -                                        MAXUS*ODP_TIME_USEC);

> -

> -     if (test_timer_ping == ODP_TIMER_INVALID) {

> -             ODP_ERR("Timer create failed.\n");

> +     /*

> +      * Create timer pool

> +      */

> +     tp = odp_timer_pool_create("timer_pool", pool,

> +                                RESUS*ODP_TIME_USEC,

> +                                MINUS*ODP_TIME_USEC,

> +                                MAXUS*ODP_TIME_USEC,

> +                                1, false, ODP_CLOCK_CPU);

> +     if (tp == ODP_TIMER_POOL_INVALID) {

> +             ODP_ERR("Timer pool create failed.\n");

>               return -1;

>       }

> +     odp_timer_pool_start();

>

>       odp_shm_print_all();

>

> --

> 1.9.1

>

>

> _______________________________________________

> lng-odp mailing list

> lng-odp@lists.linaro.org<mailto:lng-odp@lists.linaro.org>

> http://lists.linaro.org/mailman/listinfo/lng-odp
Gilad Ben-Yossef Oct. 6, 2014, 12:44 p.m. UTC | #8
Hi Petri,

I understand. Thanks for the explanation.

Just to explain where all these question are coming from - I'm implementing an ODP platform on top of the EZchip NPS SDK. Every time I see something that would be difficult to implement on my platform, or would cost a lot of performance/power etc, I try to understand the requirement better.

Specifically here, if the odp_schedule(..)  with core specific timeouts don't jell well with our HW based queue schedule(). Which is fine – but understanding the requirement source can help me do the best possible thing here.

So, any idea what is the application that require this and for what purpose? A practical usage example?


Thanks,
Gilad

Gilad Ben-Yossef
Software Architect
EZchip Technologies Ltd.
37 Israel Pollak Ave, Kiryat Gat 82025 ,Israel
Tel: +972-4-959-6666 ext. 576, Fax: +972-8-681-1483
Mobile: +972-52-826-0388, US Mobile: +1-973-826-0388
Email: giladb@ezchip.com<mailto:giladb@ezchip.com>, Web: http://www.ezchip.com<http://www.ezchip.com/>

"Ethernet always wins."
        — Andy Bechtolsheim

From: Savolainen, Petri (NSN - FI/Espoo) [mailto:petri.savolainen@nsn.com]

Sent: Monday, October 06, 2014 1:45 PM
To: ext Ola Liljedahl; Gilad Ben Yossef
Cc: lng-odp@lists.linaro.org
Subject: RE: [lng-odp] [PATCHv4] Timer API and and priority queue-based implementation

Hi,

The main point is to be able to use an ODP API “à la carte”. In this case, an user can use the scheduler API without being forced to use the timer API. Implementation underneath may run the schedule timeout whichever way is optimal for the HW/implementation.

Also, user has the option to call scheduler with ODP_SCHED_WAIT and setup the timeout through the timer API.

-Petri


From: lng-odp-bounces@lists.linaro.org<mailto:lng-odp-bounces@lists.linaro.org> [mailto:lng-odp-bounces@lists.linaro.org] On Behalf Of ext Ola Liljedahl

Sent: Monday, October 06, 2014 12:37 PM
To: Gilad Ben Yossef
Cc: lng-odp@lists.linaro.org<mailto:lng-odp@lists.linaro.org>
Subject: Re: [lng-odp] [PATCHv4] Timer API and and priority queue-based implementation

Gilad,

Your suggestion makes sense. This is how a purely event-driven application would be designed.

Possibly the wait/timeout parameter to the odp_schedule calls is a legacy from the time before there was a timer API in ODP. Maybe Petri can s(c)hed some light on this.

I suspect there could be some performance benefits from specifying the timeout as an explicit parameter. If scheduling timeout is implemented using a timer event facility (e.g. the ODP timer API), the application (or the ODP implementation if it uses the same design) would have to reset that timer for every odp_schedule call, for a SW timer implementation this could add serious overhead. With an explicit timeout parameter, the scheduler implementation could be reading e.g some cycle counter while (busy-) waiting for events to become available. This overhead should be less and also incurred only when the thread is idle and waiting for work.

The current API does not prevent an implementation from using timer events internally and does not limit an application from using the timer API for timeouts. It does add a little bit of implementation complexity. What is the best trade-off?

-- Ola

On 6 October 2014 08:22, Gilad Ben Yossef <giladb@ezchip.com<mailto:giladb@ezchip.com>> wrote:

Another one of my stupid questions, I'm afraid.  :-)
If we have a timer implemented as an event pushed to queue which can be scheduled as any other queue (which is good thing I think), why does our schedule APIs need a timeout?
I mean, if you want a timeout, just add a scheduled timer queue and send yourself timeout events. That's how I would implement the schedule timeouts internally anyway (running a native timer on a core that does packet processing stops it from enjoying Linux full NOHZ cpu isolation so we really don't want timers there...)
Anything I've missed?
Thanks,
Gilad
Gilad Ben-Yossef
Software Architect
EZchip Technologies Ltd.
37 Israel Pollak Ave, Kiryat Gat 82025 ,Israel
Tel: +972-4-959-6666 ext. 576<tel:%2B972-4-959-6666%20ext.%20576>, Fax: +972-8-681-1483<tel:%2B972-8-681-1483>
Mobile: +972-52-826-0388<tel:%2B972-52-826-0388>, US Mobile: +1-973-826-0388<tel:%2B1-973-826-0388>
Email: giladb@ezchip.com<mailto:giladb@ezchip.com>, Web: http://www.ezchip.com

"Ethernet always wins."
        — Andy Bechtolsheim


> -----Original Message-----

> From: lng-odp-bounces@lists.linaro.org<mailto:lng-odp-bounces@lists.linaro.org> [mailto:lng-odp-<mailto:lng-odp->

> bounces@lists.linaro.org<mailto:bounces@lists.linaro.org>] On Behalf Of Ola Liljedahl

> Sent: Thursday, October 02, 2014 6:23 PM

> To: lng-odp@lists.linaro.org<mailto:lng-odp@lists.linaro.org>

> Subject: [lng-odp] [PATCHv4] Timer API and and priority queue-based

> implementation

>

> Signed-off-by: Ola Liljedahl <ola.liljedahl@linaro.org<mailto:ola.liljedahl@linaro.org>>

> ---

> Fixed review comments for v3 from Anders R.

> * Example code snippets use @code/@endcode.

> * Added some missing doxygen comments.

> * Updated some comments.

> * Reverted year in copyright notices.

> * Added odp_likely() hint.

> * Made some variables self-descriptive and removed redundant comments.

> Changed to use ticket locks instead of spin locks (ticket locks are more

> fair).

> Changed to use ODP_ABORT() which has become available since the last

> patch.

>

>  example/timer/odp_timer_test.c                     | 125 +--

>  platform/linux-generic/Makefile.am                 |   1 +

>  platform/linux-generic/include/api/odp_timer.h     | 570 +++++++++++--

>  .../include/odp_priority_queue_internal.h          | 108 +++

>  .../linux-generic/include/odp_timer_internal.h     |  71 +-

>  platform/linux-generic/odp_priority_queue.c        | 283 +++++++

>  platform/linux-generic/odp_timer.c                 | 923 ++++++++++++++-

> ------

>  test/api_test/odp_timer_ping.c                     |  73 +-

>  8 files changed, 1648 insertions(+), 506 deletions(-)

>  create mode 100644 platform/linux-

> generic/include/odp_priority_queue_internal.h

>  create mode 100644 platform/linux-generic/odp_priority_queue.c

>

> diff --git a/example/timer/odp_timer_test.c

> b/example/timer/odp_timer_test.c

> index 6e1715d..750d785 100644

> --- a/example/timer/odp_timer_test.c

> +++ b/example/timer/odp_timer_test.c

> @@ -41,67 +41,89 @@ typedef struct {

>  /** @private Barrier for test synchronisation */

>  static odp_barrier_t test_barrier;

>

> -/** @private Timer handle*/

> -static odp_timer_t test_timer;

> +/** @private Timer pool handle */

> +static odp_timer_pool_t tp;

>

>

> +/** @private Timeout status ASCII strings */

> +static const char *const status2str[] = {

> +     "fresh", "stale", "orphaned"

> +};

> +

>  /** @private test timeout */

>  static void test_abs_timeouts(int thr, test_args_t *args)

>  {

> -     uint64_t tick;

>       uint64_t period;

>       uint64_t period_ns;

>       odp_queue_t queue;

> -     odp_buffer_t buf;

> -     int num;

> +     int remain = args->tmo_count;

> +     odp_timer_t hdl;

> +     uint64_t tick;

>

>       ODP_DBG("  [%i] test_timeouts\n", thr);

>

>       queue = odp_queue_lookup("timer_queue");

>

>       period_ns = args->period_us*ODP_TIME_USEC;

> -     period    = odp_timer_ns_to_tick(test_timer, period_ns);

> +     period    = odp_timer_ns_to_tick(tp, period_ns);

>

>       ODP_DBG("  [%i] period %"PRIu64" ticks,  %"PRIu64" ns\n", thr,

>               period, period_ns);

>

> -     tick = odp_timer_current_tick(test_timer);

> -

> -     ODP_DBG("  [%i] current tick %"PRIu64"\n", thr, tick);

> -

> -     tick += period;

> +     ODP_DBG("  [%i] current tick %"PRIu64"\n", thr,

> +             odp_timer_current_tick(tp));

>

> -     if (odp_timer_absolute_tmo(test_timer, tick, queue,

> ODP_BUFFER_INVALID)

> -         == ODP_TIMER_TMO_INVALID){

> -             ODP_DBG("Timeout request failed\n");

> +     odp_timer_t test_timer;

> +     test_timer = odp_timer_alloc(tp, queue, NULL);

> +     if (test_timer == ODP_TIMER_INVALID) {

> +             ODP_ERR("Failed to allocate timer\n");

>               return;

>       }

> +     tick = odp_timer_current_tick(tp);

> +     hdl = test_timer;

>

> -     num = args->tmo_count;

> -

> -     while (1) {

> -             odp_timeout_t tmo;

> -

> -             buf = odp_schedule_one(&queue, ODP_SCHED_WAIT);

> -

> -             tmo  = odp_timeout_from_buffer(buf);

> -             tick = odp_timeout_tick(tmo);

> -

> -             ODP_DBG("  [%i] timeout, tick %"PRIu64"\n", thr, tick);

> -

> -             odp_buffer_free(buf);

> -

> -             num--;

> -

> -             if (num == 0)

> -                     break;

> +     while (remain != 0) {

> +             odp_buffer_t buf;

> +             odp_timer_tmo_t tmo;

> +             odp_timer_tmo_status_t stat;

> +             odp_timer_set_t rc;

>

>               tick += period;

> +             rc = odp_timer_set_abs(hdl, tick);

> +             if (odp_unlikely(rc != ODP_TIMER_SET_SUCCESS)) {

> +                     ODP_ERR("odp_timer_set_abs() failed (%u)\n", rc);

> +                     abort();

> +             }

>

> -             odp_timer_absolute_tmo(test_timer, tick,

> -                                    queue, ODP_BUFFER_INVALID);

> +             /* Get the next ready buffer/timeout */

> +             buf = odp_schedule_one(&queue, ODP_SCHED_WAIT);

> +             if (odp_unlikely(odp_buffer_type(buf) !=

> +                              ODP_BUFFER_TYPE_TIMEOUT)) {

> +                     ODP_ERR("Unexpected buffer type received\n");

> +                     abort();

> +             }

> +             tmo = odp_timeout_from_buffer(buf);

> +             stat = odp_timer_tmo_status(tmo);

> +             tick = odp_timer_expiration(tmo);

> +             hdl = odp_timer_handle(tmo);

> +             ODP_DBG("  [%i] timeout, tick %"PRIu64", status %s\n",

> +                     thr, tick, status2str[stat]);

> +             /* if (stat == ODP_TMO_FRESH)  - do your thing! */

> +             if (odp_likely(stat == ODP_TMO_ORPHAN)) {

> +                     /* Some other thread freed the corresponding

> +                        timer after the timeout was already

> +                        enqueued */

> +                     /* Timeout handle is invalid, use our own timer */

> +                     hdl = test_timer;

> +             }

> +             /* Return timeout to timer manager, regardless of status */

> +             odp_timer_return_tmo(tmo);

> +             remain--;

>       }

>

> +     odp_timer_cancel(test_timer);

> +     odp_timer_free(test_timer);

> +

>       if (odp_queue_sched_type(queue) == ODP_SCHED_SYNC_ATOMIC)

>               odp_schedule_release_atomic();

>  }

> @@ -155,7 +177,6 @@ static void print_usage(void)

>       printf("Options:\n");

>       printf("  -c, --count <number>    core count, core IDs start from

> 1\n");

>       printf("  -r, --resolution <us>   timeout resolution in usec\n");

> -     printf("  -m, --min <us>          minimum timeout in usec\n");

>       printf("  -x, --max <us>          maximum timeout in usec\n");

>       printf("  -p, --period <us>       timeout period in usec\n");

>       printf("  -t, --timeouts <count>  timeout repeat count\n");

> @@ -190,14 +211,14 @@ static void parse_args(int argc, char *argv[],

> test_args_t *args)

>       /* defaults */

>       args->core_count    = 0; /* all cores */

>       args->resolution_us = 10000;

> -     args->min_us        = args->resolution_us;

> +     args->min_us        = 0;

>       args->max_us        = 10000000;

>       args->period_us     = 1000000;

>       args->tmo_count     = 30;

>

>       while (1) {

>               opt = getopt_long(argc, argv, "+c:r:m:x:p:t:h",

> -                              longopts, &long_index);

> +                               longopts, &long_index);

>

>               if (opt == -1)

>                       break;  /* No more options */

> @@ -321,10 +342,25 @@ int main(int argc, char *argv[])

>                                     ODP_BUFFER_TYPE_TIMEOUT);

>

>       if (pool == ODP_BUFFER_POOL_INVALID) {

> -             ODP_ERR("Pool create failed.\n");

> +             ODP_ERR("Buffer pool create failed.\n");

>               return -1;

>       }

>

> +     tp = odp_timer_pool_create("timer_pool", pool,

> +                                args.resolution_us*ODP_TIME_USEC,

> +                                args.min_us*ODP_TIME_USEC,

> +                                args.max_us*ODP_TIME_USEC,

> +                                num_workers, /* One timer per worker */

> +                                true,

> +                                ODP_CLOCK_CPU);

> +     if (tp == ODP_TIMER_POOL_INVALID) {

> +             ODP_ERR("Timer pool create failed.\n");

> +             return -1;

> +     }

> +     odp_timer_pool_start();

> +

> +     odp_shm_print_all();

> +

>       /*

>        * Create a queue for timer test

>        */

> @@ -340,19 +376,6 @@ int main(int argc, char *argv[])

>               return -1;

>       }

>

> -     test_timer = odp_timer_create("test_timer", pool,

> -                                   args.resolution_us*ODP_TIME_USEC,

> -                                   args.min_us*ODP_TIME_USEC,

> -                                   args.max_us*ODP_TIME_USEC);

> -

> -     if (test_timer == ODP_TIMER_INVALID) {

> -             ODP_ERR("Timer create failed.\n");

> -             return -1;

> -     }

> -

> -

> -     odp_shm_print_all();

> -

>       printf("CPU freq %"PRIu64" hz\n", odp_sys_cpu_hz());

>       printf("Cycles vs nanoseconds:\n");

>       ns = 0;

> diff --git a/platform/linux-generic/Makefile.am b/platform/linux-

> generic/Makefile.am

> index d076d50..71f923c 100644

> --- a/platform/linux-generic/Makefile.am

> +++ b/platform/linux-generic/Makefile.am

> @@ -59,6 +59,7 @@ __LIB__libodp_la_SOURCES = \

>                          odp_packet_flags.c \

>                          odp_packet_io.c \

>                          odp_packet_socket.c \

> +                        odp_priority_queue.c \

>                          odp_queue.c \

>                          odp_ring.c \

>                          odp_rwlock.c \

> diff --git a/platform/linux-generic/include/api/odp_timer.h

> b/platform/linux-generic/include/api/odp_timer.h

> index 01db839..82a1e05 100644

> --- a/platform/linux-generic/include/api/odp_timer.h

> +++ b/platform/linux-generic/include/api/odp_timer.h

> @@ -8,9 +8,193 @@

>  /**

>   * @file

>   *

> - * ODP timer

> + * ODP timer service

>   */

>

> +/** Example #1 Retransmission timer (e.g. for reliable connections)

> + @code

> +

> +//Create timer pool for reliable connections

> +#define SEC 1000000000ULL //1s expressed in nanoseconds

> +odp_timer_pool_t tcp_tpid =

> +    odp_timer_pool_create("TCP",

> +                       buffer_pool,

> +                       1000000,//resolution 1ms

> +                       0,//min tmo

> +                       7200 * SEC,//max tmo length 2hours

> +                       40000,//num_timers

> +                       true,//shared

> +                       ODP_CLOCK_CPU

> +                      );

> +if (tcp_tpid == ODP_TIMER_POOL_INVALID)

> +{

> +     //Failed to create timer pool => fatal error

> +}

> +

> +

> +//Setting up a new connection

> +//Allocate retransmission timeout (identical for supervision timeout)

> +//The user pointer points back to the connection context

> +conn->ret_tim = odp_timer_alloc(tcp_tpid, queue, conn);

> +//Check if all resources were successfully allocated

> +if (conn->ret_tim == ODP_TIMER_INVALID)

> +{

> +     //Failed to allocate all resources for connection => tear down

> +     //Destroy timeout

> +     odp_timer_free(conn->ret_tim);

> +     //Tear down connection

> +     ...

> +     return false;

> +}

> +//All necessary resources successfully allocated

> +//Compute initial retransmission length in timer ticks

> +conn->ret_len = odp_timer_ns_to_tick(tcp_tpid, 3 * SEC);//Per RFC1122

> +//Arm the timer

> +odp_timer_set_rel(conn->ret_tim, conn->ret_len);

> +return true;

> +

> +

> +//A packet for the connection has just been transmitted

> +//Reset the retransmission timer

> +odp_timer_set_rel(conn->ret_tim, conn->ret_len);

> +

> +

> +//A retransmission timeout buffer for the connection has been received

> +odp_timer_tmo_t tmo = odp_timeout_from_buffer(buf);

> +odp_timer_tmo_status_t stat = odp_timer_tmo_status(tmo);

> +//Check if timeout is fresh or stale, for stale timeouts we need to

> reset the

> +//timer

> +if (stat == ODP_TMO_FRESH) {

> +     //Fresh timeout, last transmitted packet not acked in time =>

> +       retransmit

> +     //Get connection from timeout event

> +     conn = odp_timer_get_userptr(tmo);

> +     //Retransmit last packet (e.g. TCP segment)

> +     ...

> +     //Re-arm timer using original delta value

> +     odp_timer_set_rel(conn->ret_tim, conn->ret_len);

> +} else if (stat == ODP_TMO_ORPHAN) {

> +     odp_free_buffer(buf);

> +     return;//Get out of here

> +} // else stat == ODP_TMO_STALE, do nothing

> +//Finished processing, return timeout

> +odp_timer_return_tmo(tmo);

> +

> + @endcode

> +*/

> +

> +/** Example #2 Periodic tick

> + @code

> +

> +//Create timer pool for periodic ticks

> +odp_timer_pool_t per_tpid =

> +    odp_timer_pool_create("periodic-tick",

> +                       buffer_pool,

> +                       1,//resolution 1ns

> +                       1,//minimum timeout length 1ns

> +                       1000000000,//maximum timeout length 1s

> +                       10,//num_timers

> +                       false,//not shared

> +                       ODP_CLOCK_CPU

> +                      );

> +if (per_tpid == ODP_TIMER_POOL_INVALID)

> +{

> +    //Failed to create timer pool => fatal error

> +}

> +

> +

> +//Allocate periodic timer

> +tim_1733 = odp_timer_alloc(per_tpid, queue, NULL);

> +//Check if all resources were successfully allocated

> +if (tim_1733 == ODP_TIMER_INVALID)

> +{

> +     //Failed to allocate all resources => tear down

> +     //Destroy timeout

> +     odp_timer_free(tim_1733);

> +     //Tear down other state

> +     ...

> +     return false;

> +}

> +//All necessary resources successfully allocated

> +//Compute tick period in timer ticks

> +period_1733 = odp_timer_ns_to_tick(per_tpid, 1000000000U /

> 1733U);//1733Hz

> +//Compute when next tick should expire

> +next_1733 = odp_timer_current_tick(per_tpid) + period_1733;

> +//Arm the periodic timer

> +odp_timer_set_abs(tim_1733, next_1733);

> +return true;

> +

> +

> +

> +//A periodic timer timeout has been received

> +odp_timer_tmo_t tmo = odp_timeout_from_buffer(buf);

> +//Get status of timeout

> +odp_timer_tmo_status_t stat = odp_timer_tmo_status(tmo);

> +//We expect the timeout is always fresh since we are not calling set or

> cancel

> +on active or expired timers in this example

> +assert(stat == ODP_TMO_FRESH);

> +//Do processing driven by timeout *before*

> +...

> +do {

> +     //Compute when the timer should expire next

> +     next_1733 += period_1733;

> +     //Check that this is in the future

> +     if (likely(next_1733 > odp_timer_current_tick(per_tpid))

> +     break;//Yes, done

> +     //Else we missed a timeout

> +     //Optionally attempt some recovery and/or logging of the problem

> +     ...

> +} while (0);

> +//Re-arm periodic timer

> +odp_timer_set_abs(tim_1733, next_1733);

> +//Or do processing driven by timeout *after*

> +...

> +odp_timer_return_tmo(tmo);

> +return;

> +

> + @endcode

> +*/

> +

> +/** Example #3 Tear down of flow

> + @code

> +//ctx points to flow context data structure owned by application

> +//Free the timer, cancelling any timeout

> +odp_timer_free(ctx->timer);//Any enqueued timeout will be made invalid

> +//Continue tearing down and eventually freeing context

> +...

> +return;

> +

> +//A timeout has been received, check status

> +odp_timer_tmo_t tmo = odp_timeout_from_buffer(buf);

> +switch (odp_timer_tmo_status(tmo))

> +{

> +    case ODP_TMO_FRESH :

> +     //A flow has timed out, tear it down

> +     //Find flow context from timeout

> +     ctx = (context *)odp_timer_get_userptr(tmo);

> +     //Free the supervision timer, any enqueued timeout will remain

> +     odp_timer_free(ctx->tim);

> +     //Free other flow related resources

> +     ...

> +     //Free the timeout buffer

> +     odp_buffer_free(buf);

> +     //Flow torn down

> +     break;

> +    case ODP_TMO_STALE :

> +     //A stale timeout was received, return timeout and update timer

> +     odp_timer_return_tmo(tmo);

> +     break;

> +    case ODP_TMO_ORPHAN :

> +     //Orphaned timeout (from previously torn down flow)

> +     //No corresponding timer or flow context

> +     //Free the timeout buffer

> +     odp_buffer_free(buf);

> +     break;

> +}

> +

> + @endcode

> +*/

> +

>  #ifndef ODP_TIMER_H_

>  #define ODP_TIMER_H_

>

> @@ -18,144 +202,408 @@

>  extern "C" {

>  #endif

>

> +#include <stdlib.h>

>  #include <odp_std_types.h>

>  #include <odp_buffer.h>

>  #include <odp_buffer_pool.h>

>  #include <odp_queue.h>

>

> +struct odp_timer_pool_s; /**< Forward declaration */

> +

> +/**

> +* ODP timer pool handle (platform dependent)

> +*/

> +typedef struct odp_timer_pool_s *odp_timer_pool_t;

> +

> +/**

> + * Invalid timer pool handle (platform dependent).

> + */

> +#define ODP_TIMER_POOL_INVALID NULL

>

>  /**

> - * ODP timer handle

> + * Clock sources for timers in timer pool.

>   */

> -typedef uint32_t odp_timer_t;

> +typedef enum odp_timer_clk_src_e {

> +     /** Use CPU clock as clock source for timers */

> +     ODP_CLOCK_CPU,

> +     /** Use external clock as clock source for timers */

> +     ODP_CLOCK_EXT

> +     /* Platform dependent which other clock sources exist */

> +} odp_timer_clk_src_t;

>

> -/** Invalid timer */

> -#define ODP_TIMER_INVALID 0

> +struct odp_timer_s; /**< Forward declaration */

>

> +/**

> +* ODP timer handle (platform dependent).

> +*/

> +typedef struct odp_timer_s *odp_timer_t;

>

>  /**

> - * ODP timeout handle

> + * Invalid timer handle (platform dependent).

>   */

> -typedef odp_buffer_t odp_timer_tmo_t;

> -

> -/** Invalid timeout */

> -#define ODP_TIMER_TMO_INVALID 0

> +#define ODP_TIMER_INVALID NULL

>

> +/**

> + * Return values of timer set calls.

> + */

> +typedef enum odp_timer_set_e {

> +     /** Timer set operation successful */

> +     ODP_TIMER_SET_SUCCESS,

> +     /** Timer set operation failed, expiration too early */

> +     ODP_TIMER_SET_TOOEARLY,

> +     /** Timer set operation failed, expiration too late */

> +     ODP_TIMER_SET_TOOLATE

> +} odp_timer_set_t;

>

>  /**

> - * Timeout notification

> + * Timeout event handle.

>   */

> -typedef odp_buffer_t odp_timeout_t;

> +typedef odp_buffer_t odp_timer_tmo_t;

>

> +/**

> + * Status of a timeout event.

> + */

> +typedef enum odp_timer_tmo_status_e {

> +     /** Timeout is fresh, process it and return timeout */

> +     ODP_TMO_FRESH,

> +     /** Timer reset or cancelled, just return timeout  */

> +     ODP_TMO_STALE,

> +     /** Timer deleted, return or free timeout */

> +     ODP_TMO_ORPHAN

> +} odp_timer_tmo_status_t;

>

>  /**

> - * Create a timer

> + * Create a timer pool

>   *

> - * Creates a new timer with requested properties.

> + * Create a new timer pool.

>   *

>   * @param name       Name

> - * @param pool       Buffer pool for allocating timeout notifications

> + * @param buf_pool   Buffer pool for allocating timeouts (and only

> timeouts)

>   * @param resolution Timeout resolution in nanoseconds

> - * @param min_tmo    Minimum timeout duration in nanoseconds

> - * @param max_tmo    Maximum timeout duration in nanoseconds

> + * @param min_tmo    Minimum relative timeout in nanoseconds

> + * @param max_tmo    Maximum relative timeout in nanoseconds

> + * @param num_timers Number of supported timers (minimum)

> + * @param shared     Shared or private timer pool.

> + *              Operations on shared timers will include the necessary

> + *              mutual exclusion, operations on private timers may not

> + *              (mutual exclusion is the responsibility of the caller).

> + * @param clk_src    Clock source to use

>   *

> - * @return Timer handle if successful, otherwise ODP_TIMER_INVALID

> + * @return Timer pool handle if successful, otherwise

> ODP_TIMER_POOL_INVALID

> + * and errno set

>   */

> -odp_timer_t odp_timer_create(const char *name, odp_buffer_pool_t pool,

> -                          uint64_t resolution, uint64_t min_tmo,

> -                          uint64_t max_tmo);

> +odp_timer_pool_t

> +odp_timer_pool_create(const char *name,

> +                   odp_buffer_pool_t buf_pool,

> +                   uint64_t resolution,

> +                   uint64_t min_tmo,

> +                   uint64_t max_tmo,

> +                   uint32_t num_timers,

> +                   bool shared,

> +                   odp_timer_clk_src_t clk_src);

> +

> +/**

> + * Start a timer pool

> + *

> + * Start all created timer pools, enabling the allocation of timers.

> + * The purpose of this call is to coordinate the creation of multiple

> timer

> + * pools that may use the same underlying HW resources.

> + * This function may be called multiple times.

> + */

> +void odp_timer_pool_start(void);

> +

> +/**

> + * Destroy a timer pool

> + *

> + * Destroy a timer pool, freeing all resources.

> + * All timers must have been freed.

> + *

> + * @param tpid  Timer pool identifier

> + */

> +void odp_timer_pool_destroy(odp_timer_pool_t tpid);

>

>  /**

>   * Convert timer ticks to nanoseconds

>   *

> - * @param timer Timer

> + * @param tpid  Timer pool identifier

>   * @param ticks Timer ticks

>   *

>   * @return Nanoseconds

>   */

> -uint64_t odp_timer_tick_to_ns(odp_timer_t timer, uint64_t ticks);

> +uint64_t odp_timer_tick_to_ns(odp_timer_pool_t tpid, uint64_t ticks);

>

>  /**

>   * Convert nanoseconds to timer ticks

>   *

> - * @param timer Timer

> + * @param tpid  Timer pool identifier

>   * @param ns    Nanoseconds

>   *

>   * @return Timer ticks

>   */

> -uint64_t odp_timer_ns_to_tick(odp_timer_t timer, uint64_t ns);

> +uint64_t odp_timer_ns_to_tick(odp_timer_pool_t tpid, uint64_t ns);

>

>  /**

> - * Timer resolution in nanoseconds

> + * Current tick value

>   *

> - * @param timer Timer

> + * @param tpid Timer pool identifier

>   *

> - * @return Resolution in nanoseconds

> + * @return Current time in timer ticks

> + */

> +uint64_t odp_timer_current_tick(odp_timer_pool_t tpid);

> +

> +/**

> + * ODP timer configurations

>   */

> -uint64_t odp_timer_resolution(odp_timer_t timer);

> +

> +typedef enum odp_timer_pool_conf_e {

> +     ODP_TIMER_NAME,      /**< Return name of timer pool */

> +     ODP_TIMER_RESOLUTION,/**< Return the timer resolution (in ns) */

> +     ODP_TIMER_MIN_TICKS, /**< Return the min supported rel timeout

> (ticks)*/

> +     ODP_TIMER_MAX_TICKS, /**< Return the max supported rel timeout

> (ticks)*/

> +     ODP_TIMER_NUM_TIMERS,/**< Return number of supported timers */

> +     ODP_TIMER_SHARED     /**< Return shared flag */

> +} odp_timer_pool_conf_t;

>

>  /**

> - * Maximum timeout in timer ticks

> + * Query different timer pool configurations, e.g.

> + *  Timer resolution in nanoseconds

> + *  Maximum timeout in timer ticks

> + *  Number of supported timers

> + *  Shared or private timer pool

>   *

> - * @param timer Timer

> + * @param tpid Timer pool identifier

> + * @param item Configuration item being queried

>   *

> - * @return Maximum timeout in timer ticks

> + * @return the requested piece of information or 0 for unknown item.

>   */

> -uint64_t odp_timer_maximum_tmo(odp_timer_t timer);

> +uintptr_t odp_timer_pool_query_conf(odp_timer_pool_t tpid,

> +                                 odp_timer_pool_conf_t item);

>

>  /**

> - * Current timer tick

> + * Allocate a timer

>   *

> - * @param timer Timer

> + * Create a timer (allocating all necessary resources e.g. timeout

> event) from

> + * the timer pool.

>   *

> - * @return Current time in timer ticks

> + * @param tpid     Timer pool identifier

> + * @param queue    Destination queue for timeout notifications

> + * @param user_ptr User defined pointer or NULL (copied to timeouts)

> + *

> + * @return Timer handle if successful, otherwise ODP_TIMER_INVALID and

> + *      errno set.

>   */

> -uint64_t odp_timer_current_tick(odp_timer_t timer);

> +odp_timer_t odp_timer_alloc(odp_timer_pool_t tpid,

> +                         odp_queue_t queue,

> +                         void *user_ptr);

>

>  /**

> - * Request timeout with an absolute timer tick

> + * Free a timer

> + *

> + * Free (destroy) a timer, freeing all associated resources (e.g.

> default

> + * timeout event). An expired and enqueued timeout event will not be

> freed.

> + * It is the responsibility of the application to free this timeout when

> it

> + * is received.

>   *

> - * When tick reaches tmo_tick, the timer enqueues the timeout

> notification into

> - * the destination queue.

> + * @param tim      Timer handle

> + */

> +void odp_timer_free(odp_timer_t tim);

> +

> +/**

> + * Set a timer (absolute time) with a user-defined timeout buffer

>   *

> - * @param timer    Timer

> - * @param tmo_tick Absolute timer tick value which triggers the timeout

> - * @param queue    Destination queue for the timeout notification

> - * @param buf      User defined timeout notification buffer. When

> - *                 ODP_BUFFER_INVALID, default timeout notification is

> used.

> + * Set (arm) the timer to expire at specific time. The user-defined

> + * buffer will be enqueued when the timer expires.

> + * Arming may fail (if the timer is in state EXPIRED), an earlier

> timeout

> + * will then be received. odp_timer_tmo_status() must be used to check

> if

> + * the received timeout is valid.

>   *

> - * @return Timeout handle if successful, otherwise ODP_TIMER_TMO_INVALID

> + * Note: any invalid parameters will be treated as programming errors

> and will

> + * cause the application to abort.

> + *

> + * @param tim      Timer

> + * @param abs_tck  Expiration time in absolute timer ticks

> + * @param user_buf The buffer to use as timeout event

> + *

> + * @return Success or failure code

>   */

> -odp_timer_tmo_t odp_timer_absolute_tmo(odp_timer_t timer, uint64_t

> tmo_tick,

> -                                    odp_queue_t queue, odp_buffer_t buf);

> +odp_timer_set_t odp_timer_set_abs_w_buf(odp_timer_t tim,

> +                                     uint64_t abs_tck,

> +                                     odp_buffer_t user_buf);

>

>  /**

> - * Cancel a timeout

> + * Set a timer with an absolute expiration time

> + *

> + * Set (arm) the timer to expire at a specific time.

> + * Arming may fail (if the timer is in state EXPIRED), an earlier

> timeout

> + * will then be received. odp_timer_tmo_status() must be used to check

> if

> + * the received timeout is valid.

> + *

> + * Note: any invalid parameters will be treated as programming errors

> and will

> + * cause the application to abort.

>   *

> - * @param timer Timer

> - * @param tmo   Timeout to cancel

> + * @param tim     Timer

> + * @param abs_tck Expiration time in absolute timer ticks

>   *

> - * @return 0 if successful

> + * @return Success or failure code

>   */

> -int odp_timer_cancel_tmo(odp_timer_t timer, odp_timer_tmo_t tmo);

> +odp_timer_set_t odp_timer_set_abs(odp_timer_t tim, uint64_t abs_tck);

>

>  /**

> - * Convert buffer handle to timeout handle

> + * Set a timer with a relative expiration time and user-defined buffer.

>   *

> - * @param buf  Buffer handle

> + * Set (arm) the timer to expire at a relative future time.

> + * Arming may fail (if the timer is in state EXPIRED),

> + * an earlier timeout will then be received. odp_timer_tmo_status() must

> + * be used to check if the received timeout is valid.

>   *

> - * @return Timeout buffer handle

> + * Note: any invalid parameters will be treated as programming errors

> and will

> + * cause the application to abort.

> + *

> + * @param tim      Timer

> + * @param rel_tck  Expiration time in timer ticks relative to current

> time of

> + *              the timer pool the timer belongs to

> + * @param user_buf The buffer to use as timeout event

> + *

> + * @return Success or failure code

>   */

> -odp_timeout_t odp_timeout_from_buffer(odp_buffer_t buf);

> +odp_timer_set_t odp_timer_set_rel_w_buf(odp_timer_t tim,

> +                                     uint64_t rel_tck,

> +                                     odp_buffer_t user_buf);

> +/**

> + * Set a timer with a relative expiration time

> + *

> + * Set (arm) the timer to expire at a relative future time.

> + * Arming may fail (if the timer is in state EXPIRED),

> + * an earlier timeout will then be received. odp_timer_tmo_status() must

> + * be used to check if the received timeout is valid.

> + *

> + * Note: any invalid parameters will be treated as programming errors

> and will

> + * cause the application to abort.

> + *

> + * @param tim     Timer

> + * @param rel_tck Expiration time in timer ticks relative to current

> time of

> + *             the timer pool the timer belongs to

> + *

> + * @return Success or failure code

> + */

> +odp_timer_set_t odp_timer_set_rel(odp_timer_t tim, uint64_t rel_tck);

>

>  /**

> - * Return absolute timeout tick

> + * Cancel a timer

> + *

> + * Cancel a timer, preventing future expiration and delivery.

> + *

> + * A timer that has already expired and been enqueued for delivery may

> be

> + * impossible to cancel and will instead be delivered to the destination

> queue.

> + * Use odp_timer_tmo_status() the check whether a received timeout is

> fresh or

> + * stale (cancelled). Stale timeouts will automatically be recycled.

> + *

> + * Note: any invalid parameters will be treated as programming errors

> and will

> + * cause the application to abort.

> + *

> + * @param tim    Timer handle

> + */

> +void odp_timer_cancel(odp_timer_t tim);

> +

> +/**

> + * Translate from buffer to timeout

> + *

> + * Return the timeout handle that corresponds to the specified buffer

> handle.

> + * The buffer must be of time ODP_BUFFER_TYPE_TIMEOUT.

> + *

> + * @param buf   Buffer handle to translate.

> + *

> + * @return      The corresponding timeout handle.

> + */

> +static inline odp_timer_tmo_t odp_timeout_from_buffer(odp_buffer_t buf)

> +{

> +     if (odp_unlikely(odp_buffer_type(buf) != ODP_BUFFER_TYPE_TIMEOUT))

> {

> +             ODP_ERR("Buffer type %u not timeout\n", buf);

> +             abort();

> +     }

> +     /* In this implementation, timeout == buffer */

> +     return (odp_timer_tmo_t)buf;

> +}

> +

> +/**

> + * Translate from timeout to buffer

> + *

> + * Return the buffer handle that corresponds to the specified timeout

> handle.

> + *

> + * @param tmo   Timeout handle to translate.

> + *

> + * @return      The corresponding buffer handle.

> + */

> +static inline odp_buffer_t odp_buffer_from_timeout(odp_timer_tmo_t tmo)

> +{

> +     /* In this implementation, buffer == timeout */

> +     return (odp_buffer_t)tmo;

> +}

> +

> +/**

> + * Return timeout to timer

> + *

> + * Return a received timeout for reuse with the parent timer.

> + * Note: odp_timer_return_tmo() must be called on all received timeouts!

> + * (Excluding user defined timeout buffers).

> + * The timeout must not be accessed after this call, the semantics is

> + * equivalent to a free call.

> + *

> + * @param tmo    Timeout

> + */

> +void odp_timer_return_tmo(odp_timer_tmo_t tmo);

> +

> +/**

> + * Return fresh/stale/orphan status of timeout.

> + *

> + * Check a received timeout for orphaness (i.e. parent timer freed) and

> + * staleness (i.e. parent timer has been reset or cancelled after the

> timeout

> + * expired and was enqueued).

> + * If the timeout is fresh, it should be processed.

> + * If the timeout is stale or orphaned, it should be ignored.

> + * All timeouts must be returned using the odp_timer_return_tmo() call.

> + *

> + * @param tmo    Timeout

> + *

> + * @return One of ODP_TMO_FRESH, ODP_TMO_STALE or ODP_TMO_ORPHAN.

> + */

> +odp_timer_tmo_status_t odp_timer_tmo_status(odp_timer_tmo_t tmo);

> +

> +/**

> + * Get timer handle

> + *

> + * Return Handle of parent timer.

> + *

> + * @param tmo   Timeout

> + *

> + * @return Timer handle or ODP_TIMER_INVALID for orphaned timeouts.

> + *         Note that the parent timer could be freed by some other

> thread

> + *         at any time and thus the timeout becomes orphaned.

> + */

> +odp_timer_t odp_timer_handle(odp_timer_tmo_t tmo);

> +

> +/**

> + * Get expiration time

> + *

> + * Return (requested) expiration time of timeout.

> + *

> + * @param tmo   Timeout

> + *

> + * @return Expiration time

> + */

> +uint64_t odp_timer_expiration(odp_timer_tmo_t tmo);

> +

> +/**

> + * Get user pointer

> + *

> + * Return User pointer of timer associated with timeout.

> + * The user pointer is often used to point to some associated context.

>   *

> - * @param tmo Timeout buffer handle

> + * @param tmo   Timeout

>   *

> - * @return Absolute timeout tick

> + * @return User pointer

>   */

> -uint64_t odp_timeout_tick(odp_timeout_t tmo);

> +void *odp_timer_userptr(odp_timer_tmo_t tmo);

>

>  #ifdef __cplusplus

>  }

> diff --git a/platform/linux-generic/include/odp_priority_queue_internal.h

> b/platform/linux-generic/include/odp_priority_queue_internal.h

> new file mode 100644

> index 0000000..7d7f3a2

> --- /dev/null

> +++ b/platform/linux-generic/include/odp_priority_queue_internal.h

> @@ -0,0 +1,108 @@

> +#ifndef _PRIORITY_QUEUE_H

> +#define _PRIORITY_QUEUE_H

> +

> +#include <assert.h>

> +#include <stddef.h>

> +#include <stdint.h>

> +#include <stdbool.h>

> +#include <odp_align.h>

> +

> +#define INVALID_INDEX ~0U

> +#define INVALID_PRIORITY ((pq_priority_t)~0ULL)

> +

> +typedef uint64_t pq_priority_t;

> +

> +struct heap_node;

> +

> +typedef struct priority_queue {

> +     uint32_t max_elems;/* Number of elements in heap */

> +     /* Number of registered elements (active + inactive) */

> +     uint32_t reg_elems;

> +     uint32_t num_elems;/* Number of active elements */

> +     struct heap_node *heap;

> +     struct heap_node *org_ptr;

> +} priority_queue ODP_ALIGNED(sizeof(uint64_t));

> +

> +/* The user gets a pointer to this structure */

> +typedef struct {

> +     /* Set when pq_element registered with priority queue */

> +     priority_queue *pq;

> +     uint32_t index;/* Index into heap array */

> +     pq_priority_t prio;

> +} pq_element;

> +

> +/*** Operations on pq_element ***/

> +

> +static inline void pq_element_con(pq_element *this)

> +{

> +     this->pq = NULL;

> +     this->index = INVALID_INDEX;

> +     this->prio = 0U;

> +}

> +

> +static inline void pq_element_des(pq_element *this)

> +{

> +     (void)this;

> +     assert(this->index == INVALID_INDEX);

> +}

> +

> +static inline priority_queue *get_pq(const pq_element *this)

> +{

> +     return this->pq;

> +}

> +

> +static inline pq_priority_t get_prio(const pq_element *this)

> +{

> +     return this->prio;

> +}

> +

> +static inline uint32_t get_index(const pq_element *this)

> +{

> +     return this->index;

> +}

> +

> +static inline bool is_active(const pq_element *this)

> +{

> +     return this->index != INVALID_INDEX;

> +}

> +

> +/*** Operations on priority_queue ***/

> +

> +extern uint32_t pq_smallest_child(priority_queue *, uint32_t,

> pq_priority_t);

> +extern void pq_bubble_down(priority_queue *, pq_element *);

> +extern void pq_bubble_up(priority_queue *, pq_element *);

> +

> +static inline bool valid_index(priority_queue *this, uint32_t idx)

> +{

> +     return idx < this->num_elems;

> +}

> +

> +extern void priority_queue_con(priority_queue *, uint32_t _max_elems);

> +extern void priority_queue_des(priority_queue *);

> +

> +/* Register pq_element with priority queue */

> +/* Return false if priority queue full */

> +extern bool pq_register_element(priority_queue *, pq_element *);

> +

> +/* Activate and add pq_element to priority queue */

> +/* Element must be disarmed */

> +extern void pq_activate_element(priority_queue *, pq_element *,

> pq_priority_t);

> +

> +/* Reset (increase) priority for pq_element */

> +/* Element may be active or inactive (released) */

> +extern void pq_reset_element(priority_queue *, pq_element *,

> pq_priority_t);

> +

> +/* Deactivate and remove element from priority queue */

> +/* Element may be active or inactive (released) */

> +extern void pq_deactivate_element(priority_queue *, pq_element *);

> +

> +/* Unregister pq_element */

> +extern void pq_unregister_element(priority_queue *, pq_element *);

> +

> +/* Return priority of first element (lowest numerical value) */

> +extern pq_priority_t pq_first_priority(const priority_queue *);

> +

> +/* Deactivate and return first element if it's prio is <= threshold */

> +extern pq_element *pq_release_element(priority_queue *, pq_priority_t

> thresh);

> +

> +#endif /* _PRIORITY_QUEUE_H */

> diff --git a/platform/linux-generic/include/odp_timer_internal.h

> b/platform/linux-generic/include/odp_timer_internal.h

> index ad28f53..461f28c 100644

> --- a/platform/linux-generic/include/odp_timer_internal.h

> +++ b/platform/linux-generic/include/odp_timer_internal.h

> @@ -1,4 +1,4 @@

> -/* Copyright (c) 2013, Linaro Limited

> +/* Copyright (c) 2014, Linaro Limited

>   * All rights reserved.

>   *

>   * SPDX-License-Identifier:     BSD-3-Clause

> @@ -8,72 +8,51 @@

>  /**

>   * @file

>   *

> - * ODP timer timeout descriptor - implementation internal

> + * ODP timeout descriptor - implementation internal

>   */

>

>  #ifndef ODP_TIMER_INTERNAL_H_

>  #define ODP_TIMER_INTERNAL_H_

>

> -#ifdef __cplusplus

> -extern "C" {

> -#endif

> -

> -#include <odp_std_types.h>

> -#include <odp_queue.h>

> -#include <odp_buffer.h>

> +#include <odp_align.h>

> +#include <odp_debug.h>

>  #include <odp_buffer_internal.h>

>  #include <odp_buffer_pool_internal.h>

>  #include <odp_timer.h>

>

> -struct timeout_t;

> -

> -typedef struct timeout_t {

> -     struct timeout_t *next;

> -     int               timer_id;

> -     int               tick;

> -     uint64_t          tmo_tick;

> -     odp_queue_t       queue;

> -     odp_buffer_t      buf;

> -     odp_buffer_t      tmo_buf;

> -} timeout_t;

> -

> -

> -struct odp_timeout_hdr_t;

> -

>  /**

> - * Timeout notification header

> + * Internal Timeout header

>   */

> -typedef struct odp_timeout_hdr_t {

> +typedef struct {

> +     /* common buffer header */

>       odp_buffer_hdr_t buf_hdr;

>

> -     timeout_t meta;

> -

> -     uint8_t buf_data[];

> +     /* Requested expiration time */

> +     uint64_t expiration;

> +     /* User ptr inherited from parent timer */

> +     void *user_ptr;

> +     /* Parent timer */

> +     odp_timer_t timer;

> +     /* Tag inherited from parent timer at time of expiration */

> +     uint32_t tag;

> +     /* Gen-cnt inherited from parent timer at time of creation */

> +     uint16_t gencnt;

> +     uint16_t pad;

> +     uint8_t buf_data[0];

>  } odp_timeout_hdr_t;

>

> -

> -

>  ODP_STATIC_ASSERT(sizeof(odp_timeout_hdr_t) ==

> -        ODP_OFFSETOF(odp_timeout_hdr_t, buf_data),

> -        "ODP_TIMEOUT_HDR_T__SIZE_ERR");

> -

> +               ODP_OFFSETOF(odp_timeout_hdr_t, buf_data),

> +               "sizeof(odp_timeout_hdr_t) ==

> ODP_OFFSETOF(odp_timeout_hdr_t, buf_data)");

>  ODP_STATIC_ASSERT(sizeof(odp_timeout_hdr_t) % sizeof(uint64_t) == 0,

> -        "ODP_TIMEOUT_HDR_T__SIZE_ERR2");

> -

> +               "sizeof(odp_timeout_hdr_t) % sizeof(uint64_t) == 0");

>

>  /**

> - * Return timeout header

> + * Return the timeout header

>   */

> -static inline odp_timeout_hdr_t *odp_timeout_hdr(odp_timeout_t tmo)

> +static inline odp_timeout_hdr_t *odp_timeout_hdr(odp_buffer_t buf)

>  {

> -     odp_buffer_hdr_t *buf_hdr = odp_buf_to_hdr((odp_buffer_t)tmo);

> -     return (odp_timeout_hdr_t *)(uintptr_t)buf_hdr;

> -}

> -

> -

> -

> -#ifdef __cplusplus

> +     return (odp_timeout_hdr_t *)odp_buf_to_hdr(buf);

>  }

> -#endif

>

>  #endif

> diff --git a/platform/linux-generic/odp_priority_queue.c

> b/platform/linux-generic/odp_priority_queue.c

> new file mode 100644

> index 0000000..b72c26f

> --- /dev/null

> +++ b/platform/linux-generic/odp_priority_queue.c

> @@ -0,0 +1,283 @@

> +#define NDEBUG /* Enabled by default by ODP build system */

> +#include <assert.h>

> +#include <unistd.h>

> +#include <stdlib.h>

> +#include <string.h>

> +#include <strings.h>

> +#include <odp_hints.h>

> +#include <odp_align.h>

> +#include <odp_debug.h>

> +

> +#include "odp_priority_queue_internal.h"

> +

> +

> +#define NUM_CHILDREN 4

> +#define CHILD(n) (NUM_CHILDREN * (n) + 1)

> +#define PARENT(n) (((n) - 1) / NUM_CHILDREN)

> +

> +/* Internal nodes in the array */

> +typedef struct heap_node {

> +     pq_element *elem;

> +     /* Copy of elem->prio so we avoid unnecessary dereferencing */

> +     pq_priority_t prio;

> +} heap_node;

> +

> +static void pq_assert_heap(priority_queue *this);

> +

> +#define ALIGNMENT(p) (1U << ((unsigned)ffs((int)p) - 1U))

> +

> +void priority_queue_con(priority_queue *this, uint32_t _max_elems)

> +{

> +     this->max_elems = _max_elems;

> +     this->reg_elems = 0;

> +     this->num_elems = 0;

> +     this->org_ptr = malloc((_max_elems + 64 / sizeof(heap_node)) *

> +                            sizeof(heap_node));

> +     if (odp_unlikely(this->org_ptr == NULL)) {

> +             ODP_ERR("malloc failed\n");

> +             abort();

> +     }

> +     this->heap = this->org_ptr;

> +     assert((size_t)&this->heap[1] % 8 == 0);

> +     /* Increment base address until first child (index 1) is cache line

> */

> +     /* aligned and thus all children (e.g. index 1-4) stored in the */

> +     /* same cache line. We are not interested in the alignment of */

> +     /* heap[0] as this is a lone node */

> +     while ((size_t)&this->heap[1] % ODP_CACHE_LINE_SIZE != 0) {

> +             /* Cast to ptr to struct member with the greatest alignment

> */

> +             /* requirement */

> +             this->heap = (heap_node *)((pq_priority_t *)this->heap + 1);

> +     }

> +     pq_assert_heap(this);

> +}

> +

> +void priority_queue_des(priority_queue *this)

> +{

> +     pq_assert_heap(this);

> +     free(this->org_ptr);

> +}

> +

> +#ifndef NDEBUG

> +static uint32_t

> +pq_assert_elem(priority_queue *this, uint32_t index, bool recurse)

> +{

> +     uint32_t num = 1;

> +     const pq_element *elem = this->heap[index].elem;

> +     assert(elem->index == index);

> +     assert(elem->prio == this->heap[index].prio);

> +     uint32_t child = CHILD(index);

> +     uint32_t i;

> +     for (i = 0; i < NUM_CHILDREN; i++, child++) {

> +             if (valid_index(this, child)) {

> +                     assert(this->heap[child].elem != NULL);

> +                     assert(this->heap[child].prio >= elem->prio);

> +                     if (recurse)

> +                             num += pq_assert_elem(this, child, recurse);

> +             }

> +     }

> +     return num;

> +}

> +#endif

> +

> +static void

> +pq_assert_heap(priority_queue *this)

> +{

> +     (void)this;

> +#ifndef NDEBUG

> +     uint32_t num = 0;

> +     if (odp_likely(this->num_elems != 0)) {

> +             assert(this->heap[0].elem != NULL);

> +             num += pq_assert_elem(this, 0, true);

> +     }

> +     assert(num == this->num_elems);

> +     unsigned i;

> +     for (i = 0; i < this->num_elems; i++) {

> +             assert(this->heap[i].elem != NULL);

> +             assert(this->heap[i].prio != INVALID_PRIORITY);

> +     }

> +#endif

> +}

> +

> +/* Bubble up to proper position */

> +void

> +pq_bubble_up(priority_queue *this, pq_element *elem)

> +{

> +     assert(this->heap[elem->index].elem == elem);

> +     assert(this->heap[elem->index].prio == elem->prio);

> +     uint32_t current = elem->index;

> +     pq_priority_t prio = elem->prio;

> +     assert(current == 0 || this->heap[PARENT(current)].elem != NULL);

> +     /* Move up into proper position */

> +     while (current != 0 && this->heap[PARENT(current)].prio > prio) {

> +             uint32_t parent = PARENT(current);

> +             assert(this->heap[parent].elem != NULL);

> +             /* Swap current with parent */

> +             /* 1) Move parent down */

> +             this->heap[current].elem = this->heap[parent].elem;

> +             this->heap[current].prio = this->heap[parent].prio;

> +             this->heap[current].elem->index = current;

> +             /* 2) Move current up to parent */

> +             this->heap[parent].elem = elem;

> +             this->heap[parent].prio = prio;

> +             this->heap[parent].elem->index = parent;

> +             /* Continue moving elem until it is in the right place */

> +             current = parent;

> +     }

> +     pq_assert_heap(this);

> +}

> +

> +/* Find the smallest child that is smaller than the specified priority

> */

> +/* Very hot function, can we decrease the number of cache misses? */

> +uint32_t pq_smallest_child(priority_queue *this,

> +                        uint32_t index,

> +                        pq_priority_t val)

> +{

> +     uint32_t smallest = index;

> +     uint32_t child = CHILD(index);

> +#if NUM_CHILDREN == 4

> +     /* Unroll loop when all children exist */

> +     if (odp_likely(valid_index(this, child + 3))) {

> +             if (this->heap[child + 0].prio < val)

> +                     val = this->heap[smallest = child + 0].prio;

> +             if (this->heap[child + 1].prio < val)

> +                     val = this->heap[smallest = child + 1].prio;

> +             if (this->heap[child + 2].prio < val)

> +                     val = this->heap[smallest = child + 2].prio;

> +             if (this->heap[child + 3].prio < val)

> +                     (void)this->heap[smallest = child + 3].prio;

> +             return smallest;

> +     }

> +#endif

> +     uint32_t i;

> +     for (i = 0; i < NUM_CHILDREN; i++) {

> +             if (odp_unlikely(!valid_index(this, child + i)))

> +                     break;

> +             if (this->heap[child + i].prio < val) {

> +                     smallest = child + i;

> +                     val = this->heap[smallest].prio;

> +             }

> +     }

> +     return smallest;

> +}

> +

> +/* Very hot function, can it be optimised? */

> +void

> +pq_bubble_down(priority_queue *this, pq_element *elem)

> +{

> +     assert(this->heap[elem->index].elem == elem);

> +     assert(this->heap[elem->index].prio == elem->prio);

> +     uint32_t current = elem->index;

> +     pq_priority_t prio = elem->prio;

> +     for (;;) {

> +             uint32_t child = pq_smallest_child(this, current, prio);

> +             if (current == child) {

> +                     /* No smaller child, we are done */

> +                     pq_assert_heap(this);

> +                     return;

> +             }

> +             /* Element larger than smaller child, must move down */

> +             assert(this->heap[child].elem != NULL);

> +             /* 1) Move child up to current */

> +             this->heap[current].elem = this->heap[child].elem;

> +             this->heap[current].prio = this->heap[child].prio;

> +             /* 2) Move current down to child */

> +             this->heap[child].elem = elem;

> +             this->heap[child].prio = prio;

> +             this->heap[child].elem->index = child;

> +

> +             this->heap[current].elem->index = current; /* cache misses!

> */

> +             /* Continue moving element until it is in the right place */

> +             current = child;

> +     }

> +}

> +

> +bool

> +pq_register_element(priority_queue *this, pq_element *elem)

> +{

> +     if (odp_likely(this->reg_elems < this->max_elems)) {

> +             elem->pq = this;

> +             this->reg_elems++;

> +             return true;

> +     }

> +     return false;

> +}

> +

> +void

> +pq_unregister_element(priority_queue *this, pq_element *elem)

> +{

> +     assert(elem->pq == this);

> +     if (is_active(elem))

> +             pq_deactivate_element(this, elem);

> +     this->reg_elems--;

> +}

> +

> +void

> +pq_activate_element(priority_queue *this, pq_element *elem,

> pq_priority_t prio)

> +{

> +     assert(elem->index == INVALID_INDEX);

> +     /* Insert element at end */

> +     uint32_t index = this->num_elems++;

> +     this->heap[index].elem = elem;

> +     this->heap[index].prio = prio;

> +     elem->index = index;

> +     elem->prio = prio;

> +     pq_bubble_up(this, elem);

> +}

> +

> +void

> +pq_deactivate_element(priority_queue *this, pq_element *elem)

> +{

> +     assert(elem->pq == this);

> +     if (odp_likely(is_active(elem))) {

> +             /* Swap element with last element */

> +             uint32_t current = elem->index;

> +             uint32_t last = --this->num_elems;

> +             if (odp_likely(last != current)) {

> +                     /* Move last element to current */

> +                     this->heap[current].elem = this->heap[last].elem;

> +                     this->heap[current].prio = this->heap[last].prio;

> +                     this->heap[current].elem->index = current;

> +                     /* Bubble down old 'last' element to its proper

> place*/

> +                     if (this->heap[current].prio < elem->prio)

> +                             pq_bubble_up(this, this->heap[current].elem);

> +                     else

> +                             pq_bubble_down(this, this->heap[current].elem);

> +             }

> +             elem->index = INVALID_INDEX;

> +             pq_assert_heap(this);

> +     }

> +}

> +

> +void

> +pq_reset_element(priority_queue *this, pq_element *elem, pq_priority_t

> prio)

> +{

> +     assert(prio != INVALID_PRIORITY);

> +     if (odp_likely(is_active(elem))) {

> +             assert(prio >= elem->prio);

> +             elem->prio = prio;

> +             this->heap[elem->index].prio = prio;/* cache misses here! */

> +             pq_bubble_down(this, elem);

> +             pq_assert_heap(this);

> +     } else {

> +             pq_activate_element(this, elem, prio);

> +     }

> +}

> +

> +pq_priority_t pq_first_priority(const priority_queue *this)

> +{

> +     return this->num_elems != 0 ? this->heap[0].prio :

> INVALID_PRIORITY;

> +}

> +

> +pq_element *

> +pq_release_element(priority_queue *this, pq_priority_t threshold)

> +{

> +     if (odp_likely(this->num_elems != 0 &&

> +                    this->heap[0].prio <= threshold)) {

> +             pq_element *elem = this->heap[0].elem;

> +             /* Remove element from heap */

> +             pq_deactivate_element(this, elem);

> +             assert(elem->prio <= threshold);

> +             return elem;

> +     }

> +     return NULL;

> +}

> diff --git a/platform/linux-generic/odp_timer.c b/platform/linux-

> generic/odp_timer.c

> index 313c713..0e5071c 100644

> --- a/platform/linux-generic/odp_timer.c

> +++ b/platform/linux-generic/odp_timer.c

> @@ -4,428 +4,713 @@

>   * SPDX-License-Identifier:     BSD-3-Clause

>   */

>

> -#include <odp_timer.h>

> -#include <odp_timer_internal.h>

> -#include <odp_time.h>

> -#include <odp_buffer_pool_internal.h>

> -#include <odp_internal.h>

> -#include <odp_atomic.h>

> -#include <odp_spinlock.h>

> -#include <odp_sync.h>

> -#include <odp_debug.h>

> -

> -#include <signal.h>

> -#include <time.h>

> +/**

> + * @file

> + *

> + * ODP timer service

> + *

> + */

>

> +#include <assert.h>

> +#include <errno.h>

>  #include <string.h>

> -

> -#define NUM_TIMERS    1

> -#define MAX_TICKS     1024

> -#define MAX_RES       ODP_TIME_SEC

> -#define MIN_RES       (100*ODP_TIME_USEC)

> -

> -

> -typedef struct {

> -     odp_spinlock_t lock;

> -     timeout_t      *list;

> -} tick_t;

> -

> -typedef struct {

> -     int               allocated;

> -     volatile int      active;

> -     volatile uint64_t cur_tick;

> -     timer_t           timerid;

> -     odp_timer_t       timer_hdl;

> -     odp_buffer_pool_t pool;

> -     uint64_t          resolution_ns;

> -     uint64_t          max_ticks;

> -     tick_t            tick[MAX_TICKS];

> -

> -} timer_ring_t;

> -

> -typedef struct {

> -     odp_spinlock_t lock;

> -     int            num_timers;

> -     timer_ring_t   timer[NUM_TIMERS];

> -

> -} timer_global_t;

> -

> -/* Global */

> -static timer_global_t odp_timer;

> -

> -static void add_tmo(tick_t *tick, timeout_t *tmo)

> +#include <stdlib.h>

> +#include <time.h>

> +#include <signal.h>

> +#include "odp_std_types.h"

> +#include "odp_buffer.h"

> +#include "odp_buffer_pool.h"

> +#include "odp_queue.h"

> +#include "odp_hints.h"

> +#include "odp_sync.h"

> +#include "odp_ticketlock.h"

> +#include "odp_debug.h"

> +#include "odp_align.h"

> +#include "odp_shared_memory.h"

> +#include "odp_hints.h"

> +#include "odp_internal.h"

> +#include "odp_time.h"

> +#include "odp_timer.h"

> +#include "odp_timer_internal.h"

> +#include "odp_priority_queue_internal.h"

> +

> +/***********************************************************************

> *******

> + * Translation between timeout and timeout header

> +

> *************************************************************************

> ****/

> +

> +static inline odp_timeout_hdr_t *odp_tmo_to_hdr(odp_timer_tmo_t tmo)

>  {

> -     odp_spinlock_lock(&tick->lock);

> -

> -     tmo->next  = tick->list;

> -     tick->list = tmo;

> +     odp_buffer_t buf = odp_buffer_from_timeout(tmo);

> +     odp_timeout_hdr_t *tmo_hdr = (odp_timeout_hdr_t

> *)odp_buf_to_hdr(buf);

> +     return tmo_hdr;

> +}

>

> -     odp_spinlock_unlock(&tick->lock);

> +/***********************************************************************

> *******

> + * odp_timer abstract datatype

> +

> *************************************************************************

> ****/

> +

> +typedef struct odp_timer_s {

> +     pq_element pqelem;/* Base class */

> +     uint64_t req_tmo;/* Requested timeout tick */

> +     odp_buffer_t tmo_buf;/* ODP_BUFFER_INVALID if timeout enqueued */

> +     odp_queue_t queue;/* ODP_QUEUE_INVALID if timer is free */

> +     uint32_t tag;/* Reusing tag as next pointer/index when timer is

> free */

> +     uint16_t gencnt;/* Smaller to make place for user_buf flag */

> +     unsigned int user_buf:1; /* User-defined buffer? */

> +} odp_timer;

> +

> +/* Constructor */

> +static inline void odp_timer_con(odp_timer *this)

> +{

> +     pq_element_con(&this->pqelem);

> +     this->tmo_buf = ODP_BUFFER_INVALID;

> +     this->queue = ODP_QUEUE_INVALID;

> +     this->gencnt = 0;

>  }

>

> -static timeout_t *rem_tmo(tick_t *tick)

> +/* Destructor */

> +static inline void odp_timer_des(odp_timer *this)

>  {

> -     timeout_t *tmo;

> +     assert(this->tmo_buf == ODP_BUFFER_INVALID);

> +     assert(this->queue == ODP_QUEUE_INVALID);

> +     pq_element_des(&this->pqelem);

> +}

>

> -     odp_spinlock_lock(&tick->lock);

> +/* Setup when timer is allocated */

> +static void setup(odp_timer *this,

> +               odp_queue_t _q,

> +               void *_up,

> +               odp_buffer_t _tmo)

> +{

> +     this->req_tmo = INVALID_PRIORITY;

> +     this->tmo_buf = _tmo;

> +     this->queue = _q;

> +     this->tag = 0;

> +     this->user_buf = false;

> +     /* Initialise constant fields of timeout event */

> +     odp_timeout_hdr_t *tmo_hdr =

> +             odp_tmo_to_hdr(odp_timeout_from_buffer(this->tmo_buf));

> +     tmo_hdr->gencnt = this->gencnt;

> +     tmo_hdr->timer = this;

> +     tmo_hdr->user_ptr = _up;

> +     /* tmo_hdr->tag set at expiration time */

> +     /* tmo_hdr->expiration set at expiration time */

> +     assert(this->queue != ODP_QUEUE_INVALID);

> +}

>

> -     tmo = tick->list;

> +/* Teardown when timer is freed */

> +static odp_buffer_t teardown(odp_timer *this)

> +{

> +     /* Increase generation count to make any pending timeout(s)

> orphaned */

> +     ++this->gencnt;

> +     odp_buffer_t buf = this->tmo_buf;

> +     this->tmo_buf = ODP_BUFFER_INVALID;

> +     this->queue = ODP_QUEUE_INVALID;

> +     return buf;

> +}

>

> -     if (tmo)

> -             tick->list = tmo->next;

> +static inline uint32_t get_next_free(odp_timer *this)

> +{

> +     assert(this->queue == ODP_QUEUE_INVALID);

> +     return this->tag;

> +}

>

> -     odp_spinlock_unlock(&tick->lock);

> +static inline void set_next_free(odp_timer *this, uint32_t nf)

> +{

> +     assert(this->queue == ODP_QUEUE_INVALID);

> +     this->tag = nf;

> +}

>

> -     if (tmo)

> -             tmo->next = NULL;

> +/***********************************************************************

> *******

> + * odp_timer_pool abstract datatype

> + * Inludes alloc and free timer

> +

> *************************************************************************

> ****/

> +

> +typedef struct odp_timer_pool_s {

> +     priority_queue pq;

> +     uint64_t cur_tick;/* Current tick value */

> +     uint64_t min_tick;/* Current expiration lower bound */

> +     uint64_t max_tick;/* Current expiration higher bound */

> +     bool shared;

> +     odp_ticketlock_t lock;

> +     const char *name;

> +     odp_buffer_pool_t buf_pool;

> +     uint64_t resolution_ns;

> +     uint64_t min_tmo_tck;

> +     uint64_t max_tmo_tck;

> +     odp_timer *timers;

> +     uint32_t num_alloc;/* Current number of allocated timers */

> +     uint32_t max_timers;/* Max number of timers */

> +     uint32_t first_free;/* 0..max_timers-1 => free timer */

> +     timer_t timerid;

> +     odp_timer_clk_src_t clk_src;

> +} odp_timer_pool;

> +

> +/* Forward declarations */

> +static void timer_init(odp_timer_pool *tp);

> +static void timer_exit(odp_timer_pool *tp);

> +

> +static void odp_timer_pool_con(odp_timer_pool *this,

> +                            const char *_n,

> +                            odp_buffer_pool_t _bp,

> +                            uint64_t _r,

> +                            uint64_t _mint,

> +                            uint64_t _maxt,

> +                            uint32_t _mt,

> +                            bool _s,

> +                            odp_timer_clk_src_t _cs)

> +{

> +     priority_queue_con(&this->pq, _mt);

> +     this->cur_tick = 0;

> +     this->shared = _s;

> +     this->name = strdup(_n);

> +     this->buf_pool = _bp;

> +     this->resolution_ns = _r;

> +     this->min_tmo_tck = odp_timer_ns_to_tick(this, _mint);

> +     this->max_tmo_tck = odp_timer_ns_to_tick(this, _maxt);

> +     this->min_tick = this->cur_tick + this->min_tmo_tck;

> +     this->max_tick = this->cur_tick + this->max_tmo_tck;

> +     this->num_alloc = 0;

> +     this->max_timers = _mt;

> +     this->first_free = 0;

> +     this->clk_src = _cs;

> +     this->timers = malloc(sizeof(odp_timer) * this->max_timers);

> +     if (this->timers == NULL)

> +             ODP_ABORT("%s: malloc failed\n", _n);

> +     uint32_t i;

> +     for (i = 0; i < this->max_timers; i++)

> +             odp_timer_con(&this->timers[i]);

> +     for (i = 0; i < this->max_timers; i++)

> +             set_next_free(&this->timers[i], i + 1);

> +     odp_ticketlock_init(&this->lock);

> +     if (this->clk_src == ODP_CLOCK_CPU)

> +             timer_init(this);

> +     /* Make sure timer pool initialisation is globally observable */

> +     /* before we return a pointer to it */

> +     odp_sync_stores();

> +}

>

> -     return tmo;

> +static odp_timer_pool *odp_timer_pool_new(

> +     const char *_n,

> +     odp_buffer_pool_t _bp,

> +     uint64_t _r,

> +     uint64_t _mint,

> +     uint64_t _maxt,

> +     uint32_t _mt,

> +     bool _s,

> +     odp_timer_clk_src_t _cs)

> +{

> +     odp_timer_pool *this = malloc(sizeof(odp_timer_pool));

> +     if (odp_unlikely(this == NULL))

> +             ODP_ABORT("%s: timer pool malloc failed\n", _n);

> +     odp_timer_pool_con(this, _n, _bp, _r, _mint, _maxt, _mt, _s, _cs);

> +     return this;

>  }

>

> -/**

> - * Search and delete tmo entry from timeout list

> - * return -1 : on error.. handle not in list

> - *           0 : success

> - */

> -static int find_and_del_tmo(timeout_t **tmo, odp_timer_tmo_t handle)

> +static void odp_timer_pool_des(odp_timer_pool *this)

>  {

> -     timeout_t *cur, *prev;

> -     prev = NULL;

> +     if (this->shared)

> +             odp_ticketlock_lock(&this->lock);

> +     if (this->num_alloc != 0) {

> +             /* It's a programming error to attempt to destroy a */

> +             /* timer pool which is still in use */

> +             ODP_ABORT("%s: timers in use\n", this->name);

> +     }

> +     if (this->clk_src == ODP_CLOCK_CPU)

> +             timer_exit(this);

> +     uint32_t i;

> +     for (i = 0; i < this->max_timers; i++)

> +             odp_timer_des(&this->timers[i]);

> +     free(this->timers);

> +     priority_queue_des(&this->pq);

> +     odp_sync_stores();

> +}

>

> -     for (cur = *tmo; cur != NULL; prev = cur, cur = cur->next) {

> -             if (cur->tmo_buf == handle) {

> -                     if (prev == NULL)

> -                             *tmo = cur->next;

> -                     else

> -                             prev->next = cur->next;

> +static void odp_timer_pool_del(odp_timer_pool *this)

> +{

> +     odp_timer_pool_des(this);

> +     free(this);

> +}

>

> -                     break;

> +static inline odp_timer *timer_alloc(odp_timer_pool *this,

> +                                  odp_queue_t queue,

> +                                  void *user_ptr,

> +                                  odp_buffer_t tmo_buf)

> +{

> +     odp_timer *tim = ODP_TIMER_INVALID;

> +     if (odp_likely(this->shared))

> +             odp_ticketlock_lock(&this->lock);

> +     if (odp_likely(this->num_alloc < this->max_timers)) {

> +             this->num_alloc++;

> +             /* Remove first unused timer from free list */

> +             assert(this->first_free != this->max_timers);

> +             tim = &this->timers[this->first_free];

> +             this->first_free = get_next_free(tim);

> +             /* Insert timer into priority queue */

> +             if (odp_unlikely(!pq_register_element(&this->pq,

> +                                                   &tim->pqelem))) {

> +                     /* Unexpected internal error */

> +                     abort();

>               }

> +             /* Create timer */

> +             setup(tim, queue, user_ptr, tmo_buf);

> +     } else {

> +             errno = ENFILE; /* Reusing file table overvlow */

>       }

> -

> -     if (!cur)

> -             /* couldn't find tmo in list */

> -             return -1;

> -

> -     /* application to free tmo_buf provided by absolute_tmo call */

> -     return 0;

> +     if (odp_likely(this->shared))

> +             odp_ticketlock_unlock(&this->lock);

> +     return tim;

>  }

>

> -int odp_timer_cancel_tmo(odp_timer_t timer_hdl, odp_timer_tmo_t tmo)

> +static inline void timer_free(odp_timer_pool *this, odp_timer *tim)

>  {

> -     int id;

> -     int tick_idx;

> -     timeout_t *cancel_tmo;

> -     odp_timeout_hdr_t *tmo_hdr;

> -     tick_t *tick;

> -

> -     /* get id */

> -     id = (int)timer_hdl - 1;

> -

> -     tmo_hdr = odp_timeout_hdr((odp_timeout_t) tmo);

> -     /* get tmo_buf to cancel */

> -     cancel_tmo = &tmo_hdr->meta;

> +     if (odp_likely(this->shared))

> +             odp_ticketlock_lock(&this->lock);

> +     if (odp_unlikely(tim->queue == ODP_QUEUE_INVALID))

> +             ODP_ABORT("Invalid timer %p\n", tim);

> +     /* Destroy timer */

> +     odp_buffer_t buf = teardown(tim);

> +     /* Remove timer from priority queue */

> +     pq_unregister_element(&this->pq, &tim->pqelem);

> +     /* Insert timer into free list */

> +     set_next_free(tim, this->first_free);

> +     this->first_free = (tim - &this->timers[0]) / sizeof(this-

> >timers[0]);

> +     assert(this->num_alloc != 0);

> +     this->num_alloc--;

> +     if (odp_likely(this->shared))

> +             odp_ticketlock_unlock(&this->lock);

> +     if (buf != ODP_BUFFER_INVALID)

> +             odp_buffer_free(buf);

> +}

>

> -     tick_idx = cancel_tmo->tick;

> -     tick = &odp_timer.timer[id].tick[tick_idx];

> +/***********************************************************************

> *******

> + * Operations on timers

> + * reset/reset_w_buf/cancel timer, return timeout

> +

> *************************************************************************

> ****/

>

> -     odp_spinlock_lock(&tick->lock);

> -     /* search and delete tmo from tick list */

> -     if (find_and_del_tmo(&tick->list, tmo) != 0) {

> -             odp_spinlock_unlock(&tick->lock);

> -             ODP_DBG("Couldn't find the tmo (%d) in tick list\n",

> (int)tmo);

> -             return -1;

> +static inline void timer_expire(odp_timer *tim)

> +{

> +     assert(tim->req_tmo != INVALID_PRIORITY);

> +     /* Timer expired, is there actually any timeout event */

> +     /* we can enqueue? */

> +     if (odp_likely(tim->tmo_buf != ODP_BUFFER_INVALID)) {

> +             /* Swap out timeout buffer */

> +             odp_buffer_t buf = tim->tmo_buf;

> +             tim->tmo_buf = ODP_BUFFER_INVALID;

> +             if (odp_likely(!tim->user_buf)) {

> +                     odp_timeout_hdr_t *tmo_hdr =

> +                             odp_tmo_to_hdr(odp_timeout_from_buffer(buf));

> +                     /* Copy tag and requested expiration tick from timer

> */

> +                     tmo_hdr->tag = tim->tag;

> +                     tmo_hdr->expiration = tim->req_tmo;

> +             }

> +             /* Else don't touch user-defined buffer */

> +             int rc = odp_queue_enq(tim->queue, buf);

> +             if (odp_unlikely(rc != 0))

> +                     ODP_ABORT("Failed to enqueue timeout buffer (%d)\n",

> +                               rc);

> +             /* Mark timer as inactive */

> +             tim->req_tmo = INVALID_PRIORITY;

>       }

> -     odp_spinlock_unlock(&tick->lock);

> -

> -     return 0;

> +     /* No, timeout event already enqueued or unavailable */

> +     /* Keep timer active, odp_timer_return_tmo() will patch up */

>  }

>

> -static void notify_function(union sigval sigval)

> +static odp_timer_set_t timer_reset(odp_timer_pool *tp,

> +                                odp_timer *tim,

> +                                uint64_t abs_tck)

>  {

> -     uint64_t cur_tick;

> -     timeout_t *tmo;

> -     tick_t *tick;

> -     timer_ring_t *timer;

> +     assert(tim->user_buf == false);

> +     if (odp_unlikely(abs_tck < tp->min_tick))

> +             return ODP_TIMER_SET_TOOEARLY;

> +     if (odp_unlikely(abs_tck > tp->max_tick))

> +             return ODP_TIMER_SET_TOOLATE;

> +

> +     if (odp_likely(tp->shared))

> +             odp_ticketlock_lock(&tp->lock);

> +

> +     if (odp_unlikely(tim->queue == ODP_QUEUE_INVALID))

> +             ODP_ABORT("Invalid timer %p\n", tim);

> +     if (odp_unlikely(tim->user_buf))

> +             ODP_ABORT("Timer %p has user buffer\n", tim);

> +     /* Increase timer tag to make any pending timeout stale */

> +     tim->tag++;

> +     /* Save requested timeout */

> +     tim->req_tmo = abs_tck;

> +     /* Update timer position in priority queue */

> +     pq_reset_element(&tp->pq, &tim->pqelem, abs_tck);

> +

> +     if (odp_likely(tp->shared))

> +             odp_ticketlock_unlock(&tp->lock);

> +     return ODP_TIMER_SET_SUCCESS;

> +}

>

> -     timer = sigval.sival_ptr;

> +static odp_timer_set_t timer_reset_w_buf(odp_timer_pool *tp,

> +             odp_timer *tim,

> +             uint64_t abs_tck,

> +             odp_buffer_t user_buf)

> +{

> +     if (odp_unlikely(abs_tck < tp->min_tick))

> +             return ODP_TIMER_SET_TOOEARLY;

> +     if (odp_unlikely(abs_tck > tp->max_tick))

> +             return ODP_TIMER_SET_TOOLATE;

> +

> +     if (odp_likely(tp->shared))

> +             odp_ticketlock_lock(&tp->lock);

> +

> +     if (odp_unlikely(tim->queue == ODP_QUEUE_INVALID))

> +             ODP_ABORT("Invalid timer %p\n", tim);

> +     /* Increase timer tag to make any pending timeout stale */

> +     tim->tag++;

> +     /* Save requested timeout */

> +     tim->req_tmo = abs_tck;

> +     /* Set flag indicating presence of user defined buffer */

> +     tim->user_buf = true;

> +     /* Swap in new buffer, save any old buffer pointer */

> +     odp_buffer_t old_buf = tim->tmo_buf;

> +     tim->tmo_buf = user_buf;

> +     /* Update timer position in priority queue */

> +     pq_reset_element(&tp->pq, &tim->pqelem, abs_tck);

> +

> +     if (odp_likely(tp->shared))

> +             odp_ticketlock_unlock(&tp->lock);

> +

> +     /* Free old buffer if present */

> +     if (odp_unlikely(old_buf != ODP_BUFFER_INVALID))

> +             odp_buffer_free(old_buf);

> +     return ODP_TIMER_SET_SUCCESS;

> +}

>

> -     if (timer->active == 0) {

> -             ODP_DBG("Timer (%u) not active\n", timer->timer_hdl);

> -             return;

> +static inline void timer_cancel(odp_timer_pool *tp,

> +                             odp_timer *tim)

> +{

> +     odp_buffer_t old_buf = ODP_BUFFER_INVALID;

> +     if (odp_likely(tp->shared))

> +             odp_ticketlock_lock(&tp->lock);

> +

> +     if (odp_unlikely(tim->queue == ODP_QUEUE_INVALID))

> +             ODP_ABORT("Invalid timer %p\n", tim);

> +     if (odp_unlikely(tim->user_buf)) {

> +             /* Swap out old user buffer */

> +             old_buf = tim->tmo_buf;

> +             tim->tmo_buf = ODP_BUFFER_INVALID;

> +             /* tim->user_buf stays true */

>       }

> +     /* Else a normal timer (no user-defined buffer) */

> +     /* Increase timer tag to make any pending timeout stale */

> +     tim->tag++;

> +     /* Clear requested timeout, mark timer inactive */

> +     tim->req_tmo = INVALID_PRIORITY;

> +     /* Remove timer from the priority queue */

> +     pq_deactivate_element(&tp->pq, &tim->pqelem);

> +

> +     if (odp_likely(tp->shared))

> +             odp_ticketlock_unlock(&tp->lock);

> +     /* Free user-defined buffer if present */

> +     if (odp_unlikely(old_buf != ODP_BUFFER_INVALID))

> +             odp_buffer_free(old_buf);

> +}

>

> -     /* ODP_DBG("Tick\n"); */

> -

> -     cur_tick = timer->cur_tick++;

> -

> -     odp_sync_stores();

> +static inline void timer_return(odp_timer_pool *tp,

> +                             odp_timer *tim,

> +                             odp_timer_tmo_t tmo,

> +                             const odp_timeout_hdr_t *tmo_hdr)

> +{

> +     odp_buffer_t tmo_buf = odp_buffer_from_timeout(tmo);

> +     if (odp_likely(tp->shared))

> +             odp_ticketlock_lock(&tp->lock);

> +     if (odp_unlikely(tim->user_buf))

> +             ODP_ABORT("Timer %p has user-defined buffer\n", tim);

> +     if (odp_likely(tmo_hdr->gencnt == tim->gencnt)) {

> +             assert(tim->tmo_buf == ODP_BUFFER_INVALID);

> +             /* Save returned buffer for use when timer expires next time

> */

> +             tim->tmo_buf = tmo_buf;

> +             tmo_buf = ODP_BUFFER_INVALID;

> +             /* Check if timer is active and should have expired */

> +             if (odp_unlikely(tim->req_tmo != INVALID_PRIORITY &&

> +                              tim->req_tmo <= tp->cur_tick)) {

> +                     /* Expire timer now since we have restored the timeout

> +                        buffer */

> +                     timer_expire(tim);

> +             }

> +             /* Else timer inactive or expires in the future */

> +     }

> +     /* Else timeout orphaned, free buffer later */

> +     if (odp_likely(tp->shared))

> +             odp_ticketlock_unlock(&tp->lock);

> +     if (odp_unlikely(tmo_buf != ODP_BUFFER_INVALID))

> +             odp_buffer_free(tmo_buf);

> +}

>

> -     tick = &timer->tick[cur_tick % MAX_TICKS];

> +/* Non-public so not in odp_timer.h but externally visible, must declare

> + * somewhere */

> +unsigned odp_timer_pool_expire(odp_timer_pool_t tpid, uint64_t tick);

>

> -     while ((tmo = rem_tmo(tick)) != NULL) {

> -             odp_queue_t  queue;

> -             odp_buffer_t buf;

> +unsigned odp_timer_pool_expire(odp_timer_pool_t tpid, uint64_t tick)

> +{

> +     if (odp_likely(tpid->shared))

> +             odp_ticketlock_lock(&tpid->lock);

> +

> +     unsigned nexp = 0;

> +     odp_timer_t tim;

> +     tpid->cur_tick = tick;

> +     tpid->min_tick = tick + tpid->min_tmo_tck;

> +     tpid->max_tick = tick + tpid->max_tmo_tck;

> +     while ((tim = (odp_timer_t)pq_release_element(&tpid->pq, tick)) !=

> +            ODP_TIMER_INVALID) {

> +             assert(get_prio(&tim->pqelem) <= tick);

> +             timer_expire(tim);

> +             nexp++;

> +     }

>

> -             queue = tmo->queue;

> -             buf   = tmo->buf;

> +     if (odp_likely(tpid->shared))

> +             odp_ticketlock_unlock(&tpid->lock);

> +     return nexp;

> +}

>

> -             if (buf != tmo->tmo_buf)

> -                     odp_buffer_free(tmo->tmo_buf);

> +/***********************************************************************

> *******

> + * POSIX timer support

> + * Functions that use Linux/POSIX per-process timers and related

> facilities

> +

> *************************************************************************

> ****/

>

> -             odp_queue_enq(queue, buf);

> -     }

> +static void timer_notify(sigval_t sigval)

> +{

> +     odp_timer_pool *tp = (odp_timer_pool *)sigval.sival_ptr;

> +     uint64_t new_tick = tp->cur_tick + 1;

> +     (void)odp_timer_pool_expire(tp, new_tick);

>  }

>

> -static void timer_start(timer_ring_t *timer)

> +static void timer_init(odp_timer_pool *tp)

>  {

>       struct sigevent   sigev;

>       struct itimerspec ispec;

>       uint64_t res, sec, nsec;

>

> -     ODP_DBG("\nTimer (%u) starts\n", timer->timer_hdl);

> +     ODP_DBG("Creating POSIX timer for timer pool %s, period %"

> +             PRIu64" ns\n", tp->name, tp->resolution_ns);

>

>       memset(&sigev, 0, sizeof(sigev));

>       memset(&ispec, 0, sizeof(ispec));

>

>       sigev.sigev_notify          = SIGEV_THREAD;

> -     sigev.sigev_notify_function = notify_function;

> -     sigev.sigev_value.sival_ptr = timer;

> +     sigev.sigev_notify_function = timer_notify;

> +     sigev.sigev_value.sival_ptr = tp;

>

> -     if (timer_create(CLOCK_MONOTONIC, &sigev, &timer->timerid)) {

> -             ODP_DBG("Timer create failed\n");

> -             return;

> -     }

> +     if (timer_create(CLOCK_MONOTONIC, &sigev, &tp->timerid))

> +             ODP_ABORT("timer_create() returned error %s\n",

> +                       strerror(errno));

>

> -     res  = timer->resolution_ns;

> +     res  = tp->resolution_ns;

>       sec  = res / ODP_TIME_SEC;

> -     nsec = res - sec*ODP_TIME_SEC;

> +     nsec = res - sec * ODP_TIME_SEC;

>

>       ispec.it_interval.tv_sec  = (time_t)sec;

>       ispec.it_interval.tv_nsec = (long)nsec;

>       ispec.it_value.tv_sec     = (time_t)sec;

>       ispec.it_value.tv_nsec    = (long)nsec;

>

> -     if (timer_settime(timer->timerid, 0, &ispec, NULL)) {

> -             ODP_DBG("Timer set failed\n");

> -             return;

> -     }

> -

> -     return;

> +     if (timer_settime(&tp->timerid, 0, &ispec, NULL))

> +             ODP_ABORT("timer_settime() returned error %s\n",

> +                       strerror(errno));

>  }

>

> -int odp_timer_init_global(void)

> +static void timer_exit(odp_timer_pool *tp)

>  {

> -     ODP_DBG("Timer init ...");

> -

> -     memset(&odp_timer, 0, sizeof(timer_global_t));

> -

> -     odp_spinlock_init(&odp_timer.lock);

> -

> -     ODP_DBG("done\n");

> -

> -     return 0;

> +     if (timer_delete(tp->timerid) != 0)

> +             ODP_ABORT("timer_delete() returned error %s\n",

> +                       strerror(errno));

>  }

>

> -int odp_timer_disarm_all(void)

> +/***********************************************************************

> *******

> + * Public API functions

> + * Some parameter checks and error messages

> + * No modificatios of internal state

> +

> *************************************************************************

> ****/

> +odp_timer_pool_t

> +odp_timer_pool_create(const char *name,

> +                   odp_buffer_pool_t buf_pool,

> +                   uint64_t resolution_ns,

> +                   uint64_t min_timeout,

> +                   uint64_t max_timeout,

> +                   uint32_t num_timers,

> +                   bool shared,

> +                   odp_timer_clk_src_t clk_src)

>  {

> -     int timers;

> -     struct itimerspec ispec;

> -

> -     odp_spinlock_lock(&odp_timer.lock);

> -

> -     timers = odp_timer.num_timers;

> -

> -     ispec.it_interval.tv_sec  = 0;

> -     ispec.it_interval.tv_nsec = 0;

> -     ispec.it_value.tv_sec     = 0;

> -     ispec.it_value.tv_nsec    = 0;

> -

> -     for (; timers >= 0; timers--) {

> -             if (timer_settime(odp_timer.timer[timers].timerid,

> -                               0, &ispec, NULL)) {

> -                     ODP_DBG("Timer reset failed\n");

> -                     odp_spinlock_unlock(&odp_timer.lock);

> -                     return -1;

> -             }

> -             odp_timer.num_timers--;

> -     }

> -

> -     odp_spinlock_unlock(&odp_timer.lock);

> -

> -     return 0;

> +     /* Verify that buffer pool can be used for timeouts */

> +     odp_buffer_t buf = odp_buffer_alloc(buf_pool);

> +     if (buf == ODP_BUFFER_INVALID)

> +             ODP_ABORT("%s: Failed to allocate buffer\n", name);

> +     if (odp_buffer_type(buf) != ODP_BUFFER_TYPE_TIMEOUT)

> +             ODP_ABORT("%s: Buffer pool wrong type\n", name);

> +     odp_buffer_free(buf);

> +     odp_timer_pool_t tp = odp_timer_pool_new(name, buf_pool,

> resolution_ns,

> +                           min_timeout, max_timeout, num_timers,

> +                           shared, clk_src);

> +     return tp;

>  }

>

> -odp_timer_t odp_timer_create(const char *name, odp_buffer_pool_t pool,

> -                          uint64_t resolution_ns, uint64_t min_ns,

> -                          uint64_t max_ns)

> +void odp_timer_pool_start(void)

>  {

> -     uint32_t id;

> -     timer_ring_t *timer;

> -     odp_timer_t timer_hdl;

> -     int i;

> -     uint64_t max_ticks;

> -     (void) name;

> -

> -     if (resolution_ns < MIN_RES)

> -             resolution_ns = MIN_RES;

> -

> -     if (resolution_ns > MAX_RES)

> -             resolution_ns = MAX_RES;

> -

> -     max_ticks = max_ns / resolution_ns;

> -

> -     if (max_ticks > MAX_TICKS) {

> -             ODP_DBG("Maximum timeout too long: %"PRIu64" ticks\n",

> -                     max_ticks);

> -             return ODP_TIMER_INVALID;

> -     }

> -

> -     if (min_ns < resolution_ns) {

> -             ODP_DBG("Min timeout %"PRIu64" ns < resolution %"PRIu64"

> ns\n",

> -                     min_ns, resolution_ns);

> -             return ODP_TIMER_INVALID;

> -     }

> -

> -     odp_spinlock_lock(&odp_timer.lock);

> -

> -     if (odp_timer.num_timers >= NUM_TIMERS) {

> -             odp_spinlock_unlock(&odp_timer.lock);

> -             ODP_DBG("All timers allocated\n");

> -             return ODP_TIMER_INVALID;

> -     }

> -

> -     for (id = 0; id < NUM_TIMERS; id++) {

> -             if (odp_timer.timer[id].allocated == 0)

> -                     break;

> -     }

> -

> -     timer = &odp_timer.timer[id];

> -     timer->allocated = 1;

> -     odp_timer.num_timers++;

> -

> -     odp_spinlock_unlock(&odp_timer.lock);

> -

> -     timer_hdl = id + 1;

> -

> -     timer->timer_hdl     = timer_hdl;

> -     timer->pool          = pool;

> -     timer->resolution_ns = resolution_ns;

> -     timer->max_ticks     = MAX_TICKS;

> -

> -     for (i = 0; i < MAX_TICKS; i++) {

> -             odp_spinlock_init(&timer->tick[i].lock);

> -             timer->tick[i].list = NULL;

> -     }

> -

> -     timer->active = 1;

> -     odp_sync_stores();

> -

> -     timer_start(timer);

> +     /* Nothing to do here, timer pools are started by the create call

> */

> +}

>

> -     return timer_hdl;

> +void odp_timer_pool_destroy(odp_timer_pool_t tpid)

> +{

> +     odp_timer_pool_del(tpid);

>  }

>

> -odp_timer_tmo_t odp_timer_absolute_tmo(odp_timer_t timer_hdl, uint64_t

> tmo_tick,

> -                                    odp_queue_t queue, odp_buffer_t buf)

> +uint64_t odp_timer_tick_to_ns(odp_timer_pool_t tpid, uint64_t ticks)

>  {

> -     int id;

> -     uint64_t tick;

> -     uint64_t cur_tick;

> -     timeout_t *new_tmo;

> -     odp_buffer_t tmo_buf;

> -     odp_timeout_hdr_t *tmo_hdr;

> -     timer_ring_t *timer;

> +     return ticks * tpid->resolution_ns;

> +}

>

> -     id = (int)timer_hdl - 1;

> -     timer = &odp_timer.timer[id];

> +uint64_t odp_timer_ns_to_tick(odp_timer_pool_t tpid, uint64_t ns)

> +{

> +     return (uint64_t)(ns / tpid->resolution_ns);

> +}

>

> -     cur_tick = timer->cur_tick;

> -     if (tmo_tick <= cur_tick) {

> -             ODP_DBG("timeout too close\n");

> -             return ODP_TIMER_TMO_INVALID;

> -     }

> +uint64_t odp_timer_current_tick(odp_timer_pool_t tpid)

> +{

> +     return tpid->cur_tick;

> +}

>

> -     if ((tmo_tick - cur_tick) > MAX_TICKS) {

> -             ODP_DBG("timeout too far: cur %"PRIu64" tmo %"PRIu64"\n",

> -                     cur_tick, tmo_tick);

> -             return ODP_TIMER_TMO_INVALID;

> +uintptr_t odp_timer_pool_query_conf(odp_timer_pool_t tpid,

> +                                 odp_timer_pool_conf_t item)

> +{

> +     switch (item) {

> +     case ODP_TIMER_NAME:

> +             return (uintptr_t)(tpid->name);

> +     case ODP_TIMER_RESOLUTION:

> +             return tpid->resolution_ns;

> +     case ODP_TIMER_MIN_TICKS:

> +             return tpid->min_tmo_tck;

> +     case ODP_TIMER_MAX_TICKS:

> +             return tpid->max_tmo_tck;

> +     case ODP_TIMER_NUM_TIMERS:

> +             return tpid->max_timers;

> +     case ODP_TIMER_SHARED:

> +             return tpid->shared;

> +     default:

> +             return 0;

>       }

> +}

>

> -     tick = tmo_tick % MAX_TICKS;

> -

> -     tmo_buf = odp_buffer_alloc(timer->pool);

> -     if (tmo_buf == ODP_BUFFER_INVALID) {

> -             ODP_DBG("tmo buffer alloc failed\n");

> -             return ODP_TIMER_TMO_INVALID;

> +odp_timer_t odp_timer_alloc(odp_timer_pool_t tpid,

> +                         odp_queue_t queue,

> +                         void *user_ptr)

> +{

> +     /* We check this because ODP_QUEUE_INVALID is used */

> +     /* to indicate a free timer */

> +     if (odp_unlikely(queue == ODP_QUEUE_INVALID))

> +             ODP_ABORT("%s: Invalid queue handle\n", tpid->name);

> +     odp_buffer_t tmo_buf = odp_buffer_alloc(tpid->buf_pool);

> +     if (odp_likely(tmo_buf != ODP_BUFFER_INVALID)) {

> +             odp_timer *tim = timer_alloc(tpid, queue, user_ptr,

> tmo_buf);

> +             if (odp_likely(tim != ODP_TIMER_INVALID)) {

> +                     /* Success */

> +                     assert(tim->queue != ODP_QUEUE_INVALID);

> +                     return tim;

> +             }

> +             odp_buffer_free(tmo_buf);

>       }

> +     /* Else failed to allocate timeout event */

> +     /* errno set by odp_buffer_alloc() or timer_alloc () */

> +     return ODP_TIMER_INVALID;

> +}

>

> -     tmo_hdr = odp_timeout_hdr((odp_timeout_t) tmo_buf);

> -     new_tmo = &tmo_hdr->meta;

> -

> -     new_tmo->timer_id = id;

> -     new_tmo->tick     = (int)tick;

> -     new_tmo->tmo_tick = tmo_tick;

> -     new_tmo->queue    = queue;

> -     new_tmo->tmo_buf  = tmo_buf;

> -

> -     if (buf != ODP_BUFFER_INVALID)

> -             new_tmo->buf = buf;

> -     else

> -             new_tmo->buf = tmo_buf;

> -

> -     add_tmo(&timer->tick[tick], new_tmo);

> -

> -     return tmo_buf;

> +void odp_timer_free(odp_timer_t tim)

> +{

> +     odp_timer_pool *tp = (odp_timer_pool *)get_pq(&tim->pqelem);

> +     timer_free(tp, tim);

>  }

>

> -uint64_t odp_timer_tick_to_ns(odp_timer_t timer_hdl, uint64_t ticks)

> +odp_timer_set_t odp_timer_set_abs_w_buf(odp_timer_t tim,

> +                                     uint64_t abs_tck,

> +                                     odp_buffer_t user_buf)

>  {

> -     uint32_t id;

> +     odp_timer_pool *tp = (odp_timer_pool *)get_pq(&tim->pqelem);

> +     odp_timer_set_t rc = timer_reset_w_buf(tp, tim, abs_tck, user_buf);

> +     return rc;

> +}

>

> -     id = timer_hdl - 1;

> -     return ticks * odp_timer.timer[id].resolution_ns;

> +odp_timer_set_t odp_timer_set_abs(odp_timer_t tim, uint64_t abs_tck)

> +{

> +     odp_timer_pool *tp = (odp_timer_pool *)get_pq(&tim->pqelem);

> +     odp_timer_set_t rc = timer_reset(tp, tim, abs_tck);

> +     return rc;

>  }

>

> -uint64_t odp_timer_ns_to_tick(odp_timer_t timer_hdl, uint64_t ns)

> +odp_timer_set_t odp_timer_set_rel_w_buf(odp_timer_t tim,

> +                                     uint64_t rel_tck,

> +                                     odp_buffer_t user_buf)

>  {

> -     uint32_t id;

> +     odp_timer_pool *tp = (odp_timer_pool *)get_pq(&tim->pqelem);

> +     odp_timer_set_t rc = timer_reset_w_buf(tp, tim, tp->cur_tick +

> rel_tck,

> +                                            user_buf);

> +     return rc;

> +}

>

> -     id = timer_hdl - 1;

> -     return ns / odp_timer.timer[id].resolution_ns;

> +odp_timer_set_t odp_timer_set_rel(odp_timer_t tim, uint64_t rel_tck)

> +{

> +     odp_timer_pool *tp = (odp_timer_pool *)get_pq(&tim->pqelem);

> +     odp_timer_set_t rc = timer_reset(tp, tim, tp->cur_tick + rel_tck);

> +     return rc;

>  }

>

> -uint64_t odp_timer_resolution(odp_timer_t timer_hdl)

> +void odp_timer_cancel(odp_timer_t tim)

>  {

> -     uint32_t id;

> +     odp_timer_pool *tp = (odp_timer_pool *)get_pq(&tim->pqelem);

> +     timer_cancel(tp, tim);

> +}

>

> -     id = timer_hdl - 1;

> -     return odp_timer.timer[id].resolution_ns;

> +void odp_timer_return_tmo(odp_timer_tmo_t tmo)

> +{

> +     const odp_timeout_hdr_t *tmo_hdr = odp_tmo_to_hdr(tmo);

> +     odp_timer *parent_tim = tmo_hdr->timer;

> +     odp_timer_pool *tp = (odp_timer_pool *)get_pq(&parent_tim->pqelem);

> +     timer_return(tp, parent_tim, tmo, tmo_hdr);

>  }

>

> -uint64_t odp_timer_maximum_tmo(odp_timer_t timer_hdl)

> +odp_timer_tmo_status_t odp_timer_tmo_status(odp_timer_tmo_t tmo)

>  {

> -     uint32_t id;

> +     const odp_timeout_hdr_t *tmo_hdr = odp_tmo_to_hdr(tmo);

> +     odp_timer *parent_tim = tmo_hdr->timer;

>

> -     id = timer_hdl - 1;

> -     return odp_timer.timer[id].max_ticks;

> +     if (odp_unlikely(tmo_hdr->gencnt != parent_tim->gencnt)) {

> +             /* Generation counters differ => timer has been freed */

> +             return ODP_TMO_ORPHAN;

> +     }

> +     /* Else generation counters match => parent timer exists */

> +

> +     if (odp_likely(parent_tim->tag == tmo_hdr->tag))

> +             return ODP_TMO_FRESH;

> +     else

> +             return ODP_TMO_STALE;

>  }

>

> -uint64_t odp_timer_current_tick(odp_timer_t timer_hdl)

> +odp_timer_t odp_timer_handle(odp_timer_tmo_t tmo)

>  {

> -     uint32_t id;

> +     odp_timeout_hdr_t *tmo_hdr = odp_tmo_to_hdr(tmo);

> +     odp_timer_t parent_tim = tmo_hdr->timer;

> +     if (odp_likely(tmo_hdr->gencnt == parent_tim->gencnt))

> +             return parent_tim;

> +     else

> +             return ODP_TIMER_INVALID;

> +}

>

> -     id = timer_hdl - 1;

> -     return odp_timer.timer[id].cur_tick;

> +uint64_t odp_timer_expiration(odp_timer_tmo_t tmo)

> +{

> +     odp_timeout_hdr_t *tmo_hdr = odp_tmo_to_hdr(tmo);

> +     return tmo_hdr->expiration;

>  }

>

> -odp_timeout_t odp_timeout_from_buffer(odp_buffer_t buf)

> +void *odp_timer_userptr(odp_timer_tmo_t tmo)

>  {

> -     return (odp_timeout_t) buf;

> +     odp_timeout_hdr_t *tmo_hdr = odp_tmo_to_hdr(tmo);

> +     return tmo_hdr->user_ptr;

>  }

>

> -uint64_t odp_timeout_tick(odp_timeout_t tmo)

> +int odp_timer_init_global(void)

>  {

> -     odp_timeout_hdr_t *tmo_hdr = odp_timeout_hdr(tmo);

> -     return tmo_hdr->meta.tmo_tick;

> +     return 0;

>  }

> diff --git a/test/api_test/odp_timer_ping.c

> b/test/api_test/odp_timer_ping.c

> index 7406a45..2617b5c 100644

> --- a/test/api_test/odp_timer_ping.c

> +++ b/test/api_test/odp_timer_ping.c

> @@ -20,6 +20,8 @@

>   *    Otherwise timeout may happen bcz of slow nw speed

>   */

>

> +#include <assert.h>

> +#include <stdlib.h>

>  #include <unistd.h>

>  #include <fcntl.h>

>  #include <errno.h>

> @@ -41,14 +43,15 @@

>  #define MSG_POOL_SIZE         (4*1024*1024)

>  #define BUF_SIZE             8

>  #define PING_CNT     10

> -#define PING_THRD    2       /* Send and Rx Ping thread */

> +#define PING_THRD    2       /* send_ping and rx_ping threads */

>

>  /* Nanoseconds */

>  #define RESUS        10000

>  #define MINUS        10000

>  #define MAXUS        10000000

>

> -static odp_timer_t test_timer_ping;

> +static odp_timer_pool_t tp;

> +static odp_timer_t test_timer_ping = ODP_TIMER_INVALID;

>  static odp_timer_tmo_t test_ping_tmo;

>

>  #define PKTSIZE      64

> @@ -128,15 +131,7 @@ static int listen_to_pingack(void)

>                                        (socklen_t *)&len);

>                       if (bytes > 0) {

>                               /* pkt rxvd therefore cancel the timeout */

> -                             if (odp_timer_cancel_tmo(test_timer_ping,

> -                                                      test_ping_tmo) != 0) {

> -                                     ODP_ERR("cancel_tmo failed ..exiting

> listner thread\n");

> -                                     /* avoid exiting from here even if tmo

> -                                      * failed for current ping,

> -                                      * allow subsequent ping_rx request */

> -                                     err = -1;

> -

> -                             }

> +                             odp_timer_cancel(test_timer_ping);

>                               /* cruel bad hack used for sender, listner ipc..

>                                * euwww.. FIXME ..

>                                */

> @@ -160,7 +155,6 @@ static int send_ping_request(struct sockaddr_in

> *addr)

>

>       uint64_t tick;

>       odp_queue_t queue;

> -     odp_buffer_t buf;

>

>       int err = 0;

>

> @@ -184,8 +178,16 @@ static int send_ping_request(struct sockaddr_in

> *addr)

>

>       /* get the ping queue */

>       queue = odp_queue_lookup("ping_timer_queue");

> +     test_timer_ping = odp_timer_alloc(tp, queue, NULL);

> +     if (test_timer_ping == ODP_TIMER_INVALID) {

> +             ODP_ERR("Failed to allocate timer.\n");

> +             err = -1;

> +             goto err;

> +     }

>

>       for (i = 0; i < PING_CNT; i++) {

> +             odp_buffer_t buf;

> +             odp_timer_tmo_t tmo;

>               /* prepare icmp pkt */

>               bzero(&pckt, sizeof(pckt));

>               pckt.hdr.type = ICMP_ECHO;

> @@ -209,12 +211,10 @@ static int send_ping_request(struct sockaddr_in

> *addr)

>               printf(" icmp_sent msg_cnt %d\n", i);

>

>               /* arm the timer */

> -             tick = odp_timer_current_tick(test_timer_ping);

> +             tick = odp_timer_current_tick(tp);

>

>               tick += 1000;

> -             test_ping_tmo = odp_timer_absolute_tmo(test_timer_ping,

> tick,

> -                                                    queue,

> -                                                    ODP_BUFFER_INVALID);

> +             odp_timer_set_abs(test_timer_ping, tick);

>               /* wait for timeout event */

>               while ((buf = odp_queue_deq(queue)) == ODP_BUFFER_INVALID) {

>                       /* flag true means ack rxvd.. a cruel hack as I

> @@ -229,17 +229,28 @@ static int send_ping_request(struct sockaddr_in

> *addr)

>                               break;

>                       }

>               }

> +             assert(odp_buffer_type(buf) == ODP_BUFFER_TYPE_TIMEOUT);

> +             tmo = odp_timeout_from_buffer(buf);

>

> -             /* free tmo_buf for timeout case */

> -             if (buf != ODP_BUFFER_INVALID) {

> -                     ODP_DBG(" timeout msg_cnt [%i] \n", i);

> +             switch (odp_timer_tmo_status(tmo)) {

> +             case ODP_TMO_FRESH:

> +                     ODP_DBG(" timeout msg_cnt [%i]\n", i);

>                       /* so to avoid seg fault commented */

> -                     odp_buffer_free(buf);

>                       err = -1;

> +                     break;

> +             case ODP_TMO_STALE:

> +                     /* Ignore stale timeouts */

> +                     break;

> +             case ODP_TMO_ORPHAN:

> +                     ODP_ERR("Received orphaned timeout!\n");

> +                     abort();

>               }

> +             odp_timer_return_tmo(tmo);

>       }

>

>  err:

> +     if (test_timer_ping != ODP_TIMER_INVALID)

> +             odp_timer_free(test_timer_ping);

>       return err;

>  }

>

> @@ -340,9 +351,9 @@ int main(int argc ODP_UNUSED, char *argv[]

> ODP_UNUSED)

>       pool = odp_buffer_pool_create("msg_pool", pool_base, MSG_POOL_SIZE,

>                                     BUF_SIZE,

>                                     ODP_CACHE_LINE_SIZE,

> -                                   ODP_BUFFER_TYPE_RAW);

> +                                   ODP_BUFFER_TYPE_TIMEOUT);

>       if (pool == ODP_BUFFER_POOL_INVALID) {

> -             ODP_ERR("Pool create failed.\n");

> +             ODP_ERR("Buffer pool create failed.\n");

>               return -1;

>       }

>

> @@ -357,15 +368,19 @@ int main(int argc ODP_UNUSED, char *argv[]

> ODP_UNUSED)

>               return -1;

>       }

>

> -     test_timer_ping = odp_timer_create("ping_timer", pool,

> -                                        RESUS*ODP_TIME_USEC,

> -                                        MINUS*ODP_TIME_USEC,

> -                                        MAXUS*ODP_TIME_USEC);

> -

> -     if (test_timer_ping == ODP_TIMER_INVALID) {

> -             ODP_ERR("Timer create failed.\n");

> +     /*

> +      * Create timer pool

> +      */

> +     tp = odp_timer_pool_create("timer_pool", pool,

> +                                RESUS*ODP_TIME_USEC,

> +                                MINUS*ODP_TIME_USEC,

> +                                MAXUS*ODP_TIME_USEC,

> +                                1, false, ODP_CLOCK_CPU);

> +     if (tp == ODP_TIMER_POOL_INVALID) {

> +             ODP_ERR("Timer pool create failed.\n");

>               return -1;

>       }

> +     odp_timer_pool_start();

>

>       odp_shm_print_all();

>

> --

> 1.9.1

>

>

> _______________________________________________

> lng-odp mailing list

> lng-odp@lists.linaro.org<mailto:lng-odp@lists.linaro.org>

> http://lists.linaro.org/mailman/listinfo/lng-odp
Savolainen, Petri (NSN - FI/Espoo) Oct. 6, 2014, 1:01 p.m. UTC | #9
> -----Original Message-----
> From: lng-odp-bounces@lists.linaro.org [mailto:lng-odp-
> bounces@lists.linaro.org] On Behalf Of ext Ola Liljedahl
> Sent: Thursday, October 02, 2014 6:23 PM
> To: lng-odp@lists.linaro.org
> Subject: [lng-odp] [PATCHv4] Timer API and and priority queue-based
> implementation
> 
> Signed-off-by: Ola Liljedahl <ola.liljedahl@linaro.org>
> ---
> Fixed review comments for v3 from Anders R.
> * Example code snippets use @code/@endcode.
> * Added some missing doxygen comments.
> * Updated some comments.
> * Reverted year in copyright notices.
> * Added odp_likely() hint.
> * Made some variables self-descriptive and removed redundant comments.
> Changed to use ticket locks instead of spin locks (ticket locks are more
> fair).
> Changed to use ODP_ABORT() which has become available since the last
> patch.
> 
>  example/timer/odp_timer_test.c                     | 125 +--
>  platform/linux-generic/Makefile.am                 |   1 +
>  platform/linux-generic/include/api/odp_timer.h     | 570 +++++++++++--
>  .../include/odp_priority_queue_internal.h          | 108 +++
>  .../linux-generic/include/odp_timer_internal.h     |  71 +-
>  platform/linux-generic/odp_priority_queue.c        | 283 +++++++
>  platform/linux-generic/odp_timer.c                 | 923 ++++++++++++++--
> -----
>  test/api_test/odp_timer_ping.c                     |  73 +-
>  8 files changed, 1648 insertions(+), 506 deletions(-)
>  create mode 100644 platform/linux-
> generic/include/odp_priority_queue_internal.h
>  create mode 100644 platform/linux-generic/odp_priority_queue.c
> 
> diff --git a/example/timer/odp_timer_test.c
> b/example/timer/odp_timer_test.c
> index 6e1715d..750d785 100644
> --- a/example/timer/odp_timer_test.c
> +++ b/example/timer/odp_timer_test.c
> @@ -41,67 +41,89 @@ typedef struct {
>  /** @private Barrier for test synchronisation */
>  static odp_barrier_t test_barrier;
> 
> -/** @private Timer handle*/
> -static odp_timer_t test_timer;
> +/** @private Timer pool handle */
> +static odp_timer_pool_t tp;
> 
> 
> +/** @private Timeout status ASCII strings */
> +static const char *const status2str[] = {
> +	"fresh", "stale", "orphaned"
> +};
> +
>  /** @private test timeout */
>  static void test_abs_timeouts(int thr, test_args_t *args)
>  {
> -	uint64_t tick;
>  	uint64_t period;
>  	uint64_t period_ns;
>  	odp_queue_t queue;
> -	odp_buffer_t buf;
> -	int num;
> +	int remain = args->tmo_count;
> +	odp_timer_t hdl;
> +	uint64_t tick;
> 
>  	ODP_DBG("  [%i] test_timeouts\n", thr);
> 
>  	queue = odp_queue_lookup("timer_queue");
> 
>  	period_ns = args->period_us*ODP_TIME_USEC;
> -	period    = odp_timer_ns_to_tick(test_timer, period_ns);
> +	period    = odp_timer_ns_to_tick(tp, period_ns);
> 
>  	ODP_DBG("  [%i] period %"PRIu64" ticks,  %"PRIu64" ns\n", thr,
>  		period, period_ns);
> 
> -	tick = odp_timer_current_tick(test_timer);
> -
> -	ODP_DBG("  [%i] current tick %"PRIu64"\n", thr, tick);
> -
> -	tick += period;
> +	ODP_DBG("  [%i] current tick %"PRIu64"\n", thr,
> +		odp_timer_current_tick(tp));
> 
> -	if (odp_timer_absolute_tmo(test_timer, tick, queue,
> ODP_BUFFER_INVALID)
> -	    == ODP_TIMER_TMO_INVALID){
> -		ODP_DBG("Timeout request failed\n");
> +	odp_timer_t test_timer;
> +	test_timer = odp_timer_alloc(tp, queue, NULL);
> +	if (test_timer == ODP_TIMER_INVALID) {
> +		ODP_ERR("Failed to allocate timer\n");
>  		return;
>  	}
> +	tick = odp_timer_current_tick(tp);
> +	hdl = test_timer;
> 
> -	num = args->tmo_count;
> -
> -	while (1) {
> -		odp_timeout_t tmo;
> -
> -		buf = odp_schedule_one(&queue, ODP_SCHED_WAIT);
> -
> -		tmo  = odp_timeout_from_buffer(buf);
> -		tick = odp_timeout_tick(tmo);
> -
> -		ODP_DBG("  [%i] timeout, tick %"PRIu64"\n", thr, tick);
> -
> -		odp_buffer_free(buf);
> -
> -		num--;
> -
> -		if (num == 0)
> -			break;
> +	while (remain != 0) {
> +		odp_buffer_t buf;
> +		odp_timer_tmo_t tmo;
> +		odp_timer_tmo_status_t stat;
> +		odp_timer_set_t rc;
> 
>  		tick += period;
> +		rc = odp_timer_set_abs(hdl, tick);
> +		if (odp_unlikely(rc != ODP_TIMER_SET_SUCCESS)) {
> +			ODP_ERR("odp_timer_set_abs() failed (%u)\n", rc);
> +			abort();
> +		}
> 
> -		odp_timer_absolute_tmo(test_timer, tick,
> -				       queue, ODP_BUFFER_INVALID);
> +		/* Get the next ready buffer/timeout */
> +		buf = odp_schedule_one(&queue, ODP_SCHED_WAIT);
> +		if (odp_unlikely(odp_buffer_type(buf) !=
> +				 ODP_BUFFER_TYPE_TIMEOUT)) {
> +			ODP_ERR("Unexpected buffer type received\n");
> +			abort();
> +		}
> +		tmo = odp_timeout_from_buffer(buf);
> +		stat = odp_timer_tmo_status(tmo);
> +		tick = odp_timer_expiration(tmo);
> +		hdl = odp_timer_handle(tmo);
> +		ODP_DBG("  [%i] timeout, tick %"PRIu64", status %s\n",
> +			thr, tick, status2str[stat]);
> +		/* if (stat == ODP_TMO_FRESH)  - do your thing! */
> +		if (odp_likely(stat == ODP_TMO_ORPHAN)) {
> +			/* Some other thread freed the corresponding
> +			   timer after the timeout was already
> +			   enqueued */
> +			/* Timeout handle is invalid, use our own timer */
> +			hdl = test_timer;
> +		}
> +		/* Return timeout to timer manager, regardless of status */
> +		odp_timer_return_tmo(tmo);
> +		remain--;
>  	}
> 
> +	odp_timer_cancel(test_timer);
> +	odp_timer_free(test_timer);

This thread is done. Should it free "hdl", instead of "test_timer" (since it may be still active).


> +
>  	if (odp_queue_sched_type(queue) == ODP_SCHED_SYNC_ATOMIC)
>  		odp_schedule_release_atomic();
>  }
> @@ -155,7 +177,6 @@ static void print_usage(void)
>  	printf("Options:\n");
>  	printf("  -c, --count <number>    core count, core IDs start from
> 1\n");
>  	printf("  -r, --resolution <us>   timeout resolution in usec\n");
> -	printf("  -m, --min <us>          minimum timeout in usec\n");
>  	printf("  -x, --max <us>          maximum timeout in usec\n");
>  	printf("  -p, --period <us>       timeout period in usec\n");
>  	printf("  -t, --timeouts <count>  timeout repeat count\n");
> @@ -190,14 +211,14 @@ static void parse_args(int argc, char *argv[],
> test_args_t *args)
>  	/* defaults */
>  	args->core_count    = 0; /* all cores */
>  	args->resolution_us = 10000;
> -	args->min_us        = args->resolution_us;
> +	args->min_us        = 0;
>  	args->max_us        = 10000000;
>  	args->period_us     = 1000000;
>  	args->tmo_count     = 30;
> 
>  	while (1) {
>  		opt = getopt_long(argc, argv, "+c:r:m:x:p:t:h",
> -				 longopts, &long_index);
> +				  longopts, &long_index);
> 
>  		if (opt == -1)
>  			break;	/* No more options */
> @@ -321,10 +342,25 @@ int main(int argc, char *argv[])
>  				      ODP_BUFFER_TYPE_TIMEOUT);
> 
>  	if (pool == ODP_BUFFER_POOL_INVALID) {
> -		ODP_ERR("Pool create failed.\n");
> +		ODP_ERR("Buffer pool create failed.\n");
>  		return -1;
>  	}
> 
> +	tp = odp_timer_pool_create("timer_pool", pool,
> +				   args.resolution_us*ODP_TIME_USEC,
> +				   args.min_us*ODP_TIME_USEC,
> +				   args.max_us*ODP_TIME_USEC,
> +				   num_workers, /* One timer per worker */
> +				   true,
> +				   ODP_CLOCK_CPU);
> +	if (tp == ODP_TIMER_POOL_INVALID) {
> +		ODP_ERR("Timer pool create failed.\n");
> +		return -1;
> +	}
> +	odp_timer_pool_start();
> +
> +	odp_shm_print_all();
> +
>  	/*
>  	 * Create a queue for timer test
>  	 */
> @@ -340,19 +376,6 @@ int main(int argc, char *argv[])
>  		return -1;
>  	}
> 
> -	test_timer = odp_timer_create("test_timer", pool,
> -				      args.resolution_us*ODP_TIME_USEC,
> -				      args.min_us*ODP_TIME_USEC,
> -				      args.max_us*ODP_TIME_USEC);
> -
> -	if (test_timer == ODP_TIMER_INVALID) {
> -		ODP_ERR("Timer create failed.\n");
> -		return -1;
> -	}
> -
> -
> -	odp_shm_print_all();
> -
>  	printf("CPU freq %"PRIu64" hz\n", odp_sys_cpu_hz());
>  	printf("Cycles vs nanoseconds:\n");
>  	ns = 0;
> diff --git a/platform/linux-generic/Makefile.am b/platform/linux-
> generic/Makefile.am
> index d076d50..71f923c 100644
> --- a/platform/linux-generic/Makefile.am
> +++ b/platform/linux-generic/Makefile.am
> @@ -59,6 +59,7 @@ __LIB__libodp_la_SOURCES = \
>  			   odp_packet_flags.c \
>  			   odp_packet_io.c \
>  			   odp_packet_socket.c \
> +			   odp_priority_queue.c \
>  			   odp_queue.c \
>  			   odp_ring.c \
>  			   odp_rwlock.c \
> diff --git a/platform/linux-generic/include/api/odp_timer.h
> b/platform/linux-generic/include/api/odp_timer.h
> index 01db839..82a1e05 100644
> --- a/platform/linux-generic/include/api/odp_timer.h
> +++ b/platform/linux-generic/include/api/odp_timer.h
> @@ -8,9 +8,193 @@
>  /**
>   * @file
>   *
> - * ODP timer
> + * ODP timer service
>   */
> 
> +/** Example #1 Retransmission timer (e.g. for reliable connections)
> + @code
> +
> +//Create timer pool for reliable connections
> +#define SEC 1000000000ULL //1s expressed in nanoseconds
> +odp_timer_pool_t tcp_tpid =
> +    odp_timer_pool_create("TCP",
> +			  buffer_pool,
> +			  1000000,//resolution 1ms
> +			  0,//min tmo
> +			  7200 * SEC,//max tmo length 2hours
> +			  40000,//num_timers
> +			  true,//shared
> +			  ODP_CLOCK_CPU
> +			 );
> +if (tcp_tpid == ODP_TIMER_POOL_INVALID)
> +{
> +	//Failed to create timer pool => fatal error
> +}
> +
> +
> +//Setting up a new connection
> +//Allocate retransmission timeout (identical for supervision timeout)
> +//The user pointer points back to the connection context
> +conn->ret_tim = odp_timer_alloc(tcp_tpid, queue, conn);
> +//Check if all resources were successfully allocated
> +if (conn->ret_tim == ODP_TIMER_INVALID)
> +{
> +	//Failed to allocate all resources for connection => tear down
> +	//Destroy timeout
> +	odp_timer_free(conn->ret_tim);
> +	//Tear down connection
> +	...
> +	return false;
> +}
> +//All necessary resources successfully allocated
> +//Compute initial retransmission length in timer ticks
> +conn->ret_len = odp_timer_ns_to_tick(tcp_tpid, 3 * SEC);//Per RFC1122
> +//Arm the timer
> +odp_timer_set_rel(conn->ret_tim, conn->ret_len);
> +return true;
> +
> +
> +//A packet for the connection has just been transmitted
> +//Reset the retransmission timer
> +odp_timer_set_rel(conn->ret_tim, conn->ret_len);
> +
> +
> +//A retransmission timeout buffer for the connection has been received
> +odp_timer_tmo_t tmo = odp_timeout_from_buffer(buf);
> +odp_timer_tmo_status_t stat = odp_timer_tmo_status(tmo);
> +//Check if timeout is fresh or stale, for stale timeouts we need to reset
> the
> +//timer
> +if (stat == ODP_TMO_FRESH) {
> +	//Fresh timeout, last transmitted packet not acked in time =>
> +	  retransmit
> +	//Get connection from timeout event
> +	conn = odp_timer_get_userptr(tmo);
> +	//Retransmit last packet (e.g. TCP segment)
> +	...
> +	//Re-arm timer using original delta value
> +	odp_timer_set_rel(conn->ret_tim, conn->ret_len);
> +} else if (stat == ODP_TMO_ORPHAN) {
> +	odp_free_buffer(buf);
> +	return;//Get out of here
> +} // else stat == ODP_TMO_STALE, do nothing
> +//Finished processing, return timeout
> +odp_timer_return_tmo(tmo);
> +
> + @endcode
> +*/
> +
> +/** Example #2 Periodic tick
> + @code
> +
> +//Create timer pool for periodic ticks
> +odp_timer_pool_t per_tpid =
> +    odp_timer_pool_create("periodic-tick",
> +			  buffer_pool,
> +			  1,//resolution 1ns
> +			  1,//minimum timeout length 1ns
> +			  1000000000,//maximum timeout length 1s
> +			  10,//num_timers
> +			  false,//not shared
> +			  ODP_CLOCK_CPU
> +			 );
> +if (per_tpid == ODP_TIMER_POOL_INVALID)
> +{
> +    //Failed to create timer pool => fatal error
> +}
> +
> +
> +//Allocate periodic timer
> +tim_1733 = odp_timer_alloc(per_tpid, queue, NULL);
> +//Check if all resources were successfully allocated
> +if (tim_1733 == ODP_TIMER_INVALID)
> +{
> +	//Failed to allocate all resources => tear down
> +	//Destroy timeout
> +	odp_timer_free(tim_1733);
> +	//Tear down other state
> +	...
> +	return false;
> +}
> +//All necessary resources successfully allocated
> +//Compute tick period in timer ticks
> +period_1733 = odp_timer_ns_to_tick(per_tpid, 1000000000U /
> 1733U);//1733Hz
> +//Compute when next tick should expire
> +next_1733 = odp_timer_current_tick(per_tpid) + period_1733;
> +//Arm the periodic timer
> +odp_timer_set_abs(tim_1733, next_1733);
> +return true;
> +
> +
> +
> +//A periodic timer timeout has been received
> +odp_timer_tmo_t tmo = odp_timeout_from_buffer(buf);
> +//Get status of timeout
> +odp_timer_tmo_status_t stat = odp_timer_tmo_status(tmo);
> +//We expect the timeout is always fresh since we are not calling set or
> cancel
> +on active or expired timers in this example
> +assert(stat == ODP_TMO_FRESH);
> +//Do processing driven by timeout *before*
> +...
> +do {
> +	//Compute when the timer should expire next
> +	next_1733 += period_1733;
> +	//Check that this is in the future
> +	if (likely(next_1733 > odp_timer_current_tick(per_tpid))
> +	break;//Yes, done
> +	//Else we missed a timeout
> +	//Optionally attempt some recovery and/or logging of the problem
> +	...
> +} while (0);
> +//Re-arm periodic timer
> +odp_timer_set_abs(tim_1733, next_1733);
> +//Or do processing driven by timeout *after*
> +...
> +odp_timer_return_tmo(tmo);
> +return;
> +
> + @endcode
> +*/
> +
> +/** Example #3 Tear down of flow
> + @code
> +//ctx points to flow context data structure owned by application
> +//Free the timer, cancelling any timeout
> +odp_timer_free(ctx->timer);//Any enqueued timeout will be made invalid
> +//Continue tearing down and eventually freeing context
> +...
> +return;
> +
> +//A timeout has been received, check status
> +odp_timer_tmo_t tmo = odp_timeout_from_buffer(buf);
> +switch (odp_timer_tmo_status(tmo))
> +{
> +    case ODP_TMO_FRESH :
> +	//A flow has timed out, tear it down
> +	//Find flow context from timeout
> +	ctx = (context *)odp_timer_get_userptr(tmo);
> +	//Free the supervision timer, any enqueued timeout will remain
> +	odp_timer_free(ctx->tim);
> +	//Free other flow related resources
> +	...
> +	//Free the timeout buffer
> +	odp_buffer_free(buf);
> +	//Flow torn down
> +	break;
> +    case ODP_TMO_STALE :
> +	//A stale timeout was received, return timeout and update timer
> +	odp_timer_return_tmo(tmo);
> +	break;
> +    case ODP_TMO_ORPHAN :
> +	//Orphaned timeout (from previously torn down flow)
> +	//No corresponding timer or flow context
> +	//Free the timeout buffer
> +	odp_buffer_free(buf);
> +	break;
> +}
> +
> + @endcode
> +*/
> +
>  #ifndef ODP_TIMER_H_
>  #define ODP_TIMER_H_
> 
> @@ -18,144 +202,408 @@
>  extern "C" {
>  #endif
> 
> +#include <stdlib.h>
>  #include <odp_std_types.h>
>  #include <odp_buffer.h>
>  #include <odp_buffer_pool.h>
>  #include <odp_queue.h>
> 
> +struct odp_timer_pool_s; /**< Forward declaration */
> +
> +/**
> +* ODP timer pool handle (platform dependent)
> +*/
> +typedef struct odp_timer_pool_s *odp_timer_pool_t;
> +
> +/**
> + * Invalid timer pool handle (platform dependent).
> + */
> +#define ODP_TIMER_POOL_INVALID NULL
> 
>  /**
> - * ODP timer handle
> + * Clock sources for timers in timer pool.
>   */
> -typedef uint32_t odp_timer_t;
> +typedef enum odp_timer_clk_src_e {
> +	/** Use CPU clock as clock source for timers */
> +	ODP_CLOCK_CPU,
> +	/** Use external clock as clock source for timers */
> +	ODP_CLOCK_EXT
> +	/* Platform dependent which other clock sources exist */
> +} odp_timer_clk_src_t;
> 
> -/** Invalid timer */
> -#define ODP_TIMER_INVALID 0
> +struct odp_timer_s; /**< Forward declaration */
> 
> +/**
> +* ODP timer handle (platform dependent).
> +*/
> +typedef struct odp_timer_s *odp_timer_t;
> 
>  /**
> - * ODP timeout handle
> + * Invalid timer handle (platform dependent).
>   */
> -typedef odp_buffer_t odp_timer_tmo_t;
> -
> -/** Invalid timeout */
> -#define ODP_TIMER_TMO_INVALID 0
> +#define ODP_TIMER_INVALID NULL
> 
> +/**
> + * Return values of timer set calls.
> + */
> +typedef enum odp_timer_set_e {
> +	/** Timer set operation successful */
> +	ODP_TIMER_SET_SUCCESS,
> +	/** Timer set operation failed, expiration too early */
> +	ODP_TIMER_SET_TOOEARLY,
> +	/** Timer set operation failed, expiration too late */
> +	ODP_TIMER_SET_TOOLATE
> +} odp_timer_set_t;
> 
>  /**
> - * Timeout notification
> + * Timeout event handle.
>   */
> -typedef odp_buffer_t odp_timeout_t;
> +typedef odp_buffer_t odp_timer_tmo_t;
> 
> +/**
> + * Status of a timeout event.
> + */
> +typedef enum odp_timer_tmo_status_e {
> +	/** Timeout is fresh, process it and return timeout */
> +	ODP_TMO_FRESH,
> +	/** Timer reset or cancelled, just return timeout  */
> +	ODP_TMO_STALE,
> +	/** Timer deleted, return or free timeout */
> +	ODP_TMO_ORPHAN
> +} odp_timer_tmo_status_t;
> 
>  /**
> - * Create a timer
> + * Create a timer pool
>   *
> - * Creates a new timer with requested properties.
> + * Create a new timer pool.
>   *
>   * @param name       Name
> - * @param pool       Buffer pool for allocating timeout notifications
> + * @param buf_pool   Buffer pool for allocating timeouts (and only
> timeouts)
>   * @param resolution Timeout resolution in nanoseconds
> - * @param min_tmo    Minimum timeout duration in nanoseconds
> - * @param max_tmo    Maximum timeout duration in nanoseconds
> + * @param min_tmo    Minimum relative timeout in nanoseconds
> + * @param max_tmo    Maximum relative timeout in nanoseconds
> + * @param num_timers Number of supported timers (minimum)
> + * @param shared     Shared or private timer pool.
> + *		   Operations on shared timers will include the necessary
> + *		   mutual exclusion, operations on private timers may not
> + *		   (mutual exclusion is the responsibility of the caller).
> + * @param clk_src    Clock source to use
>   *
> - * @return Timer handle if successful, otherwise ODP_TIMER_INVALID
> + * @return Timer pool handle if successful, otherwise
> ODP_TIMER_POOL_INVALID
> + * and errno set
>   */
> -odp_timer_t odp_timer_create(const char *name, odp_buffer_pool_t pool,
> -			     uint64_t resolution, uint64_t min_tmo,
> -			     uint64_t max_tmo);
> +odp_timer_pool_t
> +odp_timer_pool_create(const char *name,
> +		      odp_buffer_pool_t buf_pool,
> +		      uint64_t resolution,
> +		      uint64_t min_tmo,
> +		      uint64_t max_tmo,
> +		      uint32_t num_timers,
> +		      bool shared,
> +		      odp_timer_clk_src_t clk_src);
> +
> +/**
> + * Start a timer pool
> + *
> + * Start all created timer pools, enabling the allocation of timers.
> + * The purpose of this call is to coordinate the creation of multiple
> timer
> + * pools that may use the same underlying HW resources.
> + * This function may be called multiple times.
> + */
> +void odp_timer_pool_start(void);
> +
> +/**
> + * Destroy a timer pool
> + *
> + * Destroy a timer pool, freeing all resources.
> + * All timers must have been freed.
> + *
> + * @param tpid  Timer pool identifier
> + */
> +void odp_timer_pool_destroy(odp_timer_pool_t tpid);
> 
>  /**
>   * Convert timer ticks to nanoseconds
>   *
> - * @param timer Timer
> + * @param tpid  Timer pool identifier
>   * @param ticks Timer ticks
>   *
>   * @return Nanoseconds
>   */
> -uint64_t odp_timer_tick_to_ns(odp_timer_t timer, uint64_t ticks);
> +uint64_t odp_timer_tick_to_ns(odp_timer_pool_t tpid, uint64_t ticks);
> 
>  /**
>   * Convert nanoseconds to timer ticks
>   *
> - * @param timer Timer
> + * @param tpid  Timer pool identifier
>   * @param ns    Nanoseconds
>   *
>   * @return Timer ticks
>   */
> -uint64_t odp_timer_ns_to_tick(odp_timer_t timer, uint64_t ns);
> +uint64_t odp_timer_ns_to_tick(odp_timer_pool_t tpid, uint64_t ns);
> 
>  /**
> - * Timer resolution in nanoseconds
> + * Current tick value
>   *
> - * @param timer Timer
> + * @param tpid Timer pool identifier
>   *
> - * @return Resolution in nanoseconds
> + * @return Current time in timer ticks
> + */
> +uint64_t odp_timer_current_tick(odp_timer_pool_t tpid);
> +
> +/**
> + * ODP timer configurations
>   */
> -uint64_t odp_timer_resolution(odp_timer_t timer);
> +
> +typedef enum odp_timer_pool_conf_e {
> +	ODP_TIMER_NAME,      /**< Return name of timer pool */
> +	ODP_TIMER_RESOLUTION,/**< Return the timer resolution (in ns) */
> +	ODP_TIMER_MIN_TICKS, /**< Return the min supported rel timeout
> (ticks)*/
> +	ODP_TIMER_MAX_TICKS, /**< Return the max supported rel timeout
> (ticks)*/
> +	ODP_TIMER_NUM_TIMERS,/**< Return number of supported timers */
> +	ODP_TIMER_SHARED     /**< Return shared flag */
> +} odp_timer_pool_conf_t;
> 
>  /**
> - * Maximum timeout in timer ticks
> + * Query different timer pool configurations, e.g.
> + *  Timer resolution in nanoseconds
> + *  Maximum timeout in timer ticks
> + *  Number of supported timers
> + *  Shared or private timer pool
>   *
> - * @param timer Timer
> + * @param tpid Timer pool identifier
> + * @param item Configuration item being queried
>   *
> - * @return Maximum timeout in timer ticks
> + * @return the requested piece of information or 0 for unknown item.
>   */
> -uint64_t odp_timer_maximum_tmo(odp_timer_t timer);
> +uintptr_t odp_timer_pool_query_conf(odp_timer_pool_t tpid,
> +				    odp_timer_pool_conf_t item);


I had the same comment on v2:

It would be simpler to output all information (to *info) with one function call. For example,

typedef struct odp_timer_pool_info_s {
	const char *name;
	uint64_t resolution;
	uint64_t min_tmo;
	uint64_t max_tmo;
	uint32_t num_timers;
	bool     shared
} odp_timer_pool_info_t;

int odp_timer_pool_info(odp_timer_pool_t tpid, odp_timer_pool_info_t *info);



> 
>  /**
> - * Current timer tick
> + * Allocate a timer
>   *
> - * @param timer Timer
> + * Create a timer (allocating all necessary resources e.g. timeout event)
> from
> + * the timer pool.
>   *
> - * @return Current time in timer ticks
> + * @param tpid     Timer pool identifier
> + * @param queue    Destination queue for timeout notifications
> + * @param user_ptr User defined pointer or NULL (copied to timeouts)
> + *
> + * @return Timer handle if successful, otherwise ODP_TIMER_INVALID and
> + *	   errno set.
>   */
> -uint64_t odp_timer_current_tick(odp_timer_t timer);
> +odp_timer_t odp_timer_alloc(odp_timer_pool_t tpid,
> +			    odp_queue_t queue,
> +			    void *user_ptr);
> 
>  /**
> - * Request timeout with an absolute timer tick
> + * Free a timer
> + *
> + * Free (destroy) a timer, freeing all associated resources (e.g. default
> + * timeout event). An expired and enqueued timeout event will not be
> freed.
> + * It is the responsibility of the application to free this timeout when
> it
> + * is received.
>   *
> - * When tick reaches tmo_tick, the timer enqueues the timeout
> notification into
> - * the destination queue.
> + * @param tim      Timer handle
> + */
> +void odp_timer_free(odp_timer_t tim);

Need a success/fail return value? User would need to know if the timeout is still coming, or not. User cannot free the destination queue or stop scheduling before the remaining tmo has been received (and freed).


> +
> +/**
> + * Set a timer (absolute time) with a user-defined timeout buffer
>   *
> - * @param timer    Timer
> - * @param tmo_tick Absolute timer tick value which triggers the timeout
> - * @param queue    Destination queue for the timeout notification
> - * @param buf      User defined timeout notification buffer. When
> - *                 ODP_BUFFER_INVALID, default timeout notification is
> used.
> + * Set (arm) the timer to expire at specific time. The user-defined
> + * buffer will be enqueued when the timer expires.
> + * Arming may fail (if the timer is in state EXPIRED), an earlier timeout

"(if the timer is in state EXPIRED)" internal to the implementation? Explanation of TOOLATE/TOOEARLY instead...

> + * will then be received. odp_timer_tmo_status() must be used to check if
> + * the received timeout is valid.

Can user call odp_timer_tmo_status() on a user defined buffer?? I guess not.

>   *
> - * @return Timeout handle if successful, otherwise ODP_TIMER_TMO_INVALID
> + * Note: any invalid parameters will be treated as programming errors and
> will
> + * cause the application to abort.
> + *
> + * @param tim      Timer
> + * @param abs_tck  Expiration time in absolute timer ticks
> + * @param user_buf The buffer to use as timeout event
> + *
> + * @return Success or failure code


@return ODP_TIMER_SET_XXX ..., and explanation what user is expected to do on those


>   */
> -odp_timer_tmo_t odp_timer_absolute_tmo(odp_timer_t timer, uint64_t
> tmo_tick,
> -				       odp_queue_t queue, odp_buffer_t buf);
> +odp_timer_set_t odp_timer_set_abs_w_buf(odp_timer_t tim,
> +					uint64_t abs_tck,
> +					odp_buffer_t user_buf);
> 
>  /**
> - * Cancel a timeout
> + * Set a timer with an absolute expiration time
> + *
> + * Set (arm) the timer to expire at a specific time.
> + * Arming may fail (if the timer is in state EXPIRED), an earlier timeout
> + * will then be received. odp_timer_tmo_status() must be used to check if
> + * the received timeout is valid.
> + *
> + * Note: any invalid parameters will be treated as programming errors and
> will
> + * cause the application to abort.
>   *
> - * @param timer Timer
> - * @param tmo   Timeout to cancel
> + * @param tim     Timer
> + * @param abs_tck Expiration time in absolute timer ticks
>   *
> - * @return 0 if successful
> + * @return Success or failure code

@return ODP_TIMER_SET_XXX ...


>   */
> -int odp_timer_cancel_tmo(odp_timer_t timer, odp_timer_tmo_t tmo);
> +odp_timer_set_t odp_timer_set_abs(odp_timer_t tim, uint64_t abs_tck);
> 
>  /**
> - * Convert buffer handle to timeout handle
> + * Set a timer with a relative expiration time and user-defined buffer.
>   *
> - * @param buf  Buffer handle
> + * Set (arm) the timer to expire at a relative future time.
> + * Arming may fail (if the timer is in state EXPIRED),
> + * an earlier timeout will then be received. odp_timer_tmo_status() must
> + * be used to check if the received timeout is valid.

odp_timer_tmo_status()on user defined buffer ...

>   *
> - * @return Timeout buffer handle
> + * Note: any invalid parameters will be treated as programming errors and
> will
> + * cause the application to abort.
> + *
> + * @param tim      Timer
> + * @param rel_tck  Expiration time in timer ticks relative to current
> time of
> + *		   the timer pool the timer belongs to
> + * @param user_buf The buffer to use as timeout event
> + *
> + * @return Success or failure code

@return ODP_TIMER_SET_XXX ...

>   */
> -odp_timeout_t odp_timeout_from_buffer(odp_buffer_t buf);
> +odp_timer_set_t odp_timer_set_rel_w_buf(odp_timer_t tim,
> +					uint64_t rel_tck,
> +					odp_buffer_t user_buf);
> +/**
> + * Set a timer with a relative expiration time
> + *
> + * Set (arm) the timer to expire at a relative future time.
> + * Arming may fail (if the timer is in state EXPIRED),
> + * an earlier timeout will then be received. odp_timer_tmo_status() must
> + * be used to check if the received timeout is valid.
> + *
> + * Note: any invalid parameters will be treated as programming errors and
> will
> + * cause the application to abort.
> + *
> + * @param tim     Timer
> + * @param rel_tck Expiration time in timer ticks relative to current time
> of
> + *		  the timer pool the timer belongs to
> + *
> + * @return Success or failure code

@return ODP_TIMER_SET_XXX ...

> + */
> +odp_timer_set_t odp_timer_set_rel(odp_timer_t tim, uint64_t rel_tck);
> 
>  /**
> - * Return absolute timeout tick
> + * Cancel a timer
> + *
> + * Cancel a timer, preventing future expiration and delivery.
> + *
> + * A timer that has already expired and been enqueued for delivery may be
> + * impossible to cancel and will instead be delivered to the destination
> queue.
> + * Use odp_timer_tmo_status() the check whether a received timeout is
> fresh or
> + * stale (cancelled). Stale timeouts will automatically be recycled.
> + *
> + * Note: any invalid parameters will be treated as programming errors and
> will
> + * cause the application to abort.
> + *
> + * @param tim    Timer handle
> + */
> +void odp_timer_cancel(odp_timer_t tim);


Need a success/fail return value? User would need to know if the timeout is still coming, or not...


> +
> +/**
> + * Translate from buffer to timeout
> + *
> + * Return the timeout handle that corresponds to the specified buffer
> handle.
> + * The buffer must be of time ODP_BUFFER_TYPE_TIMEOUT.
> + *
> + * @param buf   Buffer handle to translate.
> + *
> + * @return      The corresponding timeout handle.
> + */
> +static inline odp_timer_tmo_t odp_timeout_from_buffer(odp_buffer_t buf)
> +{
> +	if (odp_unlikely(odp_buffer_type(buf) != ODP_BUFFER_TYPE_TIMEOUT)) {
> +		ODP_ERR("Buffer type %u not timeout\n", buf);
> +		abort();
> +	}
> +	/* In this implementation, timeout == buffer */
> +	return (odp_timer_tmo_t)buf;
> +}
> +
> +/**
> + * Translate from timeout to buffer
> + *
> + * Return the buffer handle that corresponds to the specified timeout
> handle.
> + *
> + * @param tmo   Timeout handle to translate.
> + *
> + * @return      The corresponding buffer handle.
> + */
> +static inline odp_buffer_t odp_buffer_from_timeout(odp_timer_tmo_t tmo)
> +{
> +	/* In this implementation, buffer == timeout */
> +	return (odp_buffer_t)tmo;
> +}
> +
> +/**
> + * Return timeout to timer
> + *
> + * Return a received timeout for reuse with the parent timer.
> + * Note: odp_timer_return_tmo() must be called on all received timeouts!
> + * (Excluding user defined timeout buffers).

And excluding ORPHANs?? For ORPHAN just free the tmo buffer.

> + * The timeout must not be accessed after this call, the semantics is
> + * equivalent to a free call.
> + *
> + * @param tmo    Timeout
> + */
> +void odp_timer_return_tmo(odp_timer_tmo_t tmo);
> +
> +/**
> + * Return fresh/stale/orphan status of timeout.
> + *
> + * Check a received timeout for orphaness (i.e. parent timer freed) and
> + * staleness (i.e. parent timer has been reset or cancelled after the
> timeout
> + * expired and was enqueued).
> + * If the timeout is fresh, it should be processed.
> + * If the timeout is stale or orphaned, it should be ignored.
> + * All timeouts must be returned using the odp_timer_return_tmo() call.

Except ORPHANs. Examples just free the buffer...

Maybe some instructions what to do in these cases:
- STALE
  - call odp_timer_return_tmo()
  - do not free the tmo buffer
- ORPHAN
  - do not call odp_timer_return_tmo()
  - free the tmo buffer 


> + *
> + * @param tmo    Timeout
> + *
> + * @return One of ODP_TMO_FRESH, ODP_TMO_STALE or ODP_TMO_ORPHAN.
> + */
> +odp_timer_tmo_status_t odp_timer_tmo_status(odp_timer_tmo_t tmo);
> +
> +/**
> + * Get timer handle
> + *
> + * Return Handle of parent timer.
> + *
> + * @param tmo   Timeout
> + *
> + * @return Timer handle or ODP_TIMER_INVALID for orphaned timeouts.
> + *         Note that the parent timer could be freed by some other thread
> + *         at any time and thus the timeout becomes orphaned.
> + */
> +odp_timer_t odp_timer_handle(odp_timer_tmo_t tmo);
> +
> +/**
> + * Get expiration time
> + *
> + * Return (requested) expiration time of timeout.
> + *
> + * @param tmo   Timeout
> + *
> + * @return Expiration time
> + */
> +uint64_t odp_timer_expiration(odp_timer_tmo_t tmo);
> +
> +/**
> + * Get user pointer
> + *
> + * Return User pointer of timer associated with timeout.
> + * The user pointer is often used to point to some associated context.
>   *
> - * @param tmo Timeout buffer handle
> + * @param tmo   Timeout
>   *
> - * @return Absolute timeout tick
> + * @return User pointer
>   */
> -uint64_t odp_timeout_tick(odp_timeout_t tmo);
> +void *odp_timer_userptr(odp_timer_tmo_t tmo);
> 
>  #ifdef __cplusplus
>  }
> diff --git a/platform/linux-generic/include/odp_priority_queue_internal.h
> b/platform/linux-generic/include/odp_priority_queue_internal.h
> new file mode 100644
> index 0000000..7d7f3a2
> --- /dev/null
> +++ b/platform/linux-generic/include/odp_priority_queue_internal.h
> @@ -0,0 +1,108 @@
> +#ifndef _PRIORITY_QUEUE_H
> +#define _PRIORITY_QUEUE_H
> +
> +#include <assert.h>
> +#include <stddef.h>
> +#include <stdint.h>
> +#include <stdbool.h>
> +#include <odp_align.h>
> +
> +#define INVALID_INDEX ~0U
> +#define INVALID_PRIORITY ((pq_priority_t)~0ULL)
> +
> +typedef uint64_t pq_priority_t;
> +
> +struct heap_node;
> +
> +typedef struct priority_queue {
> +	uint32_t max_elems;/* Number of elements in heap */
> +	/* Number of registered elements (active + inactive) */
> +	uint32_t reg_elems;
> +	uint32_t num_elems;/* Number of active elements */
> +	struct heap_node *heap;
> +	struct heap_node *org_ptr;
> +} priority_queue ODP_ALIGNED(sizeof(uint64_t));
> +
> +/* The user gets a pointer to this structure */
> +typedef struct {
> +	/* Set when pq_element registered with priority queue */
> +	priority_queue *pq;
> +	uint32_t index;/* Index into heap array */
> +	pq_priority_t prio;
> +} pq_element;
> +
> +/*** Operations on pq_element ***/
> +
> +static inline void pq_element_con(pq_element *this)
> +{
> +	this->pq = NULL;
> +	this->index = INVALID_INDEX;
> +	this->prio = 0U;
> +}
> +
> +static inline void pq_element_des(pq_element *this)
> +{
> +	(void)this;
> +	assert(this->index == INVALID_INDEX);
> +}
> +
> +static inline priority_queue *get_pq(const pq_element *this)
> +{
> +	return this->pq;
> +}
> +
> +static inline pq_priority_t get_prio(const pq_element *this)
> +{
> +	return this->prio;
> +}
> +
> +static inline uint32_t get_index(const pq_element *this)
> +{
> +	return this->index;
> +}
> +
> +static inline bool is_active(const pq_element *this)
> +{
> +	return this->index != INVALID_INDEX;
> +}
> +
> +/*** Operations on priority_queue ***/
> +
> +extern uint32_t pq_smallest_child(priority_queue *, uint32_t,
> pq_priority_t);
> +extern void pq_bubble_down(priority_queue *, pq_element *);
> +extern void pq_bubble_up(priority_queue *, pq_element *);
> +
> +static inline bool valid_index(priority_queue *this, uint32_t idx)
> +{
> +	return idx < this->num_elems;
> +}
> +
> +extern void priority_queue_con(priority_queue *, uint32_t _max_elems);
> +extern void priority_queue_des(priority_queue *);
> +
> +/* Register pq_element with priority queue */
> +/* Return false if priority queue full */
> +extern bool pq_register_element(priority_queue *, pq_element *);
> +
> +/* Activate and add pq_element to priority queue */
> +/* Element must be disarmed */
> +extern void pq_activate_element(priority_queue *, pq_element *,
> pq_priority_t);
> +
> +/* Reset (increase) priority for pq_element */
> +/* Element may be active or inactive (released) */
> +extern void pq_reset_element(priority_queue *, pq_element *,
> pq_priority_t);
> +
> +/* Deactivate and remove element from priority queue */
> +/* Element may be active or inactive (released) */
> +extern void pq_deactivate_element(priority_queue *, pq_element *);
> +
> +/* Unregister pq_element */
> +extern void pq_unregister_element(priority_queue *, pq_element *);
> +
> +/* Return priority of first element (lowest numerical value) */
> +extern pq_priority_t pq_first_priority(const priority_queue *);
> +
> +/* Deactivate and return first element if it's prio is <= threshold */
> +extern pq_element *pq_release_element(priority_queue *, pq_priority_t
> thresh);
> +
> +#endif /* _PRIORITY_QUEUE_H */
> diff --git a/platform/linux-generic/include/odp_timer_internal.h
> b/platform/linux-generic/include/odp_timer_internal.h
> index ad28f53..461f28c 100644
> --- a/platform/linux-generic/include/odp_timer_internal.h
> +++ b/platform/linux-generic/include/odp_timer_internal.h
> @@ -1,4 +1,4 @@
> -/* Copyright (c) 2013, Linaro Limited
> +/* Copyright (c) 2014, Linaro Limited
>   * All rights reserved.
>   *
>   * SPDX-License-Identifier:     BSD-3-Clause
> @@ -8,72 +8,51 @@
>  /**
>   * @file
>   *
> - * ODP timer timeout descriptor - implementation internal
> + * ODP timeout descriptor - implementation internal
>   */
> 
>  #ifndef ODP_TIMER_INTERNAL_H_
>  #define ODP_TIMER_INTERNAL_H_
> 
> -#ifdef __cplusplus
> -extern "C" {
> -#endif
> -
> -#include <odp_std_types.h>
> -#include <odp_queue.h>
> -#include <odp_buffer.h>
> +#include <odp_align.h>
> +#include <odp_debug.h>
>  #include <odp_buffer_internal.h>
>  #include <odp_buffer_pool_internal.h>
>  #include <odp_timer.h>
> 
> -struct timeout_t;
> -
> -typedef struct timeout_t {
> -	struct timeout_t *next;
> -	int               timer_id;
> -	int               tick;
> -	uint64_t          tmo_tick;
> -	odp_queue_t       queue;
> -	odp_buffer_t      buf;
> -	odp_buffer_t      tmo_buf;
> -} timeout_t;
> -
> -
> -struct odp_timeout_hdr_t;
> -
>  /**
> - * Timeout notification header
> + * Internal Timeout header
>   */
> -typedef struct odp_timeout_hdr_t {
> +typedef struct {
> +	/* common buffer header */
>  	odp_buffer_hdr_t buf_hdr;
> 
> -	timeout_t meta;
> -
> -	uint8_t buf_data[];
> +	/* Requested expiration time */
> +	uint64_t expiration;
> +	/* User ptr inherited from parent timer */
> +	void *user_ptr;
> +	/* Parent timer */
> +	odp_timer_t timer;
> +	/* Tag inherited from parent timer at time of expiration */
> +	uint32_t tag;
> +	/* Gen-cnt inherited from parent timer at time of creation */
> +	uint16_t gencnt;
> +	uint16_t pad;
> +	uint8_t buf_data[0];
>  } odp_timeout_hdr_t;
> 
> -
> -
>  ODP_STATIC_ASSERT(sizeof(odp_timeout_hdr_t) ==
> -	   ODP_OFFSETOF(odp_timeout_hdr_t, buf_data),
> -	   "ODP_TIMEOUT_HDR_T__SIZE_ERR");
> -
> +		  ODP_OFFSETOF(odp_timeout_hdr_t, buf_data),
> +		  "sizeof(odp_timeout_hdr_t) ==
> ODP_OFFSETOF(odp_timeout_hdr_t, buf_data)");
>  ODP_STATIC_ASSERT(sizeof(odp_timeout_hdr_t) % sizeof(uint64_t) == 0,
> -	   "ODP_TIMEOUT_HDR_T__SIZE_ERR2");
> -
> +		  "sizeof(odp_timeout_hdr_t) % sizeof(uint64_t) == 0");
> 
>  /**
> - * Return timeout header
> + * Return the timeout header
>   */
> -static inline odp_timeout_hdr_t *odp_timeout_hdr(odp_timeout_t tmo)
> +static inline odp_timeout_hdr_t *odp_timeout_hdr(odp_buffer_t buf)
>  {
> -	odp_buffer_hdr_t *buf_hdr = odp_buf_to_hdr((odp_buffer_t)tmo);
> -	return (odp_timeout_hdr_t *)(uintptr_t)buf_hdr;
> -}
> -
> -
> -
> -#ifdef __cplusplus
> +	return (odp_timeout_hdr_t *)odp_buf_to_hdr(buf);
>  }
> -#endif
> 
>  #endif
> diff --git a/platform/linux-generic/odp_priority_queue.c b/platform/linux-
> generic/odp_priority_queue.c
> new file mode 100644
> index 0000000..b72c26f
> --- /dev/null
> +++ b/platform/linux-generic/odp_priority_queue.c
> @@ -0,0 +1,283 @@
> +#define NDEBUG /* Enabled by default by ODP build system */
> +#include <assert.h>
> +#include <unistd.h>
> +#include <stdlib.h>
> +#include <string.h>
> +#include <strings.h>
> +#include <odp_hints.h>
> +#include <odp_align.h>
> +#include <odp_debug.h>
> +
> +#include "odp_priority_queue_internal.h"
> +
> +
> +#define NUM_CHILDREN 4
> +#define CHILD(n) (NUM_CHILDREN * (n) + 1)
> +#define PARENT(n) (((n) - 1) / NUM_CHILDREN)
> +
> +/* Internal nodes in the array */
> +typedef struct heap_node {
> +	pq_element *elem;
> +	/* Copy of elem->prio so we avoid unnecessary dereferencing */
> +	pq_priority_t prio;
> +} heap_node;
> +
> +static void pq_assert_heap(priority_queue *this);
> +
> +#define ALIGNMENT(p) (1U << ((unsigned)ffs((int)p) - 1U))
> +
> +void priority_queue_con(priority_queue *this, uint32_t _max_elems)
> +{
> +	this->max_elems = _max_elems;
> +	this->reg_elems = 0;
> +	this->num_elems = 0;
> +	this->org_ptr = malloc((_max_elems + 64 / sizeof(heap_node)) *
> +			       sizeof(heap_node));
> +	if (odp_unlikely(this->org_ptr == NULL)) {
> +		ODP_ERR("malloc failed\n");
> +		abort();
> +	}
> +	this->heap = this->org_ptr;
> +	assert((size_t)&this->heap[1] % 8 == 0);
> +	/* Increment base address until first child (index 1) is cache line
> */
> +	/* aligned and thus all children (e.g. index 1-4) stored in the */
> +	/* same cache line. We are not interested in the alignment of */
> +	/* heap[0] as this is a lone node */
> +	while ((size_t)&this->heap[1] % ODP_CACHE_LINE_SIZE != 0) {
> +		/* Cast to ptr to struct member with the greatest alignment */
> +		/* requirement */
> +		this->heap = (heap_node *)((pq_priority_t *)this->heap + 1);
> +	}
> +	pq_assert_heap(this);
> +}
> +
> +void priority_queue_des(priority_queue *this)
> +{
> +	pq_assert_heap(this);
> +	free(this->org_ptr);
> +}
> +
> +#ifndef NDEBUG
> +static uint32_t
> +pq_assert_elem(priority_queue *this, uint32_t index, bool recurse)
> +{
> +	uint32_t num = 1;
> +	const pq_element *elem = this->heap[index].elem;
> +	assert(elem->index == index);
> +	assert(elem->prio == this->heap[index].prio);
> +	uint32_t child = CHILD(index);
> +	uint32_t i;
> +	for (i = 0; i < NUM_CHILDREN; i++, child++) {
> +		if (valid_index(this, child)) {
> +			assert(this->heap[child].elem != NULL);
> +			assert(this->heap[child].prio >= elem->prio);
> +			if (recurse)
> +				num += pq_assert_elem(this, child, recurse);
> +		}
> +	}
> +	return num;
> +}
> +#endif
> +
> +static void
> +pq_assert_heap(priority_queue *this)
> +{
> +	(void)this;
> +#ifndef NDEBUG
> +	uint32_t num = 0;
> +	if (odp_likely(this->num_elems != 0)) {
> +		assert(this->heap[0].elem != NULL);
> +		num += pq_assert_elem(this, 0, true);
> +	}
> +	assert(num == this->num_elems);
> +	unsigned i;
> +	for (i = 0; i < this->num_elems; i++) {
> +		assert(this->heap[i].elem != NULL);
> +		assert(this->heap[i].prio != INVALID_PRIORITY);
> +	}
> +#endif
> +}
> +
> +/* Bubble up to proper position */
> +void
> +pq_bubble_up(priority_queue *this, pq_element *elem)
> +{
> +	assert(this->heap[elem->index].elem == elem);
> +	assert(this->heap[elem->index].prio == elem->prio);
> +	uint32_t current = elem->index;
> +	pq_priority_t prio = elem->prio;
> +	assert(current == 0 || this->heap[PARENT(current)].elem != NULL);
> +	/* Move up into proper position */
> +	while (current != 0 && this->heap[PARENT(current)].prio > prio) {
> +		uint32_t parent = PARENT(current);
> +		assert(this->heap[parent].elem != NULL);
> +		/* Swap current with parent */
> +		/* 1) Move parent down */
> +		this->heap[current].elem = this->heap[parent].elem;
> +		this->heap[current].prio = this->heap[parent].prio;
> +		this->heap[current].elem->index = current;
> +		/* 2) Move current up to parent */
> +		this->heap[parent].elem = elem;
> +		this->heap[parent].prio = prio;
> +		this->heap[parent].elem->index = parent;
> +		/* Continue moving elem until it is in the right place */
> +		current = parent;
> +	}
> +	pq_assert_heap(this);
> +}
> +
> +/* Find the smallest child that is smaller than the specified priority */
> +/* Very hot function, can we decrease the number of cache misses? */
> +uint32_t pq_smallest_child(priority_queue *this,
> +			   uint32_t index,
> +			   pq_priority_t val)
> +{
> +	uint32_t smallest = index;
> +	uint32_t child = CHILD(index);
> +#if NUM_CHILDREN == 4
> +	/* Unroll loop when all children exist */
> +	if (odp_likely(valid_index(this, child + 3))) {
> +		if (this->heap[child + 0].prio < val)
> +			val = this->heap[smallest = child + 0].prio;
> +		if (this->heap[child + 1].prio < val)
> +			val = this->heap[smallest = child + 1].prio;
> +		if (this->heap[child + 2].prio < val)
> +			val = this->heap[smallest = child + 2].prio;
> +		if (this->heap[child + 3].prio < val)
> +			(void)this->heap[smallest = child + 3].prio;
> +		return smallest;
> +	}
> +#endif
> +	uint32_t i;
> +	for (i = 0; i < NUM_CHILDREN; i++) {
> +		if (odp_unlikely(!valid_index(this, child + i)))
> +			break;
> +		if (this->heap[child + i].prio < val) {
> +			smallest = child + i;
> +			val = this->heap[smallest].prio;
> +		}
> +	}
> +	return smallest;
> +}
> +
> +/* Very hot function, can it be optimised? */
> +void
> +pq_bubble_down(priority_queue *this, pq_element *elem)
> +{
> +	assert(this->heap[elem->index].elem == elem);
> +	assert(this->heap[elem->index].prio == elem->prio);
> +	uint32_t current = elem->index;
> +	pq_priority_t prio = elem->prio;
> +	for (;;) {
> +		uint32_t child = pq_smallest_child(this, current, prio);
> +		if (current == child) {
> +			/* No smaller child, we are done */
> +			pq_assert_heap(this);
> +			return;
> +		}
> +		/* Element larger than smaller child, must move down */
> +		assert(this->heap[child].elem != NULL);
> +		/* 1) Move child up to current */
> +		this->heap[current].elem = this->heap[child].elem;
> +		this->heap[current].prio = this->heap[child].prio;
> +		/* 2) Move current down to child */
> +		this->heap[child].elem = elem;
> +		this->heap[child].prio = prio;
> +		this->heap[child].elem->index = child;
> +
> +		this->heap[current].elem->index = current; /* cache misses! */
> +		/* Continue moving element until it is in the right place */
> +		current = child;
> +	}
> +}
> +
> +bool
> +pq_register_element(priority_queue *this, pq_element *elem)
> +{
> +	if (odp_likely(this->reg_elems < this->max_elems)) {
> +		elem->pq = this;
> +		this->reg_elems++;
> +		return true;
> +	}
> +	return false;
> +}
> +
> +void
> +pq_unregister_element(priority_queue *this, pq_element *elem)
> +{
> +	assert(elem->pq == this);
> +	if (is_active(elem))
> +		pq_deactivate_element(this, elem);
> +	this->reg_elems--;
> +}
> +
> +void
> +pq_activate_element(priority_queue *this, pq_element *elem, pq_priority_t
> prio)
> +{
> +	assert(elem->index == INVALID_INDEX);
> +	/* Insert element at end */
> +	uint32_t index = this->num_elems++;
> +	this->heap[index].elem = elem;
> +	this->heap[index].prio = prio;
> +	elem->index = index;
> +	elem->prio = prio;
> +	pq_bubble_up(this, elem);
> +}
> +
> +void
> +pq_deactivate_element(priority_queue *this, pq_element *elem)
> +{
> +	assert(elem->pq == this);
> +	if (odp_likely(is_active(elem))) {
> +		/* Swap element with last element */
> +		uint32_t current = elem->index;
> +		uint32_t last = --this->num_elems;
> +		if (odp_likely(last != current)) {
> +			/* Move last element to current */
> +			this->heap[current].elem = this->heap[last].elem;
> +			this->heap[current].prio = this->heap[last].prio;
> +			this->heap[current].elem->index = current;
> +			/* Bubble down old 'last' element to its proper place*/
> +			if (this->heap[current].prio < elem->prio)
> +				pq_bubble_up(this, this->heap[current].elem);
> +			else
> +				pq_bubble_down(this, this->heap[current].elem);
> +		}
> +		elem->index = INVALID_INDEX;
> +		pq_assert_heap(this);
> +	}
> +}
> +
> +void
> +pq_reset_element(priority_queue *this, pq_element *elem, pq_priority_t
> prio)
> +{
> +	assert(prio != INVALID_PRIORITY);
> +	if (odp_likely(is_active(elem))) {
> +		assert(prio >= elem->prio);
> +		elem->prio = prio;
> +		this->heap[elem->index].prio = prio;/* cache misses here! */
> +		pq_bubble_down(this, elem);
> +		pq_assert_heap(this);
> +	} else {
> +		pq_activate_element(this, elem, prio);
> +	}
> +}
> +
> +pq_priority_t pq_first_priority(const priority_queue *this)
> +{
> +	return this->num_elems != 0 ? this->heap[0].prio : INVALID_PRIORITY;
> +}
> +
> +pq_element *
> +pq_release_element(priority_queue *this, pq_priority_t threshold)
> +{
> +	if (odp_likely(this->num_elems != 0 &&
> +		       this->heap[0].prio <= threshold)) {
> +		pq_element *elem = this->heap[0].elem;
> +		/* Remove element from heap */
> +		pq_deactivate_element(this, elem);
> +		assert(elem->prio <= threshold);
> +		return elem;
> +	}
> +	return NULL;
> +}
> diff --git a/platform/linux-generic/odp_timer.c b/platform/linux-
> generic/odp_timer.c
> index 313c713..0e5071c 100644
> --- a/platform/linux-generic/odp_timer.c
> +++ b/platform/linux-generic/odp_timer.c
> @@ -4,428 +4,713 @@
>   * SPDX-License-Identifier:     BSD-3-Clause
>   */
> 
> -#include <odp_timer.h>
> -#include <odp_timer_internal.h>
> -#include <odp_time.h>
> -#include <odp_buffer_pool_internal.h>
> -#include <odp_internal.h>
> -#include <odp_atomic.h>
> -#include <odp_spinlock.h>
> -#include <odp_sync.h>
> -#include <odp_debug.h>
> -
> -#include <signal.h>
> -#include <time.h>
> +/**
> + * @file
> + *
> + * ODP timer service
> + *
> + */
> 
> +#include <assert.h>
> +#include <errno.h>
>  #include <string.h>
> -
> -#define NUM_TIMERS    1
> -#define MAX_TICKS     1024
> -#define MAX_RES       ODP_TIME_SEC
> -#define MIN_RES       (100*ODP_TIME_USEC)
> -
> -
> -typedef struct {
> -	odp_spinlock_t lock;
> -	timeout_t      *list;
> -} tick_t;
> -
> -typedef struct {
> -	int               allocated;
> -	volatile int      active;
> -	volatile uint64_t cur_tick;
> -	timer_t           timerid;
> -	odp_timer_t       timer_hdl;
> -	odp_buffer_pool_t pool;
> -	uint64_t          resolution_ns;
> -	uint64_t          max_ticks;
> -	tick_t            tick[MAX_TICKS];
> -
> -} timer_ring_t;
> -
> -typedef struct {
> -	odp_spinlock_t lock;
> -	int            num_timers;
> -	timer_ring_t   timer[NUM_TIMERS];
> -
> -} timer_global_t;
> -
> -/* Global */
> -static timer_global_t odp_timer;
> -
> -static void add_tmo(tick_t *tick, timeout_t *tmo)
> +#include <stdlib.h>
> +#include <time.h>
> +#include <signal.h>
> +#include "odp_std_types.h"
> +#include "odp_buffer.h"
> +#include "odp_buffer_pool.h"
> +#include "odp_queue.h"
> +#include "odp_hints.h"
> +#include "odp_sync.h"
> +#include "odp_ticketlock.h"
> +#include "odp_debug.h"
> +#include "odp_align.h"
> +#include "odp_shared_memory.h"
> +#include "odp_hints.h"
> +#include "odp_internal.h"
> +#include "odp_time.h"
> +#include "odp_timer.h"
> +#include "odp_timer_internal.h"
> +#include "odp_priority_queue_internal.h"
> +
> +/************************************************************************
> ******
> + * Translation between timeout and timeout header
> +
> **************************************************************************
> ***/
> +
> +static inline odp_timeout_hdr_t *odp_tmo_to_hdr(odp_timer_tmo_t tmo)
>  {
> -	odp_spinlock_lock(&tick->lock);
> -
> -	tmo->next  = tick->list;
> -	tick->list = tmo;
> +	odp_buffer_t buf = odp_buffer_from_timeout(tmo);
> +	odp_timeout_hdr_t *tmo_hdr = (odp_timeout_hdr_t
> *)odp_buf_to_hdr(buf);
> +	return tmo_hdr;
> +}
> 
> -	odp_spinlock_unlock(&tick->lock);
> +/************************************************************************
> ******
> + * odp_timer abstract datatype
> +
> **************************************************************************
> ***/
> +
> +typedef struct odp_timer_s {
> +	pq_element pqelem;/* Base class */
> +	uint64_t req_tmo;/* Requested timeout tick */
> +	odp_buffer_t tmo_buf;/* ODP_BUFFER_INVALID if timeout enqueued */
> +	odp_queue_t queue;/* ODP_QUEUE_INVALID if timer is free */
> +	uint32_t tag;/* Reusing tag as next pointer/index when timer is free
> */
> +	uint16_t gencnt;/* Smaller to make place for user_buf flag */
> +	unsigned int user_buf:1; /* User-defined buffer? */
> +} odp_timer;
> +
> +/* Constructor */
> +static inline void odp_timer_con(odp_timer *this)
> +{
> +	pq_element_con(&this->pqelem);
> +	this->tmo_buf = ODP_BUFFER_INVALID;
> +	this->queue = ODP_QUEUE_INVALID;
> +	this->gencnt = 0;
>  }
> 
> -static timeout_t *rem_tmo(tick_t *tick)
> +/* Destructor */
> +static inline void odp_timer_des(odp_timer *this)
>  {
> -	timeout_t *tmo;
> +	assert(this->tmo_buf == ODP_BUFFER_INVALID);
> +	assert(this->queue == ODP_QUEUE_INVALID);
> +	pq_element_des(&this->pqelem);
> +}
> 
> -	odp_spinlock_lock(&tick->lock);
> +/* Setup when timer is allocated */
> +static void setup(odp_timer *this,
> +		  odp_queue_t _q,
> +		  void *_up,
> +		  odp_buffer_t _tmo)
> +{
> +	this->req_tmo = INVALID_PRIORITY;
> +	this->tmo_buf = _tmo;
> +	this->queue = _q;
> +	this->tag = 0;
> +	this->user_buf = false;
> +	/* Initialise constant fields of timeout event */
> +	odp_timeout_hdr_t *tmo_hdr =
> +		odp_tmo_to_hdr(odp_timeout_from_buffer(this->tmo_buf));
> +	tmo_hdr->gencnt = this->gencnt;
> +	tmo_hdr->timer = this;
> +	tmo_hdr->user_ptr = _up;
> +	/* tmo_hdr->tag set at expiration time */
> +	/* tmo_hdr->expiration set at expiration time */
> +	assert(this->queue != ODP_QUEUE_INVALID);
> +}
> 
> -	tmo = tick->list;
> +/* Teardown when timer is freed */
> +static odp_buffer_t teardown(odp_timer *this)
> +{
> +	/* Increase generation count to make any pending timeout(s) orphaned
> */
> +	++this->gencnt;
> +	odp_buffer_t buf = this->tmo_buf;
> +	this->tmo_buf = ODP_BUFFER_INVALID;
> +	this->queue = ODP_QUEUE_INVALID;
> +	return buf;
> +}
> 
> -	if (tmo)
> -		tick->list = tmo->next;
> +static inline uint32_t get_next_free(odp_timer *this)
> +{
> +	assert(this->queue == ODP_QUEUE_INVALID);
> +	return this->tag;
> +}
> 
> -	odp_spinlock_unlock(&tick->lock);
> +static inline void set_next_free(odp_timer *this, uint32_t nf)
> +{
> +	assert(this->queue == ODP_QUEUE_INVALID);
> +	this->tag = nf;
> +}
> 
> -	if (tmo)
> -		tmo->next = NULL;
> +/************************************************************************
> ******
> + * odp_timer_pool abstract datatype
> + * Inludes alloc and free timer
> +
> **************************************************************************
> ***/
> +
> +typedef struct odp_timer_pool_s {
> +	priority_queue pq;
> +	uint64_t cur_tick;/* Current tick value */
> +	uint64_t min_tick;/* Current expiration lower bound */
> +	uint64_t max_tick;/* Current expiration higher bound */
> +	bool shared;
> +	odp_ticketlock_t lock;
> +	const char *name;
> +	odp_buffer_pool_t buf_pool;
> +	uint64_t resolution_ns;
> +	uint64_t min_tmo_tck;
> +	uint64_t max_tmo_tck;
> +	odp_timer *timers;
> +	uint32_t num_alloc;/* Current number of allocated timers */
> +	uint32_t max_timers;/* Max number of timers */
> +	uint32_t first_free;/* 0..max_timers-1 => free timer */
> +	timer_t timerid;
> +	odp_timer_clk_src_t clk_src;
> +} odp_timer_pool;
> +
> +/* Forward declarations */
> +static void timer_init(odp_timer_pool *tp);
> +static void timer_exit(odp_timer_pool *tp);
> +
> +static void odp_timer_pool_con(odp_timer_pool *this,
> +			       const char *_n,
> +			       odp_buffer_pool_t _bp,
> +			       uint64_t _r,
> +			       uint64_t _mint,
> +			       uint64_t _maxt,
> +			       uint32_t _mt,
> +			       bool _s,
> +			       odp_timer_clk_src_t _cs)
> +{
> +	priority_queue_con(&this->pq, _mt);
> +	this->cur_tick = 0;
> +	this->shared = _s;
> +	this->name = strdup(_n);
> +	this->buf_pool = _bp;
> +	this->resolution_ns = _r;
> +	this->min_tmo_tck = odp_timer_ns_to_tick(this, _mint);
> +	this->max_tmo_tck = odp_timer_ns_to_tick(this, _maxt);
> +	this->min_tick = this->cur_tick + this->min_tmo_tck;
> +	this->max_tick = this->cur_tick + this->max_tmo_tck;
> +	this->num_alloc = 0;
> +	this->max_timers = _mt;
> +	this->first_free = 0;
> +	this->clk_src = _cs;
> +	this->timers = malloc(sizeof(odp_timer) * this->max_timers);
> +	if (this->timers == NULL)
> +		ODP_ABORT("%s: malloc failed\n", _n);
> +	uint32_t i;
> +	for (i = 0; i < this->max_timers; i++)
> +		odp_timer_con(&this->timers[i]);
> +	for (i = 0; i < this->max_timers; i++)
> +		set_next_free(&this->timers[i], i + 1);
> +	odp_ticketlock_init(&this->lock);
> +	if (this->clk_src == ODP_CLOCK_CPU)
> +		timer_init(this);
> +	/* Make sure timer pool initialisation is globally observable */
> +	/* before we return a pointer to it */
> +	odp_sync_stores();
> +}
> 
> -	return tmo;
> +static odp_timer_pool *odp_timer_pool_new(
> +	const char *_n,
> +	odp_buffer_pool_t _bp,
> +	uint64_t _r,
> +	uint64_t _mint,
> +	uint64_t _maxt,
> +	uint32_t _mt,
> +	bool _s,
> +	odp_timer_clk_src_t _cs)
> +{
> +	odp_timer_pool *this = malloc(sizeof(odp_timer_pool));


Should not use malloc, but ODP shm instead, if want to support processes (in addition to pthreads)...


> +	if (odp_unlikely(this == NULL))
> +		ODP_ABORT("%s: timer pool malloc failed\n", _n);
> +	odp_timer_pool_con(this, _n, _bp, _r, _mint, _maxt, _mt, _s, _cs);
> +	return this;
>  }
> 
> -/**
> - * Search and delete tmo entry from timeout list
> - * return -1 : on error.. handle not in list
> - *		0 : success
> - */
> -static int find_and_del_tmo(timeout_t **tmo, odp_timer_tmo_t handle)
> +static void odp_timer_pool_des(odp_timer_pool *this)
>  {
> -	timeout_t *cur, *prev;
> -	prev = NULL;
> +	if (this->shared)
> +		odp_ticketlock_lock(&this->lock);
> +	if (this->num_alloc != 0) {
> +		/* It's a programming error to attempt to destroy a */
> +		/* timer pool which is still in use */
> +		ODP_ABORT("%s: timers in use\n", this->name);
> +	}
> +	if (this->clk_src == ODP_CLOCK_CPU)
> +		timer_exit(this);
> +	uint32_t i;
> +	for (i = 0; i < this->max_timers; i++)
> +		odp_timer_des(&this->timers[i]);
> +	free(this->timers);
> +	priority_queue_des(&this->pq);
> +	odp_sync_stores();
> +}
> 
> -	for (cur = *tmo; cur != NULL; prev = cur, cur = cur->next) {
> -		if (cur->tmo_buf == handle) {
> -			if (prev == NULL)
> -				*tmo = cur->next;
> -			else
> -				prev->next = cur->next;
> +static void odp_timer_pool_del(odp_timer_pool *this)
> +{
> +	odp_timer_pool_des(this);
> +	free(this);
> +}
> 
> -			break;
> +static inline odp_timer *timer_alloc(odp_timer_pool *this,
> +				     odp_queue_t queue,
> +				     void *user_ptr,
> +				     odp_buffer_t tmo_buf)
> +{
> +	odp_timer *tim = ODP_TIMER_INVALID;
> +	if (odp_likely(this->shared))
> +		odp_ticketlock_lock(&this->lock);
> +	if (odp_likely(this->num_alloc < this->max_timers)) {
> +		this->num_alloc++;
> +		/* Remove first unused timer from free list */
> +		assert(this->first_free != this->max_timers);
> +		tim = &this->timers[this->first_free];
> +		this->first_free = get_next_free(tim);
> +		/* Insert timer into priority queue */
> +		if (odp_unlikely(!pq_register_element(&this->pq,
> +						      &tim->pqelem))) {
> +			/* Unexpected internal error */
> +			abort();
>  		}
> +		/* Create timer */
> +		setup(tim, queue, user_ptr, tmo_buf);
> +	} else {
> +		errno = ENFILE; /* Reusing file table overvlow */
>  	}
> -
> -	if (!cur)
> -		/* couldn't find tmo in list */
> -		return -1;
> -
> -	/* application to free tmo_buf provided by absolute_tmo call */
> -	return 0;
> +	if (odp_likely(this->shared))
> +		odp_ticketlock_unlock(&this->lock);
> +	return tim;
>  }
> 
> -int odp_timer_cancel_tmo(odp_timer_t timer_hdl, odp_timer_tmo_t tmo)
> +static inline void timer_free(odp_timer_pool *this, odp_timer *tim)
>  {
> -	int id;
> -	int tick_idx;
> -	timeout_t *cancel_tmo;
> -	odp_timeout_hdr_t *tmo_hdr;
> -	tick_t *tick;
> -
> -	/* get id */
> -	id = (int)timer_hdl - 1;
> -
> -	tmo_hdr = odp_timeout_hdr((odp_timeout_t) tmo);
> -	/* get tmo_buf to cancel */
> -	cancel_tmo = &tmo_hdr->meta;
> +	if (odp_likely(this->shared))
> +		odp_ticketlock_lock(&this->lock);
> +	if (odp_unlikely(tim->queue == ODP_QUEUE_INVALID))
> +		ODP_ABORT("Invalid timer %p\n", tim);
> +	/* Destroy timer */
> +	odp_buffer_t buf = teardown(tim);
> +	/* Remove timer from priority queue */
> +	pq_unregister_element(&this->pq, &tim->pqelem);
> +	/* Insert timer into free list */
> +	set_next_free(tim, this->first_free);
> +	this->first_free = (tim - &this->timers[0]) / sizeof(this-
> >timers[0]);
> +	assert(this->num_alloc != 0);
> +	this->num_alloc--;
> +	if (odp_likely(this->shared))
> +		odp_ticketlock_unlock(&this->lock);
> +	if (buf != ODP_BUFFER_INVALID)
> +		odp_buffer_free(buf);
> +}
> 
> -	tick_idx = cancel_tmo->tick;
> -	tick = &odp_timer.timer[id].tick[tick_idx];
> +/************************************************************************
> ******
> + * Operations on timers
> + * reset/reset_w_buf/cancel timer, return timeout
> +
> **************************************************************************
> ***/
> 
> -	odp_spinlock_lock(&tick->lock);
> -	/* search and delete tmo from tick list */
> -	if (find_and_del_tmo(&tick->list, tmo) != 0) {
> -		odp_spinlock_unlock(&tick->lock);
> -		ODP_DBG("Couldn't find the tmo (%d) in tick list\n",
> (int)tmo);
> -		return -1;
> +static inline void timer_expire(odp_timer *tim)
> +{
> +	assert(tim->req_tmo != INVALID_PRIORITY);
> +	/* Timer expired, is there actually any timeout event */
> +	/* we can enqueue? */
> +	if (odp_likely(tim->tmo_buf != ODP_BUFFER_INVALID)) {
> +		/* Swap out timeout buffer */
> +		odp_buffer_t buf = tim->tmo_buf;
> +		tim->tmo_buf = ODP_BUFFER_INVALID;
> +		if (odp_likely(!tim->user_buf)) {
> +			odp_timeout_hdr_t *tmo_hdr =
> +				odp_tmo_to_hdr(odp_timeout_from_buffer(buf));
> +			/* Copy tag and requested expiration tick from timer */
> +			tmo_hdr->tag = tim->tag;
> +			tmo_hdr->expiration = tim->req_tmo;
> +		}
> +		/* Else don't touch user-defined buffer */
> +		int rc = odp_queue_enq(tim->queue, buf);
> +		if (odp_unlikely(rc != 0))
> +			ODP_ABORT("Failed to enqueue timeout buffer (%d)\n",
> +				  rc);
> +		/* Mark timer as inactive */
> +		tim->req_tmo = INVALID_PRIORITY;
>  	}
> -	odp_spinlock_unlock(&tick->lock);
> -
> -	return 0;
> +	/* No, timeout event already enqueued or unavailable */
> +	/* Keep timer active, odp_timer_return_tmo() will patch up */
>  }
> 
> -static void notify_function(union sigval sigval)
> +static odp_timer_set_t timer_reset(odp_timer_pool *tp,
> +				   odp_timer *tim,
> +				   uint64_t abs_tck)
>  {
> -	uint64_t cur_tick;
> -	timeout_t *tmo;
> -	tick_t *tick;
> -	timer_ring_t *timer;
> +	assert(tim->user_buf == false);
> +	if (odp_unlikely(abs_tck < tp->min_tick))
> +		return ODP_TIMER_SET_TOOEARLY;
> +	if (odp_unlikely(abs_tck > tp->max_tick))
> +		return ODP_TIMER_SET_TOOLATE;
> +
> +	if (odp_likely(tp->shared))
> +		odp_ticketlock_lock(&tp->lock);
> +
> +	if (odp_unlikely(tim->queue == ODP_QUEUE_INVALID))
> +		ODP_ABORT("Invalid timer %p\n", tim);
> +	if (odp_unlikely(tim->user_buf))
> +		ODP_ABORT("Timer %p has user buffer\n", tim);
> +	/* Increase timer tag to make any pending timeout stale */
> +	tim->tag++;
> +	/* Save requested timeout */
> +	tim->req_tmo = abs_tck;
> +	/* Update timer position in priority queue */
> +	pq_reset_element(&tp->pq, &tim->pqelem, abs_tck);
> +
> +	if (odp_likely(tp->shared))
> +		odp_ticketlock_unlock(&tp->lock);
> +	return ODP_TIMER_SET_SUCCESS;
> +}
> 
> -	timer = sigval.sival_ptr;
> +static odp_timer_set_t timer_reset_w_buf(odp_timer_pool *tp,
> +		odp_timer *tim,
> +		uint64_t abs_tck,
> +		odp_buffer_t user_buf)
> +{
> +	if (odp_unlikely(abs_tck < tp->min_tick))
> +		return ODP_TIMER_SET_TOOEARLY;
> +	if (odp_unlikely(abs_tck > tp->max_tick))
> +		return ODP_TIMER_SET_TOOLATE;
> +
> +	if (odp_likely(tp->shared))
> +		odp_ticketlock_lock(&tp->lock);
> +
> +	if (odp_unlikely(tim->queue == ODP_QUEUE_INVALID))
> +		ODP_ABORT("Invalid timer %p\n", tim);
> +	/* Increase timer tag to make any pending timeout stale */
> +	tim->tag++;
> +	/* Save requested timeout */
> +	tim->req_tmo = abs_tck;
> +	/* Set flag indicating presence of user defined buffer */
> +	tim->user_buf = true;
> +	/* Swap in new buffer, save any old buffer pointer */
> +	odp_buffer_t old_buf = tim->tmo_buf;
> +	tim->tmo_buf = user_buf;
> +	/* Update timer position in priority queue */
> +	pq_reset_element(&tp->pq, &tim->pqelem, abs_tck);
> +
> +	if (odp_likely(tp->shared))
> +		odp_ticketlock_unlock(&tp->lock);
> +
> +	/* Free old buffer if present */
> +	if (odp_unlikely(old_buf != ODP_BUFFER_INVALID))
> +		odp_buffer_free(old_buf);
> +	return ODP_TIMER_SET_SUCCESS;
> +}
> 
> -	if (timer->active == 0) {
> -		ODP_DBG("Timer (%u) not active\n", timer->timer_hdl);
> -		return;
> +static inline void timer_cancel(odp_timer_pool *tp,
> +				odp_timer *tim)
> +{
> +	odp_buffer_t old_buf = ODP_BUFFER_INVALID;
> +	if (odp_likely(tp->shared))
> +		odp_ticketlock_lock(&tp->lock);
> +
> +	if (odp_unlikely(tim->queue == ODP_QUEUE_INVALID))
> +		ODP_ABORT("Invalid timer %p\n", tim);
> +	if (odp_unlikely(tim->user_buf)) {
> +		/* Swap out old user buffer */
> +		old_buf = tim->tmo_buf;
> +		tim->tmo_buf = ODP_BUFFER_INVALID;
> +		/* tim->user_buf stays true */
>  	}
> +	/* Else a normal timer (no user-defined buffer) */
> +	/* Increase timer tag to make any pending timeout stale */
> +	tim->tag++;
> +	/* Clear requested timeout, mark timer inactive */
> +	tim->req_tmo = INVALID_PRIORITY;
> +	/* Remove timer from the priority queue */
> +	pq_deactivate_element(&tp->pq, &tim->pqelem);
> +
> +	if (odp_likely(tp->shared))
> +		odp_ticketlock_unlock(&tp->lock);
> +	/* Free user-defined buffer if present */
> +	if (odp_unlikely(old_buf != ODP_BUFFER_INVALID))
> +		odp_buffer_free(old_buf);
> +}
> 
> -	/* ODP_DBG("Tick\n"); */
> -
> -	cur_tick = timer->cur_tick++;
> -
> -	odp_sync_stores();
> +static inline void timer_return(odp_timer_pool *tp,
> +				odp_timer *tim,
> +				odp_timer_tmo_t tmo,
> +				const odp_timeout_hdr_t *tmo_hdr)
> +{
> +	odp_buffer_t tmo_buf = odp_buffer_from_timeout(tmo);
> +	if (odp_likely(tp->shared))
> +		odp_ticketlock_lock(&tp->lock);
> +	if (odp_unlikely(tim->user_buf))
> +		ODP_ABORT("Timer %p has user-defined buffer\n", tim);
> +	if (odp_likely(tmo_hdr->gencnt == tim->gencnt)) {
> +		assert(tim->tmo_buf == ODP_BUFFER_INVALID);
> +		/* Save returned buffer for use when timer expires next time
> */
> +		tim->tmo_buf = tmo_buf;
> +		tmo_buf = ODP_BUFFER_INVALID;
> +		/* Check if timer is active and should have expired */
> +		if (odp_unlikely(tim->req_tmo != INVALID_PRIORITY &&
> +				 tim->req_tmo <= tp->cur_tick)) {
> +			/* Expire timer now since we have restored the timeout
> +			   buffer */
> +			timer_expire(tim);
> +		}
> +		/* Else timer inactive or expires in the future */
> +	}
> +	/* Else timeout orphaned, free buffer later */
> +	if (odp_likely(tp->shared))
> +		odp_ticketlock_unlock(&tp->lock);
> +	if (odp_unlikely(tmo_buf != ODP_BUFFER_INVALID))
> +		odp_buffer_free(tmo_buf);
> +}
> 
> -	tick = &timer->tick[cur_tick % MAX_TICKS];
> +/* Non-public so not in odp_timer.h but externally visible, must declare
> + * somewhere */
> +unsigned odp_timer_pool_expire(odp_timer_pool_t tpid, uint64_t tick);
> 
> -	while ((tmo = rem_tmo(tick)) != NULL) {
> -		odp_queue_t  queue;
> -		odp_buffer_t buf;
> +unsigned odp_timer_pool_expire(odp_timer_pool_t tpid, uint64_t tick)
> +{
> +	if (odp_likely(tpid->shared))
> +		odp_ticketlock_lock(&tpid->lock);


One big lock (per timer pool) will scale poorly. Almost all functions use the same lock and e.g. here it's locked as long as there are tmo items in the pq queue (which may be many). Current timer implementation uses a lock per tick (so that average contention per lock should be low).


-Petri
Savolainen, Petri (NSN - FI/Espoo) Oct. 6, 2014, 1:35 p.m. UTC | #10
Hi,

E.g. application may integrate other (non-ODP, non-real time) SW into the main loop and poll that side when there’s low traffic on the ODP side (e.g. when ODP  scheduler has been in idle for >100ms, etc). Easy integration, no need to hassle with per core idle timer restarts (on every packet).

I wouldn’t expect this timeout to be very accurate. It’s more for giving application the change to run also (every now and then) when there is no other work.

-Petri


From: ext Gilad Ben Yossef [mailto:giladb@ezchip.com]

Sent: Monday, October 06, 2014 3:45 PM
To: Savolainen, Petri (NSN - FI/Espoo); ext Ola Liljedahl
Cc: lng-odp@lists.linaro.org
Subject: RE: [lng-odp] [PATCHv4] Timer API and and priority queue-based implementation

Hi Petri,

I understand. Thanks for the explanation.

Just to explain where all these question are coming from - I'm implementing an ODP platform on top of the EZchip NPS SDK. Every time I see something that would be difficult to implement on my platform, or would cost a lot of performance/power etc, I try to understand the requirement better.

Specifically here, if the odp_schedule(..)  with core specific timeouts don't jell well with our HW based queue schedule(). Which is fine – but understanding the requirement source can help me do the best possible thing here.

So, any idea what is the application that require this and for what purpose? A practical usage example?


Thanks,
Gilad

Gilad Ben-Yossef
Software Architect
EZchip Technologies Ltd.
37 Israel Pollak Ave, Kiryat Gat 82025 ,Israel
Tel: +972-4-959-6666 ext. 576, Fax: +972-8-681-1483
Mobile: +972-52-826-0388, US Mobile: +1-973-826-0388
Email: giladb@ezchip.com<mailto:giladb@ezchip.com>, Web: http://www.ezchip.com<http://www.ezchip.com/>

"Ethernet always wins."
        — Andy Bechtolsheim

From: Savolainen, Petri (NSN - FI/Espoo) [mailto:petri.savolainen@nsn.com]

Sent: Monday, October 06, 2014 1:45 PM
To: ext Ola Liljedahl; Gilad Ben Yossef
Cc: lng-odp@lists.linaro.org<mailto:lng-odp@lists.linaro.org>
Subject: RE: [lng-odp] [PATCHv4] Timer API and and priority queue-based implementation

Hi,

The main point is to be able to use an ODP API “à la carte”. In this case, an user can use the scheduler API without being forced to use the timer API. Implementation underneath may run the schedule timeout whichever way is optimal for the HW/implementation.

Also, user has the option to call scheduler with ODP_SCHED_WAIT and setup the timeout through the timer API.

-Petri


From: lng-odp-bounces@lists.linaro.org<mailto:lng-odp-bounces@lists.linaro.org> [mailto:lng-odp-bounces@lists.linaro.org] On Behalf Of ext Ola Liljedahl

Sent: Monday, October 06, 2014 12:37 PM
To: Gilad Ben Yossef
Cc: lng-odp@lists.linaro.org<mailto:lng-odp@lists.linaro.org>
Subject: Re: [lng-odp] [PATCHv4] Timer API and and priority queue-based implementation

Gilad,

Your suggestion makes sense. This is how a purely event-driven application would be designed.

Possibly the wait/timeout parameter to the odp_schedule calls is a legacy from the time before there was a timer API in ODP. Maybe Petri can s(c)hed some light on this.

I suspect there could be some performance benefits from specifying the timeout as an explicit parameter. If scheduling timeout is implemented using a timer event facility (e.g. the ODP timer API), the application (or the ODP implementation if it uses the same design) would have to reset that timer for every odp_schedule call, for a SW timer implementation this could add serious overhead. With an explicit timeout parameter, the scheduler implementation could be reading e.g some cycle counter while (busy-) waiting for events to become available. This overhead should be less and also incurred only when the thread is idle and waiting for work.

The current API does not prevent an implementation from using timer events internally and does not limit an application from using the timer API for timeouts. It does add a little bit of implementation complexity. What is the best trade-off?

-- Ola

On 6 October 2014 08:22, Gilad Ben Yossef <giladb@ezchip.com<mailto:giladb@ezchip.com>> wrote:

Another one of my stupid questions, I'm afraid.  :-)
If we have a timer implemented as an event pushed to queue which can be scheduled as any other queue (which is good thing I think), why does our schedule APIs need a timeout?
I mean, if you want a timeout, just add a scheduled timer queue and send yourself timeout events. That's how I would implement the schedule timeouts internally anyway (running a native timer on a core that does packet processing stops it from enjoying Linux full NOHZ cpu isolation so we really don't want timers there...)
Anything I've missed?
Thanks,
Gilad
Gilad Ben-Yossef
Software Architect
EZchip Technologies Ltd.
37 Israel Pollak Ave, Kiryat Gat 82025 ,Israel
Tel: +972-4-959-6666 ext. 576<tel:%2B972-4-959-6666%20ext.%20576>, Fax: +972-8-681-1483<tel:%2B972-8-681-1483>
Mobile: +972-52-826-0388<tel:%2B972-52-826-0388>, US Mobile: +1-973-826-0388<tel:%2B1-973-826-0388>
Email: giladb@ezchip.com<mailto:giladb@ezchip.com>, Web: http://www.ezchip.com

"Ethernet always wins."
        — Andy Bechtolsheim


> -----Original Message-----

> From: lng-odp-bounces@lists.linaro.org<mailto:lng-odp-bounces@lists.linaro.org> [mailto:lng-odp-<mailto:lng-odp->

> bounces@lists.linaro.org<mailto:bounces@lists.linaro.org>] On Behalf Of Ola Liljedahl

> Sent: Thursday, October 02, 2014 6:23 PM

> To: lng-odp@lists.linaro.org<mailto:lng-odp@lists.linaro.org>

> Subject: [lng-odp] [PATCHv4] Timer API and and priority queue-based

> implementation

>

> Signed-off-by: Ola Liljedahl <ola.liljedahl@linaro.org<mailto:ola.liljedahl@linaro.org>>

> ---

> Fixed review comments for v3 from Anders R.

> * Example code snippets use @code/@endcode.

> * Added some missing doxygen comments.

> * Updated some comments.

> * Reverted year in copyright notices.

> * Added odp_likely() hint.

> * Made some variables self-descriptive and removed redundant comments.

> Changed to use ticket locks instead of spin locks (ticket locks are more

> fair).

> Changed to use ODP_ABORT() which has become available since the last

> patch.

>

>  example/timer/odp_timer_test.c                     | 125 +--

>  platform/linux-generic/Makefile.am                 |   1 +

>  platform/linux-generic/include/api/odp_timer.h     | 570 +++++++++++--

>  .../include/odp_priority_queue_internal.h          | 108 +++

>  .../linux-generic/include/odp_timer_internal.h     |  71 +-

>  platform/linux-generic/odp_priority_queue.c        | 283 +++++++

>  platform/linux-generic/odp_timer.c                 | 923 ++++++++++++++-

> ------

>  test/api_test/odp_timer_ping.c                     |  73 +-

>  8 files changed, 1648 insertions(+), 506 deletions(-)

>  create mode 100644 platform/linux-

> generic/include/odp_priority_queue_internal.h

>  create mode 100644 platform/linux-generic/odp_priority_queue.c

>

> diff --git a/example/timer/odp_timer_test.c

> b/example/timer/odp_timer_test.c

> index 6e1715d..750d785 100644

> --- a/example/timer/odp_timer_test.c

> +++ b/example/timer/odp_timer_test.c

> @@ -41,67 +41,89 @@ typedef struct {

>  /** @private Barrier for test synchronisation */

>  static odp_barrier_t test_barrier;

>

> -/** @private Timer handle*/

> -static odp_timer_t test_timer;

> +/** @private Timer pool handle */

> +static odp_timer_pool_t tp;

>

>

> +/** @private Timeout status ASCII strings */

> +static const char *const status2str[] = {

> +     "fresh", "stale", "orphaned"

> +};

> +

>  /** @private test timeout */

>  static void test_abs_timeouts(int thr, test_args_t *args)

>  {

> -     uint64_t tick;

>       uint64_t period;

>       uint64_t period_ns;

>       odp_queue_t queue;

> -     odp_buffer_t buf;

> -     int num;

> +     int remain = args->tmo_count;

> +     odp_timer_t hdl;

> +     uint64_t tick;

>

>       ODP_DBG("  [%i] test_timeouts\n", thr);

>

>       queue = odp_queue_lookup("timer_queue");

>

>       period_ns = args->period_us*ODP_TIME_USEC;

> -     period    = odp_timer_ns_to_tick(test_timer, period_ns);

> +     period    = odp_timer_ns_to_tick(tp, period_ns);

>

>       ODP_DBG("  [%i] period %"PRIu64" ticks,  %"PRIu64" ns\n", thr,

>               period, period_ns);

>

> -     tick = odp_timer_current_tick(test_timer);

> -

> -     ODP_DBG("  [%i] current tick %"PRIu64"\n", thr, tick);

> -

> -     tick += period;

> +     ODP_DBG("  [%i] current tick %"PRIu64"\n", thr,

> +             odp_timer_current_tick(tp));

>

> -     if (odp_timer_absolute_tmo(test_timer, tick, queue,

> ODP_BUFFER_INVALID)

> -         == ODP_TIMER_TMO_INVALID){

> -             ODP_DBG("Timeout request failed\n");

> +     odp_timer_t test_timer;

> +     test_timer = odp_timer_alloc(tp, queue, NULL);

> +     if (test_timer == ODP_TIMER_INVALID) {

> +             ODP_ERR("Failed to allocate timer\n");

>               return;

>       }

> +     tick = odp_timer_current_tick(tp);

> +     hdl = test_timer;

>

> -     num = args->tmo_count;

> -

> -     while (1) {

> -             odp_timeout_t tmo;

> -

> -             buf = odp_schedule_one(&queue, ODP_SCHED_WAIT);

> -

> -             tmo  = odp_timeout_from_buffer(buf);

> -             tick = odp_timeout_tick(tmo);

> -

> -             ODP_DBG("  [%i] timeout, tick %"PRIu64"\n", thr, tick);

> -

> -             odp_buffer_free(buf);

> -

> -             num--;

> -

> -             if (num == 0)

> -                     break;

> +     while (remain != 0) {

> +             odp_buffer_t buf;

> +             odp_timer_tmo_t tmo;

> +             odp_timer_tmo_status_t stat;

> +             odp_timer_set_t rc;

>

>               tick += period;

> +             rc = odp_timer_set_abs(hdl, tick);

> +             if (odp_unlikely(rc != ODP_TIMER_SET_SUCCESS)) {

> +                     ODP_ERR("odp_timer_set_abs() failed (%u)\n", rc);

> +                     abort();

> +             }

>

> -             odp_timer_absolute_tmo(test_timer, tick,

> -                                    queue, ODP_BUFFER_INVALID);

> +             /* Get the next ready buffer/timeout */

> +             buf = odp_schedule_one(&queue, ODP_SCHED_WAIT);

> +             if (odp_unlikely(odp_buffer_type(buf) !=

> +                              ODP_BUFFER_TYPE_TIMEOUT)) {

> +                     ODP_ERR("Unexpected buffer type received\n");

> +                     abort();

> +             }

> +             tmo = odp_timeout_from_buffer(buf);

> +             stat = odp_timer_tmo_status(tmo);

> +             tick = odp_timer_expiration(tmo);

> +             hdl = odp_timer_handle(tmo);

> +             ODP_DBG("  [%i] timeout, tick %"PRIu64", status %s\n",

> +                     thr, tick, status2str[stat]);

> +             /* if (stat == ODP_TMO_FRESH)  - do your thing! */

> +             if (odp_likely(stat == ODP_TMO_ORPHAN)) {

> +                     /* Some other thread freed the corresponding

> +                        timer after the timeout was already

> +                        enqueued */

> +                     /* Timeout handle is invalid, use our own timer */

> +                     hdl = test_timer;

> +             }

> +             /* Return timeout to timer manager, regardless of status */

> +             odp_timer_return_tmo(tmo);

> +             remain--;

>       }

>

> +     odp_timer_cancel(test_timer);

> +     odp_timer_free(test_timer);

> +

>       if (odp_queue_sched_type(queue) == ODP_SCHED_SYNC_ATOMIC)

>               odp_schedule_release_atomic();

>  }

> @@ -155,7 +177,6 @@ static void print_usage(void)

>       printf("Options:\n");

>       printf("  -c, --count <number>    core count, core IDs start from

> 1\n");

>       printf("  -r, --resolution <us>   timeout resolution in usec\n");

> -     printf("  -m, --min <us>          minimum timeout in usec\n");

>       printf("  -x, --max <us>          maximum timeout in usec\n");

>       printf("  -p, --period <us>       timeout period in usec\n");

>       printf("  -t, --timeouts <count>  timeout repeat count\n");

> @@ -190,14 +211,14 @@ static void parse_args(int argc, char *argv[],

> test_args_t *args)

>       /* defaults */

>       args->core_count    = 0; /* all cores */

>       args->resolution_us = 10000;

> -     args->min_us        = args->resolution_us;

> +     args->min_us        = 0;

>       args->max_us        = 10000000;

>       args->period_us     = 1000000;

>       args->tmo_count     = 30;

>

>       while (1) {

>               opt = getopt_long(argc, argv, "+c:r:m:x:p:t:h",

> -                              longopts, &long_index);

> +                               longopts, &long_index);

>

>               if (opt == -1)

>                       break;  /* No more options */

> @@ -321,10 +342,25 @@ int main(int argc, char *argv[])

>                                     ODP_BUFFER_TYPE_TIMEOUT);

>

>       if (pool == ODP_BUFFER_POOL_INVALID) {

> -             ODP_ERR("Pool create failed.\n");

> +             ODP_ERR("Buffer pool create failed.\n");

>               return -1;

>       }

>

> +     tp = odp_timer_pool_create("timer_pool", pool,

> +                                args.resolution_us*ODP_TIME_USEC,

> +                                args.min_us*ODP_TIME_USEC,

> +                                args.max_us*ODP_TIME_USEC,

> +                                num_workers, /* One timer per worker */

> +                                true,

> +                                ODP_CLOCK_CPU);

> +     if (tp == ODP_TIMER_POOL_INVALID) {

> +             ODP_ERR("Timer pool create failed.\n");

> +             return -1;

> +     }

> +     odp_timer_pool_start();

> +

> +     odp_shm_print_all();

> +

>       /*

>        * Create a queue for timer test

>        */

> @@ -340,19 +376,6 @@ int main(int argc, char *argv[])

>               return -1;

>       }

>

> -     test_timer = odp_timer_create("test_timer", pool,

> -                                   args.resolution_us*ODP_TIME_USEC,

> -                                   args.min_us*ODP_TIME_USEC,

> -                                   args.max_us*ODP_TIME_USEC);

> -

> -     if (test_timer == ODP_TIMER_INVALID) {

> -             ODP_ERR("Timer create failed.\n");

> -             return -1;

> -     }

> -

> -

> -     odp_shm_print_all();

> -

>       printf("CPU freq %"PRIu64" hz\n", odp_sys_cpu_hz());

>       printf("Cycles vs nanoseconds:\n");

>       ns = 0;

> diff --git a/platform/linux-generic/Makefile.am b/platform/linux-

> generic/Makefile.am

> index d076d50..71f923c 100644

> --- a/platform/linux-generic/Makefile.am

> +++ b/platform/linux-generic/Makefile.am

> @@ -59,6 +59,7 @@ __LIB__libodp_la_SOURCES = \

>                          odp_packet_flags.c \

>                          odp_packet_io.c \

>                          odp_packet_socket.c \

> +                        odp_priority_queue.c \

>                          odp_queue.c \

>                          odp_ring.c \

>                          odp_rwlock.c \

> diff --git a/platform/linux-generic/include/api/odp_timer.h

> b/platform/linux-generic/include/api/odp_timer.h

> index 01db839..82a1e05 100644

> --- a/platform/linux-generic/include/api/odp_timer.h

> +++ b/platform/linux-generic/include/api/odp_timer.h

> @@ -8,9 +8,193 @@

>  /**

>   * @file

>   *

> - * ODP timer

> + * ODP timer service

>   */

>

> +/** Example #1 Retransmission timer (e.g. for reliable connections)

> + @code

> +

> +//Create timer pool for reliable connections

> +#define SEC 1000000000ULL //1s expressed in nanoseconds

> +odp_timer_pool_t tcp_tpid =

> +    odp_timer_pool_create("TCP",

> +                       buffer_pool,

> +                       1000000,//resolution 1ms

> +                       0,//min tmo

> +                       7200 * SEC,//max tmo length 2hours

> +                       40000,//num_timers

> +                       true,//shared

> +                       ODP_CLOCK_CPU

> +                      );

> +if (tcp_tpid == ODP_TIMER_POOL_INVALID)

> +{

> +     //Failed to create timer pool => fatal error

> +}

> +

> +

> +//Setting up a new connection

> +//Allocate retransmission timeout (identical for supervision timeout)

> +//The user pointer points back to the connection context

> +conn->ret_tim = odp_timer_alloc(tcp_tpid, queue, conn);

> +//Check if all resources were successfully allocated

> +if (conn->ret_tim == ODP_TIMER_INVALID)

> +{

> +     //Failed to allocate all resources for connection => tear down

> +     //Destroy timeout

> +     odp_timer_free(conn->ret_tim);

> +     //Tear down connection

> +     ...

> +     return false;

> +}

> +//All necessary resources successfully allocated

> +//Compute initial retransmission length in timer ticks

> +conn->ret_len = odp_timer_ns_to_tick(tcp_tpid, 3 * SEC);//Per RFC1122

> +//Arm the timer

> +odp_timer_set_rel(conn->ret_tim, conn->ret_len);

> +return true;

> +

> +

> +//A packet for the connection has just been transmitted

> +//Reset the retransmission timer

> +odp_timer_set_rel(conn->ret_tim, conn->ret_len);

> +

> +

> +//A retransmission timeout buffer for the connection has been received

> +odp_timer_tmo_t tmo = odp_timeout_from_buffer(buf);

> +odp_timer_tmo_status_t stat = odp_timer_tmo_status(tmo);

> +//Check if timeout is fresh or stale, for stale timeouts we need to

> reset the

> +//timer

> +if (stat == ODP_TMO_FRESH) {

> +     //Fresh timeout, last transmitted packet not acked in time =>

> +       retransmit

> +     //Get connection from timeout event

> +     conn = odp_timer_get_userptr(tmo);

> +     //Retransmit last packet (e.g. TCP segment)

> +     ...

> +     //Re-arm timer using original delta value

> +     odp_timer_set_rel(conn->ret_tim, conn->ret_len);

> +} else if (stat == ODP_TMO_ORPHAN) {

> +     odp_free_buffer(buf);

> +     return;//Get out of here

> +} // else stat == ODP_TMO_STALE, do nothing

> +//Finished processing, return timeout

> +odp_timer_return_tmo(tmo);

> +

> + @endcode

> +*/

> +

> +/** Example #2 Periodic tick

> + @code

> +

> +//Create timer pool for periodic ticks

> +odp_timer_pool_t per_tpid =

> +    odp_timer_pool_create("periodic-tick",

> +                       buffer_pool,

> +                       1,//resolution 1ns

> +                       1,//minimum timeout length 1ns

> +                       1000000000,//maximum timeout length 1s

> +                       10,//num_timers

> +                       false,//not shared

> +                       ODP_CLOCK_CPU

> +                      );

> +if (per_tpid == ODP_TIMER_POOL_INVALID)

> +{

> +    //Failed to create timer pool => fatal error

> +}

> +

> +

> +//Allocate periodic timer

> +tim_1733 = odp_timer_alloc(per_tpid, queue, NULL);

> +//Check if all resources were successfully allocated

> +if (tim_1733 == ODP_TIMER_INVALID)

> +{

> +     //Failed to allocate all resources => tear down

> +     //Destroy timeout

> +     odp_timer_free(tim_1733);

> +     //Tear down other state

> +     ...

> +     return false;

> +}

> +//All necessary resources successfully allocated

> +//Compute tick period in timer ticks

> +period_1733 = odp_timer_ns_to_tick(per_tpid, 1000000000U /

> 1733U);//1733Hz

> +//Compute when next tick should expire

> +next_1733 = odp_timer_current_tick(per_tpid) + period_1733;

> +//Arm the periodic timer

> +odp_timer_set_abs(tim_1733, next_1733);

> +return true;

> +

> +

> +

> +//A periodic timer timeout has been received

> +odp_timer_tmo_t tmo = odp_timeout_from_buffer(buf);

> +//Get status of timeout

> +odp_timer_tmo_status_t stat = odp_timer_tmo_status(tmo);

> +//We expect the timeout is always fresh since we are not calling set or

> cancel

> +on active or expired timers in this example

> +assert(stat == ODP_TMO_FRESH);

> +//Do processing driven by timeout *before*

> +...

> +do {

> +     //Compute when the timer should expire next

> +     next_1733 += period_1733;

> +     //Check that this is in the future

> +     if (likely(next_1733 > odp_timer_current_tick(per_tpid))

> +     break;//Yes, done

> +     //Else we missed a timeout

> +     //Optionally attempt some recovery and/or logging of the problem

> +     ...

> +} while (0);

> +//Re-arm periodic timer

> +odp_timer_set_abs(tim_1733, next_1733);

> +//Or do processing driven by timeout *after*

> +...

> +odp_timer_return_tmo(tmo);

> +return;

> +

> + @endcode

> +*/

> +

> +/** Example #3 Tear down of flow

> + @code

> +//ctx points to flow context data structure owned by application

> +//Free the timer, cancelling any timeout

> +odp_timer_free(ctx->timer);//Any enqueued timeout will be made invalid

> +//Continue tearing down and eventually freeing context

> +...

> +return;

> +

> +//A timeout has been received, check status

> +odp_timer_tmo_t tmo = odp_timeout_from_buffer(buf);

> +switch (odp_timer_tmo_status(tmo))

> +{

> +    case ODP_TMO_FRESH :

> +     //A flow has timed out, tear it down

> +     //Find flow context from timeout

> +     ctx = (context *)odp_timer_get_userptr(tmo);

> +     //Free the supervision timer, any enqueued timeout will remain

> +     odp_timer_free(ctx->tim);

> +     //Free other flow related resources

> +     ...

> +     //Free the timeout buffer

> +     odp_buffer_free(buf);

> +     //Flow torn down

> +     break;

> +    case ODP_TMO_STALE :

> +     //A stale timeout was received, return timeout and update timer

> +     odp_timer_return_tmo(tmo);

> +     break;

> +    case ODP_TMO_ORPHAN :

> +     //Orphaned timeout (from previously torn down flow)

> +     //No corresponding timer or flow context

> +     //Free the timeout buffer

> +     odp_buffer_free(buf);

> +     break;

> +}

> +

> + @endcode

> +*/

> +

>  #ifndef ODP_TIMER_H_

>  #define ODP_TIMER_H_

>

> @@ -18,144 +202,408 @@

>  extern "C" {

>  #endif

>

> +#include <stdlib.h>

>  #include <odp_std_types.h>

>  #include <odp_buffer.h>

>  #include <odp_buffer_pool.h>

>  #include <odp_queue.h>

>

> +struct odp_timer_pool_s; /**< Forward declaration */

> +

> +/**

> +* ODP timer pool handle (platform dependent)

> +*/

> +typedef struct odp_timer_pool_s *odp_timer_pool_t;

> +

> +/**

> + * Invalid timer pool handle (platform dependent).

> + */

> +#define ODP_TIMER_POOL_INVALID NULL

>

>  /**

> - * ODP timer handle

> + * Clock sources for timers in timer pool.

>   */

> -typedef uint32_t odp_timer_t;

> +typedef enum odp_timer_clk_src_e {

> +     /** Use CPU clock as clock source for timers */

> +     ODP_CLOCK_CPU,

> +     /** Use external clock as clock source for timers */

> +     ODP_CLOCK_EXT

> +     /* Platform dependent which other clock sources exist */

> +} odp_timer_clk_src_t;

>

> -/** Invalid timer */

> -#define ODP_TIMER_INVALID 0

> +struct odp_timer_s; /**< Forward declaration */

>

> +/**

> +* ODP timer handle (platform dependent).

> +*/

> +typedef struct odp_timer_s *odp_timer_t;

>

>  /**

> - * ODP timeout handle

> + * Invalid timer handle (platform dependent).

>   */

> -typedef odp_buffer_t odp_timer_tmo_t;

> -

> -/** Invalid timeout */

> -#define ODP_TIMER_TMO_INVALID 0

> +#define ODP_TIMER_INVALID NULL

>

> +/**

> + * Return values of timer set calls.

> + */

> +typedef enum odp_timer_set_e {

> +     /** Timer set operation successful */

> +     ODP_TIMER_SET_SUCCESS,

> +     /** Timer set operation failed, expiration too early */

> +     ODP_TIMER_SET_TOOEARLY,

> +     /** Timer set operation failed, expiration too late */

> +     ODP_TIMER_SET_TOOLATE

> +} odp_timer_set_t;

>

>  /**

> - * Timeout notification

> + * Timeout event handle.

>   */

> -typedef odp_buffer_t odp_timeout_t;

> +typedef odp_buffer_t odp_timer_tmo_t;

>

> +/**

> + * Status of a timeout event.

> + */

> +typedef enum odp_timer_tmo_status_e {

> +     /** Timeout is fresh, process it and return timeout */

> +     ODP_TMO_FRESH,

> +     /** Timer reset or cancelled, just return timeout  */

> +     ODP_TMO_STALE,

> +     /** Timer deleted, return or free timeout */

> +     ODP_TMO_ORPHAN

> +} odp_timer_tmo_status_t;

>

>  /**

> - * Create a timer

> + * Create a timer pool

>   *

> - * Creates a new timer with requested properties.

> + * Create a new timer pool.

>   *

>   * @param name       Name

> - * @param pool       Buffer pool for allocating timeout notifications

> + * @param buf_pool   Buffer pool for allocating timeouts (and only

> timeouts)

>   * @param resolution Timeout resolution in nanoseconds

> - * @param min_tmo    Minimum timeout duration in nanoseconds

> - * @param max_tmo    Maximum timeout duration in nanoseconds

> + * @param min_tmo    Minimum relative timeout in nanoseconds

> + * @param max_tmo    Maximum relative timeout in nanoseconds

> + * @param num_timers Number of supported timers (minimum)

> + * @param shared     Shared or private timer pool.

> + *              Operations on shared timers will include the necessary

> + *              mutual exclusion, operations on private timers may not

> + *              (mutual exclusion is the responsibility of the caller).

> + * @param clk_src    Clock source to use

>   *

> - * @return Timer handle if successful, otherwise ODP_TIMER_INVALID

> + * @return Timer pool handle if successful, otherwise

> ODP_TIMER_POOL_INVALID

> + * and errno set

>   */

> -odp_timer_t odp_timer_create(const char *name, odp_buffer_pool_t pool,

> -                          uint64_t resolution, uint64_t min_tmo,

> -                          uint64_t max_tmo);

> +odp_timer_pool_t

> +odp_timer_pool_create(const char *name,

> +                   odp_buffer_pool_t buf_pool,

> +                   uint64_t resolution,

> +                   uint64_t min_tmo,

> +                   uint64_t max_tmo,

> +                   uint32_t num_timers,

> +                   bool shared,

> +                   odp_timer_clk_src_t clk_src);

> +

> +/**

> + * Start a timer pool

> + *

> + * Start all created timer pools, enabling the allocation of timers.

> + * The purpose of this call is to coordinate the creation of multiple

> timer

> + * pools that may use the same underlying HW resources.

> + * This function may be called multiple times.

> + */

> +void odp_timer_pool_start(void);

> +

> +/**

> + * Destroy a timer pool

> + *

> + * Destroy a timer pool, freeing all resources.

> + * All timers must have been freed.

> + *

> + * @param tpid  Timer pool identifier

> + */

> +void odp_timer_pool_destroy(odp_timer_pool_t tpid);

>

>  /**

>   * Convert timer ticks to nanoseconds

>   *

> - * @param timer Timer

> + * @param tpid  Timer pool identifier

>   * @param ticks Timer ticks

>   *

>   * @return Nanoseconds

>   */

> -uint64_t odp_timer_tick_to_ns(odp_timer_t timer, uint64_t ticks);

> +uint64_t odp_timer_tick_to_ns(odp_timer_pool_t tpid, uint64_t ticks);

>

>  /**

>   * Convert nanoseconds to timer ticks

>   *

> - * @param timer Timer

> + * @param tpid  Timer pool identifier

>   * @param ns    Nanoseconds

>   *

>   * @return Timer ticks

>   */

> -uint64_t odp_timer_ns_to_tick(odp_timer_t timer, uint64_t ns);

> +uint64_t odp_timer_ns_to_tick(odp_timer_pool_t tpid, uint64_t ns);

>

>  /**

> - * Timer resolution in nanoseconds

> + * Current tick value

>   *

> - * @param timer Timer

> + * @param tpid Timer pool identifier

>   *

> - * @return Resolution in nanoseconds

> + * @return Current time in timer ticks

> + */

> +uint64_t odp_timer_current_tick(odp_timer_pool_t tpid);

> +

> +/**

> + * ODP timer configurations

>   */

> -uint64_t odp_timer_resolution(odp_timer_t timer);

> +

> +typedef enum odp_timer_pool_conf_e {

> +     ODP_TIMER_NAME,      /**< Return name of timer pool */

> +     ODP_TIMER_RESOLUTION,/**< Return the timer resolution (in ns) */

> +     ODP_TIMER_MIN_TICKS, /**< Return the min supported rel timeout

> (ticks)*/

> +     ODP_TIMER_MAX_TICKS, /**< Return the max supported rel timeout

> (ticks)*/

> +     ODP_TIMER_NUM_TIMERS,/**< Return number of supported timers */

> +     ODP_TIMER_SHARED     /**< Return shared flag */

> +} odp_timer_pool_conf_t;

>

>  /**

> - * Maximum timeout in timer ticks

> + * Query different timer pool configurations, e.g.

> + *  Timer resolution in nanoseconds

> + *  Maximum timeout in timer ticks

> + *  Number of supported timers

> + *  Shared or private timer pool

>   *

> - * @param timer Timer

> + * @param tpid Timer pool identifier

> + * @param item Configuration item being queried

>   *

> - * @return Maximum timeout in timer ticks

> + * @return the requested piece of information or 0 for unknown item.

>   */

> -uint64_t odp_timer_maximum_tmo(odp_timer_t timer);

> +uintptr_t odp_timer_pool_query_conf(odp_timer_pool_t tpid,

> +                                 odp_timer_pool_conf_t item);

>

>  /**

> - * Current timer tick

> + * Allocate a timer

>   *

> - * @param timer Timer

> + * Create a timer (allocating all necessary resources e.g. timeout

> event) from

> + * the timer pool.

>   *

> - * @return Current time in timer ticks

> + * @param tpid     Timer pool identifier

> + * @param queue    Destination queue for timeout notifications

> + * @param user_ptr User defined pointer or NULL (copied to timeouts)

> + *

> + * @return Timer handle if successful, otherwise ODP_TIMER_INVALID and

> + *      errno set.

>   */

> -uint64_t odp_timer_current_tick(odp_timer_t timer);

> +odp_timer_t odp_timer_alloc(odp_timer_pool_t tpid,

> +                         odp_queue_t queue,

> +                         void *user_ptr);

>

>  /**

> - * Request timeout with an absolute timer tick

> + * Free a timer

> + *

> + * Free (destroy) a timer, freeing all associated resources (e.g.

> default

> + * timeout event). An expired and enqueued timeout event will not be

> freed.

> + * It is the responsibility of the application to free this timeout when

> it

> + * is received.

>   *

> - * When tick reaches tmo_tick, the timer enqueues the timeout

> notification into

> - * the destination queue.

> + * @param tim      Timer handle

> + */

> +void odp_timer_free(odp_timer_t tim);

> +

> +/**

> + * Set a timer (absolute time) with a user-defined timeout buffer

>   *

> - * @param timer    Timer

> - * @param tmo_tick Absolute timer tick value which triggers the timeout

> - * @param queue    Destination queue for the timeout notification

> - * @param buf      User defined timeout notification buffer. When

> - *                 ODP_BUFFER_INVALID, default timeout notification is

> used.

> + * Set (arm) the timer to expire at specific time. The user-defined

> + * buffer will be enqueued when the timer expires.

> + * Arming may fail (if the timer is in state EXPIRED), an earlier

> timeout

> + * will then be received. odp_timer_tmo_status() must be used to check

> if

> + * the received timeout is valid.

>   *

> - * @return Timeout handle if successful, otherwise ODP_TIMER_TMO_INVALID

> + * Note: any invalid parameters will be treated as programming errors

> and will

> + * cause the application to abort.

> + *

> + * @param tim      Timer

> + * @param abs_tck  Expiration time in absolute timer ticks

> + * @param user_buf The buffer to use as timeout event

> + *

> + * @return Success or failure code

>   */

> -odp_timer_tmo_t odp_timer_absolute_tmo(odp_timer_t timer, uint64_t

> tmo_tick,

> -                                    odp_queue_t queue, odp_buffer_t buf);

> +odp_timer_set_t odp_timer_set_abs_w_buf(odp_timer_t tim,

> +                                     uint64_t abs_tck,

> +                                     odp_buffer_t user_buf);

>

>  /**

> - * Cancel a timeout

> + * Set a timer with an absolute expiration time

> + *

> + * Set (arm) the timer to expire at a specific time.

> + * Arming may fail (if the timer is in state EXPIRED), an earlier

> timeout

> + * will then be received. odp_timer_tmo_status() must be used to check

> if

> + * the received timeout is valid.

> + *

> + * Note: any invalid parameters will be treated as programming errors

> and will

> + * cause the application to abort.

>   *

> - * @param timer Timer

> - * @param tmo   Timeout to cancel

> + * @param tim     Timer

> + * @param abs_tck Expiration time in absolute timer ticks

>   *

> - * @return 0 if successful

> + * @return Success or failure code

>   */

> -int odp_timer_cancel_tmo(odp_timer_t timer, odp_timer_tmo_t tmo);

> +odp_timer_set_t odp_timer_set_abs(odp_timer_t tim, uint64_t abs_tck);

>

>  /**

> - * Convert buffer handle to timeout handle

> + * Set a timer with a relative expiration time and user-defined buffer.

>   *

> - * @param buf  Buffer handle

> + * Set (arm) the timer to expire at a relative future time.

> + * Arming may fail (if the timer is in state EXPIRED),

> + * an earlier timeout will then be received. odp_timer_tmo_status() must

> + * be used to check if the received timeout is valid.

>   *

> - * @return Timeout buffer handle

> + * Note: any invalid parameters will be treated as programming errors

> and will

> + * cause the application to abort.

> + *

> + * @param tim      Timer

> + * @param rel_tck  Expiration time in timer ticks relative to current

> time of

> + *              the timer pool the timer belongs to

> + * @param user_buf The buffer to use as timeout event

> + *

> + * @return Success or failure code

>   */

> -odp_timeout_t odp_timeout_from_buffer(odp_buffer_t buf);

> +odp_timer_set_t odp_timer_set_rel_w_buf(odp_timer_t tim,

> +                                     uint64_t rel_tck,

> +                                     odp_buffer_t user_buf);

> +/**

> + * Set a timer with a relative expiration time

> + *

> + * Set (arm) the timer to expire at a relative future time.

> + * Arming may fail (if the timer is in state EXPIRED),

> + * an earlier timeout will then be received. odp_timer_tmo_status() must

> + * be used to check if the received timeout is valid.

> + *

> + * Note: any invalid parameters will be treated as programming errors

> and will

> + * cause the application to abort.

> + *

> + * @param tim     Timer

> + * @param rel_tck Expiration time in timer ticks relative to current

> time of

> + *             the timer pool the timer belongs to

> + *

> + * @return Success or failure code

> + */

> +odp_timer_set_t odp_timer_set_rel(odp_timer_t tim, uint64_t rel_tck);

>

>  /**

> - * Return absolute timeout tick

> + * Cancel a timer

> + *

> + * Cancel a timer, preventing future expiration and delivery.

> + *

> + * A timer that has already expired and been enqueued for delivery may

> be

> + * impossible to cancel and will instead be delivered to the destination

> queue.

> + * Use odp_timer_tmo_status() the check whether a received timeout is

> fresh or

> + * stale (cancelled). Stale timeouts will automatically be recycled.

> + *

> + * Note: any invalid parameters will be treated as programming errors

> and will

> + * cause the application to abort.

> + *

> + * @param tim    Timer handle

> + */

> +void odp_timer_cancel(odp_timer_t tim);

> +

> +/**

> + * Translate from buffer to timeout

> + *

> + * Return the timeout handle that corresponds to the specified buffer

> handle.

> + * The buffer must be of time ODP_BUFFER_TYPE_TIMEOUT.

> + *

> + * @param buf   Buffer handle to translate.

> + *

> + * @return      The corresponding timeout handle.

> + */

> +static inline odp_timer_tmo_t odp_timeout_from_buffer(odp_buffer_t buf)

> +{

> +     if (odp_unlikely(odp_buffer_type(buf) != ODP_BUFFER_TYPE_TIMEOUT))

> {

> +             ODP_ERR("Buffer type %u not timeout\n", buf);

> +             abort();

> +     }

> +     /* In this implementation, timeout == buffer */

> +     return (odp_timer_tmo_t)buf;

> +}

> +

> +/**

> + * Translate from timeout to buffer

> + *

> + * Return the buffer handle that corresponds to the specified timeout

> handle.

> + *

> + * @param tmo   Timeout handle to translate.

> + *

> + * @return      The corresponding buffer handle.

> + */

> +static inline odp_buffer_t odp_buffer_from_timeout(odp_timer_tmo_t tmo)

> +{

> +     /* In this implementation, buffer == timeout */

> +     return (odp_buffer_t)tmo;

> +}

> +

> +/**

> + * Return timeout to timer

> + *

> + * Return a received timeout for reuse with the parent timer.

> + * Note: odp_timer_return_tmo() must be called on all received timeouts!

> + * (Excluding user defined timeout buffers).

> + * The timeout must not be accessed after this call, the semantics is

> + * equivalent to a free call.

> + *

> + * @param tmo    Timeout

> + */

> +void odp_timer_return_tmo(odp_timer_tmo_t tmo);

> +

> +/**

> + * Return fresh/stale/orphan status of timeout.

> + *

> + * Check a received timeout for orphaness (i.e. parent timer freed) and

> + * staleness (i.e. parent timer has been reset or cancelled after the

> timeout

> + * expired and was enqueued).

> + * If the timeout is fresh, it should be processed.

> + * If the timeout is stale or orphaned, it should be ignored.

> + * All timeouts must be returned using the odp_timer_return_tmo() call.

> + *

> + * @param tmo    Timeout

> + *

> + * @return One of ODP_TMO_FRESH, ODP_TMO_STALE or ODP_TMO_ORPHAN.

> + */

> +odp_timer_tmo_status_t odp_timer_tmo_status(odp_timer_tmo_t tmo);

> +

> +/**

> + * Get timer handle

> + *

> + * Return Handle of parent timer.

> + *

> + * @param tmo   Timeout

> + *

> + * @return Timer handle or ODP_TIMER_INVALID for orphaned timeouts.

> + *         Note that the parent timer could be freed by some other

> thread

> + *         at any time and thus the timeout becomes orphaned.

> + */

> +odp_timer_t odp_timer_handle(odp_timer_tmo_t tmo);

> +

> +/**

> + * Get expiration time

> + *

> + * Return (requested) expiration time of timeout.

> + *

> + * @param tmo   Timeout

> + *

> + * @return Expiration time

> + */

> +uint64_t odp_timer_expiration(odp_timer_tmo_t tmo);

> +

> +/**

> + * Get user pointer

> + *

> + * Return User pointer of timer associated with timeout.

> + * The user pointer is often used to point to some associated context.

>   *

> - * @param tmo Timeout buffer handle

> + * @param tmo   Timeout

>   *

> - * @return Absolute timeout tick

> + * @return User pointer

>   */

> -uint64_t odp_timeout_tick(odp_timeout_t tmo);

> +void *odp_timer_userptr(odp_timer_tmo_t tmo);

>

>  #ifdef __cplusplus

>  }

> diff --git a/platform/linux-generic/include/odp_priority_queue_internal.h

> b/platform/linux-generic/include/odp_priority_queue_internal.h

> new file mode 100644

> index 0000000..7d7f3a2

> --- /dev/null

> +++ b/platform/linux-generic/include/odp_priority_queue_internal.h

> @@ -0,0 +1,108 @@

> +#ifndef _PRIORITY_QUEUE_H

> +#define _PRIORITY_QUEUE_H

> +

> +#include <assert.h>

> +#include <stddef.h>

> +#include <stdint.h>

> +#include <stdbool.h>

> +#include <odp_align.h>

> +

> +#define INVALID_INDEX ~0U

> +#define INVALID_PRIORITY ((pq_priority_t)~0ULL)

> +

> +typedef uint64_t pq_priority_t;

> +

> +struct heap_node;

> +

> +typedef struct priority_queue {

> +     uint32_t max_elems;/* Number of elements in heap */

> +     /* Number of registered elements (active + inactive) */

> +     uint32_t reg_elems;

> +     uint32_t num_elems;/* Number of active elements */

> +     struct heap_node *heap;

> +     struct heap_node *org_ptr;

> +} priority_queue ODP_ALIGNED(sizeof(uint64_t));

> +

> +/* The user gets a pointer to this structure */

> +typedef struct {

> +     /* Set when pq_element registered with priority queue */

> +     priority_queue *pq;

> +     uint32_t index;/* Index into heap array */

> +     pq_priority_t prio;

> +} pq_element;

> +

> +/*** Operations on pq_element ***/

> +

> +static inline void pq_element_con(pq_element *this)

> +{

> +     this->pq = NULL;

> +     this->index = INVALID_INDEX;

> +     this->prio = 0U;

> +}

> +

> +static inline void pq_element_des(pq_element *this)

> +{

> +     (void)this;

> +     assert(this->index == INVALID_INDEX);

> +}

> +

> +static inline priority_queue *get_pq(const pq_element *this)

> +{

> +     return this->pq;

> +}

> +

> +static inline pq_priority_t get_prio(const pq_element *this)

> +{

> +     return this->prio;

> +}

> +

> +static inline uint32_t get_index(const pq_element *this)

> +{

> +     return this->index;

> +}

> +

> +static inline bool is_active(const pq_element *this)

> +{

> +     return this->index != INVALID_INDEX;

> +}

> +

> +/*** Operations on priority_queue ***/

> +

> +extern uint32_t pq_smallest_child(priority_queue *, uint32_t,

> pq_priority_t);

> +extern void pq_bubble_down(priority_queue *, pq_element *);

> +extern void pq_bubble_up(priority_queue *, pq_element *);

> +

> +static inline bool valid_index(priority_queue *this, uint32_t idx)

> +{

> +     return idx < this->num_elems;

> +}

> +

> +extern void priority_queue_con(priority_queue *, uint32_t _max_elems);

> +extern void priority_queue_des(priority_queue *);

> +

> +/* Register pq_element with priority queue */

> +/* Return false if priority queue full */

> +extern bool pq_register_element(priority_queue *, pq_element *);

> +

> +/* Activate and add pq_element to priority queue */

> +/* Element must be disarmed */

> +extern void pq_activate_element(priority_queue *, pq_element *,

> pq_priority_t);

> +

> +/* Reset (increase) priority for pq_element */

> +/* Element may be active or inactive (released) */

> +extern void pq_reset_element(priority_queue *, pq_element *,

> pq_priority_t);

> +

> +/* Deactivate and remove element from priority queue */

> +/* Element may be active or inactive (released) */

> +extern void pq_deactivate_element(priority_queue *, pq_element *);

> +

> +/* Unregister pq_element */

> +extern void pq_unregister_element(priority_queue *, pq_element *);

> +

> +/* Return priority of first element (lowest numerical value) */

> +extern pq_priority_t pq_first_priority(const priority_queue *);

> +

> +/* Deactivate and return first element if it's prio is <= threshold */

> +extern pq_element *pq_release_element(priority_queue *, pq_priority_t

> thresh);

> +

> +#endif /* _PRIORITY_QUEUE_H */

> diff --git a/platform/linux-generic/include/odp_timer_internal.h

> b/platform/linux-generic/include/odp_timer_internal.h

> index ad28f53..461f28c 100644

> --- a/platform/linux-generic/include/odp_timer_internal.h

> +++ b/platform/linux-generic/include/odp_timer_internal.h

> @@ -1,4 +1,4 @@

> -/* Copyright (c) 2013, Linaro Limited

> +/* Copyright (c) 2014, Linaro Limited

>   * All rights reserved.

>   *

>   * SPDX-License-Identifier:     BSD-3-Clause

> @@ -8,72 +8,51 @@

>  /**

>   * @file

>   *

> - * ODP timer timeout descriptor - implementation internal

> + * ODP timeout descriptor - implementation internal

>   */

>

>  #ifndef ODP_TIMER_INTERNAL_H_

>  #define ODP_TIMER_INTERNAL_H_

>

> -#ifdef __cplusplus

> -extern "C" {

> -#endif

> -

> -#include <odp_std_types.h>

> -#include <odp_queue.h>

> -#include <odp_buffer.h>

> +#include <odp_align.h>

> +#include <odp_debug.h>

>  #include <odp_buffer_internal.h>

>  #include <odp_buffer_pool_internal.h>

>  #include <odp_timer.h>

>

> -struct timeout_t;

> -

> -typedef struct timeout_t {

> -     struct timeout_t *next;

> -     int               timer_id;

> -     int               tick;

> -     uint64_t          tmo_tick;

> -     odp_queue_t       queue;

> -     odp_buffer_t      buf;

> -     odp_buffer_t      tmo_buf;

> -} timeout_t;

> -

> -

> -struct odp_timeout_hdr_t;

> -

>  /**

> - * Timeout notification header

> + * Internal Timeout header

>   */

> -typedef struct odp_timeout_hdr_t {

> +typedef struct {

> +     /* common buffer header */

>       odp_buffer_hdr_t buf_hdr;

>

> -     timeout_t meta;

> -

> -     uint8_t buf_data[];

> +     /* Requested expiration time */

> +     uint64_t expiration;

> +     /* User ptr inherited from parent timer */

> +     void *user_ptr;

> +     /* Parent timer */

> +     odp_timer_t timer;

> +     /* Tag inherited from parent timer at time of expiration */

> +     uint32_t tag;

> +     /* Gen-cnt inherited from parent timer at time of creation */

> +     uint16_t gencnt;

> +     uint16_t pad;

> +     uint8_t buf_data[0];

>  } odp_timeout_hdr_t;

>

> -

> -

>  ODP_STATIC_ASSERT(sizeof(odp_timeout_hdr_t) ==

> -        ODP_OFFSETOF(odp_timeout_hdr_t, buf_data),

> -        "ODP_TIMEOUT_HDR_T__SIZE_ERR");

> -

> +               ODP_OFFSETOF(odp_timeout_hdr_t, buf_data),

> +               "sizeof(odp_timeout_hdr_t) ==

> ODP_OFFSETOF(odp_timeout_hdr_t, buf_data)");

>  ODP_STATIC_ASSERT(sizeof(odp_timeout_hdr_t) % sizeof(uint64_t) == 0,

> -        "ODP_TIMEOUT_HDR_T__SIZE_ERR2");

> -

> +               "sizeof(odp_timeout_hdr_t) % sizeof(uint64_t) == 0");

>

>  /**

> - * Return timeout header

> + * Return the timeout header

>   */

> -static inline odp_timeout_hdr_t *odp_timeout_hdr(odp_timeout_t tmo)

> +static inline odp_timeout_hdr_t *odp_timeout_hdr(odp_buffer_t buf)

>  {

> -     odp_buffer_hdr_t *buf_hdr = odp_buf_to_hdr((odp_buffer_t)tmo);

> -     return (odp_timeout_hdr_t *)(uintptr_t)buf_hdr;

> -}

> -

> -

> -

> -#ifdef __cplusplus

> +     return (odp_timeout_hdr_t *)odp_buf_to_hdr(buf);

>  }

> -#endif

>

>  #endif

> diff --git a/platform/linux-generic/odp_priority_queue.c

> b/platform/linux-generic/odp_priority_queue.c

> new file mode 100644

> index 0000000..b72c26f

> --- /dev/null

> +++ b/platform/linux-generic/odp_priority_queue.c

> @@ -0,0 +1,283 @@

> +#define NDEBUG /* Enabled by default by ODP build system */

> +#include <assert.h>

> +#include <unistd.h>

> +#include <stdlib.h>

> +#include <string.h>

> +#include <strings.h>

> +#include <odp_hints.h>

> +#include <odp_align.h>

> +#include <odp_debug.h>

> +

> +#include "odp_priority_queue_internal.h"

> +

> +

> +#define NUM_CHILDREN 4

> +#define CHILD(n) (NUM_CHILDREN * (n) + 1)

> +#define PARENT(n) (((n) - 1) / NUM_CHILDREN)

> +

> +/* Internal nodes in the array */

> +typedef struct heap_node {

> +     pq_element *elem;

> +     /* Copy of elem->prio so we avoid unnecessary dereferencing */

> +     pq_priority_t prio;

> +} heap_node;

> +

> +static void pq_assert_heap(priority_queue *this);

> +

> +#define ALIGNMENT(p) (1U << ((unsigned)ffs((int)p) - 1U))

> +

> +void priority_queue_con(priority_queue *this, uint32_t _max_elems)

> +{

> +     this->max_elems = _max_elems;

> +     this->reg_elems = 0;

> +     this->num_elems = 0;

> +     this->org_ptr = malloc((_max_elems + 64 / sizeof(heap_node)) *

> +                            sizeof(heap_node));

> +     if (odp_unlikely(this->org_ptr == NULL)) {

> +             ODP_ERR("malloc failed\n");

> +             abort();

> +     }

> +     this->heap = this->org_ptr;

> +     assert((size_t)&this->heap[1] % 8 == 0);

> +     /* Increment base address until first child (index 1) is cache line

> */

> +     /* aligned and thus all children (e.g. index 1-4) stored in the */

> +     /* same cache line. We are not interested in the alignment of */

> +     /* heap[0] as this is a lone node */

> +     while ((size_t)&this->heap[1] % ODP_CACHE_LINE_SIZE != 0) {

> +             /* Cast to ptr to struct member with the greatest alignment

> */

> +             /* requirement */

> +             this->heap = (heap_node *)((pq_priority_t *)this->heap + 1);

> +     }

> +     pq_assert_heap(this);

> +}

> +

> +void priority_queue_des(priority_queue *this)

> +{

> +     pq_assert_heap(this);

> +     free(this->org_ptr);

> +}

> +

> +#ifndef NDEBUG

> +static uint32_t

> +pq_assert_elem(priority_queue *this, uint32_t index, bool recurse)

> +{

> +     uint32_t num = 1;

> +     const pq_element *elem = this->heap[index].elem;

> +     assert(elem->index == index);

> +     assert(elem->prio == this->heap[index].prio);

> +     uint32_t child = CHILD(index);

> +     uint32_t i;

> +     for (i = 0; i < NUM_CHILDREN; i++, child++) {

> +             if (valid_index(this, child)) {

> +                     assert(this->heap[child].elem != NULL);

> +                     assert(this->heap[child].prio >= elem->prio);

> +                     if (recurse)

> +                             num += pq_assert_elem(this, child, recurse);

> +             }

> +     }

> +     return num;

> +}

> +#endif

> +

> +static void

> +pq_assert_heap(priority_queue *this)

> +{

> +     (void)this;

> +#ifndef NDEBUG

> +     uint32_t num = 0;

> +     if (odp_likely(this->num_elems != 0)) {

> +             assert(this->heap[0].elem != NULL);

> +             num += pq_assert_elem(this, 0, true);

> +     }

> +     assert(num == this->num_elems);

> +     unsigned i;

> +     for (i = 0; i < this->num_elems; i++) {

> +             assert(this->heap[i].elem != NULL);

> +             assert(this->heap[i].prio != INVALID_PRIORITY);

> +     }

> +#endif

> +}

> +

> +/* Bubble up to proper position */

> +void

> +pq_bubble_up(priority_queue *this, pq_element *elem)

> +{

> +     assert(this->heap[elem->index].elem == elem);

> +     assert(this->heap[elem->index].prio == elem->prio);

> +     uint32_t current = elem->index;

> +     pq_priority_t prio = elem->prio;

> +     assert(current == 0 || this->heap[PARENT(current)].elem != NULL);

> +     /* Move up into proper position */

> +     while (current != 0 && this->heap[PARENT(current)].prio > prio) {

> +             uint32_t parent = PARENT(current);

> +             assert(this->heap[parent].elem != NULL);

> +             /* Swap current with parent */

> +             /* 1) Move parent down */

> +             this->heap[current].elem = this->heap[parent].elem;

> +             this->heap[current].prio = this->heap[parent].prio;

> +             this->heap[current].elem->index = current;

> +             /* 2) Move current up to parent */

> +             this->heap[parent].elem = elem;

> +             this->heap[parent].prio = prio;

> +             this->heap[parent].elem->index = parent;

> +             /* Continue moving elem until it is in the right place */

> +             current = parent;

> +     }

> +     pq_assert_heap(this);

> +}

> +

> +/* Find the smallest child that is smaller than the specified priority

> */

> +/* Very hot function, can we decrease the number of cache misses? */

> +uint32_t pq_smallest_child(priority_queue *this,

> +                        uint32_t index,

> +                        pq_priority_t val)

> +{

> +     uint32_t smallest = index;

> +     uint32_t child = CHILD(index);

> +#if NUM_CHILDREN == 4

> +     /* Unroll loop when all children exist */

> +     if (odp_likely(valid_index(this, child + 3))) {

> +             if (this->heap[child + 0].prio < val)

> +                     val = this->heap[smallest = child + 0].prio;

> +             if (this->heap[child + 1].prio < val)

> +                     val = this->heap[smallest = child + 1].prio;

> +             if (this->heap[child + 2].prio < val)

> +                     val = this->heap[smallest = child + 2].prio;

> +             if (this->heap[child + 3].prio < val)

> +                     (void)this->heap[smallest = child + 3].prio;

> +             return smallest;

> +     }

> +#endif

> +     uint32_t i;

> +     for (i = 0; i < NUM_CHILDREN; i++) {

> +             if (odp_unlikely(!valid_index(this, child + i)))

> +                     break;

> +             if (this->heap[child + i].prio < val) {

> +                     smallest = child + i;

> +                     val = this->heap[smallest].prio;

> +             }

> +     }

> +     return smallest;

> +}

> +

> +/* Very hot function, can it be optimised? */

> +void

> +pq_bubble_down(priority_queue *this, pq_element *elem)

> +{

> +     assert(this->heap[elem->index].elem == elem);

> +     assert(this->heap[elem->index].prio == elem->prio);

> +     uint32_t current = elem->index;

> +     pq_priority_t prio = elem->prio;

> +     for (;;) {

> +             uint32_t child = pq_smallest_child(this, current, prio);

> +             if (current == child) {

> +                     /* No smaller child, we are done */

> +                     pq_assert_heap(this);

> +                     return;

> +             }

> +             /* Element larger than smaller child, must move down */

> +             assert(this->heap[child].elem != NULL);

> +             /* 1) Move child up to current */

> +             this->heap[current].elem = this->heap[child].elem;

> +             this->heap[current].prio = this->heap[child].prio;

> +             /* 2) Move current down to child */

> +             this->heap[child].elem = elem;

> +             this->heap[child].prio = prio;

> +             this->heap[child].elem->index = child;

> +

> +             this->heap[current].elem->index = current; /* cache misses!

> */

> +             /* Continue moving element until it is in the right place */

> +             current = child;

> +     }

> +}

> +

> +bool

> +pq_register_element(priority_queue *this, pq_element *elem)

> +{

> +     if (odp_likely(this->reg_elems < this->max_elems)) {

> +             elem->pq = this;

> +             this->reg_elems++;

> +             return true;

> +     }

> +     return false;

> +}

> +

> +void

> +pq_unregister_element(priority_queue *this, pq_element *elem)

> +{

> +     assert(elem->pq == this);

> +     if (is_active(elem))

> +             pq_deactivate_element(this, elem);

> +     this->reg_elems--;

> +}

> +

> +void

> +pq_activate_element(priority_queue *this, pq_element *elem,

> pq_priority_t prio)

> +{

> +     assert(elem->index == INVALID_INDEX);

> +     /* Insert element at end */

> +     uint32_t index = this->num_elems++;

> +     this->heap[index].elem = elem;

> +     this->heap[index].prio = prio;

> +     elem->index = index;

> +     elem->prio = prio;

> +     pq_bubble_up(this, elem);

> +}

> +

> +void

> +pq_deactivate_element(priority_queue *this, pq_element *elem)

> +{

> +     assert(elem->pq == this);

> +     if (odp_likely(is_active(elem))) {

> +             /* Swap element with last element */

> +             uint32_t current = elem->index;

> +             uint32_t last = --this->num_elems;

> +             if (odp_likely(last != current)) {

> +                     /* Move last element to current */

> +                     this->heap[current].elem = this->heap[last].elem;

> +                     this->heap[current].prio = this->heap[last].prio;

> +                     this->heap[current].elem->index = current;

> +                     /* Bubble down old 'last' element to its proper

> place*/

> +                     if (this->heap[current].prio < elem->prio)

> +                             pq_bubble_up(this, this->heap[current].elem);

> +                     else

> +                             pq_bubble_down(this, this->heap[current].elem);

> +             }

> +             elem->index = INVALID_INDEX;

> +             pq_assert_heap(this);

> +     }

> +}

> +

> +void

> +pq_reset_element(priority_queue *this, pq_element *elem, pq_priority_t

> prio)

> +{

> +     assert(prio != INVALID_PRIORITY);

> +     if (odp_likely(is_active(elem))) {

> +             assert(prio >= elem->prio);

> +             elem->prio = prio;

> +             this->heap[elem->index].prio = prio;/* cache misses here! */

> +             pq_bubble_down(this, elem);

> +             pq_assert_heap(this);

> +     } else {

> +             pq_activate_element(this, elem, prio);

> +     }

> +}

> +

> +pq_priority_t pq_first_priority(const priority_queue *this)

> +{

> +     return this->num_elems != 0 ? this->heap[0].prio :

> INVALID_PRIORITY;

> +}

> +

> +pq_element *

> +pq_release_element(priority_queue *this, pq_priority_t threshold)

> +{

> +     if (odp_likely(this->num_elems != 0 &&

> +                    this->heap[0].prio <= threshold)) {

> +             pq_element *elem = this->heap[0].elem;

> +             /* Remove element from heap */

> +             pq_deactivate_element(this, elem);

> +             assert(elem->prio <= threshold);

> +             return elem;

> +     }

> +     return NULL;

> +}

> diff --git a/platform/linux-generic/odp_timer.c b/platform/linux-

> generic/odp_timer.c

> index 313c713..0e5071c 100644

> --- a/platform/linux-generic/odp_timer.c

> +++ b/platform/linux-generic/odp_timer.c

> @@ -4,428 +4,713 @@

>   * SPDX-License-Identifier:     BSD-3-Clause

>   */

>

> -#include <odp_timer.h>

> -#include <odp_timer_internal.h>

> -#include <odp_time.h>

> -#include <odp_buffer_pool_internal.h>

> -#include <odp_internal.h>

> -#include <odp_atomic.h>

> -#include <odp_spinlock.h>

> -#include <odp_sync.h>

> -#include <odp_debug.h>

> -

> -#include <signal.h>

> -#include <time.h>

> +/**

> + * @file

> + *

> + * ODP timer service

> + *

> + */

>

> +#include <assert.h>

> +#include <errno.h>

>  #include <string.h>

> -

> -#define NUM_TIMERS    1

> -#define MAX_TICKS     1024

> -#define MAX_RES       ODP_TIME_SEC

> -#define MIN_RES       (100*ODP_TIME_USEC)

> -

> -

> -typedef struct {

> -     odp_spinlock_t lock;

> -     timeout_t      *list;

> -} tick_t;

> -

> -typedef struct {

> -     int               allocated;

> -     volatile int      active;

> -     volatile uint64_t cur_tick;

> -     timer_t           timerid;

> -     odp_timer_t       timer_hdl;

> -     odp_buffer_pool_t pool;

> -     uint64_t          resolution_ns;

> -     uint64_t          max_ticks;

> -     tick_t            tick[MAX_TICKS];

> -

> -} timer_ring_t;

> -

> -typedef struct {

> -     odp_spinlock_t lock;

> -     int            num_timers;

> -     timer_ring_t   timer[NUM_TIMERS];

> -

> -} timer_global_t;

> -

> -/* Global */

> -static timer_global_t odp_timer;

> -

> -static void add_tmo(tick_t *tick, timeout_t *tmo)

> +#include <stdlib.h>

> +#include <time.h>

> +#include <signal.h>

> +#include "odp_std_types.h"

> +#include "odp_buffer.h"

> +#include "odp_buffer_pool.h"

> +#include "odp_queue.h"

> +#include "odp_hints.h"

> +#include "odp_sync.h"

> +#include "odp_ticketlock.h"

> +#include "odp_debug.h"

> +#include "odp_align.h"

> +#include "odp_shared_memory.h"

> +#include "odp_hints.h"

> +#include "odp_internal.h"

> +#include "odp_time.h"

> +#include "odp_timer.h"

> +#include "odp_timer_internal.h"

> +#include "odp_priority_queue_internal.h"

> +

> +/***********************************************************************

> *******

> + * Translation between timeout and timeout header

> +

> *************************************************************************

> ****/

> +

> +static inline odp_timeout_hdr_t *odp_tmo_to_hdr(odp_timer_tmo_t tmo)

>  {

> -     odp_spinlock_lock(&tick->lock);

> -

> -     tmo->next  = tick->list;

> -     tick->list = tmo;

> +     odp_buffer_t buf = odp_buffer_from_timeout(tmo);

> +     odp_timeout_hdr_t *tmo_hdr = (odp_timeout_hdr_t

> *)odp_buf_to_hdr(buf);

> +     return tmo_hdr;

> +}

>

> -     odp_spinlock_unlock(&tick->lock);

> +/***********************************************************************

> *******

> + * odp_timer abstract datatype

> +

> *************************************************************************

> ****/

> +

> +typedef struct odp_timer_s {

> +     pq_element pqelem;/* Base class */

> +     uint64_t req_tmo;/* Requested timeout tick */

> +     odp_buffer_t tmo_buf;/* ODP_BUFFER_INVALID if timeout enqueued */

> +     odp_queue_t queue;/* ODP_QUEUE_INVALID if timer is free */

> +     uint32_t tag;/* Reusing tag as next pointer/index when timer is

> free */

> +     uint16_t gencnt;/* Smaller to make place for user_buf flag */

> +     unsigned int user_buf:1; /* User-defined buffer? */

> +} odp_timer;

> +

> +/* Constructor */

> +static inline void odp_timer_con(odp_timer *this)

> +{

> +     pq_element_con(&this->pqelem);

> +     this->tmo_buf = ODP_BUFFER_INVALID;

> +     this->queue = ODP_QUEUE_INVALID;

> +     this->gencnt = 0;

>  }

>

> -static timeout_t *rem_tmo(tick_t *tick)

> +/* Destructor */

> +static inline void odp_timer_des(odp_timer *this)

>  {

> -     timeout_t *tmo;

> +     assert(this->tmo_buf == ODP_BUFFER_INVALID);

> +     assert(this->queue == ODP_QUEUE_INVALID);

> +     pq_element_des(&this->pqelem);

> +}

>

> -     odp_spinlock_lock(&tick->lock);

> +/* Setup when timer is allocated */

> +static void setup(odp_timer *this,

> +               odp_queue_t _q,

> +               void *_up,

> +               odp_buffer_t _tmo)

> +{

> +     this->req_tmo = INVALID_PRIORITY;

> +     this->tmo_buf = _tmo;

> +     this->queue = _q;

> +     this->tag = 0;

> +     this->user_buf = false;

> +     /* Initialise constant fields of timeout event */

> +     odp_timeout_hdr_t *tmo_hdr =

> +             odp_tmo_to_hdr(odp_timeout_from_buffer(this->tmo_buf));

> +     tmo_hdr->gencnt = this->gencnt;

> +     tmo_hdr->timer = this;

> +     tmo_hdr->user_ptr = _up;

> +     /* tmo_hdr->tag set at expiration time */

> +     /* tmo_hdr->expiration set at expiration time */

> +     assert(this->queue != ODP_QUEUE_INVALID);

> +}

>

> -     tmo = tick->list;

> +/* Teardown when timer is freed */

> +static odp_buffer_t teardown(odp_timer *this)

> +{

> +     /* Increase generation count to make any pending timeout(s)

> orphaned */

> +     ++this->gencnt;

> +     odp_buffer_t buf = this->tmo_buf;

> +     this->tmo_buf = ODP_BUFFER_INVALID;

> +     this->queue = ODP_QUEUE_INVALID;

> +     return buf;

> +}

>

> -     if (tmo)

> -             tick->list = tmo->next;

> +static inline uint32_t get_next_free(odp_timer *this)

> +{

> +     assert(this->queue == ODP_QUEUE_INVALID);

> +     return this->tag;

> +}

>

> -     odp_spinlock_unlock(&tick->lock);

> +static inline void set_next_free(odp_timer *this, uint32_t nf)

> +{

> +     assert(this->queue == ODP_QUEUE_INVALID);

> +     this->tag = nf;

> +}

>

> -     if (tmo)

> -             tmo->next = NULL;

> +/***********************************************************************

> *******

> + * odp_timer_pool abstract datatype

> + * Inludes alloc and free timer

> +

> *************************************************************************

> ****/

> +

> +typedef struct odp_timer_pool_s {

> +     priority_queue pq;

> +     uint64_t cur_tick;/* Current tick value */

> +     uint64_t min_tick;/* Current expiration lower bound */

> +     uint64_t max_tick;/* Current expiration higher bound */

> +     bool shared;

> +     odp_ticketlock_t lock;

> +     const char *name;

> +     odp_buffer_pool_t buf_pool;

> +     uint64_t resolution_ns;

> +     uint64_t min_tmo_tck;

> +     uint64_t max_tmo_tck;

> +     odp_timer *timers;

> +     uint32_t num_alloc;/* Current number of allocated timers */

> +     uint32_t max_timers;/* Max number of timers */

> +     uint32_t first_free;/* 0..max_timers-1 => free timer */

> +     timer_t timerid;

> +     odp_timer_clk_src_t clk_src;

> +} odp_timer_pool;

> +

> +/* Forward declarations */

> +static void timer_init(odp_timer_pool *tp);

> +static void timer_exit(odp_timer_pool *tp);

> +

> +static void odp_timer_pool_con(odp_timer_pool *this,

> +                            const char *_n,

> +                            odp_buffer_pool_t _bp,

> +                            uint64_t _r,

> +                            uint64_t _mint,

> +                            uint64_t _maxt,

> +                            uint32_t _mt,

> +                            bool _s,

> +                            odp_timer_clk_src_t _cs)

> +{

> +     priority_queue_con(&this->pq, _mt);

> +     this->cur_tick = 0;

> +     this->shared = _s;

> +     this->name = strdup(_n);

> +     this->buf_pool = _bp;

> +     this->resolution_ns = _r;

> +     this->min_tmo_tck = odp_timer_ns_to_tick(this, _mint);

> +     this->max_tmo_tck = odp_timer_ns_to_tick(this, _maxt);

> +     this->min_tick = this->cur_tick + this->min_tmo_tck;

> +     this->max_tick = this->cur_tick + this->max_tmo_tck;

> +     this->num_alloc = 0;

> +     this->max_timers = _mt;

> +     this->first_free = 0;

> +     this->clk_src = _cs;

> +     this->timers = malloc(sizeof(odp_timer) * this->max_timers);

> +     if (this->timers == NULL)

> +             ODP_ABORT("%s: malloc failed\n", _n);

> +     uint32_t i;

> +     for (i = 0; i < this->max_timers; i++)

> +             odp_timer_con(&this->timers[i]);

> +     for (i = 0; i < this->max_timers; i++)

> +             set_next_free(&this->timers[i], i + 1);

> +     odp_ticketlock_init(&this->lock);

> +     if (this->clk_src == ODP_CLOCK_CPU)

> +             timer_init(this);

> +     /* Make sure timer pool initialisation is globally observable */

> +     /* before we return a pointer to it */

> +     odp_sync_stores();

> +}

>

> -     return tmo;

> +static odp_timer_pool *odp_timer_pool_new(

> +     const char *_n,

> +     odp_buffer_pool_t _bp,

> +     uint64_t _r,

> +     uint64_t _mint,

> +     uint64_t _maxt,

> +     uint32_t _mt,

> +     bool _s,

> +     odp_timer_clk_src_t _cs)

> +{

> +     odp_timer_pool *this = malloc(sizeof(odp_timer_pool));

> +     if (odp_unlikely(this == NULL))

> +             ODP_ABORT("%s: timer pool malloc failed\n", _n);

> +     odp_timer_pool_con(this, _n, _bp, _r, _mint, _maxt, _mt, _s, _cs);

> +     return this;

>  }

>

> -/**

> - * Search and delete tmo entry from timeout list

> - * return -1 : on error.. handle not in list

> - *           0 : success

> - */

> -static int find_and_del_tmo(timeout_t **tmo, odp_timer_tmo_t handle)

> +static void odp_timer_pool_des(odp_timer_pool *this)

>  {

> -     timeout_t *cur, *prev;

> -     prev = NULL;

> +     if (this->shared)

> +             odp_ticketlock_lock(&this->lock);

> +     if (this->num_alloc != 0) {

> +             /* It's a programming error to attempt to destroy a */

> +             /* timer pool which is still in use */

> +             ODP_ABORT("%s: timers in use\n", this->name);

> +     }

> +     if (this->clk_src == ODP_CLOCK_CPU)

> +             timer_exit(this);

> +     uint32_t i;

> +     for (i = 0; i < this->max_timers; i++)

> +             odp_timer_des(&this->timers[i]);

> +     free(this->timers);

> +     priority_queue_des(&this->pq);

> +     odp_sync_stores();

> +}

>

> -     for (cur = *tmo; cur != NULL; prev = cur, cur = cur->next) {

> -             if (cur->tmo_buf == handle) {

> -                     if (prev == NULL)

> -                             *tmo = cur->next;

> -                     else

> -                             prev->next = cur->next;

> +static void odp_timer_pool_del(odp_timer_pool *this)

> +{

> +     odp_timer_pool_des(this);

> +     free(this);

> +}

>

> -                     break;

> +static inline odp_timer *timer_alloc(odp_timer_pool *this,

> +                                  odp_queue_t queue,

> +                                  void *user_ptr,

> +                                  odp_buffer_t tmo_buf)

> +{

> +     odp_timer *tim = ODP_TIMER_INVALID;

> +     if (odp_likely(this->shared))

> +             odp_ticketlock_lock(&this->lock);

> +     if (odp_likely(this->num_alloc < this->max_timers)) {

> +             this->num_alloc++;

> +             /* Remove first unused timer from free list */

> +             assert(this->first_free != this->max_timers);

> +             tim = &this->timers[this->first_free];

> +             this->first_free = get_next_free(tim);

> +             /* Insert timer into priority queue */

> +             if (odp_unlikely(!pq_register_element(&this->pq,

> +                                                   &tim->pqelem))) {

> +                     /* Unexpected internal error */

> +                     abort();

>               }

> +             /* Create timer */

> +             setup(tim, queue, user_ptr, tmo_buf);

> +     } else {

> +             errno = ENFILE; /* Reusing file table overvlow */

>       }

> -

> -     if (!cur)

> -             /* couldn't find tmo in list */

> -             return -1;

> -

> -     /* application to free tmo_buf provided by absolute_tmo call */

> -     return 0;

> +     if (odp_likely(this->shared))

> +             odp_ticketlock_unlock(&this->lock);

> +     return tim;

>  }

>

> -int odp_timer_cancel_tmo(odp_timer_t timer_hdl, odp_timer_tmo_t tmo)

> +static inline void timer_free(odp_timer_pool *this, odp_timer *tim)

>  {

> -     int id;

> -     int tick_idx;

> -     timeout_t *cancel_tmo;

> -     odp_timeout_hdr_t *tmo_hdr;

> -     tick_t *tick;

> -

> -     /* get id */

> -     id = (int)timer_hdl - 1;

> -

> -     tmo_hdr = odp_timeout_hdr((odp_timeout_t) tmo);

> -     /* get tmo_buf to cancel */

> -     cancel_tmo = &tmo_hdr->meta;

> +     if (odp_likely(this->shared))

> +             odp_ticketlock_lock(&this->lock);

> +     if (odp_unlikely(tim->queue == ODP_QUEUE_INVALID))

> +             ODP_ABORT("Invalid timer %p\n", tim);

> +     /* Destroy timer */

> +     odp_buffer_t buf = teardown(tim);

> +     /* Remove timer from priority queue */

> +     pq_unregister_element(&this->pq, &tim->pqelem);

> +     /* Insert timer into free list */

> +     set_next_free(tim, this->first_free);

> +     this->first_free = (tim - &this->timers[0]) / sizeof(this-

> >timers[0]);

> +     assert(this->num_alloc != 0);

> +     this->num_alloc--;

> +     if (odp_likely(this->shared))

> +             odp_ticketlock_unlock(&this->lock);

> +     if (buf != ODP_BUFFER_INVALID)

> +             odp_buffer_free(buf);

> +}

>

> -     tick_idx = cancel_tmo->tick;

> -     tick = &odp_timer.timer[id].tick[tick_idx];

> +/***********************************************************************

> *******

> + * Operations on timers

> + * reset/reset_w_buf/cancel timer, return timeout

> +

> *************************************************************************

> ****/

>

> -     odp_spinlock_lock(&tick->lock);

> -     /* search and delete tmo from tick list */

> -     if (find_and_del_tmo(&tick->list, tmo) != 0) {

> -             odp_spinlock_unlock(&tick->lock);

> -             ODP_DBG("Couldn't find the tmo (%d) in tick list\n",

> (int)tmo);

> -             return -1;

> +static inline void timer_expire(odp_timer *tim)

> +{

> +     assert(tim->req_tmo != INVALID_PRIORITY);

> +     /* Timer expired, is there actually any timeout event */

> +     /* we can enqueue? */

> +     if (odp_likely(tim->tmo_buf != ODP_BUFFER_INVALID)) {

> +             /* Swap out timeout buffer */

> +             odp_buffer_t buf = tim->tmo_buf;

> +             tim->tmo_buf = ODP_BUFFER_INVALID;

> +             if (odp_likely(!tim->user_buf)) {

> +                     odp_timeout_hdr_t *tmo_hdr =

> +                             odp_tmo_to_hdr(odp_timeout_from_buffer(buf));

> +                     /* Copy tag and requested expiration tick from timer

> */

> +                     tmo_hdr->tag = tim->tag;

> +                     tmo_hdr->expiration = tim->req_tmo;

> +             }

> +             /* Else don't touch user-defined buffer */

> +             int rc = odp_queue_enq(tim->queue, buf);

> +             if (odp_unlikely(rc != 0))

> +                     ODP_ABORT("Failed to enqueue timeout buffer (%d)\n",

> +                               rc);

> +             /* Mark timer as inactive */

> +             tim->req_tmo = INVALID_PRIORITY;

>       }

> -     odp_spinlock_unlock(&tick->lock);

> -

> -     return 0;

> +     /* No, timeout event already enqueued or unavailable */

> +     /* Keep timer active, odp_timer_return_tmo() will patch up */

>  }

>

> -static void notify_function(union sigval sigval)

> +static odp_timer_set_t timer_reset(odp_timer_pool *tp,

> +                                odp_timer *tim,

> +                                uint64_t abs_tck)

>  {

> -     uint64_t cur_tick;

> -     timeout_t *tmo;

> -     tick_t *tick;

> -     timer_ring_t *timer;

> +     assert(tim->user_buf == false);

> +     if (odp_unlikely(abs_tck < tp->min_tick))

> +             return ODP_TIMER_SET_TOOEARLY;

> +     if (odp_unlikely(abs_tck > tp->max_tick))

> +             return ODP_TIMER_SET_TOOLATE;

> +

> +     if (odp_likely(tp->shared))

> +             odp_ticketlock_lock(&tp->lock);

> +

> +     if (odp_unlikely(tim->queue == ODP_QUEUE_INVALID))

> +             ODP_ABORT("Invalid timer %p\n", tim);

> +     if (odp_unlikely(tim->user_buf))

> +             ODP_ABORT("Timer %p has user buffer\n", tim);

> +     /* Increase timer tag to make any pending timeout stale */

> +     tim->tag++;

> +     /* Save requested timeout */

> +     tim->req_tmo = abs_tck;

> +     /* Update timer position in priority queue */

> +     pq_reset_element(&tp->pq, &tim->pqelem, abs_tck);

> +

> +     if (odp_likely(tp->shared))

> +             odp_ticketlock_unlock(&tp->lock);

> +     return ODP_TIMER_SET_SUCCESS;

> +}

>

> -     timer = sigval.sival_ptr;

> +static odp_timer_set_t timer_reset_w_buf(odp_timer_pool *tp,

> +             odp_timer *tim,

> +             uint64_t abs_tck,

> +             odp_buffer_t user_buf)

> +{

> +     if (odp_unlikely(abs_tck < tp->min_tick))

> +             return ODP_TIMER_SET_TOOEARLY;

> +     if (odp_unlikely(abs_tck > tp->max_tick))

> +             return ODP_TIMER_SET_TOOLATE;

> +

> +     if (odp_likely(tp->shared))

> +             odp_ticketlock_lock(&tp->lock);

> +

> +     if (odp_unlikely(tim->queue == ODP_QUEUE_INVALID))

> +             ODP_ABORT("Invalid timer %p\n", tim);

> +     /* Increase timer tag to make any pending timeout stale */

> +     tim->tag++;

> +     /* Save requested timeout */

> +     tim->req_tmo = abs_tck;

> +     /* Set flag indicating presence of user defined buffer */

> +     tim->user_buf = true;

> +     /* Swap in new buffer, save any old buffer pointer */

> +     odp_buffer_t old_buf = tim->tmo_buf;

> +     tim->tmo_buf = user_buf;

> +     /* Update timer position in priority queue */

> +     pq_reset_element(&tp->pq, &tim->pqelem, abs_tck);

> +

> +     if (odp_likely(tp->shared))

> +             odp_ticketlock_unlock(&tp->lock);

> +

> +     /* Free old buffer if present */

> +     if (odp_unlikely(old_buf != ODP_BUFFER_INVALID))

> +             odp_buffer_free(old_buf);

> +     return ODP_TIMER_SET_SUCCESS;

> +}

>

> -     if (timer->active == 0) {

> -             ODP_DBG("Timer (%u) not active\n", timer->timer_hdl);

> -             return;

> +static inline void timer_cancel(odp_timer_pool *tp,

> +                             odp_timer *tim)

> +{

> +     odp_buffer_t old_buf = ODP_BUFFER_INVALID;

> +     if (odp_likely(tp->shared))

> +             odp_ticketlock_lock(&tp->lock);

> +

> +     if (odp_unlikely(tim->queue == ODP_QUEUE_INVALID))

> +             ODP_ABORT("Invalid timer %p\n", tim);

> +     if (odp_unlikely(tim->user_buf)) {

> +             /* Swap out old user buffer */

> +             old_buf = tim->tmo_buf;

> +             tim->tmo_buf = ODP_BUFFER_INVALID;

> +             /* tim->user_buf stays true */

>       }

> +     /* Else a normal timer (no user-defined buffer) */

> +     /* Increase timer tag to make any pending timeout stale */

> +     tim->tag++;

> +     /* Clear requested timeout, mark timer inactive */

> +     tim->req_tmo = INVALID_PRIORITY;

> +     /* Remove timer from the priority queue */

> +     pq_deactivate_element(&tp->pq, &tim->pqelem);

> +

> +     if (odp_likely(tp->shared))

> +             odp_ticketlock_unlock(&tp->lock);

> +     /* Free user-defined buffer if present */

> +     if (odp_unlikely(old_buf != ODP_BUFFER_INVALID))

> +             odp_buffer_free(old_buf);

> +}

>

> -     /* ODP_DBG("Tick\n"); */

> -

> -     cur_tick = timer->cur_tick++;

> -

> -     odp_sync_stores();

> +static inline void timer_return(odp_timer_pool *tp,

> +                             odp_timer *tim,

> +                             odp_timer_tmo_t tmo,

> +                             const odp_timeout_hdr_t *tmo_hdr)

> +{

> +     odp_buffer_t tmo_buf = odp_buffer_from_timeout(tmo);

> +     if (odp_likely(tp->shared))

> +             odp_ticketlock_lock(&tp->lock);

> +     if (odp_unlikely(tim->user_buf))

> +             ODP_ABORT("Timer %p has user-defined buffer\n", tim);

> +     if (odp_likely(tmo_hdr->gencnt == tim->gencnt)) {

> +             assert(tim->tmo_buf == ODP_BUFFER_INVALID);

> +             /* Save returned buffer for use when timer expires next time

> */

> +             tim->tmo_buf = tmo_buf;

> +             tmo_buf = ODP_BUFFER_INVALID;

> +             /* Check if timer is active and should have expired */

> +             if (odp_unlikely(tim->req_tmo != INVALID_PRIORITY &&

> +                              tim->req_tmo <= tp->cur_tick)) {

> +                     /* Expire timer now since we have restored the timeout

> +                        buffer */

> +                     timer_expire(tim);

> +             }

> +             /* Else timer inactive or expires in the future */

> +     }

> +     /* Else timeout orphaned, free buffer later */

> +     if (odp_likely(tp->shared))

> +             odp_ticketlock_unlock(&tp->lock);

> +     if (odp_unlikely(tmo_buf != ODP_BUFFER_INVALID))

> +             odp_buffer_free(tmo_buf);

> +}

>

> -     tick = &timer->tick[cur_tick % MAX_TICKS];

> +/* Non-public so not in odp_timer.h but externally visible, must declare

> + * somewhere */

> +unsigned odp_timer_pool_expire(odp_timer_pool_t tpid, uint64_t tick);

>

> -     while ((tmo = rem_tmo(tick)) != NULL) {

> -             odp_queue_t  queue;

> -             odp_buffer_t buf;

> +unsigned odp_timer_pool_expire(odp_timer_pool_t tpid, uint64_t tick)

> +{

> +     if (odp_likely(tpid->shared))

> +             odp_ticketlock_lock(&tpid->lock);

> +

> +     unsigned nexp = 0;

> +     odp_timer_t tim;

> +     tpid->cur_tick = tick;

> +     tpid->min_tick = tick + tpid->min_tmo_tck;

> +     tpid->max_tick = tick + tpid->max_tmo_tck;

> +     while ((tim = (odp_timer_t)pq_release_element(&tpid->pq, tick)) !=

> +            ODP_TIMER_INVALID) {

> +             assert(get_prio(&tim->pqelem) <= tick);

> +             timer_expire(tim);

> +             nexp++;

> +     }

>

> -             queue = tmo->queue;

> -             buf   = tmo->buf;

> +     if (odp_likely(tpid->shared))

> +             odp_ticketlock_unlock(&tpid->lock);

> +     return nexp;

> +}

>

> -             if (buf != tmo->tmo_buf)

> -                     odp_buffer_free(tmo->tmo_buf);

> +/***********************************************************************

> *******

> + * POSIX timer support

> + * Functions that use Linux/POSIX per-process timers and related

> facilities

> +

> *************************************************************************

> ****/

>

> -             odp_queue_enq(queue, buf);

> -     }

> +static void timer_notify(sigval_t sigval)

> +{

> +     odp_timer_pool *tp = (odp_timer_pool *)sigval.sival_ptr;

> +     uint64_t new_tick = tp->cur_tick + 1;

> +     (void)odp_timer_pool_expire(tp, new_tick);

>  }

>

> -static void timer_start(timer_ring_t *timer)

> +static void timer_init(odp_timer_pool *tp)

>  {

>       struct sigevent   sigev;

>       struct itimerspec ispec;

>       uint64_t res, sec, nsec;

>

> -     ODP_DBG("\nTimer (%u) starts\n", timer->timer_hdl);

> +     ODP_DBG("Creating POSIX timer for timer pool %s, period %"

> +             PRIu64" ns\n", tp->name, tp->resolution_ns);

>

>       memset(&sigev, 0, sizeof(sigev));

>       memset(&ispec, 0, sizeof(ispec));

>

>       sigev.sigev_notify          = SIGEV_THREAD;

> -     sigev.sigev_notify_function = notify_function;

> -     sigev.sigev_value.sival_ptr = timer;

> +     sigev.sigev_notify_function = timer_notify;

> +     sigev.sigev_value.sival_ptr = tp;

>

> -     if (timer_create(CLOCK_MONOTONIC, &sigev, &timer->timerid)) {

> -             ODP_DBG("Timer create failed\n");

> -             return;

> -     }

> +     if (timer_create(CLOCK_MONOTONIC, &sigev, &tp->timerid))

> +             ODP_ABORT("timer_create() returned error %s\n",

> +                       strerror(errno));

>

> -     res  = timer->resolution_ns;

> +     res  = tp->resolution_ns;

>       sec  = res / ODP_TIME_SEC;

> -     nsec = res - sec*ODP_TIME_SEC;

> +     nsec = res - sec * ODP_TIME_SEC;

>

>       ispec.it_interval.tv_sec  = (time_t)sec;

>       ispec.it_interval.tv_nsec = (long)nsec;

>       ispec.it_value.tv_sec     = (time_t)sec;

>       ispec.it_value.tv_nsec    = (long)nsec;

>

> -     if (timer_settime(timer->timerid, 0, &ispec, NULL)) {

> -             ODP_DBG("Timer set failed\n");

> -             return;

> -     }

> -

> -     return;

> +     if (timer_settime(&tp->timerid, 0, &ispec, NULL))

> +             ODP_ABORT("timer_settime() returned error %s\n",

> +                       strerror(errno));

>  }

>

> -int odp_timer_init_global(void)

> +static void timer_exit(odp_timer_pool *tp)

>  {

> -     ODP_DBG("Timer init ...");

> -

> -     memset(&odp_timer, 0, sizeof(timer_global_t));

> -

> -     odp_spinlock_init(&odp_timer.lock);

> -

> -     ODP_DBG("done\n");

> -

> -     return 0;

> +     if (timer_delete(tp->timerid) != 0)

> +             ODP_ABORT("timer_delete() returned error %s\n",

> +                       strerror(errno));

>  }

>

> -int odp_timer_disarm_all(void)

> +/***********************************************************************

> *******

> + * Public API functions

> + * Some parameter checks and error messages

> + * No modificatios of internal state

> +

> *************************************************************************

> ****/

> +odp_timer_pool_t

> +odp_timer_pool_create(const char *name,

> +                   odp_buffer_pool_t buf_pool,

> +                   uint64_t resolution_ns,

> +                   uint64_t min_timeout,

> +                   uint64_t max_timeout,

> +                   uint32_t num_timers,

> +                   bool shared,

> +                   odp_timer_clk_src_t clk_src)

>  {

> -     int timers;

> -     struct itimerspec ispec;

> -

> -     odp_spinlock_lock(&odp_timer.lock);

> -

> -     timers = odp_timer.num_timers;

> -

> -     ispec.it_interval.tv_sec  = 0;

> -     ispec.it_interval.tv_nsec = 0;

> -     ispec.it_value.tv_sec     = 0;

> -     ispec.it_value.tv_nsec    = 0;

> -

> -     for (; timers >= 0; timers--) {

> -             if (timer_settime(odp_timer.timer[timers].timerid,

> -                               0, &ispec, NULL)) {

> -                     ODP_DBG("Timer reset failed\n");

> -                     odp_spinlock_unlock(&odp_timer.lock);

> -                     return -1;

> -             }

> -             odp_timer.num_timers--;

> -     }

> -

> -     odp_spinlock_unlock(&odp_timer.lock);

> -

> -     return 0;

> +     /* Verify that buffer pool can be used for timeouts */

> +     odp_buffer_t buf = odp_buffer_alloc(buf_pool);

> +     if (buf == ODP_BUFFER_INVALID)

> +             ODP_ABORT("%s: Failed to allocate buffer\n", name);

> +     if (odp_buffer_type(buf) != ODP_BUFFER_TYPE_TIMEOUT)

> +             ODP_ABORT("%s: Buffer pool wrong type\n", name);

> +     odp_buffer_free(buf);

> +     odp_timer_pool_t tp = odp_timer_pool_new(name, buf_pool,

> resolution_ns,

> +                           min_timeout, max_timeout, num_timers,

> +                           shared, clk_src);

> +     return tp;

>  }

>

> -odp_timer_t odp_timer_create(const char *name, odp_buffer_pool_t pool,

> -                          uint64_t resolution_ns, uint64_t min_ns,

> -                          uint64_t max_ns)

> +void odp_timer_pool_start(void)

>  {

> -     uint32_t id;

> -     timer_ring_t *timer;

> -     odp_timer_t timer_hdl;

> -     int i;

> -     uint64_t max_ticks;

> -     (void) name;

> -

> -     if (resolution_ns < MIN_RES)

> -             resolution_ns = MIN_RES;

> -

> -     if (resolution_ns > MAX_RES)

> -             resolution_ns = MAX_RES;

> -

> -     max_ticks = max_ns / resolution_ns;

> -

> -     if (max_ticks > MAX_TICKS) {

> -             ODP_DBG("Maximum timeout too long: %"PRIu64" ticks\n",

> -                     max_ticks);

> -             return ODP_TIMER_INVALID;

> -     }

> -

> -     if (min_ns < resolution_ns) {

> -             ODP_DBG("Min timeout %"PRIu64" ns < resolution %"PRIu64"

> ns\n",

> -                     min_ns, resolution_ns);

> -             return ODP_TIMER_INVALID;

> -     }

> -

> -     odp_spinlock_lock(&odp_timer.lock);

> -

> -     if (odp_timer.num_timers >= NUM_TIMERS) {

> -             odp_spinlock_unlock(&odp_timer.lock);

> -             ODP_DBG("All timers allocated\n");

> -             return ODP_TIMER_INVALID;

> -     }

> -

> -     for (id = 0; id < NUM_TIMERS; id++) {

> -             if (odp_timer.timer[id].allocated == 0)

> -                     break;

> -     }

> -

> -     timer = &odp_timer.timer[id];

> -     timer->allocated = 1;

> -     odp_timer.num_timers++;

> -

> -     odp_spinlock_unlock(&odp_timer.lock);

> -

> -     timer_hdl = id + 1;

> -

> -     timer->timer_hdl     = timer_hdl;

> -     timer->pool          = pool;

> -     timer->resolution_ns = resolution_ns;

> -     timer->max_ticks     = MAX_TICKS;

> -

> -     for (i = 0; i < MAX_TICKS; i++) {

> -             odp_spinlock_init(&timer->tick[i].lock);

> -             timer->tick[i].list = NULL;

> -     }

> -

> -     timer->active = 1;

> -     odp_sync_stores();

> -

> -     timer_start(timer);

> +     /* Nothing to do here, timer pools are started by the create call

> */

> +}

>

> -     return timer_hdl;

> +void odp_timer_pool_destroy(odp_timer_pool_t tpid)

> +{

> +     odp_timer_pool_del(tpid);

>  }

>

> -odp_timer_tmo_t odp_timer_absolute_tmo(odp_timer_t timer_hdl, uint64_t

> tmo_tick,

> -                                    odp_queue_t queue, odp_buffer_t buf)

> +uint64_t odp_timer_tick_to_ns(odp_timer_pool_t tpid, uint64_t ticks)

>  {

> -     int id;

> -     uint64_t tick;

> -     uint64_t cur_tick;

> -     timeout_t *new_tmo;

> -     odp_buffer_t tmo_buf;

> -     odp_timeout_hdr_t *tmo_hdr;

> -     timer_ring_t *timer;

> +     return ticks * tpid->resolution_ns;

> +}

>

> -     id = (int)timer_hdl - 1;

> -     timer = &odp_timer.timer[id];

> +uint64_t odp_timer_ns_to_tick(odp_timer_pool_t tpid, uint64_t ns)

> +{

> +     return (uint64_t)(ns / tpid->resolution_ns);

> +}

>

> -     cur_tick = timer->cur_tick;

> -     if (tmo_tick <= cur_tick) {

> -             ODP_DBG("timeout too close\n");

> -             return ODP_TIMER_TMO_INVALID;

> -     }

> +uint64_t odp_timer_current_tick(odp_timer_pool_t tpid)

> +{

> +     return tpid->cur_tick;

> +}

>

> -     if ((tmo_tick - cur_tick) > MAX_TICKS) {

> -             ODP_DBG("timeout too far: cur %"PRIu64" tmo %"PRIu64"\n",

> -                     cur_tick, tmo_tick);

> -             return ODP_TIMER_TMO_INVALID;

> +uintptr_t odp_timer_pool_query_conf(odp_timer_pool_t tpid,

> +                                 odp_timer_pool_conf_t item)

> +{

> +     switch (item) {

> +     case ODP_TIMER_NAME:

> +             return (uintptr_t)(tpid->name);

> +     case ODP_TIMER_RESOLUTION:

> +             return tpid->resolution_ns;

> +     case ODP_TIMER_MIN_TICKS:

> +             return tpid->min_tmo_tck;

> +     case ODP_TIMER_MAX_TICKS:

> +             return tpid->max_tmo_tck;

> +     case ODP_TIMER_NUM_TIMERS:

> +             return tpid->max_timers;

> +     case ODP_TIMER_SHARED:

> +             return tpid->shared;

> +     default:

> +             return 0;

>       }

> +}

>

> -     tick = tmo_tick % MAX_TICKS;

> -

> -     tmo_buf = odp_buffer_alloc(timer->pool);

> -     if (tmo_buf == ODP_BUFFER_INVALID) {

> -             ODP_DBG("tmo buffer alloc failed\n");

> -             return ODP_TIMER_TMO_INVALID;

> +odp_timer_t odp_timer_alloc(odp_timer_pool_t tpid,

> +                         odp_queue_t queue,

> +                         void *user_ptr)

> +{

> +     /* We check this because ODP_QUEUE_INVALID is used */

> +     /* to indicate a free timer */

> +     if (odp_unlikely(queue == ODP_QUEUE_INVALID))

> +             ODP_ABORT("%s: Invalid queue handle\n", tpid->name);

> +     odp_buffer_t tmo_buf = odp_buffer_alloc(tpid->buf_pool);

> +     if (odp_likely(tmo_buf != ODP_BUFFER_INVALID)) {

> +             odp_timer *tim = timer_alloc(tpid, queue, user_ptr,

> tmo_buf);

> +             if (odp_likely(tim != ODP_TIMER_INVALID)) {

> +                     /* Success */

> +                     assert(tim->queue != ODP_QUEUE_INVALID);

> +                     return tim;

> +             }

> +             odp_buffer_free(tmo_buf);

>       }

> +     /* Else failed to allocate timeout event */

> +     /* errno set by odp_buffer_alloc() or timer_alloc () */

> +     return ODP_TIMER_INVALID;

> +}

>

> -     tmo_hdr = odp_timeout_hdr((odp_timeout_t) tmo_buf);

> -     new_tmo = &tmo_hdr->meta;

> -

> -     new_tmo->timer_id = id;

> -     new_tmo->tick     = (int)tick;

> -     new_tmo->tmo_tick = tmo_tick;

> -     new_tmo->queue    = queue;

> -     new_tmo->tmo_buf  = tmo_buf;

> -

> -     if (buf != ODP_BUFFER_INVALID)

> -             new_tmo->buf = buf;

> -     else

> -             new_tmo->buf = tmo_buf;

> -

> -     add_tmo(&timer->tick[tick], new_tmo);

> -

> -     return tmo_buf;

> +void odp_timer_free(odp_timer_t tim)

> +{

> +     odp_timer_pool *tp = (odp_timer_pool *)get_pq(&tim->pqelem);

> +     timer_free(tp, tim);

>  }

>

> -uint64_t odp_timer_tick_to_ns(odp_timer_t timer_hdl, uint64_t ticks)

> +odp_timer_set_t odp_timer_set_abs_w_buf(odp_timer_t tim,

> +                                     uint64_t abs_tck,

> +                                     odp_buffer_t user_buf)

>  {

> -     uint32_t id;

> +     odp_timer_pool *tp = (odp_timer_pool *)get_pq(&tim->pqelem);

> +     odp_timer_set_t rc = timer_reset_w_buf(tp, tim, abs_tck, user_buf);

> +     return rc;

> +}

>

> -     id = timer_hdl - 1;

> -     return ticks * odp_timer.timer[id].resolution_ns;

> +odp_timer_set_t odp_timer_set_abs(odp_timer_t tim, uint64_t abs_tck)

> +{

> +     odp_timer_pool *tp = (odp_timer_pool *)get_pq(&tim->pqelem);

> +     odp_timer_set_t rc = timer_reset(tp, tim, abs_tck);

> +     return rc;

>  }

>

> -uint64_t odp_timer_ns_to_tick(odp_timer_t timer_hdl, uint64_t ns)

> +odp_timer_set_t odp_timer_set_rel_w_buf(odp_timer_t tim,

> +                                     uint64_t rel_tck,

> +                                     odp_buffer_t user_buf)

>  {

> -     uint32_t id;

> +     odp_timer_pool *tp = (odp_timer_pool *)get_pq(&tim->pqelem);

> +     odp_timer_set_t rc = timer_reset_w_buf(tp, tim, tp->cur_tick +

> rel_tck,

> +                                            user_buf);

> +     return rc;

> +}

>

> -     id = timer_hdl - 1;

> -     return ns / odp_timer.timer[id].resolution_ns;

> +odp_timer_set_t odp_timer_set_rel(odp_timer_t tim, uint64_t rel_tck)

> +{

> +     odp_timer_pool *tp = (odp_timer_pool *)get_pq(&tim->pqelem);

> +     odp_timer_set_t rc = timer_reset(tp, tim, tp->cur_tick + rel_tck);

> +     return rc;

>  }

>

> -uint64_t odp_timer_resolution(odp_timer_t timer_hdl)

> +void odp_timer_cancel(odp_timer_t tim)

>  {

> -     uint32_t id;

> +     odp_timer_pool *tp = (odp_timer_pool *)get_pq(&tim->pqelem);

> +     timer_cancel(tp, tim);

> +}

>

> -     id = timer_hdl - 1;

> -     return odp_timer.timer[id].resolution_ns;

> +void odp_timer_return_tmo(odp_timer_tmo_t tmo)

> +{

> +     const odp_timeout_hdr_t *tmo_hdr = odp_tmo_to_hdr(tmo);

> +     odp_timer *parent_tim = tmo_hdr->timer;

> +     odp_timer_pool *tp = (odp_timer_pool *)get_pq(&parent_tim->pqelem);

> +     timer_return(tp, parent_tim, tmo, tmo_hdr);

>  }

>

> -uint64_t odp_timer_maximum_tmo(odp_timer_t timer_hdl)

> +odp_timer_tmo_status_t odp_timer_tmo_status(odp_timer_tmo_t tmo)

>  {

> -     uint32_t id;

> +     const odp_timeout_hdr_t *tmo_hdr = odp_tmo_to_hdr(tmo);

> +     odp_timer *parent_tim = tmo_hdr->timer;

>

> -     id = timer_hdl - 1;

> -     return odp_timer.timer[id].max_ticks;

> +     if (odp_unlikely(tmo_hdr->gencnt != parent_tim->gencnt)) {

> +             /* Generation counters differ => timer has been freed */

> +             return ODP_TMO_ORPHAN;

> +     }

> +     /* Else generation counters match => parent timer exists */

> +

> +     if (odp_likely(parent_tim->tag == tmo_hdr->tag))

> +             return ODP_TMO_FRESH;

> +     else

> +             return ODP_TMO_STALE;

>  }

>

> -uint64_t odp_timer_current_tick(odp_timer_t timer_hdl)

> +odp_timer_t odp_timer_handle(odp_timer_tmo_t tmo)

>  {

> -     uint32_t id;

> +     odp_timeout_hdr_t *tmo_hdr = odp_tmo_to_hdr(tmo);

> +     odp_timer_t parent_tim = tmo_hdr->timer;

> +     if (odp_likely(tmo_hdr->gencnt == parent_tim->gencnt))

> +             return parent_tim;

> +     else

> +             return ODP_TIMER_INVALID;

> +}

>

> -     id = timer_hdl - 1;

> -     return odp_timer.timer[id].cur_tick;

> +uint64_t odp_timer_expiration(odp_timer_tmo_t tmo)

> +{

> +     odp_timeout_hdr_t *tmo_hdr = odp_tmo_to_hdr(tmo);

> +     return tmo_hdr->expiration;

>  }

>

> -odp_timeout_t odp_timeout_from_buffer(odp_buffer_t buf)

> +void *odp_timer_userptr(odp_timer_tmo_t tmo)

>  {

> -     return (odp_timeout_t) buf;

> +     odp_timeout_hdr_t *tmo_hdr = odp_tmo_to_hdr(tmo);

> +     return tmo_hdr->user_ptr;

>  }

>

> -uint64_t odp_timeout_tick(odp_timeout_t tmo)

> +int odp_timer_init_global(void)

>  {

> -     odp_timeout_hdr_t *tmo_hdr = odp_timeout_hdr(tmo);

> -     return tmo_hdr->meta.tmo_tick;

> +     return 0;

>  }

> diff --git a/test/api_test/odp_timer_ping.c

> b/test/api_test/odp_timer_ping.c

> index 7406a45..2617b5c 100644

> --- a/test/api_test/odp_timer_ping.c

> +++ b/test/api_test/odp_timer_ping.c

> @@ -20,6 +20,8 @@

>   *    Otherwise timeout may happen bcz of slow nw speed

>   */

>

> +#include <assert.h>

> +#include <stdlib.h>

>  #include <unistd.h>

>  #include <fcntl.h>

>  #include <errno.h>

> @@ -41,14 +43,15 @@

>  #define MSG_POOL_SIZE         (4*1024*1024)

>  #define BUF_SIZE             8

>  #define PING_CNT     10

> -#define PING_THRD    2       /* Send and Rx Ping thread */

> +#define PING_THRD    2       /* send_ping and rx_ping threads */

>

>  /* Nanoseconds */

>  #define RESUS        10000

>  #define MINUS        10000

>  #define MAXUS        10000000

>

> -static odp_timer_t test_timer_ping;

> +static odp_timer_pool_t tp;

> +static odp_timer_t test_timer_ping = ODP_TIMER_INVALID;

>  static odp_timer_tmo_t test_ping_tmo;

>

>  #define PKTSIZE      64

> @@ -128,15 +131,7 @@ static int listen_to_pingack(void)

>                                        (socklen_t *)&len);

>                       if (bytes > 0) {

>                               /* pkt rxvd therefore cancel the timeout */

> -                             if (odp_timer_cancel_tmo(test_timer_ping,

> -                                                      test_ping_tmo) != 0) {

> -                                     ODP_ERR("cancel_tmo failed ..exiting

> listner thread\n");

> -                                     /* avoid exiting from here even if tmo

> -                                      * failed for current ping,

> -                                      * allow subsequent ping_rx request */

> -                                     err = -1;

> -

> -                             }

> +                             odp_timer_cancel(test_timer_ping);

>                               /* cruel bad hack used for sender, listner ipc..

>                                * euwww.. FIXME ..

>                                */

> @@ -160,7 +155,6 @@ static int send_ping_request(struct sockaddr_in

> *addr)

>

>       uint64_t tick;

>       odp_queue_t queue;

> -     odp_buffer_t buf;

>

>       int err = 0;

>

> @@ -184,8 +178,16 @@ static int send_ping_request(struct sockaddr_in

> *addr)

>

>       /* get the ping queue */

>       queue = odp_queue_lookup("ping_timer_queue");

> +     test_timer_ping = odp_timer_alloc(tp, queue, NULL);

> +     if (test_timer_ping == ODP_TIMER_INVALID) {

> +             ODP_ERR("Failed to allocate timer.\n");

> +             err = -1;

> +             goto err;

> +     }

>

>       for (i = 0; i < PING_CNT; i++) {

> +             odp_buffer_t buf;

> +             odp_timer_tmo_t tmo;

>               /* prepare icmp pkt */

>               bzero(&pckt, sizeof(pckt));

>               pckt.hdr.type = ICMP_ECHO;

> @@ -209,12 +211,10 @@ static int send_ping_request(struct sockaddr_in

> *addr)

>               printf(" icmp_sent msg_cnt %d\n", i);

>

>               /* arm the timer */

> -             tick = odp_timer_current_tick(test_timer_ping);

> +             tick = odp_timer_current_tick(tp);

>

>               tick += 1000;

> -             test_ping_tmo = odp_timer_absolute_tmo(test_timer_ping,

> tick,

> -                                                    queue,

> -                                                    ODP_BUFFER_INVALID);

> +             odp_timer_set_abs(test_timer_ping, tick);

>               /* wait for timeout event */

>               while ((buf = odp_queue_deq(queue)) == ODP_BUFFER_INVALID) {

>                       /* flag true means ack rxvd.. a cruel hack as I

> @@ -229,17 +229,28 @@ static int send_ping_request(struct sockaddr_in

> *addr)

>                               break;

>                       }

>               }

> +             assert(odp_buffer_type(buf) == ODP_BUFFER_TYPE_TIMEOUT);

> +             tmo = odp_timeout_from_buffer(buf);

>

> -             /* free tmo_buf for timeout case */

> -             if (buf != ODP_BUFFER_INVALID) {

> -                     ODP_DBG(" timeout msg_cnt [%i] \n", i);

> +             switch (odp_timer_tmo_status(tmo)) {

> +             case ODP_TMO_FRESH:

> +                     ODP_DBG(" timeout msg_cnt [%i]\n", i);

>                       /* so to avoid seg fault commented */

> -                     odp_buffer_free(buf);

>                       err = -1;

> +                     break;

> +             case ODP_TMO_STALE:

> +                     /* Ignore stale timeouts */

> +                     break;

> +             case ODP_TMO_ORPHAN:

> +                     ODP_ERR("Received orphaned timeout!\n");

> +                     abort();

>               }

> +             odp_timer_return_tmo(tmo);

>       }

>

>  err:

> +     if (test_timer_ping != ODP_TIMER_INVALID)

> +             odp_timer_free(test_timer_ping);

>       return err;

>  }

>

> @@ -340,9 +351,9 @@ int main(int argc ODP_UNUSED, char *argv[]

> ODP_UNUSED)

>       pool = odp_buffer_pool_create("msg_pool", pool_base, MSG_POOL_SIZE,

>                                     BUF_SIZE,

>                                     ODP_CACHE_LINE_SIZE,

> -                                   ODP_BUFFER_TYPE_RAW);

> +                                   ODP_BUFFER_TYPE_TIMEOUT);

>       if (pool == ODP_BUFFER_POOL_INVALID) {

> -             ODP_ERR("Pool create failed.\n");

> +             ODP_ERR("Buffer pool create failed.\n");

>               return -1;

>       }

>

> @@ -357,15 +368,19 @@ int main(int argc ODP_UNUSED, char *argv[]

> ODP_UNUSED)

>               return -1;

>       }

>

> -     test_timer_ping = odp_timer_create("ping_timer", pool,

> -                                        RESUS*ODP_TIME_USEC,

> -                                        MINUS*ODP_TIME_USEC,

> -                                        MAXUS*ODP_TIME_USEC);

> -

> -     if (test_timer_ping == ODP_TIMER_INVALID) {

> -             ODP_ERR("Timer create failed.\n");

> +     /*

> +      * Create timer pool

> +      */

> +     tp = odp_timer_pool_create("timer_pool", pool,

> +                                RESUS*ODP_TIME_USEC,

> +                                MINUS*ODP_TIME_USEC,

> +                                MAXUS*ODP_TIME_USEC,

> +                                1, false, ODP_CLOCK_CPU);

> +     if (tp == ODP_TIMER_POOL_INVALID) {

> +             ODP_ERR("Timer pool create failed.\n");

>               return -1;

>       }

> +     odp_timer_pool_start();

>

>       odp_shm_print_all();

>

> --

> 1.9.1

>

>

> _______________________________________________

> lng-odp mailing list

> lng-odp@lists.linaro.org<mailto:lng-odp@lists.linaro.org>

> http://lists.linaro.org/mailman/listinfo/lng-odp
Ola Liljedahl Oct. 6, 2014, 9:34 p.m. UTC | #11
Wow some more in-depth review.

In this mail thread, gmail shows the subject prefix as PATCHv3 but I
generated the patch from git with the prefix PATCHv4 and I think the
individual message on the mailing list archive shows the subject using
PATCHv4. Is it gmail playing tricks on me?

On 6 October 2014 15:01, Savolainen, Petri (NSN - FI/Espoo) <
petri.savolainen@nsn.com> wrote:

> >
> > +     odp_timer_cancel(test_timer);
> > +     odp_timer_free(test_timer);
>
> This thread is done. Should it free "hdl", instead of "test_timer" (since
> it may be still active).
>
Either or. Freeing test_timer (which might be active) creates opportunity
for testing other paths through the code. Sometimes this design causes
timeouts received by the other threads to be orphaned. The example works
either way but perhaps the example becomes a bit less obvious. We also need
a real timer test program which tests all legal situations. If I write
this, I might simplify the example.


> +uintptr_t odp_timer_pool_query_conf(odp_timer_pool_t tpid,
> > +                                 odp_timer_pool_conf_t item);
>
>
> I had the same comment on v2:
>
> It would be simpler to output all information (to *info) with one function
> call. For example,
>
> typedef struct odp_timer_pool_info_s {
>         const char *name;
>         uint64_t resolution;
>         uint64_t min_tmo;
>         uint64_t max_tmo;
>         uint32_t num_timers;
>         bool     shared
> } odp_timer_pool_info_t;
>
> int odp_timer_pool_info(odp_timer_pool_t tpid, odp_timer_pool_info_t
> *info);
>
>
> And I decided not to change before. If this is a show stopper to you I
guess I have to change...
ACCEPTED


> + */
> > +void odp_timer_free(odp_timer_t tim);
>
> Need a success/fail return value? User would need to know if the timeout
> is still coming, or not. User cannot free the destination queue or stop
> scheduling before the remaining tmo has been received (and freed).
>
Well this makes sense even though I am not sure it is the only solution. I
could tolerate a return value here because timer_free is less performance
sensitive. I hope I don't regret this (is there some situation where the
timer has no associated unexpired timeout so we are led to believe that the
timeout is instead outstanding and supposedly will be received at some
later time? I guess we will have to make sure this situation cannot occur).
ACCEPTED



>
> > + * will then be received. odp_timer_tmo_status() must be used to check
> if
> > + * the received timeout is valid.
>
> Can user call odp_timer_tmo_status() on a user defined buffer?? I guess
> not.
>
No, user-defined buffers are not necessarily odp_timer_tmo_t buffers. It is
not possible to cancel a timer with a user-defined timeout buffer *if* the
timer has already expired. You can attempt to cancel the timer and if it
has not expired, the user-defined buffer will be freed.


>
> >   *
> > - * @return Timeout handle if successful, otherwise ODP_TIMER_TMO_INVALID
> > + * Note: any invalid parameters will be treated as programming errors
> and
> > will
> > + * cause the application to abort.
> > + *
> > + * @param tim      Timer
> > + * @param abs_tck  Expiration time in absolute timer ticks
> > + * @param user_buf The buffer to use as timeout event
> > + *
> > + * @return Success or failure code
>
>
> @return ODP_TIMER_SET_XXX ..., and explanation what user is expected to do
> on those
>
The timer implementation does not care what the application does if the
timeout is too early or too late. But I could add a comment on what seems
useful for to the application to do.
ACCEPTED

Mentioning the symbolic names of the return values as is done elsewhere in
odp_timer.h but not done by some other ODP functions (e.g.
odp_buffer_type(), odp_queue_sched_type()) so we seem not to have any rule
for this.
ACCEPTED


>
> >
>
> >   */
> > -int odp_timer_cancel_tmo(odp_timer_t timer, odp_timer_tmo_t tmo);
> > +odp_timer_set_t odp_timer_set_abs(odp_timer_t tim, uint64_t abs_tck);
> >
> >  /**
> > - * Convert buffer handle to timeout handle
> > + * Set a timer with a relative expiration time and user-defined buffer.
> >   *
> > - * @param buf  Buffer handle
> > + * Set (arm) the timer to expire at a relative future time.
> > + * Arming may fail (if the timer is in state EXPIRED),
> > + * an earlier timeout will then be received. odp_timer_tmo_status() must
> > + * be used to check if the received timeout is valid.
>
> odp_timer_tmo_status()on user defined buffer ...
>
Sorry, copy&paste error.
ACCEPTED



> >
> >  /**
> > - * Return absolute timeout tick
> > + * Cancel a timer
> > + *
> > + * Cancel a timer, preventing future expiration and delivery.
> > + *
> > + * A timer that has already expired and been enqueued for delivery may
> be
> > + * impossible to cancel and will instead be delivered to the destination
> > queue.
> > + * Use odp_timer_tmo_status() the check whether a received timeout is
> > fresh or
> > + * stale (cancelled). Stale timeouts will automatically be recycled.
> > + *
> > + * Note: any invalid parameters will be treated as programming errors
> and
> > will
> > + * cause the application to abort.
> > + *
> > + * @param tim    Timer handle
> > + */
> > +void odp_timer_cancel(odp_timer_t tim);
>
>
> Need a success/fail return value? User would need to know if the timeout
> is still coming, or not...
>
Why? When (if) the timeout is received, odp_timer_tmo_status() will tell
the application is the timeout is fresh or stale (or orphaned). Which use
case requires the application to immediately know if the timeout was
successfully cancelled?

There is a reason for the set and cancel functions not returning any status
codes relating to the potential and eventual success of those operations
(the tooearly/toolate indications are (hopefully) benign exceptions, these
situations only require checking the passed parameter against the current
tick which hopefully can be read with little overhead). I want to enable
asynchronous implementations of the timer service (e.g. it could be running
on a dedicated core or far away on the SoC). Returning a status code
indicating the success of e.g. cancel would require a synchronous
implementation, the actual timer manager would have to return a status
value to the timer call and the application would have to block waiting for
this status value. This would add latency to these latency-critical
operations.

Thus the Timer API takes an optimistic approach and you have to handle the
spurious failed set and cancel operations when those timeouts are received.


> > + * Return timeout to timer
> > + *
> > + * Return a received timeout for reuse with the parent timer.
> > + * Note: odp_timer_return_tmo() must be called on all received timeouts!
> > + * (Excluding user defined timeout buffers).
>
> And excluding ORPHANs?? For ORPHAN just free the tmo buffer.
>
The application could call odp_buffer_free() (as documented by
ODP_TMO_ORPHAN) itself but odp_timer_return_tmo() will also do this. Both
alternatives are legal and documented. The comment here does create a minor
contradiction,  I should align with what is documented elsewhere.
ACCEPTED


>
> > + */
> > +void odp_timer_return_tmo(odp_timer_tmo_t tmo);
> > +
> > +/**
> > + * Return fresh/stale/orphan status of timeout.
> > + *
> > + * Check a received timeout for orphaness (i.e. parent timer freed) and
> > + * staleness (i.e. parent timer has been reset or cancelled after the
> > timeout
> > + * expired and was enqueued).
> > + * If the timeout is fresh, it should be processed.
> > + * If the timeout is stale or orphaned, it should be ignored.
> > + * All timeouts must be returned using the odp_timer_return_tmo() call.
>
> Except ORPHANs. Examples just free the buffer...
>
This alternative is documented so the example is following the rules. I am
unifying the comments.
ACCEPTED


>
> Maybe some instructions what to do in these cases:
> - STALE
>   - call odp_timer_return_tmo()
>   - do not free the tmo buffer
> - ORPHAN
>   - do not call odp_timer_return_tmo()
>   - free the tmo buffer
>
Yes this is documented now in odp_timer_return_tmo() and also in the
odp_timer_tmo_t type definition.
ACCEPTED



> +{
> > +     odp_timer_pool *this = malloc(sizeof(odp_timer_pool));
>
>
> Should not use malloc, but ODP shm instead, if want to support processes
> (in addition to pthreads)...
>
Then I think we need to introduce a malloc-like allocator for shared
memory. I don't think each and every component should be creating their own
shared memory regions. That's a lot of work for just one malloc() call.
There is also another use of malloc() in linux-generic
(odp_packet_socket.c). And more than a dozen usages of static data which I
assume also prevent process-mode.

$ nm lib/.libs/libodp.a | grep " [bBCdD] "
0000000000000000 b local_chunk
0000000000000080 C pool_entry_ptr
0000000000000000 b pool_tbl
0000000000000000 b global
0000000000000000 b pktio_tbl
0000000000000020 b raw_sockets
0000000000000000 b raw_sockets_lock
0000000000000000 b queue_tbl
0000000000000010 b odp_ring_list
0000000000000000 b qlock
0000000000000000 b sched
0000000000000000 b sched_local
0000000000000000 b odp_shm_tbl
0000000000000000 b odp_system_info
0000000000000000 b num_threads
0000000000000000 b odp_this_thread
0000000000000020 b odp_thread_tbl

Create odp_malloc() (that works with ODP process mode) and I will use it in
the timer implementation.

> +unsigned odp_timer_pool_expire(odp_timer_pool_t tpid, uint64_t tick)
> > +{
> > +     if (odp_likely(tpid->shared))
> > +             odp_ticketlock_lock(&tpid->lock);
>
>
> One big lock (per timer pool) will scale poorly. Almost all functions use
> the same lock and e.g. here it's locked as long as there are tmo items in
> the pq queue (which may be many). Current timer implementation uses a lock
> per tick (so that average contention per lock should be low).
>
It is a software implementation, we cannot expect it to scale linearly, any
serious ODP platform ought to have HW support for timers. Also I don't
expect timers to expire frequently so the lock time in
odp_timer_pool_expire() should normally be short. One could temporarily
release the lock after N expired timeouts if one is worried that the global
timer lock will be held for too long time.

All timers use the same data priority queue and thus the same lock. One
could distribute timers over different priority queues (each with its own
lock), this would decrease contention. I would like to see a situation
where contention starts to become a problem before we speculate too much in
different alternative designs. And other alternatives can be envisioned.
E.g. run all timer management on a separate core and communicate with that
core using some simple lock-less request queue in shared memory. There
would be no locks and applications do not have to access any internal timer
state. But speculating in potential solutions before we know the actual
problem is not the best way to spend our effort now me thinks.


>
> -Petri
>
>
>
Savolainen, Petri (NSN - FI/Espoo) Oct. 9, 2014, 2:03 p.m. UTC | #12
>

>  /**

> - * Return absolute timeout tick

> + * Cancel a timer

> + *

> + * Cancel a timer, preventing future expiration and delivery.

> + *

> + * A timer that has already expired and been enqueued for delivery may be

> + * impossible to cancel and will instead be delivered to the destination

> queue.

> + * Use odp_timer_tmo_status() the check whether a received timeout is

> fresh or

> + * stale (cancelled). Stale timeouts will automatically be recycled.

> + *

> + * Note: any invalid parameters will be treated as programming errors and

> will

> + * cause the application to abort.

> + *

> + * @param tim    Timer handle

> + */

> +void odp_timer_cancel(odp_timer_t tim);



Need a success/fail return value? User would need to know if the timeout is still coming, or not...
Why? When (if) the timeout is received, odp_timer_tmo_status() will tell the application is the timeout is fresh or stale (or orphaned). Which use case requires the application to immediately know if the timeout was successfully cancelled?

For example, if I have a number of re-transmission timers. Outgoing packets and incoming ack packets are handled in the same atomic queue. Also timeouts would be sent to the same queue. Mostly (99.99% of the packets) I’ll receive the ack packet before the tmo expires. So I’ll cancel timer during ack packet processing. Now if cancel() does not tell me whether the operation was successful (usually it is), how I’d know when I can reuse the timer for some other packet?

If cancel failed, it’s OK - I’ll receive a stale tmo later and there I can mark the timer reusable again. If cancel succeeded, I don’t get a confirmation of that, ever. I don’t want the timer send me a stale tmo on every cancel, since that would increase per packet event rate 50% (from 2 to 3).

So, the cancel status is needed. Right?


There is a reason for the set and cancel functions not returning any status codes relating to the potential and eventual success of those operations (the tooearly/toolate indications are (hopefully) benign exceptions, these situations only require checking the passed parameter against the current tick which hopefully can be read with little overhead). I want to enable asynchronous implementations of the timer service (e.g. it could be running on a dedicated core or far away on the SoC). Returning a status code indicating the success of e.g. cancel would require a synchronous implementation, the actual timer manager would have to return a status value to the timer call and the application would have to block waiting for this status value. This would add latency to these latency-critical operations.

Thus the Timer API takes an optimistic approach and you have to handle the spurious failed set and cancel operations when those timeouts are received.

Sure, but you have to consider the whole system. If the API lacks some necessary synchronization feature, the effort is just pushed from implementation to the application, which may not be able solve it or it may be more costly due to lower amount of SoC status information (on application level).

API should be targeted for HW that has “proper” level of HW acceleration. An application will run also on cheap HW, but then you’ll get “cheap” performance - and that’s fine. Again on more specialized HW, the API should not limit the performance. If a SoC has put down HW for managing robust and fast timer cancel operations, I should be able enjoy from that.


> +{

> +     odp_timer_pool *this = malloc(sizeof(odp_timer_pool));


Should not use malloc, but ODP shm instead, if want to support processes (in addition to pthreads)...
Then I think we need to introduce a malloc-like allocator for shared memory. I don't think each and every component should be creating their own shared memory regions. That's a lot of work for just one malloc() call. There is also another use of malloc() in linux-generic (odp_packet_socket.c). And more than a dozen usages of static data which I assume also prevent process-mode.

$ nm lib/.libs/libodp.a | grep " [bBCdD] "

0000000000000000 b local_chunk

0000000000000080 C pool_entry_ptr

0000000000000000 b pool_tbl

0000000000000000 b global

0000000000000000 b pktio_tbl

0000000000000020 b raw_sockets

0000000000000000 b raw_sockets_lock

0000000000000000 b queue_tbl

0000000000000010 b odp_ring_list

0000000000000000 b qlock

0000000000000000 b sched

0000000000000000 b sched_local

0000000000000000 b odp_shm_tbl

0000000000000000 b odp_system_info

0000000000000000 b num_threads

0000000000000000 b odp_this_thread

0000000000000020 b odp_thread_tbl
Create odp_malloc() (that works with ODP process mode) and I will use it in the timer implementation.

Just quick repeat from another mail thread. Odp_malloc is not high priority. Also a heap allocator should not be needed here as you are doing fixed size allocations. Process support is not strictly required, but it’s just matter of time that someone wants use timers with processes…



> +unsigned odp_timer_pool_expire(odp_timer_pool_t tpid, uint64_t tick)

> +{

> +     if (odp_likely(tpid->shared))

> +             odp_ticketlock_lock(&tpid->lock);


One big lock (per timer pool) will scale poorly. Almost all functions use the same lock and e.g. here it's locked as long as there are tmo items in the pq queue (which may be many). Current timer implementation uses a lock per tick (so that average contention per lock should be low).
It is a software implementation, we cannot expect it to scale linearly, any serious ODP platform ought to have HW support for timers. Also I don't expect timers to expire frequently so the lock time in odp_timer_pool_expire() should normally be short. One could temporarily release the lock after N expired timeouts if one is worried that the global timer lock will be held for too long time.

All timers use the same data priority queue and thus the same lock. One could distribute timers over different priority queues (each with its own lock), this would decrease contention. I would like to see a situation where contention starts to become a problem before we speculate too much in different alternative designs. And other alternatives can be envisioned. E.g. run all timer management on a separate core and communicate with that core using some simple lock-less request queue in shared memory. There would be no locks and applications do not have to access any internal timer state. But speculating in potential solutions before we know the actual problem is not the best way to spend our effort now me thinks.

Yeah, it’s for reference but “one big lock” is not the design pattern I’d promote. It just kills performance on multicore. We are tied to system calls in some places (e.g. sockets for packet IO), but when we are not - we should target for good performance and scaling. E.g. if user runs a retransmission timer per packet, and cancels 100% of those. It’s millions of set/cancel operations per second on a e.g. tens of cores. I’d want to be socket/system call limited, not timer set/cancel limited (which is under our control).

-Petri
Ola Liljedahl Oct. 9, 2014, 3:10 p.m. UTC | #13
>
>
> Need a success/fail return value? User would need to know if the timeout
> is still coming, or not...
>
>  Why? When (if) the timeout is received, odp_timer_tmo_status() will tell
> the application is the timeout is fresh or stale (or orphaned). Which use
> case requires the application to immediately know if the timeout was
> successfully cancelled?
>
>
>
> For example, if I have a number of re-transmission timers. Outgoing
> packets and incoming ack packets are handled in the same atomic queue. Also
> timeouts would be sent to the same queue. Mostly (99.99% of the packets)
> I’ll receive the ack packet before the tmo expires. So I’ll cancel timer
> during ack packet processing. Now if cancel() does not tell me whether the
> operation was successful (usually it is), how I’d know when I can reuse the
> timer for some other packet?
>
>
>
> If cancel failed, it’s OK - I’ll receive a stale tmo later and there I can
> mark the timer reusable again. If cancel succeeded, I don’t get a
> confirmation of that, ever. I don’t want the timer send me a stale tmo on
> every cancel, since that would increase per packet event rate 50% (from 2
> to 3).
>
>
>
> So, the cancel status is needed. Right?
>
Probably we mean different things by cancel failing. In my API and
implementation, cancel will always succeed in the sense that any
outstanding timeout will never be seen as a fresh timeout. The only
question is whether we will be able to prevent the timeout from being
received and we can't do that if the timer has already expired. But a sent
timeout will be detected as stale.

The timer can be reused and reset even if the cancel operation "failed". It
is only the last set or cancel operating that defines the state of the
timer and the freshness of any received timeouts. When the timeout is
returned it will be re-associated with the timer and if the timer has
already expired, the timeout will be enqueued for immediate delivery.

In your specific example, the application should just (re-) set the
re-transmission timer whenever it receives another packet. No need to first
cancel the timer and check any return code. I did specify something like
that in one of my first proposals, this put responsibility on the
application to remember status of previous ODP operations. With the help of
Bill, I realized we didn't have to expose this to the application, keep it
hidden under the API and enable more implementation choices (e.g.
asynchronous HW or SW implementations that do not have to perform the
operation immediately and return a success code which the application has
to stall waiting for).

Sorry if this was not clear. Probably something in the documentation needs
to be enhanced to convey this message better.


>
>
>
> There is a reason for the set and cancel functions not returning any
> status codes relating to the potential and eventual success of those
> operations (the tooearly/toolate indications are (hopefully) benign
> exceptions, these situations only require checking the passed parameter
> against the current tick which hopefully can be read with little overhead).
> I want to enable asynchronous implementations of the timer service (e.g. it
> could be running on a dedicated core or far away on the SoC). Returning a
> status code indicating the success of e.g. cancel would require a
> synchronous implementation, the actual timer manager would have to return a
> status value to the timer call and the application would have to block
> waiting for this status value. This would add latency to these
> latency-critical operations.
>
>
>
> Thus the Timer API takes an optimistic approach and you have to handle the
> spurious failed set and cancel operations when those timeouts are received.
>
>
>
> Sure, but you have to consider the whole system. If the API lacks some
> necessary synchronization feature, the effort is just pushed from
> implementation to the application, which may not be able solve it or it may
> be more costly due to lower amount of SoC status information (on
> application level).
>
>
>
> API should be targeted for HW that has “proper” level of HW acceleration.
> An application will run also on cheap HW, but then you’ll get “cheap”
> performance - and that’s fine. Again on more specialized HW, the API should
> not limit the performance. If a SoC has put down HW for managing robust and
> fast timer cancel operations, I should be able enjoy from that.
>
And I hope this will be possible, persons responsible for HW-accelerated
implementations of ODP need to speak out if they think the API is poor for
them. I haven't heard or seen any such complaint.

Just by keeping the API abstract should allow for different HW
implementations. If there is HW that cannot reliably cancel e.g. expired
timeouts, it can always be complemented by SW to keep track of stale and
orphaned timeouts. That's why there are calls to check for timeout status
for a received timeout and to return a timeout (regardless of status).



>
>
>
>
>   > +{
> > +     odp_timer_pool *this = malloc(sizeof(odp_timer_pool));
>
>   Should not use malloc, but ODP shm instead, if want to support
> processes (in addition to pthreads)...
>
>  Then I think we need to introduce a malloc-like allocator for shared
> memory. I don't think each and every component should be creating their own
> shared memory regions. That's a lot of work for just one malloc() call.
> There is also another use of malloc() in linux-generic
> (odp_packet_socket.c). And more than a dozen usages of static data which I
> assume also prevent process-mode.
>
> $ nm lib/.libs/libodp.a | grep " [bBCdD] "
>
> 0000000000000000 b local_chunk
>
> 0000000000000080 C pool_entry_ptr
>
> 0000000000000000 b pool_tbl
>
> 0000000000000000 b global
>
> 0000000000000000 b pktio_tbl
>
> 0000000000000020 b raw_sockets
>
> 0000000000000000 b raw_sockets_lock
>
> 0000000000000000 b queue_tbl
>
> 0000000000000010 b odp_ring_list
>
> 0000000000000000 b qlock
>
> 0000000000000000 b sched
>
> 0000000000000000 b sched_local
>
> 0000000000000000 b odp_shm_tbl
>
> 0000000000000000 b odp_system_info
>
> 0000000000000000 b num_threads
>
> 0000000000000000 b odp_this_thread
>
> 0000000000000020 b odp_thread_tbl
>
>  Create odp_malloc() (that works with ODP process mode) and I will use it
> in the timer implementation.
>
>
>
> Just quick repeat from another mail thread. Odp_malloc is not high
> priority. Also a heap allocator should not be needed here as you are doing
> fixed size allocations. Process support is not strictly required, but it’s
> just matter of time that someone wants use timers with processes…
>
I can add this support in an additional patch. We are talking enhancements
here, not fundamental features. I rather get the API approved and then
incrementally work with enhancements than immediately aim for an undefined
and subjective goal of perfection.




>
>
>
>
>
>
>   > +unsigned odp_timer_pool_expire(odp_timer_pool_t tpid, uint64_t tick)
> > +{
> > +     if (odp_likely(tpid->shared))
> > +             odp_ticketlock_lock(&tpid->lock);
>
>   One big lock (per timer pool) will scale poorly. Almost all functions
> use the same lock and e.g. here it's locked as long as there are tmo items
> in the pq queue (which may be many). Current timer implementation uses a
> lock per tick (so that average contention per lock should be low).
>
>  It is a software implementation, we cannot expect it to scale linearly,
> any serious ODP platform ought to have HW support for timers. Also I don't
> expect timers to expire frequently so the lock time in
> odp_timer_pool_expire() should normally be short. One could temporarily
> release the lock after N expired timeouts if one is worried that the global
> timer lock will be held for too long time.
>
>
>
> All timers use the same data priority queue and thus the same lock. One
> could distribute timers over different priority queues (each with its own
> lock), this would decrease contention. I would like to see a situation
> where contention starts to become a problem before we speculate too much in
> different alternative designs. And other alternatives can be envisioned.
> E.g. run all timer management on a separate core and communicate with that
> core using some simple lock-less request queue in shared memory. There
> would be no locks and applications do not have to access any internal timer
> state. But speculating in potential solutions before we know the actual
> problem is not the best way to spend our effort now me thinks.
>
>
>
> Yeah, it’s for reference but “one big lock” is not the design pattern I’d
> promote. It just kills performance on multicore. We are tied to system
> calls in some places (e.g. sockets for packet IO), but when we are not - we
> should target for good performance and scaling. E.g. if user runs a
> retransmission timer per packet, and cancels 100% of those. It’s millions
> of set/cancel operations per second on a e.g. tens of cores. I’d want to be
> socket/system call limited, not timer set/cancel limited (which is under
> our control).
>
Again I can send updates that enhance scalability. For the priority
queue-based implementation, I can think of different schemes, e.g.
spreading timers randomly over different priority queues or moving timers
between pq's based on the expiration time. Each pq would use a separate
lock. But with enough cores, it could make sense to keep timer management
in one thread/core and skip the locks. It's not just the lock but all the
data that is referenced by the application when making timer calls that
will limit performance. The 4-ary priority queue (basically a complete
4-ary tree) is very good at decreasing the amount of data that is accessed
for different operations but a dozen accesses to cache lines stored in the
cache of some other CPU will still hurt. Offloading all timer operations to
a separate core would make much better use of the CPU caches me thinks.



>
> -Petri
>
>
>
Savolainen, Petri (NSN - FI/Espoo) Oct. 10, 2014, 8:05 a.m. UTC | #14
From: ext Ola Liljedahl [mailto:ola.liljedahl@linaro.org]

Sent: Thursday, October 09, 2014 6:10 PM
To: Savolainen, Petri (NSN - FI/Espoo)
Cc: lng-odp@lists.linaro.org
Subject: Re: [lng-odp] [PATCHv4] Timer API and and priority queue-based implementation


Need a success/fail return value? User would need to know if the timeout is still coming, or not...
Why? When (if) the timeout is received, odp_timer_tmo_status() will tell the application is the timeout is fresh or stale (or orphaned). Which use case requires the application to immediately know if the timeout was successfully cancelled?

For example, if I have a number of re-transmission timers. Outgoing packets and incoming ack packets are handled in the same atomic queue. Also timeouts would be sent to the same queue. Mostly (99.99% of the packets) I’ll receive the ack packet before the tmo expires. So I’ll cancel timer during ack packet processing. Now if cancel() does not tell me whether the operation was successful (usually it is), how I’d know when I can reuse the timer for some other packet?

If cancel failed, it’s OK - I’ll receive a stale tmo later and there I can mark the timer reusable again. If cancel succeeded, I don’t get a confirmation of that, ever. I don’t want the timer send me a stale tmo on every cancel, since that would increase per packet event rate 50% (from 2 to 3).

So, the cancel status is needed. Right?
Probably we mean different things by cancel failing. In my API and implementation, cancel will always succeed in the sense that any outstanding timeout will never be seen as a fresh timeout. The only question is whether we will be able to prevent the timeout from being received and we can't do that if the timer has already expired. But a sent timeout will be detected as stale.

The timer can be reused and reset even if the cancel operation "failed". It is only the last set or cancel operating that defines the state of the timer and the freshness of any received timeouts. When the timeout is returned it will be re-associated with the timer and if the timer has already expired, the timeout will be enqueued for immediate delivery.

In your specific example, the application should just (re-) set the re-transmission timer whenever it receives another packet. No need to first cancel the timer and check any return code. I did specify something like that in one of my first proposals, this put responsibility on the application to remember status of previous ODP operations. With the help of Bill, I realized we didn't have to expose this to the application, keep it hidden under the API and enable more implementation choices (e.g. asynchronous HW or SW implementations that do not have to perform the operation immediately and return a success code which the application has to stall waiting for).

Sorry if this was not clear. Probably something in the documentation needs to be enhanced to convey this message better.

There are many packets/timers in-flight (e.g. 1000). One timer per outgoing packet. E.g. a packet+ack roundtrip could be 10us and retransmit timeout 10ms.

I’d pick a timer and set it on packet output, and cancel it when ack is received. I have to cancel it here (not reset) because I cannot predict when the timer is needed for another outgoing packet. Now, in the rare case the cancel would fail (ack was received too close to tmo expiration). I would not notice that, but mark the timer ready for reuse. The tmo is now marked stale but not yet in my queue. On next outgoing packet (<1us) later I’ll reuse the same timer (reset it for 10ms). 10us later, I receive ack for the packet and cancel the same timer, and so on. Maybe I’ll cancel/reset the same timer multiple times before the stale tmo would travel through the timer HW/queues/scheduling back to me (and the status check function magic).

If cancel() would return me a status code, I would not try to reuse the timer before the (stale) tmo is received.



There is a reason for the set and cancel functions not returning any status codes relating to the potential and eventual success of those operations (the tooearly/toolate indications are (hopefully) benign exceptions, these situations only require checking the passed parameter against the current tick which hopefully can be read with little overhead). I want to enable asynchronous implementations of the timer service (e.g. it could be running on a dedicated core or far away on the SoC). Returning a status code indicating the success of e.g. cancel would require a synchronous implementation, the actual timer manager would have to return a status value to the timer call and the application would have to block waiting for this status value. This would add latency to these latency-critical operations.

Thus the Timer API takes an optimistic approach and you have to handle the spurious failed set and cancel operations when those timeouts are received.

Sure, but you have to consider the whole system. If the API lacks some necessary synchronization feature, the effort is just pushed from implementation to the application, which may not be able solve it or it may be more costly due to lower amount of SoC status information (on application level).

API should be targeted for HW that has “proper” level of HW acceleration. An application will run also on cheap HW, but then you’ll get “cheap” performance - and that’s fine. Again on more specialized HW, the API should not limit the performance. If a SoC has put down HW for managing robust and fast timer cancel operations, I should be able enjoy from that.
And I hope this will be possible, persons responsible for HW-accelerated implementations of ODP need to speak out if they think the API is poor for them. I haven't heard or seen any such complaint.

Just by keeping the API abstract should allow for different HW implementations. If there is HW that cannot reliably cancel e.g. expired timeouts, it can always be complemented by SW to keep track of stale and orphaned timeouts. That's why there are calls to check for timeout status for a received timeout and to return a timeout (regardless of status).

The same is possible if cancel() returns status. Implementation can also decide to always “fail” and make the user to receive the stale tmo. Another implementation would be able to cancel it in sync (mostly succeed) and  performance would be e.g. 30% better due to lower event rate (e.g. 2 packets vs 2 packets + 1 stale tmo per packet).




> +{

> +     odp_timer_pool *this = malloc(sizeof(odp_timer_pool));

Should not use malloc, but ODP shm instead, if want to support processes (in addition to pthreads)...
Then I think we need to introduce a malloc-like allocator for shared memory. I don't think each and every component should be creating their own shared memory regions. That's a lot of work for just one malloc() call. There is also another use of malloc() in linux-generic (odp_packet_socket.c). And more than a dozen usages of static data which I assume also prevent process-mode.

$ nm lib/.libs/libodp.a | grep " [bBCdD] "

0000000000000000 b local_chunk

0000000000000080 C pool_entry_ptr

0000000000000000 b pool_tbl

0000000000000000 b global

0000000000000000 b pktio_tbl

0000000000000020 b raw_sockets

0000000000000000 b raw_sockets_lock

0000000000000000 b queue_tbl

0000000000000010 b odp_ring_list

0000000000000000 b qlock

0000000000000000 b sched

0000000000000000 b sched_local

0000000000000000 b odp_shm_tbl

0000000000000000 b odp_system_info

0000000000000000 b num_threads

0000000000000000 b odp_this_thread

0000000000000020 b odp_thread_tbl
Create odp_malloc() (that works with ODP process mode) and I will use it in the timer implementation.

Just quick repeat from another mail thread. Odp_malloc is not high priority. Also a heap allocator should not be needed here as you are doing fixed size allocations. Process support is not strictly required, but it’s just matter of time that someone wants use timers with processes…
I can add this support in an additional patch. We are talking enhancements here, not fundamental features. I rather get the API approved and then incrementally work with enhancements than immediately aim for an undefined and subjective goal of perfection.

That’s OK.




> +unsigned odp_timer_pool_expire(odp_timer_pool_t tpid, uint64_t tick)

> +{

> +     if (odp_likely(tpid->shared))

> +             odp_ticketlock_lock(&tpid->lock);

One big lock (per timer pool) will scale poorly. Almost all functions use the same lock and e.g. here it's locked as long as there are tmo items in the pq queue (which may be many). Current timer implementation uses a lock per tick (so that average contention per lock should be low).
It is a software implementation, we cannot expect it to scale linearly, any serious ODP platform ought to have HW support for timers. Also I don't expect timers to expire frequently so the lock time in odp_timer_pool_expire() should normally be short. One could temporarily release the lock after N expired timeouts if one is worried that the global timer lock will be held for too long time.

All timers use the same data priority queue and thus the same lock. One could distribute timers over different priority queues (each with its own lock), this would decrease contention. I would like to see a situation where contention starts to become a problem before we speculate too much in different alternative designs. And other alternatives can be envisioned. E.g. run all timer management on a separate core and communicate with that core using some simple lock-less request queue in shared memory. There would be no locks and applications do not have to access any internal timer state. But speculating in potential solutions before we know the actual problem is not the best way to spend our effort now me thinks.

Yeah, it’s for reference but “one big lock” is not the design pattern I’d promote. It just kills performance on multicore. We are tied to system calls in some places (e.g. sockets for packet IO), but when we are not - we should target for good performance and scaling. E.g. if user runs a retransmission timer per packet, and cancels 100% of those. It’s millions of set/cancel operations per second on a e.g. tens of cores. I’d want to be socket/system call limited, not timer set/cancel limited (which is under our control).
Again I can send updates that enhance scalability. For the priority queue-based implementation, I can think of different schemes, e.g. spreading timers randomly over different priority queues or moving timers between pq's based on the expiration time. Each pq would use a separate lock. But with enough cores, it could make sense to keep timer management in one thread/core and skip the locks. It's not just the lock but all the data that is referenced by the application when making timer calls that will limit performance. The 4-ary priority queue (basically a complete 4-ary tree) is very good at decreasing the amount of data that is accessed for different operations but a dozen accesses to cache lines stored in the cache of some other CPU will still hurt. Offloading all timer operations to a separate core would make much better use of the CPU caches me thinks.

That’s OK. Although, core local timers won’t get you away from synchronizations issues. Due to core load balancing, it’s likely that a timer set on one core will be cancel/reset on a different core.


-Petri
Ola Liljedahl Oct. 10, 2014, 1:33 p.m. UTC | #15
On 10 October 2014 10:05, Savolainen, Petri (NSN - FI/Espoo) <
petri.savolainen@nsn.com> wrote:

>
>
>
> *From:* ext Ola Liljedahl [mailto:ola.liljedahl@linaro.org]
> *Sent:* Thursday, October 09, 2014 6:10 PM
> *To:* Savolainen, Petri (NSN - FI/Espoo)
> *Cc:* lng-odp@lists.linaro.org
> *Subject:* Re: [lng-odp] [PATCHv4] Timer API and and priority queue-based
> implementation
>
>
>
>
> Need a success/fail return value? User would need to know if the timeout
> is still coming, or not...
>
>  Why? When (if) the timeout is received, odp_timer_tmo_status() will tell
> the application is the timeout is fresh or stale (or orphaned). Which use
> case requires the application to immediately know if the timeout was
> successfully cancelled?
>
>
>
> For example, if I have a number of re-transmission timers. Outgoing
> packets and incoming ack packets are handled in the same atomic queue. Also
> timeouts would be sent to the same queue. Mostly (99.99% of the packets)
> I’ll receive the ack packet before the tmo expires. So I’ll cancel timer
> during ack packet processing. Now if cancel() does not tell me whether the
> operation was successful (usually it is), how I’d know when I can reuse the
> timer for some other packet?
>
>
>
> If cancel failed, it’s OK - I’ll receive a stale tmo later and there I can
> mark the timer reusable again. If cancel succeeded, I don’t get a
> confirmation of that, ever. I don’t want the timer send me a stale tmo on
> every cancel, since that would increase per packet event rate 50% (from 2
> to 3).
>
>
>
> So, the cancel status is needed. Right?
>
>  Probably we mean different things by cancel failing. In my API and
> implementation, cancel will always succeed in the sense that any
> outstanding timeout will never be seen as a fresh timeout. The only
> question is whether we will be able to prevent the timeout from being
> received and we can't do that if the timer has already expired. But a sent
> timeout will be detected as stale.
>
>
>
> The timer can be reused and reset even if the cancel operation "failed".
> It is only the last set or cancel operating that defines the state of the
> timer and the freshness of any received timeouts. When the timeout is
> returned it will be re-associated with the timer and if the timer has
> already expired, the timeout will be enqueued for immediate delivery.
>
>
>
> In your specific example, the application should just (re-) set the
> re-transmission timer whenever it receives another packet. No need to first
> cancel the timer and check any return code. I did specify something like
> that in one of my first proposals, this put responsibility on the
> application to remember status of previous ODP operations. With the help of
> Bill, I realized we didn't have to expose this to the application, keep it
> hidden under the API and enable more implementation choices (e.g.
> asynchronous HW or SW implementations that do not have to perform the
> operation immediately and return a success code which the application has
> to stall waiting for).
>
>
>
> Sorry if this was not clear. Probably something in the documentation needs
> to be enhanced to convey this message better.
>
>
>
> There are many packets/timers in-flight (e.g. 1000). One timer per
> outgoing packet. E.g. a packet+ack roundtrip could be 10us and retransmit
> timeout 10ms.
>
>
>
> I’d pick a timer and set it on packet output, and cancel it when ack is
> received. I have to cancel it here (not reset) because I cannot predict
> when the timer is needed for another outgoing packet. Now, in the rare case
> the cancel would fail (ack was received too close to tmo expiration). I
> would not notice that, but mark the timer ready for reuse. The tmo is now
> marked stale but not yet in my queue. On next outgoing packet (<1us) later
> I’ll reuse the same timer (reset it for 10ms). 10us later, I receive ack
> for the packet and cancel the same timer, and so on. Maybe I’ll
> cancel/reset the same timer multiple times before the stale tmo would
> travel through the timer HW/queues/scheduling back to me (and the status
> check function magic).
>
It is always the latest set or cancel operation that is active. You can
cancel that timer (even if it is "too late"), should the timeout be
received, it will be considered stale. At some later time, you could (re-)
set the timer. And cancel it. And reset it. The original timeout (if
enqueued) is still considered stale (e.g. it contains the wrong expiration
time). As soon as the stale timeout is received and returned, the latest
set operation will re-evaluated and potentially the timer has expired and
then the timeout will immediately be enqueued. Otherwise the timeout will
just be re-associated with the timer, waiting for the next expiration (if
any has been requested). There is no problem to continue to operate on a
timer even when there might be a stale timeout pending somewhere outside
the reach of the timer manager (e.g. on a queue or being processed by some
other core but not yet returned).

I don't see a problem with your use case and the functionality the timer
API is providing. There must be something lacking in the description.



>
> If cancel() would return me a status code, I would not try to reuse the
> timer before the (stale) tmo is received.
>
>
>
>
>
>
>
> There is a reason for the set and cancel functions not returning any
> status codes relating to the potential and eventual success of those
> operations (the tooearly/toolate indications are (hopefully) benign
> exceptions, these situations only require checking the passed parameter
> against the current tick which hopefully can be read with little overhead).
> I want to enable asynchronous implementations of the timer service (e.g. it
> could be running on a dedicated core or far away on the SoC). Returning a
> status code indicating the success of e.g. cancel would require a
> synchronous implementation, the actual timer manager would have to return a
> status value to the timer call and the application would have to block
> waiting for this status value. This would add latency to these
> latency-critical operations.
>
>
>
> Thus the Timer API takes an optimistic approach and you have to handle the
> spurious failed set and cancel operations when those timeouts are received.
>
>
>
> Sure, but you have to consider the whole system. If the API lacks some
> necessary synchronization feature, the effort is just pushed from
> implementation to the application, which may not be able solve it or it may
> be more costly due to lower amount of SoC status information (on
> application level).
>
>
>
> API should be targeted for HW that has “proper” level of HW acceleration.
> An application will run also on cheap HW, but then you’ll get “cheap”
> performance - and that’s fine. Again on more specialized HW, the API should
> not limit the performance. If a SoC has put down HW for managing robust and
> fast timer cancel operations, I should be able enjoy from that.
>
>  And I hope this will be possible, persons responsible for HW-accelerated
> implementations of ODP need to speak out if they think the API is poor for
> them. I haven't heard or seen any such complaint.
>
>
>
> Just by keeping the API abstract should allow for different HW
> implementations. If there is HW that cannot reliably cancel e.g. expired
> timeouts, it can always be complemented by SW to keep track of stale and
> orphaned timeouts. That's why there are calls to check for timeout status
> for a received timeout and to return a timeout (regardless of status).
>
>
>
> The same is possible if cancel() returns status. Implementation can also
> decide to always “fail” and make the user to receive the stale tmo. Another
> implementation would be able to cancel it in sync (mostly succeed) and
>  performance would be e.g. 30% better due to lower event rate (e.g. 2
> packets vs 2 packets + 1 stale tmo per packet).
>
If cancel has a return value, this return value must be trusted by the
application. If the implementation decides to always "fail" (because it
doesn't want to wait for the actual result of the cancel operation as this
would stall the operation in that operation), the timeout must always be
delivered even when it could have been cancelled before expiration. I want
to avoid this contract with the application as it seems costly either way
and I can't understand why the application needs to know. See the previous
discussion.



>
>
> That’s OK. Although, core local timers won’t get you away from
> synchronizations issues. Due to core load balancing, it’s likely that a
> timer set on one core will be cancel/reset on a different core.
>
I was thinking of a design where all timers irrespective of client core
would be managed by a separate core. That timer manager core would be the
only one which accesses the internal data structures so no need for locks,
I also think this would be good for cache utilization, both in the timer
manager core and the clients. Then there would be communication between the
timer manager core and the clients, something light-weight, e.g. a simple
ring for requests to the timer manager. Responses (e.g. for
odp_timer_alloc) could be returned through a normal ODP queue used
internally by the timer implementation.

Some tests I have made indicate 1M-2M timer operations per second (mainly
set and cancel with some expirations, timer alloc and free operations as
well) for a 1.7GHz A15 when not using locks. This is with 100K or 1M timers
I think. I can return with more detailed performance numbers.



>
>
>
> -Petri
>
>
>
Savolainen, Petri (NSN - FI/Espoo) Oct. 13, 2014, 2:24 p.m. UTC | #16
From: ext Ola Liljedahl [mailto:ola.liljedahl@linaro.org]

Sent: Friday, October 10, 2014 4:34 PM
To: Savolainen, Petri (NSN - FI/Espoo)
Cc: lng-odp@lists.linaro.org
Subject: Re: [lng-odp] [PATCHv4] Timer API and and priority queue-based implementation

On 10 October 2014 10:05, Savolainen, Petri (NSN - FI/Espoo) <petri.savolainen@nsn.com<mailto:petri.savolainen@nsn.com>> wrote:


From: ext Ola Liljedahl [mailto:ola.liljedahl@linaro.org<mailto:ola.liljedahl@linaro.org>]

Sent: Thursday, October 09, 2014 6:10 PM
To: Savolainen, Petri (NSN - FI/Espoo)
Cc: lng-odp@lists.linaro.org<mailto:lng-odp@lists.linaro.org>
Subject: Re: [lng-odp] [PATCHv4] Timer API and and priority queue-based implementation


Need a success/fail return value? User would need to know if the timeout is still coming, or not...
Why? When (if) the timeout is received, odp_timer_tmo_status() will tell the application is the timeout is fresh or stale (or orphaned). Which use case requires the application to immediately know if the timeout was successfully cancelled?

For example, if I have a number of re-transmission timers. Outgoing packets and incoming ack packets are handled in the same atomic queue. Also timeouts would be sent to the same queue. Mostly (99.99% of the packets) I’ll receive the ack packet before the tmo expires. So I’ll cancel timer during ack packet processing. Now if cancel() does not tell me whether the operation was successful (usually it is), how I’d know when I can reuse the timer for some other packet?

If cancel failed, it’s OK - I’ll receive a stale tmo later and there I can mark the timer reusable again. If cancel succeeded, I don’t get a confirmation of that, ever. I don’t want the timer send me a stale tmo on every cancel, since that would increase per packet event rate 50% (from 2 to 3).

So, the cancel status is needed. Right?
Probably we mean different things by cancel failing. In my API and implementation, cancel will always succeed in the sense that any outstanding timeout will never be seen as a fresh timeout. The only question is whether we will be able to prevent the timeout from being received and we can't do that if the timer has already expired. But a sent timeout will be detected as stale.

The timer can be reused and reset even if the cancel operation "failed". It is only the last set or cancel operating that defines the state of the timer and the freshness of any received timeouts. When the timeout is returned it will be re-associated with the timer and if the timer has already expired, the timeout will be enqueued for immediate delivery.

In your specific example, the application should just (re-) set the re-transmission timer whenever it receives another packet. No need to first cancel the timer and check any return code. I did specify something like that in one of my first proposals, this put responsibility on the application to remember status of previous ODP operations. With the help of Bill, I realized we didn't have to expose this to the application, keep it hidden under the API and enable more implementation choices (e.g. asynchronous HW or SW implementations that do not have to perform the operation immediately and return a success code which the application has to stall waiting for).

Sorry if this was not clear. Probably something in the documentation needs to be enhanced to convey this message better.

There are many packets/timers in-flight (e.g. 1000). One timer per outgoing packet. E.g. a packet+ack roundtrip could be 10us and retransmit timeout 10ms.

I’d pick a timer and set it on packet output, and cancel it when ack is received. I have to cancel it here (not reset) because I cannot predict when the timer is needed for another outgoing packet. Now, in the rare case the cancel would fail (ack was received too close to tmo expiration). I would not notice that, but mark the timer ready for reuse. The tmo is now marked stale but not yet in my queue. On next outgoing packet (<1us) later I’ll reuse the same timer (reset it for 10ms). 10us later, I receive ack for the packet and cancel the same timer, and so on. Maybe I’ll cancel/reset the same timer multiple times before the stale tmo would travel through the timer HW/queues/scheduling back to me (and the status check function magic).
It is always the latest set or cancel operation that is active. You can cancel that timer (even if it is "too late"), should the timeout be received, it will be considered stale. At some later time, you could (re-) set the timer. And cancel it. And reset it. The original timeout (if enqueued) is still considered stale (e.g. it contains the wrong expiration time). As soon as the stale timeout is received and returned, the latest set operation will re-evaluated and potentially the timer has expired and then the timeout will immediately be enqueued. Otherwise the timeout will just be re-associated with the timer, waiting for the next expiration (if any has been requested). There is no problem to continue to operate on a timer even when there might be a stale timeout pending somewhere outside the reach of the timer manager (e.g. on a queue or being processed by some other core but not yet returned).

I don't see a problem with your use case and the functionality the timer API is providing. There must be something lacking in the description.


So in principle, the API guarantees that application can call set() and cancel() in an infinite loop, without calling tmo_status() in between. It does not matter if cancel calls succeed (tmo is not delivered) or are late (stale tmo would be delivered). Also it’s guaranteed that nothing blows up (in timer HW, queue/scheduling HW, buffer management HW, etc) in such an infinite loop. All things that were allocated in a set() call is freed in the cancel() call (by default nothing was allocated). There’s nothing accumulating into the timer or queue manager HW, etc.

In other words, it sounds like the API forces all implementations to do generation counting between timer set()/cancel()/tmo_status() calls in SW - at least I don’t know any timer HW doing this. In practice, you’d need a SW lock between set() and tmo_status() to synchronize which call sets the timeout request into the HW. For example, synchronization is needed to ensure that there’s only one copy of the tmo buffer descriptor in HW queues, etc.

One of the main goals of ODP is to avoid SW locking in the fast path. Now, it sounds like that this fast path API forces SW locking on all implementations - also on those that could avoid it (by returning the cancel success/too late status).

A use case would be the previous example turned into multi-core. Instead of one atomic queue - we’d have two. The first queue is used for outgoing packets and timer set() for those. The second queue would process incoming ack packets and call  timer cancel() for those. It would process also tmo messages. Free timers (either cancelled or timeouted) would be returned to the output side (through HW poll queue, pool, etc … no locks there). Now the same timer can be set() on the first queue (core)  in parallel with the tmo_status() processing (for a stale tmo) on the second queue (core). Those cores need to sync (with a SW lock) to avoid race in timer state management.

If cancel would return status, application would return the timer to the other side only after it’s really free (= cancel succeeded or good/stale tmo was received).


If cancel() would return me a status code, I would not try to reuse the timer before the (stale) tmo is received.



There is a reason for the set and cancel functions not returning any status codes relating to the potential and eventual success of those operations (the tooearly/toolate indications are (hopefully) benign exceptions, these situations only require checking the passed parameter against the current tick which hopefully can be read with little overhead). I want to enable asynchronous implementations of the timer service (e.g. it could be running on a dedicated core or far away on the SoC). Returning a status code indicating the success of e.g. cancel would require a synchronous implementation, the actual timer manager would have to return a status value to the timer call and the application would have to block waiting for this status value. This would add latency to these latency-critical operations.

Thus the Timer API takes an optimistic approach and you have to handle the spurious failed set and cancel operations when those timeouts are received.

Sure, but you have to consider the whole system. If the API lacks some necessary synchronization feature, the effort is just pushed from implementation to the application, which may not be able solve it or it may be more costly due to lower amount of SoC status information (on application level).

API should be targeted for HW that has “proper” level of HW acceleration. An application will run also on cheap HW, but then you’ll get “cheap” performance - and that’s fine. Again on more specialized HW, the API should not limit the performance. If a SoC has put down HW for managing robust and fast timer cancel operations, I should be able enjoy from that.
And I hope this will be possible, persons responsible for HW-accelerated implementations of ODP need to speak out if they think the API is poor for them. I haven't heard or seen any such complaint.

Just by keeping the API abstract should allow for different HW implementations. If there is HW that cannot reliably cancel e.g. expired timeouts, it can always be complemented by SW to keep track of stale and orphaned timeouts. That's why there are calls to check for timeout status for a received timeout and to return a timeout (regardless of status).

The same is possible if cancel() returns status. Implementation can also decide to always “fail” and make the user to receive the stale tmo. Another implementation would be able to cancel it in sync (mostly succeed) and  performance would be e.g. 30% better due to lower event rate (e.g. 2 packets vs 2 packets + 1 stale tmo per packet).
If cancel has a return value, this return value must be trusted by the application. If the implementation decides to always "fail" (because it doesn't want to wait for the actual result of the cancel operation as this would stall the operation in that operation), the timeout must always be delivered even when it could have been cancelled before expiration. I want to avoid this contract with the application as it seems costly either way and I can't understand why the application needs to know. See the previous discussion.

The implementation has the choice how to synchronize cancel. Either synchronously and return success, asynchronously and return fail, or in combination (only fail when it’s “too late”).

When you abstract too much, implementation is forced to fix multicore synchronization issues in SW. Application would run it’s part of the state machine with very little overhead (e.g. reuse timer only when implementation tells it’s reusable => much lower overhead/better scaling than implementation taking that lock). See the previous use case.


That’s OK. Although, core local timers won’t get you away from synchronizations issues. Due to core load balancing, it’s likely that a timer set on one core will be cancel/reset on a different core.
I was thinking of a design where all timers irrespective of client core would be managed by a separate core. That timer manager core would be the only one which accesses the internal data structures so no need for locks, I also think this would be good for cache utilization, both in the timer manager core and the clients. Then there would be communication between the timer manager core and the clients, something light-weight, e.g. a simple ring for requests to the timer manager. Responses (e.g. for odp_timer_alloc) could be returned through a normal ODP queue used internally by the timer implementation.

Some tests I have made indicate 1M-2M timer operations per second (mainly set and cancel with some expirations, timer alloc and free operations as well) for a 1.7GHz A15 when not using locks. This is with 100K or 1M timers I think. I can return with more detailed performance numbers.

1M operations/sec would translate less than 1Gbps small packet. So single core could be a performance bottleneck already with linux-generic (with timer per packet, and about ten cores). So maybe better to keep it distributed.

-Petri
Ola Liljedahl Oct. 22, 2014, 1 p.m. UTC | #17
On 13 October 2014 16:24, Savolainen, Petri (NSN - FI/Espoo) <
petri.savolainen@nsn.com> wrote:

>
>
>
>
> *From:* ext Ola Liljedahl [mailto:ola.liljedahl@linaro.org]
> *Sent:* Friday, October 10, 2014 4:34 PM
> *To:* Savolainen, Petri (NSN - FI/Espoo)
> *Cc:* lng-odp@lists.linaro.org
> *Subject:* Re: [lng-odp] [PATCHv4] Timer API and and priority queue-based
> implementation
>
>
>
> On 10 October 2014 10:05, Savolainen, Petri (NSN - FI/Espoo) <
> petri.savolainen@nsn.com> wrote:
>
>
>
>
>
> *From:* ext Ola Liljedahl [mailto:ola.liljedahl@linaro.org]
> *Sent:* Thursday, October 09, 2014 6:10 PM
> *To:* Savolainen, Petri (NSN - FI/Espoo)
> *Cc:* lng-odp@lists.linaro.org
> *Subject:* Re: [lng-odp] [PATCHv4] Timer API and and priority queue-based
> implementation
>
>
>
>
> Need a success/fail return value? User would need to know if the timeout
> is still coming, or not...
>
>  Why? When (if) the timeout is received, odp_timer_tmo_status() will tell
> the application is the timeout is fresh or stale (or orphaned). Which use
> case requires the application to immediately know if the timeout was
> successfully cancelled?
>
>
>
> For example, if I have a number of re-transmission timers. Outgoing
> packets and incoming ack packets are handled in the same atomic queue. Also
> timeouts would be sent to the same queue. Mostly (99.99% of the packets)
> I’ll receive the ack packet before the tmo expires. So I’ll cancel timer
> during ack packet processing. Now if cancel() does not tell me whether the
> operation was successful (usually it is), how I’d know when I can reuse the
> timer for some other packet?
>
>
>
> If cancel failed, it’s OK - I’ll receive a stale tmo later and there I can
> mark the timer reusable again. If cancel succeeded, I don’t get a
> confirmation of that, ever. I don’t want the timer send me a stale tmo on
> every cancel, since that would increase per packet event rate 50% (from 2
> to 3).
>
>
>
> So, the cancel status is needed. Right?
>
>  Probably we mean different things by cancel failing. In my API and
> implementation, cancel will always succeed in the sense that any
> outstanding timeout will never be seen as a fresh timeout. The only
> question is whether we will be able to prevent the timeout from being
> received and we can't do that if the timer has already expired. But a sent
> timeout will be detected as stale.
>
>
>
> The timer can be reused and reset even if the cancel operation "failed".
> It is only the last set or cancel operating that defines the state of the
> timer and the freshness of any received timeouts. When the timeout is
> returned it will be re-associated with the timer and if the timer has
> already expired, the timeout will be enqueued for immediate delivery.
>
>
>
> In your specific example, the application should just (re-) set the
> re-transmission timer whenever it receives another packet. No need to first
> cancel the timer and check any return code. I did specify something like
> that in one of my first proposals, this put responsibility on the
> application to remember status of previous ODP operations. With the help of
> Bill, I realized we didn't have to expose this to the application, keep it
> hidden under the API and enable more implementation choices (e.g.
> asynchronous HW or SW implementations that do not have to perform the
> operation immediately and return a success code which the application has
> to stall waiting for).
>
>
>
> Sorry if this was not clear. Probably something in the documentation needs
> to be enhanced to convey this message better.
>
>
>
> There are many packets/timers in-flight (e.g. 1000). One timer per
> outgoing packet. E.g. a packet+ack roundtrip could be 10us and retransmit
> timeout 10ms.
>
>
>
> I’d pick a timer and set it on packet output, and cancel it when ack is
> received. I have to cancel it here (not reset) because I cannot predict
> when the timer is needed for another outgoing packet. Now, in the rare case
> the cancel would fail (ack was received too close to tmo expiration). I
> would not notice that, but mark the timer ready for reuse. The tmo is now
> marked stale but not yet in my queue. On next outgoing packet (<1us) later
> I’ll reuse the same timer (reset it for 10ms). 10us later, I receive ack
> for the packet and cancel the same timer, and so on. Maybe I’ll
> cancel/reset the same timer multiple times before the stale tmo would
> travel through the timer HW/queues/scheduling back to me (and the status
> check function magic).
>
> It is always the latest set or cancel operation that is active. You can
> cancel that timer (even if it is "too late"), should the timeout be
> received, it will be considered stale. At some later time, you could (re-)
> set the timer. And cancel it. And reset it. The original timeout (if
> enqueued) is still considered stale (e.g. it contains the wrong expiration
> time). As soon as the stale timeout is received and returned, the latest
> set operation will re-evaluated and potentially the timer has expired and
> then the timeout will immediately be enqueued. Otherwise the timeout will
> just be re-associated with the timer, waiting for the next expiration (if
> any has been requested). There is no problem to continue to operate on a
> timer even when there might be a stale timeout pending somewhere outside
> the reach of the timer manager (e.g. on a queue or being processed by some
> other core but not yet returned).
>
>
>
> I don't see a problem with your use case and the functionality the timer
> API is providing. There must be something lacking in the description.
>
>
>
>
>
> So in principle, the API guarantees that application can call set() and
> cancel() in an infinite loop, without calling tmo_status() in between. It
> does not matter if cancel calls succeed (tmo is not delivered) or are late
> (stale tmo would be delivered). Also it’s guaranteed that nothing blows up
> (in timer HW, queue/scheduling HW, buffer management HW, etc) in such an
> infinite loop. All things that were allocated in a set() call is freed in
> the cancel() call (by default nothing was allocated). There’s nothing
> accumulating into the timer or queue manager HW, etc.
>
>
>
> In other words, it sounds like the API forces all implementations to do
> generation counting between timer set()/cancel()/tmo_status() calls in SW -
> at least I don’t know any timer HW doing this. In practice, you’d need a SW
> lock between set() and tmo_status() to synchronize which call sets the
> timeout request into the HW. For example, synchronization is needed to
> ensure that there’s only one copy of the tmo buffer descriptor in HW
> queues, etc.
>
>
>
> One of the main goals of ODP is to avoid SW locking in the fast path. Now,
> it sounds like that this fast path API forces SW locking on all
> implementations - also on those that could avoid it (by returning the
> cancel success/too late status).
>
I think the update of the tag and the expiration time can be performed
using atomic updates. I intend to prototype this ASAP (I have vacation next
week and then some travel, after that or perhaps on a plane). Two atomic
updates required to mark any expired timeouts stale and to set a new
expiration time (or mark the timer as inactive for cancel operations). Any
other updates of internal timer data structures would be done
asynchronously by some internal timer thread or by the HW.



>
>
> A use case would be the previous example turned into multi-core. Instead
> of one atomic queue - we’d have two. The first queue is used for outgoing
> packets and timer set() for those. The second queue would process incoming
> ack packets and call  timer cancel() for those. It would process also tmo
> messages. Free timers (either cancelled or timeouted) would be returned to
> the output side (through HW poll queue, pool, etc … no locks there). Now
> the same timer can be set() on the first queue (core)  in parallel with the
> tmo_status() processing (for a stale tmo) on the second queue (core). Those
> cores need to sync (with a SW lock) to avoid race in timer state
> management.
>
>
>
> If cancel would return status, application would return the timer to the
> other side only after it’s really free (= cancel succeeded or good/stale
> tmo was received).
>
I can change the API to return a status code from cancel and save any
asynchronous experimentation for later. It shouldn't be too difficult to
update applications later if the API should change and the return value
disappear, less work to do.



>
>
>
>
> If cancel() would return me a status code, I would not try to reuse the
> timer before the (stale) tmo is received.
>
>
>
>
>
>
>
> There is a reason for the set and cancel functions not returning any
> status codes relating to the potential and eventual success of those
> operations (the tooearly/toolate indications are (hopefully) benign
> exceptions, these situations only require checking the passed parameter
> against the current tick which hopefully can be read with little overhead).
> I want to enable asynchronous implementations of the timer service (e.g. it
> could be running on a dedicated core or far away on the SoC). Returning a
> status code indicating the success of e.g. cancel would require a
> synchronous implementation, the actual timer manager would have to return a
> status value to the timer call and the application would have to block
> waiting for this status value. This would add latency to these
> latency-critical operations.
>
>
>
> Thus the Timer API takes an optimistic approach and you have to handle the
> spurious failed set and cancel operations when those timeouts are received.
>
>
>
> Sure, but you have to consider the whole system. If the API lacks some
> necessary synchronization feature, the effort is just pushed from
> implementation to the application, which may not be able solve it or it may
> be more costly due to lower amount of SoC status information (on
> application level).
>
>
>
> API should be targeted for HW that has “proper” level of HW acceleration.
> An application will run also on cheap HW, but then you’ll get “cheap”
> performance - and that’s fine. Again on more specialized HW, the API should
> not limit the performance. If a SoC has put down HW for managing robust and
> fast timer cancel operations, I should be able enjoy from that.
>
>  And I hope this will be possible, persons responsible for HW-accelerated
> implementations of ODP need to speak out if they think the API is poor for
> them. I haven't heard or seen any such complaint.
>
>
>
> Just by keeping the API abstract should allow for different HW
> implementations. If there is HW that cannot reliably cancel e.g. expired
> timeouts, it can always be complemented by SW to keep track of stale and
> orphaned timeouts. That's why there are calls to check for timeout status
> for a received timeout and to return a timeout (regardless of status).
>
>
>
> The same is possible if cancel() returns status. Implementation can also
> decide to always “fail” and make the user to receive the stale tmo. Another
> implementation would be able to cancel it in sync (mostly succeed) and
>  performance would be e.g. 30% better due to lower event rate (e.g. 2
> packets vs 2 packets + 1 stale tmo per packet).
>
>  If cancel has a return value, this return value must be trusted by the
> application. If the implementation decides to always "fail" (because it
> doesn't want to wait for the actual result of the cancel operation as this
> would stall the operation in that operation), the timeout must always be
> delivered even when it could have been cancelled before expiration. I want
> to avoid this contract with the application as it seems costly either way
> and I can't understand why the application needs to know. See the previous
> discussion.
>
>
>
> The implementation has the choice how to synchronize cancel. Either
> synchronously and return success, asynchronously and return fail, or in
> combination (only fail when it’s “too late”).
>
>
>
> When you abstract too much, implementation is forced to fix multicore
> synchronization issues in SW. Application would run it’s part of the state
> machine with very little overhead (e.g. reuse timer only when
> implementation tells it’s reusable => much lower overhead/better scaling
> than implementation taking that lock). See the previous use case.
>
If this doesn't have other consequences for the application...


>
>
>
>
> That’s OK. Although, core local timers won’t get you away from
> synchronizations issues. Due to core load balancing, it’s likely that a
> timer set on one core will be cancel/reset on a different core.
>
>  I was thinking of a design where all timers irrespective of client core
> would be managed by a separate core. That timer manager core would be the
> only one which accesses the internal data structures so no need for locks,
> I also think this would be good for cache utilization, both in the timer
> manager core and the clients. Then there would be communication between the
> timer manager core and the clients, something light-weight, e.g. a simple
> ring for requests to the timer manager. Responses (e.g. for
> odp_timer_alloc) could be returned through a normal ODP queue used
> internally by the timer implementation.
>
>
>
> Some tests I have made indicate 1M-2M timer operations per second (mainly
> set and cancel with some expirations, timer alloc and free operations as
> well) for a 1.7GHz A15 when not using locks. This is with 100K or 1M timers
> I think. I can return with more detailed performance numbers.
>
>
>
> 1M operations/sec would translate less than 1Gbps small packet. So single
> core could be a performance bottleneck already with linux-generic (with
> timer per packet, and about ten cores). So maybe better to keep it
> distributed.
>
I have ported my stress test/performance benchmark to using the original
timer API so I can compare performance. Unfortunately the original
implementation does not work very well, I had to add some global spin locks
in order to avoid segfaults. Another problem with the original
implementation is that the POSIX  itimer thread takes increasingly longer
time to execute and thus doesn't finish before the next tick and the kernel
creates a new thread. This makes the system unresponsive, if you are lucky
the program crashes and kills all of those itimer threads and you get your
machine back. That the original implementation also puts timeouts in 1024
different lists actually leaks out through the API to the application and
becomes the de facto max timeout (not the max that the application
specified).

Using a 1000ms (!) tick period for the original timer, PQ timer is using
1ms tick so more overhead. Measuring ns/operation (average of set (~28%),
reset (~42%) and cancel (~28%)) on 2.0GHz A15. Taking the best measurements
out of many runs. I have subtracted 100ns (up to 33K timeouts), 125ns (for
100K), 190ns (for 333K), 210ns (for 1M timeouts) to compensate for the
application overhead (e.g. calling rand_r multiple times) which was
measured when not making the timer calls.

*timeouts/impl org       PQ*
*   1000       570      214*
*   3333       580      224*
*  10000       622      227*
*  33333      1296      322*
* 100000         -      498*
* 333333         -      528*
*1000000         -      565*

The original implementation cannot handle 100000 concurrent timeouts with a
1000ms tick period without the periodic "timer notification" taking too
long time to finish (longer than the tick period). With shorter tick
periods (1ms, 10ms, 100ms), this problem of course occurs even for smaller
amount of timeouts. 1ms tick period cannot reliably handle 2000 concurrent
timeouts on the 2GHz A15.

Not a perfect log curve for the PQ timer (which uses a 4-ary balanced
heap/tree). It probably depends on there being a certain amount of constant
overhead (function calls, spin locks etc) and then some cache effects with
increased degradation when going from 33K to 100K timeouts (working set too
large for L2 cache?), after that it stabilizes again.

Regards multicore scalabilty, I have an idea on how to divide up the timers
on separate priority queues, each protected by their own lock. The number
of priority queues could equal the number of cores. This is another
performance optimization we could implement later when needed. For ODP1.0
we need the agreed API and a correct implementation.

-- Ola



>
>
> -Petri
>
>
>
diff mbox

Patch

diff --git a/example/timer/odp_timer_test.c b/example/timer/odp_timer_test.c
index 6e1715d..750d785 100644
--- a/example/timer/odp_timer_test.c
+++ b/example/timer/odp_timer_test.c
@@ -41,67 +41,89 @@  typedef struct {
 /** @private Barrier for test synchronisation */
 static odp_barrier_t test_barrier;
 
-/** @private Timer handle*/
-static odp_timer_t test_timer;
+/** @private Timer pool handle */
+static odp_timer_pool_t tp;
 
 
+/** @private Timeout status ASCII strings */
+static const char *const status2str[] = {
+	"fresh", "stale", "orphaned"
+};
+
 /** @private test timeout */
 static void test_abs_timeouts(int thr, test_args_t *args)
 {
-	uint64_t tick;
 	uint64_t period;
 	uint64_t period_ns;
 	odp_queue_t queue;
-	odp_buffer_t buf;
-	int num;
+	int remain = args->tmo_count;
+	odp_timer_t hdl;
+	uint64_t tick;
 
 	ODP_DBG("  [%i] test_timeouts\n", thr);
 
 	queue = odp_queue_lookup("timer_queue");
 
 	period_ns = args->period_us*ODP_TIME_USEC;
-	period    = odp_timer_ns_to_tick(test_timer, period_ns);
+	period    = odp_timer_ns_to_tick(tp, period_ns);
 
 	ODP_DBG("  [%i] period %"PRIu64" ticks,  %"PRIu64" ns\n", thr,
 		period, period_ns);
 
-	tick = odp_timer_current_tick(test_timer);
-
-	ODP_DBG("  [%i] current tick %"PRIu64"\n", thr, tick);
-
-	tick += period;
+	ODP_DBG("  [%i] current tick %"PRIu64"\n", thr,
+		odp_timer_current_tick(tp));
 
-	if (odp_timer_absolute_tmo(test_timer, tick, queue, ODP_BUFFER_INVALID)
-	    == ODP_TIMER_TMO_INVALID){
-		ODP_DBG("Timeout request failed\n");
+	odp_timer_t test_timer;
+	test_timer = odp_timer_alloc(tp, queue, NULL);
+	if (test_timer == ODP_TIMER_INVALID) {
+		ODP_ERR("Failed to allocate timer\n");
 		return;
 	}
+	tick = odp_timer_current_tick(tp);
+	hdl = test_timer;
 
-	num = args->tmo_count;
-
-	while (1) {
-		odp_timeout_t tmo;
-
-		buf = odp_schedule_one(&queue, ODP_SCHED_WAIT);
-
-		tmo  = odp_timeout_from_buffer(buf);
-		tick = odp_timeout_tick(tmo);
-
-		ODP_DBG("  [%i] timeout, tick %"PRIu64"\n", thr, tick);
-
-		odp_buffer_free(buf);
-
-		num--;
-
-		if (num == 0)
-			break;
+	while (remain != 0) {
+		odp_buffer_t buf;
+		odp_timer_tmo_t tmo;
+		odp_timer_tmo_status_t stat;
+		odp_timer_set_t rc;
 
 		tick += period;
+		rc = odp_timer_set_abs(hdl, tick);
+		if (odp_unlikely(rc != ODP_TIMER_SET_SUCCESS)) {
+			ODP_ERR("odp_timer_set_abs() failed (%u)\n", rc);
+			abort();
+		}
 
-		odp_timer_absolute_tmo(test_timer, tick,
-				       queue, ODP_BUFFER_INVALID);
+		/* Get the next ready buffer/timeout */
+		buf = odp_schedule_one(&queue, ODP_SCHED_WAIT);
+		if (odp_unlikely(odp_buffer_type(buf) !=
+				 ODP_BUFFER_TYPE_TIMEOUT)) {
+			ODP_ERR("Unexpected buffer type received\n");
+			abort();
+		}
+		tmo = odp_timeout_from_buffer(buf);
+		stat = odp_timer_tmo_status(tmo);
+		tick = odp_timer_expiration(tmo);
+		hdl = odp_timer_handle(tmo);
+		ODP_DBG("  [%i] timeout, tick %"PRIu64", status %s\n",
+			thr, tick, status2str[stat]);
+		/* if (stat == ODP_TMO_FRESH)  - do your thing! */
+		if (odp_likely(stat == ODP_TMO_ORPHAN)) {
+			/* Some other thread freed the corresponding
+			   timer after the timeout was already
+			   enqueued */
+			/* Timeout handle is invalid, use our own timer */
+			hdl = test_timer;
+		}
+		/* Return timeout to timer manager, regardless of status */
+		odp_timer_return_tmo(tmo);
+		remain--;
 	}
 
+	odp_timer_cancel(test_timer);
+	odp_timer_free(test_timer);
+
 	if (odp_queue_sched_type(queue) == ODP_SCHED_SYNC_ATOMIC)
 		odp_schedule_release_atomic();
 }
@@ -155,7 +177,6 @@  static void print_usage(void)
 	printf("Options:\n");
 	printf("  -c, --count <number>    core count, core IDs start from 1\n");
 	printf("  -r, --resolution <us>   timeout resolution in usec\n");
-	printf("  -m, --min <us>          minimum timeout in usec\n");
 	printf("  -x, --max <us>          maximum timeout in usec\n");
 	printf("  -p, --period <us>       timeout period in usec\n");
 	printf("  -t, --timeouts <count>  timeout repeat count\n");
@@ -190,14 +211,14 @@  static void parse_args(int argc, char *argv[], test_args_t *args)
 	/* defaults */
 	args->core_count    = 0; /* all cores */
 	args->resolution_us = 10000;
-	args->min_us        = args->resolution_us;
+	args->min_us        = 0;
 	args->max_us        = 10000000;
 	args->period_us     = 1000000;
 	args->tmo_count     = 30;
 
 	while (1) {
 		opt = getopt_long(argc, argv, "+c:r:m:x:p:t:h",
-				 longopts, &long_index);
+				  longopts, &long_index);
 
 		if (opt == -1)
 			break;	/* No more options */
@@ -321,10 +342,25 @@  int main(int argc, char *argv[])
 				      ODP_BUFFER_TYPE_TIMEOUT);
 
 	if (pool == ODP_BUFFER_POOL_INVALID) {
-		ODP_ERR("Pool create failed.\n");
+		ODP_ERR("Buffer pool create failed.\n");
 		return -1;
 	}
 
+	tp = odp_timer_pool_create("timer_pool", pool,
+				   args.resolution_us*ODP_TIME_USEC,
+				   args.min_us*ODP_TIME_USEC,
+				   args.max_us*ODP_TIME_USEC,
+				   num_workers, /* One timer per worker */
+				   true,
+				   ODP_CLOCK_CPU);
+	if (tp == ODP_TIMER_POOL_INVALID) {
+		ODP_ERR("Timer pool create failed.\n");
+		return -1;
+	}
+	odp_timer_pool_start();
+
+	odp_shm_print_all();
+
 	/*
 	 * Create a queue for timer test
 	 */
@@ -340,19 +376,6 @@  int main(int argc, char *argv[])
 		return -1;
 	}
 
-	test_timer = odp_timer_create("test_timer", pool,
-				      args.resolution_us*ODP_TIME_USEC,
-				      args.min_us*ODP_TIME_USEC,
-				      args.max_us*ODP_TIME_USEC);
-
-	if (test_timer == ODP_TIMER_INVALID) {
-		ODP_ERR("Timer create failed.\n");
-		return -1;
-	}
-
-
-	odp_shm_print_all();
-
 	printf("CPU freq %"PRIu64" hz\n", odp_sys_cpu_hz());
 	printf("Cycles vs nanoseconds:\n");
 	ns = 0;
diff --git a/platform/linux-generic/Makefile.am b/platform/linux-generic/Makefile.am
index d076d50..71f923c 100644
--- a/platform/linux-generic/Makefile.am
+++ b/platform/linux-generic/Makefile.am
@@ -59,6 +59,7 @@  __LIB__libodp_la_SOURCES = \
 			   odp_packet_flags.c \
 			   odp_packet_io.c \
 			   odp_packet_socket.c \
+			   odp_priority_queue.c \
 			   odp_queue.c \
 			   odp_ring.c \
 			   odp_rwlock.c \
diff --git a/platform/linux-generic/include/api/odp_timer.h b/platform/linux-generic/include/api/odp_timer.h
index 01db839..82a1e05 100644
--- a/platform/linux-generic/include/api/odp_timer.h
+++ b/platform/linux-generic/include/api/odp_timer.h
@@ -8,9 +8,193 @@ 
 /**
  * @file
  *
- * ODP timer
+ * ODP timer service
  */
 
+/** Example #1 Retransmission timer (e.g. for reliable connections)
+ @code
+
+//Create timer pool for reliable connections
+#define SEC 1000000000ULL //1s expressed in nanoseconds
+odp_timer_pool_t tcp_tpid =
+    odp_timer_pool_create("TCP",
+			  buffer_pool,
+			  1000000,//resolution 1ms
+			  0,//min tmo
+			  7200 * SEC,//max tmo length 2hours
+			  40000,//num_timers
+			  true,//shared
+			  ODP_CLOCK_CPU
+			 );
+if (tcp_tpid == ODP_TIMER_POOL_INVALID)
+{
+	//Failed to create timer pool => fatal error
+}
+
+
+//Setting up a new connection
+//Allocate retransmission timeout (identical for supervision timeout)
+//The user pointer points back to the connection context
+conn->ret_tim = odp_timer_alloc(tcp_tpid, queue, conn);
+//Check if all resources were successfully allocated
+if (conn->ret_tim == ODP_TIMER_INVALID)
+{
+	//Failed to allocate all resources for connection => tear down
+	//Destroy timeout
+	odp_timer_free(conn->ret_tim);
+	//Tear down connection
+	...
+	return false;
+}
+//All necessary resources successfully allocated
+//Compute initial retransmission length in timer ticks
+conn->ret_len = odp_timer_ns_to_tick(tcp_tpid, 3 * SEC);//Per RFC1122
+//Arm the timer
+odp_timer_set_rel(conn->ret_tim, conn->ret_len);
+return true;
+
+
+//A packet for the connection has just been transmitted
+//Reset the retransmission timer
+odp_timer_set_rel(conn->ret_tim, conn->ret_len);
+
+
+//A retransmission timeout buffer for the connection has been received
+odp_timer_tmo_t tmo = odp_timeout_from_buffer(buf);
+odp_timer_tmo_status_t stat = odp_timer_tmo_status(tmo);
+//Check if timeout is fresh or stale, for stale timeouts we need to reset the
+//timer
+if (stat == ODP_TMO_FRESH) {
+	//Fresh timeout, last transmitted packet not acked in time =>
+	  retransmit
+	//Get connection from timeout event
+	conn = odp_timer_get_userptr(tmo);
+	//Retransmit last packet (e.g. TCP segment)
+	...
+	//Re-arm timer using original delta value
+	odp_timer_set_rel(conn->ret_tim, conn->ret_len);
+} else if (stat == ODP_TMO_ORPHAN) {
+	odp_free_buffer(buf);
+	return;//Get out of here
+} // else stat == ODP_TMO_STALE, do nothing
+//Finished processing, return timeout
+odp_timer_return_tmo(tmo);
+
+ @endcode
+*/
+
+/** Example #2 Periodic tick
+ @code
+
+//Create timer pool for periodic ticks
+odp_timer_pool_t per_tpid =
+    odp_timer_pool_create("periodic-tick",
+			  buffer_pool,
+			  1,//resolution 1ns
+			  1,//minimum timeout length 1ns
+			  1000000000,//maximum timeout length 1s
+			  10,//num_timers
+			  false,//not shared
+			  ODP_CLOCK_CPU
+			 );
+if (per_tpid == ODP_TIMER_POOL_INVALID)
+{
+    //Failed to create timer pool => fatal error
+}
+
+
+//Allocate periodic timer
+tim_1733 = odp_timer_alloc(per_tpid, queue, NULL);
+//Check if all resources were successfully allocated
+if (tim_1733 == ODP_TIMER_INVALID)
+{
+	//Failed to allocate all resources => tear down
+	//Destroy timeout
+	odp_timer_free(tim_1733);
+	//Tear down other state
+	...
+	return false;
+}
+//All necessary resources successfully allocated
+//Compute tick period in timer ticks
+period_1733 = odp_timer_ns_to_tick(per_tpid, 1000000000U / 1733U);//1733Hz
+//Compute when next tick should expire
+next_1733 = odp_timer_current_tick(per_tpid) + period_1733;
+//Arm the periodic timer
+odp_timer_set_abs(tim_1733, next_1733);
+return true;
+
+
+
+//A periodic timer timeout has been received
+odp_timer_tmo_t tmo = odp_timeout_from_buffer(buf);
+//Get status of timeout
+odp_timer_tmo_status_t stat = odp_timer_tmo_status(tmo);
+//We expect the timeout is always fresh since we are not calling set or cancel
+on active or expired timers in this example
+assert(stat == ODP_TMO_FRESH);
+//Do processing driven by timeout *before*
+...
+do {
+	//Compute when the timer should expire next
+	next_1733 += period_1733;
+	//Check that this is in the future
+	if (likely(next_1733 > odp_timer_current_tick(per_tpid))
+	break;//Yes, done
+	//Else we missed a timeout
+	//Optionally attempt some recovery and/or logging of the problem
+	...
+} while (0);
+//Re-arm periodic timer
+odp_timer_set_abs(tim_1733, next_1733);
+//Or do processing driven by timeout *after*
+...
+odp_timer_return_tmo(tmo);
+return;
+
+ @endcode
+*/
+
+/** Example #3 Tear down of flow
+ @code
+//ctx points to flow context data structure owned by application
+//Free the timer, cancelling any timeout
+odp_timer_free(ctx->timer);//Any enqueued timeout will be made invalid
+//Continue tearing down and eventually freeing context
+...
+return;
+
+//A timeout has been received, check status
+odp_timer_tmo_t tmo = odp_timeout_from_buffer(buf);
+switch (odp_timer_tmo_status(tmo))
+{
+    case ODP_TMO_FRESH :
+	//A flow has timed out, tear it down
+	//Find flow context from timeout
+	ctx = (context *)odp_timer_get_userptr(tmo);
+	//Free the supervision timer, any enqueued timeout will remain
+	odp_timer_free(ctx->tim);
+	//Free other flow related resources
+	...
+	//Free the timeout buffer
+	odp_buffer_free(buf);
+	//Flow torn down
+	break;
+    case ODP_TMO_STALE :
+	//A stale timeout was received, return timeout and update timer
+	odp_timer_return_tmo(tmo);
+	break;
+    case ODP_TMO_ORPHAN :
+	//Orphaned timeout (from previously torn down flow)
+	//No corresponding timer or flow context
+	//Free the timeout buffer
+	odp_buffer_free(buf);
+	break;
+}
+
+ @endcode
+*/
+
 #ifndef ODP_TIMER_H_
 #define ODP_TIMER_H_
 
@@ -18,144 +202,408 @@ 
 extern "C" {
 #endif
 
+#include <stdlib.h>
 #include <odp_std_types.h>
 #include <odp_buffer.h>
 #include <odp_buffer_pool.h>
 #include <odp_queue.h>
 
+struct odp_timer_pool_s; /**< Forward declaration */
+
+/**
+* ODP timer pool handle (platform dependent)
+*/
+typedef struct odp_timer_pool_s *odp_timer_pool_t;
+
+/**
+ * Invalid timer pool handle (platform dependent).
+ */
+#define ODP_TIMER_POOL_INVALID NULL
 
 /**
- * ODP timer handle
+ * Clock sources for timers in timer pool.
  */
-typedef uint32_t odp_timer_t;
+typedef enum odp_timer_clk_src_e {
+	/** Use CPU clock as clock source for timers */
+	ODP_CLOCK_CPU,
+	/** Use external clock as clock source for timers */
+	ODP_CLOCK_EXT
+	/* Platform dependent which other clock sources exist */
+} odp_timer_clk_src_t;
 
-/** Invalid timer */
-#define ODP_TIMER_INVALID 0
+struct odp_timer_s; /**< Forward declaration */
 
+/**
+* ODP timer handle (platform dependent).
+*/
+typedef struct odp_timer_s *odp_timer_t;
 
 /**
- * ODP timeout handle
+ * Invalid timer handle (platform dependent).
  */
-typedef odp_buffer_t odp_timer_tmo_t;
-
-/** Invalid timeout */
-#define ODP_TIMER_TMO_INVALID 0
+#define ODP_TIMER_INVALID NULL
 
+/**
+ * Return values of timer set calls.
+ */
+typedef enum odp_timer_set_e {
+	/** Timer set operation successful */
+	ODP_TIMER_SET_SUCCESS,
+	/** Timer set operation failed, expiration too early */
+	ODP_TIMER_SET_TOOEARLY,
+	/** Timer set operation failed, expiration too late */
+	ODP_TIMER_SET_TOOLATE
+} odp_timer_set_t;
 
 /**
- * Timeout notification
+ * Timeout event handle.
  */
-typedef odp_buffer_t odp_timeout_t;
+typedef odp_buffer_t odp_timer_tmo_t;
 
+/**
+ * Status of a timeout event.
+ */
+typedef enum odp_timer_tmo_status_e {
+	/** Timeout is fresh, process it and return timeout */
+	ODP_TMO_FRESH,
+	/** Timer reset or cancelled, just return timeout  */
+	ODP_TMO_STALE,
+	/** Timer deleted, return or free timeout */
+	ODP_TMO_ORPHAN
+} odp_timer_tmo_status_t;
 
 /**
- * Create a timer
+ * Create a timer pool
  *
- * Creates a new timer with requested properties.
+ * Create a new timer pool.
  *
  * @param name       Name
- * @param pool       Buffer pool for allocating timeout notifications
+ * @param buf_pool   Buffer pool for allocating timeouts (and only timeouts)
  * @param resolution Timeout resolution in nanoseconds
- * @param min_tmo    Minimum timeout duration in nanoseconds
- * @param max_tmo    Maximum timeout duration in nanoseconds
+ * @param min_tmo    Minimum relative timeout in nanoseconds
+ * @param max_tmo    Maximum relative timeout in nanoseconds
+ * @param num_timers Number of supported timers (minimum)
+ * @param shared     Shared or private timer pool.
+ *		   Operations on shared timers will include the necessary
+ *		   mutual exclusion, operations on private timers may not
+ *		   (mutual exclusion is the responsibility of the caller).
+ * @param clk_src    Clock source to use
  *
- * @return Timer handle if successful, otherwise ODP_TIMER_INVALID
+ * @return Timer pool handle if successful, otherwise ODP_TIMER_POOL_INVALID
+ * and errno set
  */
-odp_timer_t odp_timer_create(const char *name, odp_buffer_pool_t pool,
-			     uint64_t resolution, uint64_t min_tmo,
-			     uint64_t max_tmo);
+odp_timer_pool_t
+odp_timer_pool_create(const char *name,
+		      odp_buffer_pool_t buf_pool,
+		      uint64_t resolution,
+		      uint64_t min_tmo,
+		      uint64_t max_tmo,
+		      uint32_t num_timers,
+		      bool shared,
+		      odp_timer_clk_src_t clk_src);
+
+/**
+ * Start a timer pool
+ *
+ * Start all created timer pools, enabling the allocation of timers.
+ * The purpose of this call is to coordinate the creation of multiple timer
+ * pools that may use the same underlying HW resources.
+ * This function may be called multiple times.
+ */
+void odp_timer_pool_start(void);
+
+/**
+ * Destroy a timer pool
+ *
+ * Destroy a timer pool, freeing all resources.
+ * All timers must have been freed.
+ *
+ * @param tpid  Timer pool identifier
+ */
+void odp_timer_pool_destroy(odp_timer_pool_t tpid);
 
 /**
  * Convert timer ticks to nanoseconds
  *
- * @param timer Timer
+ * @param tpid  Timer pool identifier
  * @param ticks Timer ticks
  *
  * @return Nanoseconds
  */
-uint64_t odp_timer_tick_to_ns(odp_timer_t timer, uint64_t ticks);
+uint64_t odp_timer_tick_to_ns(odp_timer_pool_t tpid, uint64_t ticks);
 
 /**
  * Convert nanoseconds to timer ticks
  *
- * @param timer Timer
+ * @param tpid  Timer pool identifier
  * @param ns    Nanoseconds
  *
  * @return Timer ticks
  */
-uint64_t odp_timer_ns_to_tick(odp_timer_t timer, uint64_t ns);
+uint64_t odp_timer_ns_to_tick(odp_timer_pool_t tpid, uint64_t ns);
 
 /**
- * Timer resolution in nanoseconds
+ * Current tick value
  *
- * @param timer Timer
+ * @param tpid Timer pool identifier
  *
- * @return Resolution in nanoseconds
+ * @return Current time in timer ticks
+ */
+uint64_t odp_timer_current_tick(odp_timer_pool_t tpid);
+
+/**
+ * ODP timer configurations
  */
-uint64_t odp_timer_resolution(odp_timer_t timer);
+
+typedef enum odp_timer_pool_conf_e {
+	ODP_TIMER_NAME,      /**< Return name of timer pool */
+	ODP_TIMER_RESOLUTION,/**< Return the timer resolution (in ns) */
+	ODP_TIMER_MIN_TICKS, /**< Return the min supported rel timeout (ticks)*/
+	ODP_TIMER_MAX_TICKS, /**< Return the max supported rel timeout (ticks)*/
+	ODP_TIMER_NUM_TIMERS,/**< Return number of supported timers */
+	ODP_TIMER_SHARED     /**< Return shared flag */
+} odp_timer_pool_conf_t;
 
 /**
- * Maximum timeout in timer ticks
+ * Query different timer pool configurations, e.g.
+ *  Timer resolution in nanoseconds
+ *  Maximum timeout in timer ticks
+ *  Number of supported timers
+ *  Shared or private timer pool
  *
- * @param timer Timer
+ * @param tpid Timer pool identifier
+ * @param item Configuration item being queried
  *
- * @return Maximum timeout in timer ticks
+ * @return the requested piece of information or 0 for unknown item.
  */
-uint64_t odp_timer_maximum_tmo(odp_timer_t timer);
+uintptr_t odp_timer_pool_query_conf(odp_timer_pool_t tpid,
+				    odp_timer_pool_conf_t item);
 
 /**
- * Current timer tick
+ * Allocate a timer
  *
- * @param timer Timer
+ * Create a timer (allocating all necessary resources e.g. timeout event) from
+ * the timer pool.
  *
- * @return Current time in timer ticks
+ * @param tpid     Timer pool identifier
+ * @param queue    Destination queue for timeout notifications
+ * @param user_ptr User defined pointer or NULL (copied to timeouts)
+ *
+ * @return Timer handle if successful, otherwise ODP_TIMER_INVALID and
+ *	   errno set.
  */
-uint64_t odp_timer_current_tick(odp_timer_t timer);
+odp_timer_t odp_timer_alloc(odp_timer_pool_t tpid,
+			    odp_queue_t queue,
+			    void *user_ptr);
 
 /**
- * Request timeout with an absolute timer tick
+ * Free a timer
+ *
+ * Free (destroy) a timer, freeing all associated resources (e.g. default
+ * timeout event). An expired and enqueued timeout event will not be freed.
+ * It is the responsibility of the application to free this timeout when it
+ * is received.
  *
- * When tick reaches tmo_tick, the timer enqueues the timeout notification into
- * the destination queue.
+ * @param tim      Timer handle
+ */
+void odp_timer_free(odp_timer_t tim);
+
+/**
+ * Set a timer (absolute time) with a user-defined timeout buffer
  *
- * @param timer    Timer
- * @param tmo_tick Absolute timer tick value which triggers the timeout
- * @param queue    Destination queue for the timeout notification
- * @param buf      User defined timeout notification buffer. When
- *                 ODP_BUFFER_INVALID, default timeout notification is used.
+ * Set (arm) the timer to expire at specific time. The user-defined
+ * buffer will be enqueued when the timer expires.
+ * Arming may fail (if the timer is in state EXPIRED), an earlier timeout
+ * will then be received. odp_timer_tmo_status() must be used to check if
+ * the received timeout is valid.
  *
- * @return Timeout handle if successful, otherwise ODP_TIMER_TMO_INVALID
+ * Note: any invalid parameters will be treated as programming errors and will
+ * cause the application to abort.
+ *
+ * @param tim      Timer
+ * @param abs_tck  Expiration time in absolute timer ticks
+ * @param user_buf The buffer to use as timeout event
+ *
+ * @return Success or failure code
  */
-odp_timer_tmo_t odp_timer_absolute_tmo(odp_timer_t timer, uint64_t tmo_tick,
-				       odp_queue_t queue, odp_buffer_t buf);
+odp_timer_set_t odp_timer_set_abs_w_buf(odp_timer_t tim,
+					uint64_t abs_tck,
+					odp_buffer_t user_buf);
 
 /**
- * Cancel a timeout
+ * Set a timer with an absolute expiration time
+ *
+ * Set (arm) the timer to expire at a specific time.
+ * Arming may fail (if the timer is in state EXPIRED), an earlier timeout
+ * will then be received. odp_timer_tmo_status() must be used to check if
+ * the received timeout is valid.
+ *
+ * Note: any invalid parameters will be treated as programming errors and will
+ * cause the application to abort.
  *
- * @param timer Timer
- * @param tmo   Timeout to cancel
+ * @param tim     Timer
+ * @param abs_tck Expiration time in absolute timer ticks
  *
- * @return 0 if successful
+ * @return Success or failure code
  */
-int odp_timer_cancel_tmo(odp_timer_t timer, odp_timer_tmo_t tmo);
+odp_timer_set_t odp_timer_set_abs(odp_timer_t tim, uint64_t abs_tck);
 
 /**
- * Convert buffer handle to timeout handle
+ * Set a timer with a relative expiration time and user-defined buffer.
  *
- * @param buf  Buffer handle
+ * Set (arm) the timer to expire at a relative future time.
+ * Arming may fail (if the timer is in state EXPIRED),
+ * an earlier timeout will then be received. odp_timer_tmo_status() must
+ * be used to check if the received timeout is valid.
  *
- * @return Timeout buffer handle
+ * Note: any invalid parameters will be treated as programming errors and will
+ * cause the application to abort.
+ *
+ * @param tim      Timer
+ * @param rel_tck  Expiration time in timer ticks relative to current time of
+ *		   the timer pool the timer belongs to
+ * @param user_buf The buffer to use as timeout event
+ *
+ * @return Success or failure code
  */
-odp_timeout_t odp_timeout_from_buffer(odp_buffer_t buf);
+odp_timer_set_t odp_timer_set_rel_w_buf(odp_timer_t tim,
+					uint64_t rel_tck,
+					odp_buffer_t user_buf);
+/**
+ * Set a timer with a relative expiration time
+ *
+ * Set (arm) the timer to expire at a relative future time.
+ * Arming may fail (if the timer is in state EXPIRED),
+ * an earlier timeout will then be received. odp_timer_tmo_status() must
+ * be used to check if the received timeout is valid.
+ *
+ * Note: any invalid parameters will be treated as programming errors and will
+ * cause the application to abort.
+ *
+ * @param tim     Timer
+ * @param rel_tck Expiration time in timer ticks relative to current time of
+ *		  the timer pool the timer belongs to
+ *
+ * @return Success or failure code
+ */
+odp_timer_set_t odp_timer_set_rel(odp_timer_t tim, uint64_t rel_tck);
 
 /**
- * Return absolute timeout tick
+ * Cancel a timer
+ *
+ * Cancel a timer, preventing future expiration and delivery.
+ *
+ * A timer that has already expired and been enqueued for delivery may be
+ * impossible to cancel and will instead be delivered to the destination queue.
+ * Use odp_timer_tmo_status() the check whether a received timeout is fresh or
+ * stale (cancelled). Stale timeouts will automatically be recycled.
+ *
+ * Note: any invalid parameters will be treated as programming errors and will
+ * cause the application to abort.
+ *
+ * @param tim    Timer handle
+ */
+void odp_timer_cancel(odp_timer_t tim);
+
+/**
+ * Translate from buffer to timeout
+ *
+ * Return the timeout handle that corresponds to the specified buffer handle.
+ * The buffer must be of time ODP_BUFFER_TYPE_TIMEOUT.
+ *
+ * @param buf   Buffer handle to translate.
+ *
+ * @return      The corresponding timeout handle.
+ */
+static inline odp_timer_tmo_t odp_timeout_from_buffer(odp_buffer_t buf)
+{
+	if (odp_unlikely(odp_buffer_type(buf) != ODP_BUFFER_TYPE_TIMEOUT)) {
+		ODP_ERR("Buffer type %u not timeout\n", buf);
+		abort();
+	}
+	/* In this implementation, timeout == buffer */
+	return (odp_timer_tmo_t)buf;
+}
+
+/**
+ * Translate from timeout to buffer
+ *
+ * Return the buffer handle that corresponds to the specified timeout handle.
+ *
+ * @param tmo   Timeout handle to translate.
+ *
+ * @return      The corresponding buffer handle.
+ */
+static inline odp_buffer_t odp_buffer_from_timeout(odp_timer_tmo_t tmo)
+{
+	/* In this implementation, buffer == timeout */
+	return (odp_buffer_t)tmo;
+}
+
+/**
+ * Return timeout to timer
+ *
+ * Return a received timeout for reuse with the parent timer.
+ * Note: odp_timer_return_tmo() must be called on all received timeouts!
+ * (Excluding user defined timeout buffers).
+ * The timeout must not be accessed after this call, the semantics is
+ * equivalent to a free call.
+ *
+ * @param tmo    Timeout
+ */
+void odp_timer_return_tmo(odp_timer_tmo_t tmo);
+
+/**
+ * Return fresh/stale/orphan status of timeout.
+ *
+ * Check a received timeout for orphaness (i.e. parent timer freed) and
+ * staleness (i.e. parent timer has been reset or cancelled after the timeout
+ * expired and was enqueued).
+ * If the timeout is fresh, it should be processed.
+ * If the timeout is stale or orphaned, it should be ignored.
+ * All timeouts must be returned using the odp_timer_return_tmo() call.
+ *
+ * @param tmo    Timeout
+ *
+ * @return One of ODP_TMO_FRESH, ODP_TMO_STALE or ODP_TMO_ORPHAN.
+ */
+odp_timer_tmo_status_t odp_timer_tmo_status(odp_timer_tmo_t tmo);
+
+/**
+ * Get timer handle
+ *
+ * Return Handle of parent timer.
+ *
+ * @param tmo   Timeout
+ *
+ * @return Timer handle or ODP_TIMER_INVALID for orphaned timeouts.
+ *         Note that the parent timer could be freed by some other thread
+ *         at any time and thus the timeout becomes orphaned.
+ */
+odp_timer_t odp_timer_handle(odp_timer_tmo_t tmo);
+
+/**
+ * Get expiration time
+ *
+ * Return (requested) expiration time of timeout.
+ *
+ * @param tmo   Timeout
+ *
+ * @return Expiration time
+ */
+uint64_t odp_timer_expiration(odp_timer_tmo_t tmo);
+
+/**
+ * Get user pointer
+ *
+ * Return User pointer of timer associated with timeout.
+ * The user pointer is often used to point to some associated context.
  *
- * @param tmo Timeout buffer handle
+ * @param tmo   Timeout
  *
- * @return Absolute timeout tick
+ * @return User pointer
  */
-uint64_t odp_timeout_tick(odp_timeout_t tmo);
+void *odp_timer_userptr(odp_timer_tmo_t tmo);
 
 #ifdef __cplusplus
 }
diff --git a/platform/linux-generic/include/odp_priority_queue_internal.h b/platform/linux-generic/include/odp_priority_queue_internal.h
new file mode 100644
index 0000000..7d7f3a2
--- /dev/null
+++ b/platform/linux-generic/include/odp_priority_queue_internal.h
@@ -0,0 +1,108 @@ 
+#ifndef _PRIORITY_QUEUE_H
+#define _PRIORITY_QUEUE_H
+
+#include <assert.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <stdbool.h>
+#include <odp_align.h>
+
+#define INVALID_INDEX ~0U
+#define INVALID_PRIORITY ((pq_priority_t)~0ULL)
+
+typedef uint64_t pq_priority_t;
+
+struct heap_node;
+
+typedef struct priority_queue {
+	uint32_t max_elems;/* Number of elements in heap */
+	/* Number of registered elements (active + inactive) */
+	uint32_t reg_elems;
+	uint32_t num_elems;/* Number of active elements */
+	struct heap_node *heap;
+	struct heap_node *org_ptr;
+} priority_queue ODP_ALIGNED(sizeof(uint64_t));
+
+/* The user gets a pointer to this structure */
+typedef struct {
+	/* Set when pq_element registered with priority queue */
+	priority_queue *pq;
+	uint32_t index;/* Index into heap array */
+	pq_priority_t prio;
+} pq_element;
+
+/*** Operations on pq_element ***/
+
+static inline void pq_element_con(pq_element *this)
+{
+	this->pq = NULL;
+	this->index = INVALID_INDEX;
+	this->prio = 0U;
+}
+
+static inline void pq_element_des(pq_element *this)
+{
+	(void)this;
+	assert(this->index == INVALID_INDEX);
+}
+
+static inline priority_queue *get_pq(const pq_element *this)
+{
+	return this->pq;
+}
+
+static inline pq_priority_t get_prio(const pq_element *this)
+{
+	return this->prio;
+}
+
+static inline uint32_t get_index(const pq_element *this)
+{
+	return this->index;
+}
+
+static inline bool is_active(const pq_element *this)
+{
+	return this->index != INVALID_INDEX;
+}
+
+/*** Operations on priority_queue ***/
+
+extern uint32_t pq_smallest_child(priority_queue *, uint32_t, pq_priority_t);
+extern void pq_bubble_down(priority_queue *, pq_element *);
+extern void pq_bubble_up(priority_queue *, pq_element *);
+
+static inline bool valid_index(priority_queue *this, uint32_t idx)
+{
+	return idx < this->num_elems;
+}
+
+extern void priority_queue_con(priority_queue *, uint32_t _max_elems);
+extern void priority_queue_des(priority_queue *);
+
+/* Register pq_element with priority queue */
+/* Return false if priority queue full */
+extern bool pq_register_element(priority_queue *, pq_element *);
+
+/* Activate and add pq_element to priority queue */
+/* Element must be disarmed */
+extern void pq_activate_element(priority_queue *, pq_element *, pq_priority_t);
+
+/* Reset (increase) priority for pq_element */
+/* Element may be active or inactive (released) */
+extern void pq_reset_element(priority_queue *, pq_element *, pq_priority_t);
+
+/* Deactivate and remove element from priority queue */
+/* Element may be active or inactive (released) */
+extern void pq_deactivate_element(priority_queue *, pq_element *);
+
+/* Unregister pq_element */
+extern void pq_unregister_element(priority_queue *, pq_element *);
+
+/* Return priority of first element (lowest numerical value) */
+extern pq_priority_t pq_first_priority(const priority_queue *);
+
+/* Deactivate and return first element if it's prio is <= threshold */
+extern pq_element *pq_release_element(priority_queue *, pq_priority_t thresh);
+
+#endif /* _PRIORITY_QUEUE_H */
diff --git a/platform/linux-generic/include/odp_timer_internal.h b/platform/linux-generic/include/odp_timer_internal.h
index ad28f53..461f28c 100644
--- a/platform/linux-generic/include/odp_timer_internal.h
+++ b/platform/linux-generic/include/odp_timer_internal.h
@@ -1,4 +1,4 @@ 
-/* Copyright (c) 2013, Linaro Limited
+/* Copyright (c) 2014, Linaro Limited
  * All rights reserved.
  *
  * SPDX-License-Identifier:     BSD-3-Clause
@@ -8,72 +8,51 @@ 
 /**
  * @file
  *
- * ODP timer timeout descriptor - implementation internal
+ * ODP timeout descriptor - implementation internal
  */
 
 #ifndef ODP_TIMER_INTERNAL_H_
 #define ODP_TIMER_INTERNAL_H_
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#include <odp_std_types.h>
-#include <odp_queue.h>
-#include <odp_buffer.h>
+#include <odp_align.h>
+#include <odp_debug.h>
 #include <odp_buffer_internal.h>
 #include <odp_buffer_pool_internal.h>
 #include <odp_timer.h>
 
-struct timeout_t;
-
-typedef struct timeout_t {
-	struct timeout_t *next;
-	int               timer_id;
-	int               tick;
-	uint64_t          tmo_tick;
-	odp_queue_t       queue;
-	odp_buffer_t      buf;
-	odp_buffer_t      tmo_buf;
-} timeout_t;
-
-
-struct odp_timeout_hdr_t;
-
 /**
- * Timeout notification header
+ * Internal Timeout header
  */
-typedef struct odp_timeout_hdr_t {
+typedef struct {
+	/* common buffer header */
 	odp_buffer_hdr_t buf_hdr;
 
-	timeout_t meta;
-
-	uint8_t buf_data[];
+	/* Requested expiration time */
+	uint64_t expiration;
+	/* User ptr inherited from parent timer */
+	void *user_ptr;
+	/* Parent timer */
+	odp_timer_t timer;
+	/* Tag inherited from parent timer at time of expiration */
+	uint32_t tag;
+	/* Gen-cnt inherited from parent timer at time of creation */
+	uint16_t gencnt;
+	uint16_t pad;
+	uint8_t buf_data[0];
 } odp_timeout_hdr_t;
 
-
-
 ODP_STATIC_ASSERT(sizeof(odp_timeout_hdr_t) ==
-	   ODP_OFFSETOF(odp_timeout_hdr_t, buf_data),
-	   "ODP_TIMEOUT_HDR_T__SIZE_ERR");
-
+		  ODP_OFFSETOF(odp_timeout_hdr_t, buf_data),
+		  "sizeof(odp_timeout_hdr_t) == ODP_OFFSETOF(odp_timeout_hdr_t, buf_data)");
 ODP_STATIC_ASSERT(sizeof(odp_timeout_hdr_t) % sizeof(uint64_t) == 0,
-	   "ODP_TIMEOUT_HDR_T__SIZE_ERR2");
-
+		  "sizeof(odp_timeout_hdr_t) % sizeof(uint64_t) == 0");
 
 /**
- * Return timeout header
+ * Return the timeout header
  */
-static inline odp_timeout_hdr_t *odp_timeout_hdr(odp_timeout_t tmo)
+static inline odp_timeout_hdr_t *odp_timeout_hdr(odp_buffer_t buf)
 {
-	odp_buffer_hdr_t *buf_hdr = odp_buf_to_hdr((odp_buffer_t)tmo);
-	return (odp_timeout_hdr_t *)(uintptr_t)buf_hdr;
-}
-
-
-
-#ifdef __cplusplus
+	return (odp_timeout_hdr_t *)odp_buf_to_hdr(buf);
 }
-#endif
 
 #endif
diff --git a/platform/linux-generic/odp_priority_queue.c b/platform/linux-generic/odp_priority_queue.c
new file mode 100644
index 0000000..b72c26f
--- /dev/null
+++ b/platform/linux-generic/odp_priority_queue.c
@@ -0,0 +1,283 @@ 
+#define NDEBUG /* Enabled by default by ODP build system */
+#include <assert.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <string.h>
+#include <strings.h>
+#include <odp_hints.h>
+#include <odp_align.h>
+#include <odp_debug.h>
+
+#include "odp_priority_queue_internal.h"
+
+
+#define NUM_CHILDREN 4
+#define CHILD(n) (NUM_CHILDREN * (n) + 1)
+#define PARENT(n) (((n) - 1) / NUM_CHILDREN)
+
+/* Internal nodes in the array */
+typedef struct heap_node {
+	pq_element *elem;
+	/* Copy of elem->prio so we avoid unnecessary dereferencing */
+	pq_priority_t prio;
+} heap_node;
+
+static void pq_assert_heap(priority_queue *this);
+
+#define ALIGNMENT(p) (1U << ((unsigned)ffs((int)p) - 1U))
+
+void priority_queue_con(priority_queue *this, uint32_t _max_elems)
+{
+	this->max_elems = _max_elems;
+	this->reg_elems = 0;
+	this->num_elems = 0;
+	this->org_ptr = malloc((_max_elems + 64 / sizeof(heap_node)) *
+			       sizeof(heap_node));
+	if (odp_unlikely(this->org_ptr == NULL)) {
+		ODP_ERR("malloc failed\n");
+		abort();
+	}
+	this->heap = this->org_ptr;
+	assert((size_t)&this->heap[1] % 8 == 0);
+	/* Increment base address until first child (index 1) is cache line */
+	/* aligned and thus all children (e.g. index 1-4) stored in the */
+	/* same cache line. We are not interested in the alignment of */
+	/* heap[0] as this is a lone node */
+	while ((size_t)&this->heap[1] % ODP_CACHE_LINE_SIZE != 0) {
+		/* Cast to ptr to struct member with the greatest alignment */
+		/* requirement */
+		this->heap = (heap_node *)((pq_priority_t *)this->heap + 1);
+	}
+	pq_assert_heap(this);
+}
+
+void priority_queue_des(priority_queue *this)
+{
+	pq_assert_heap(this);
+	free(this->org_ptr);
+}
+
+#ifndef NDEBUG
+static uint32_t
+pq_assert_elem(priority_queue *this, uint32_t index, bool recurse)
+{
+	uint32_t num = 1;
+	const pq_element *elem = this->heap[index].elem;
+	assert(elem->index == index);
+	assert(elem->prio == this->heap[index].prio);
+	uint32_t child = CHILD(index);
+	uint32_t i;
+	for (i = 0; i < NUM_CHILDREN; i++, child++) {
+		if (valid_index(this, child)) {
+			assert(this->heap[child].elem != NULL);
+			assert(this->heap[child].prio >= elem->prio);
+			if (recurse)
+				num += pq_assert_elem(this, child, recurse);
+		}
+	}
+	return num;
+}
+#endif
+
+static void
+pq_assert_heap(priority_queue *this)
+{
+	(void)this;
+#ifndef NDEBUG
+	uint32_t num = 0;
+	if (odp_likely(this->num_elems != 0)) {
+		assert(this->heap[0].elem != NULL);
+		num += pq_assert_elem(this, 0, true);
+	}
+	assert(num == this->num_elems);
+	unsigned i;
+	for (i = 0; i < this->num_elems; i++) {
+		assert(this->heap[i].elem != NULL);
+		assert(this->heap[i].prio != INVALID_PRIORITY);
+	}
+#endif
+}
+
+/* Bubble up to proper position */
+void
+pq_bubble_up(priority_queue *this, pq_element *elem)
+{
+	assert(this->heap[elem->index].elem == elem);
+	assert(this->heap[elem->index].prio == elem->prio);
+	uint32_t current = elem->index;
+	pq_priority_t prio = elem->prio;
+	assert(current == 0 || this->heap[PARENT(current)].elem != NULL);
+	/* Move up into proper position */
+	while (current != 0 && this->heap[PARENT(current)].prio > prio) {
+		uint32_t parent = PARENT(current);
+		assert(this->heap[parent].elem != NULL);
+		/* Swap current with parent */
+		/* 1) Move parent down */
+		this->heap[current].elem = this->heap[parent].elem;
+		this->heap[current].prio = this->heap[parent].prio;
+		this->heap[current].elem->index = current;
+		/* 2) Move current up to parent */
+		this->heap[parent].elem = elem;
+		this->heap[parent].prio = prio;
+		this->heap[parent].elem->index = parent;
+		/* Continue moving elem until it is in the right place */
+		current = parent;
+	}
+	pq_assert_heap(this);
+}
+
+/* Find the smallest child that is smaller than the specified priority */
+/* Very hot function, can we decrease the number of cache misses? */
+uint32_t pq_smallest_child(priority_queue *this,
+			   uint32_t index,
+			   pq_priority_t val)
+{
+	uint32_t smallest = index;
+	uint32_t child = CHILD(index);
+#if NUM_CHILDREN == 4
+	/* Unroll loop when all children exist */
+	if (odp_likely(valid_index(this, child + 3))) {
+		if (this->heap[child + 0].prio < val)
+			val = this->heap[smallest = child + 0].prio;
+		if (this->heap[child + 1].prio < val)
+			val = this->heap[smallest = child + 1].prio;
+		if (this->heap[child + 2].prio < val)
+			val = this->heap[smallest = child + 2].prio;
+		if (this->heap[child + 3].prio < val)
+			(void)this->heap[smallest = child + 3].prio;
+		return smallest;
+	}
+#endif
+	uint32_t i;
+	for (i = 0; i < NUM_CHILDREN; i++) {
+		if (odp_unlikely(!valid_index(this, child + i)))
+			break;
+		if (this->heap[child + i].prio < val) {
+			smallest = child + i;
+			val = this->heap[smallest].prio;
+		}
+	}
+	return smallest;
+}
+
+/* Very hot function, can it be optimised? */
+void
+pq_bubble_down(priority_queue *this, pq_element *elem)
+{
+	assert(this->heap[elem->index].elem == elem);
+	assert(this->heap[elem->index].prio == elem->prio);
+	uint32_t current = elem->index;
+	pq_priority_t prio = elem->prio;
+	for (;;) {
+		uint32_t child = pq_smallest_child(this, current, prio);
+		if (current == child) {
+			/* No smaller child, we are done */
+			pq_assert_heap(this);
+			return;
+		}
+		/* Element larger than smaller child, must move down */
+		assert(this->heap[child].elem != NULL);
+		/* 1) Move child up to current */
+		this->heap[current].elem = this->heap[child].elem;
+		this->heap[current].prio = this->heap[child].prio;
+		/* 2) Move current down to child */
+		this->heap[child].elem = elem;
+		this->heap[child].prio = prio;
+		this->heap[child].elem->index = child;
+
+		this->heap[current].elem->index = current; /* cache misses! */
+		/* Continue moving element until it is in the right place */
+		current = child;
+	}
+}
+
+bool
+pq_register_element(priority_queue *this, pq_element *elem)
+{
+	if (odp_likely(this->reg_elems < this->max_elems)) {
+		elem->pq = this;
+		this->reg_elems++;
+		return true;
+	}
+	return false;
+}
+
+void
+pq_unregister_element(priority_queue *this, pq_element *elem)
+{
+	assert(elem->pq == this);
+	if (is_active(elem))
+		pq_deactivate_element(this, elem);
+	this->reg_elems--;
+}
+
+void
+pq_activate_element(priority_queue *this, pq_element *elem, pq_priority_t prio)
+{
+	assert(elem->index == INVALID_INDEX);
+	/* Insert element at end */
+	uint32_t index = this->num_elems++;
+	this->heap[index].elem = elem;
+	this->heap[index].prio = prio;
+	elem->index = index;
+	elem->prio = prio;
+	pq_bubble_up(this, elem);
+}
+
+void
+pq_deactivate_element(priority_queue *this, pq_element *elem)
+{
+	assert(elem->pq == this);
+	if (odp_likely(is_active(elem))) {
+		/* Swap element with last element */
+		uint32_t current = elem->index;
+		uint32_t last = --this->num_elems;
+		if (odp_likely(last != current)) {
+			/* Move last element to current */
+			this->heap[current].elem = this->heap[last].elem;
+			this->heap[current].prio = this->heap[last].prio;
+			this->heap[current].elem->index = current;
+			/* Bubble down old 'last' element to its proper place*/
+			if (this->heap[current].prio < elem->prio)
+				pq_bubble_up(this, this->heap[current].elem);
+			else
+				pq_bubble_down(this, this->heap[current].elem);
+		}
+		elem->index = INVALID_INDEX;
+		pq_assert_heap(this);
+	}
+}
+
+void
+pq_reset_element(priority_queue *this, pq_element *elem, pq_priority_t prio)
+{
+	assert(prio != INVALID_PRIORITY);
+	if (odp_likely(is_active(elem))) {
+		assert(prio >= elem->prio);
+		elem->prio = prio;
+		this->heap[elem->index].prio = prio;/* cache misses here! */
+		pq_bubble_down(this, elem);
+		pq_assert_heap(this);
+	} else {
+		pq_activate_element(this, elem, prio);
+	}
+}
+
+pq_priority_t pq_first_priority(const priority_queue *this)
+{
+	return this->num_elems != 0 ? this->heap[0].prio : INVALID_PRIORITY;
+}
+
+pq_element *
+pq_release_element(priority_queue *this, pq_priority_t threshold)
+{
+	if (odp_likely(this->num_elems != 0 &&
+		       this->heap[0].prio <= threshold)) {
+		pq_element *elem = this->heap[0].elem;
+		/* Remove element from heap */
+		pq_deactivate_element(this, elem);
+		assert(elem->prio <= threshold);
+		return elem;
+	}
+	return NULL;
+}
diff --git a/platform/linux-generic/odp_timer.c b/platform/linux-generic/odp_timer.c
index 313c713..0e5071c 100644
--- a/platform/linux-generic/odp_timer.c
+++ b/platform/linux-generic/odp_timer.c
@@ -4,428 +4,713 @@ 
  * SPDX-License-Identifier:     BSD-3-Clause
  */
 
-#include <odp_timer.h>
-#include <odp_timer_internal.h>
-#include <odp_time.h>
-#include <odp_buffer_pool_internal.h>
-#include <odp_internal.h>
-#include <odp_atomic.h>
-#include <odp_spinlock.h>
-#include <odp_sync.h>
-#include <odp_debug.h>
-
-#include <signal.h>
-#include <time.h>
+/**
+ * @file
+ *
+ * ODP timer service
+ *
+ */
 
+#include <assert.h>
+#include <errno.h>
 #include <string.h>
-
-#define NUM_TIMERS    1
-#define MAX_TICKS     1024
-#define MAX_RES       ODP_TIME_SEC
-#define MIN_RES       (100*ODP_TIME_USEC)
-
-
-typedef struct {
-	odp_spinlock_t lock;
-	timeout_t      *list;
-} tick_t;
-
-typedef struct {
-	int               allocated;
-	volatile int      active;
-	volatile uint64_t cur_tick;
-	timer_t           timerid;
-	odp_timer_t       timer_hdl;
-	odp_buffer_pool_t pool;
-	uint64_t          resolution_ns;
-	uint64_t          max_ticks;
-	tick_t            tick[MAX_TICKS];
-
-} timer_ring_t;
-
-typedef struct {
-	odp_spinlock_t lock;
-	int            num_timers;
-	timer_ring_t   timer[NUM_TIMERS];
-
-} timer_global_t;
-
-/* Global */
-static timer_global_t odp_timer;
-
-static void add_tmo(tick_t *tick, timeout_t *tmo)
+#include <stdlib.h>
+#include <time.h>
+#include <signal.h>
+#include "odp_std_types.h"
+#include "odp_buffer.h"
+#include "odp_buffer_pool.h"
+#include "odp_queue.h"
+#include "odp_hints.h"
+#include "odp_sync.h"
+#include "odp_ticketlock.h"
+#include "odp_debug.h"
+#include "odp_align.h"
+#include "odp_shared_memory.h"
+#include "odp_hints.h"
+#include "odp_internal.h"
+#include "odp_time.h"
+#include "odp_timer.h"
+#include "odp_timer_internal.h"
+#include "odp_priority_queue_internal.h"
+
+/******************************************************************************
+ * Translation between timeout and timeout header
+ *****************************************************************************/
+
+static inline odp_timeout_hdr_t *odp_tmo_to_hdr(odp_timer_tmo_t tmo)
 {
-	odp_spinlock_lock(&tick->lock);
-
-	tmo->next  = tick->list;
-	tick->list = tmo;
+	odp_buffer_t buf = odp_buffer_from_timeout(tmo);
+	odp_timeout_hdr_t *tmo_hdr = (odp_timeout_hdr_t *)odp_buf_to_hdr(buf);
+	return tmo_hdr;
+}
 
-	odp_spinlock_unlock(&tick->lock);
+/******************************************************************************
+ * odp_timer abstract datatype
+ *****************************************************************************/
+
+typedef struct odp_timer_s {
+	pq_element pqelem;/* Base class */
+	uint64_t req_tmo;/* Requested timeout tick */
+	odp_buffer_t tmo_buf;/* ODP_BUFFER_INVALID if timeout enqueued */
+	odp_queue_t queue;/* ODP_QUEUE_INVALID if timer is free */
+	uint32_t tag;/* Reusing tag as next pointer/index when timer is free */
+	uint16_t gencnt;/* Smaller to make place for user_buf flag */
+	unsigned int user_buf:1; /* User-defined buffer? */
+} odp_timer;
+
+/* Constructor */
+static inline void odp_timer_con(odp_timer *this)
+{
+	pq_element_con(&this->pqelem);
+	this->tmo_buf = ODP_BUFFER_INVALID;
+	this->queue = ODP_QUEUE_INVALID;
+	this->gencnt = 0;
 }
 
-static timeout_t *rem_tmo(tick_t *tick)
+/* Destructor */
+static inline void odp_timer_des(odp_timer *this)
 {
-	timeout_t *tmo;
+	assert(this->tmo_buf == ODP_BUFFER_INVALID);
+	assert(this->queue == ODP_QUEUE_INVALID);
+	pq_element_des(&this->pqelem);
+}
 
-	odp_spinlock_lock(&tick->lock);
+/* Setup when timer is allocated */
+static void setup(odp_timer *this,
+		  odp_queue_t _q,
+		  void *_up,
+		  odp_buffer_t _tmo)
+{
+	this->req_tmo = INVALID_PRIORITY;
+	this->tmo_buf = _tmo;
+	this->queue = _q;
+	this->tag = 0;
+	this->user_buf = false;
+	/* Initialise constant fields of timeout event */
+	odp_timeout_hdr_t *tmo_hdr =
+		odp_tmo_to_hdr(odp_timeout_from_buffer(this->tmo_buf));
+	tmo_hdr->gencnt = this->gencnt;
+	tmo_hdr->timer = this;
+	tmo_hdr->user_ptr = _up;
+	/* tmo_hdr->tag set at expiration time */
+	/* tmo_hdr->expiration set at expiration time */
+	assert(this->queue != ODP_QUEUE_INVALID);
+}
 
-	tmo = tick->list;
+/* Teardown when timer is freed */
+static odp_buffer_t teardown(odp_timer *this)
+{
+	/* Increase generation count to make any pending timeout(s) orphaned */
+	++this->gencnt;
+	odp_buffer_t buf = this->tmo_buf;
+	this->tmo_buf = ODP_BUFFER_INVALID;
+	this->queue = ODP_QUEUE_INVALID;
+	return buf;
+}
 
-	if (tmo)
-		tick->list = tmo->next;
+static inline uint32_t get_next_free(odp_timer *this)
+{
+	assert(this->queue == ODP_QUEUE_INVALID);
+	return this->tag;
+}
 
-	odp_spinlock_unlock(&tick->lock);
+static inline void set_next_free(odp_timer *this, uint32_t nf)
+{
+	assert(this->queue == ODP_QUEUE_INVALID);
+	this->tag = nf;
+}
 
-	if (tmo)
-		tmo->next = NULL;
+/******************************************************************************
+ * odp_timer_pool abstract datatype
+ * Inludes alloc and free timer
+ *****************************************************************************/
+
+typedef struct odp_timer_pool_s {
+	priority_queue pq;
+	uint64_t cur_tick;/* Current tick value */
+	uint64_t min_tick;/* Current expiration lower bound */
+	uint64_t max_tick;/* Current expiration higher bound */
+	bool shared;
+	odp_ticketlock_t lock;
+	const char *name;
+	odp_buffer_pool_t buf_pool;
+	uint64_t resolution_ns;
+	uint64_t min_tmo_tck;
+	uint64_t max_tmo_tck;
+	odp_timer *timers;
+	uint32_t num_alloc;/* Current number of allocated timers */
+	uint32_t max_timers;/* Max number of timers */
+	uint32_t first_free;/* 0..max_timers-1 => free timer */
+	timer_t timerid;
+	odp_timer_clk_src_t clk_src;
+} odp_timer_pool;
+
+/* Forward declarations */
+static void timer_init(odp_timer_pool *tp);
+static void timer_exit(odp_timer_pool *tp);
+
+static void odp_timer_pool_con(odp_timer_pool *this,
+			       const char *_n,
+			       odp_buffer_pool_t _bp,
+			       uint64_t _r,
+			       uint64_t _mint,
+			       uint64_t _maxt,
+			       uint32_t _mt,
+			       bool _s,
+			       odp_timer_clk_src_t _cs)
+{
+	priority_queue_con(&this->pq, _mt);
+	this->cur_tick = 0;
+	this->shared = _s;
+	this->name = strdup(_n);
+	this->buf_pool = _bp;
+	this->resolution_ns = _r;
+	this->min_tmo_tck = odp_timer_ns_to_tick(this, _mint);
+	this->max_tmo_tck = odp_timer_ns_to_tick(this, _maxt);
+	this->min_tick = this->cur_tick + this->min_tmo_tck;
+	this->max_tick = this->cur_tick + this->max_tmo_tck;
+	this->num_alloc = 0;
+	this->max_timers = _mt;
+	this->first_free = 0;
+	this->clk_src = _cs;
+	this->timers = malloc(sizeof(odp_timer) * this->max_timers);
+	if (this->timers == NULL)
+		ODP_ABORT("%s: malloc failed\n", _n);
+	uint32_t i;
+	for (i = 0; i < this->max_timers; i++)
+		odp_timer_con(&this->timers[i]);
+	for (i = 0; i < this->max_timers; i++)
+		set_next_free(&this->timers[i], i + 1);
+	odp_ticketlock_init(&this->lock);
+	if (this->clk_src == ODP_CLOCK_CPU)
+		timer_init(this);
+	/* Make sure timer pool initialisation is globally observable */
+	/* before we return a pointer to it */
+	odp_sync_stores();
+}
 
-	return tmo;
+static odp_timer_pool *odp_timer_pool_new(
+	const char *_n,
+	odp_buffer_pool_t _bp,
+	uint64_t _r,
+	uint64_t _mint,
+	uint64_t _maxt,
+	uint32_t _mt,
+	bool _s,
+	odp_timer_clk_src_t _cs)
+{
+	odp_timer_pool *this = malloc(sizeof(odp_timer_pool));
+	if (odp_unlikely(this == NULL))
+		ODP_ABORT("%s: timer pool malloc failed\n", _n);
+	odp_timer_pool_con(this, _n, _bp, _r, _mint, _maxt, _mt, _s, _cs);
+	return this;
 }
 
-/**
- * Search and delete tmo entry from timeout list
- * return -1 : on error.. handle not in list
- *		0 : success
- */
-static int find_and_del_tmo(timeout_t **tmo, odp_timer_tmo_t handle)
+static void odp_timer_pool_des(odp_timer_pool *this)
 {
-	timeout_t *cur, *prev;
-	prev = NULL;
+	if (this->shared)
+		odp_ticketlock_lock(&this->lock);
+	if (this->num_alloc != 0) {
+		/* It's a programming error to attempt to destroy a */
+		/* timer pool which is still in use */
+		ODP_ABORT("%s: timers in use\n", this->name);
+	}
+	if (this->clk_src == ODP_CLOCK_CPU)
+		timer_exit(this);
+	uint32_t i;
+	for (i = 0; i < this->max_timers; i++)
+		odp_timer_des(&this->timers[i]);
+	free(this->timers);
+	priority_queue_des(&this->pq);
+	odp_sync_stores();
+}
 
-	for (cur = *tmo; cur != NULL; prev = cur, cur = cur->next) {
-		if (cur->tmo_buf == handle) {
-			if (prev == NULL)
-				*tmo = cur->next;
-			else
-				prev->next = cur->next;
+static void odp_timer_pool_del(odp_timer_pool *this)
+{
+	odp_timer_pool_des(this);
+	free(this);
+}
 
-			break;
+static inline odp_timer *timer_alloc(odp_timer_pool *this,
+				     odp_queue_t queue,
+				     void *user_ptr,
+				     odp_buffer_t tmo_buf)
+{
+	odp_timer *tim = ODP_TIMER_INVALID;
+	if (odp_likely(this->shared))
+		odp_ticketlock_lock(&this->lock);
+	if (odp_likely(this->num_alloc < this->max_timers)) {
+		this->num_alloc++;
+		/* Remove first unused timer from free list */
+		assert(this->first_free != this->max_timers);
+		tim = &this->timers[this->first_free];
+		this->first_free = get_next_free(tim);
+		/* Insert timer into priority queue */
+		if (odp_unlikely(!pq_register_element(&this->pq,
+						      &tim->pqelem))) {
+			/* Unexpected internal error */
+			abort();
 		}
+		/* Create timer */
+		setup(tim, queue, user_ptr, tmo_buf);
+	} else {
+		errno = ENFILE; /* Reusing file table overvlow */
 	}
-
-	if (!cur)
-		/* couldn't find tmo in list */
-		return -1;
-
-	/* application to free tmo_buf provided by absolute_tmo call */
-	return 0;
+	if (odp_likely(this->shared))
+		odp_ticketlock_unlock(&this->lock);
+	return tim;
 }
 
-int odp_timer_cancel_tmo(odp_timer_t timer_hdl, odp_timer_tmo_t tmo)
+static inline void timer_free(odp_timer_pool *this, odp_timer *tim)
 {
-	int id;
-	int tick_idx;
-	timeout_t *cancel_tmo;
-	odp_timeout_hdr_t *tmo_hdr;
-	tick_t *tick;
-
-	/* get id */
-	id = (int)timer_hdl - 1;
-
-	tmo_hdr = odp_timeout_hdr((odp_timeout_t) tmo);
-	/* get tmo_buf to cancel */
-	cancel_tmo = &tmo_hdr->meta;
+	if (odp_likely(this->shared))
+		odp_ticketlock_lock(&this->lock);
+	if (odp_unlikely(tim->queue == ODP_QUEUE_INVALID))
+		ODP_ABORT("Invalid timer %p\n", tim);
+	/* Destroy timer */
+	odp_buffer_t buf = teardown(tim);
+	/* Remove timer from priority queue */
+	pq_unregister_element(&this->pq, &tim->pqelem);
+	/* Insert timer into free list */
+	set_next_free(tim, this->first_free);
+	this->first_free = (tim - &this->timers[0]) / sizeof(this->timers[0]);
+	assert(this->num_alloc != 0);
+	this->num_alloc--;
+	if (odp_likely(this->shared))
+		odp_ticketlock_unlock(&this->lock);
+	if (buf != ODP_BUFFER_INVALID)
+		odp_buffer_free(buf);
+}
 
-	tick_idx = cancel_tmo->tick;
-	tick = &odp_timer.timer[id].tick[tick_idx];
+/******************************************************************************
+ * Operations on timers
+ * reset/reset_w_buf/cancel timer, return timeout
+ *****************************************************************************/
 
-	odp_spinlock_lock(&tick->lock);
-	/* search and delete tmo from tick list */
-	if (find_and_del_tmo(&tick->list, tmo) != 0) {
-		odp_spinlock_unlock(&tick->lock);
-		ODP_DBG("Couldn't find the tmo (%d) in tick list\n", (int)tmo);
-		return -1;
+static inline void timer_expire(odp_timer *tim)
+{
+	assert(tim->req_tmo != INVALID_PRIORITY);
+	/* Timer expired, is there actually any timeout event */
+	/* we can enqueue? */
+	if (odp_likely(tim->tmo_buf != ODP_BUFFER_INVALID)) {
+		/* Swap out timeout buffer */
+		odp_buffer_t buf = tim->tmo_buf;
+		tim->tmo_buf = ODP_BUFFER_INVALID;
+		if (odp_likely(!tim->user_buf)) {
+			odp_timeout_hdr_t *tmo_hdr =
+				odp_tmo_to_hdr(odp_timeout_from_buffer(buf));
+			/* Copy tag and requested expiration tick from timer */
+			tmo_hdr->tag = tim->tag;
+			tmo_hdr->expiration = tim->req_tmo;
+		}
+		/* Else don't touch user-defined buffer */
+		int rc = odp_queue_enq(tim->queue, buf);
+		if (odp_unlikely(rc != 0))
+			ODP_ABORT("Failed to enqueue timeout buffer (%d)\n",
+				  rc);
+		/* Mark timer as inactive */
+		tim->req_tmo = INVALID_PRIORITY;
 	}
-	odp_spinlock_unlock(&tick->lock);
-
-	return 0;
+	/* No, timeout event already enqueued or unavailable */
+	/* Keep timer active, odp_timer_return_tmo() will patch up */
 }
 
-static void notify_function(union sigval sigval)
+static odp_timer_set_t timer_reset(odp_timer_pool *tp,
+				   odp_timer *tim,
+				   uint64_t abs_tck)
 {
-	uint64_t cur_tick;
-	timeout_t *tmo;
-	tick_t *tick;
-	timer_ring_t *timer;
+	assert(tim->user_buf == false);
+	if (odp_unlikely(abs_tck < tp->min_tick))
+		return ODP_TIMER_SET_TOOEARLY;
+	if (odp_unlikely(abs_tck > tp->max_tick))
+		return ODP_TIMER_SET_TOOLATE;
+
+	if (odp_likely(tp->shared))
+		odp_ticketlock_lock(&tp->lock);
+
+	if (odp_unlikely(tim->queue == ODP_QUEUE_INVALID))
+		ODP_ABORT("Invalid timer %p\n", tim);
+	if (odp_unlikely(tim->user_buf))
+		ODP_ABORT("Timer %p has user buffer\n", tim);
+	/* Increase timer tag to make any pending timeout stale */
+	tim->tag++;
+	/* Save requested timeout */
+	tim->req_tmo = abs_tck;
+	/* Update timer position in priority queue */
+	pq_reset_element(&tp->pq, &tim->pqelem, abs_tck);
+
+	if (odp_likely(tp->shared))
+		odp_ticketlock_unlock(&tp->lock);
+	return ODP_TIMER_SET_SUCCESS;
+}
 
-	timer = sigval.sival_ptr;
+static odp_timer_set_t timer_reset_w_buf(odp_timer_pool *tp,
+		odp_timer *tim,
+		uint64_t abs_tck,
+		odp_buffer_t user_buf)
+{
+	if (odp_unlikely(abs_tck < tp->min_tick))
+		return ODP_TIMER_SET_TOOEARLY;
+	if (odp_unlikely(abs_tck > tp->max_tick))
+		return ODP_TIMER_SET_TOOLATE;
+
+	if (odp_likely(tp->shared))
+		odp_ticketlock_lock(&tp->lock);
+
+	if (odp_unlikely(tim->queue == ODP_QUEUE_INVALID))
+		ODP_ABORT("Invalid timer %p\n", tim);
+	/* Increase timer tag to make any pending timeout stale */
+	tim->tag++;
+	/* Save requested timeout */
+	tim->req_tmo = abs_tck;
+	/* Set flag indicating presence of user defined buffer */
+	tim->user_buf = true;
+	/* Swap in new buffer, save any old buffer pointer */
+	odp_buffer_t old_buf = tim->tmo_buf;
+	tim->tmo_buf = user_buf;
+	/* Update timer position in priority queue */
+	pq_reset_element(&tp->pq, &tim->pqelem, abs_tck);
+
+	if (odp_likely(tp->shared))
+		odp_ticketlock_unlock(&tp->lock);
+
+	/* Free old buffer if present */
+	if (odp_unlikely(old_buf != ODP_BUFFER_INVALID))
+		odp_buffer_free(old_buf);
+	return ODP_TIMER_SET_SUCCESS;
+}
 
-	if (timer->active == 0) {
-		ODP_DBG("Timer (%u) not active\n", timer->timer_hdl);
-		return;
+static inline void timer_cancel(odp_timer_pool *tp,
+				odp_timer *tim)
+{
+	odp_buffer_t old_buf = ODP_BUFFER_INVALID;
+	if (odp_likely(tp->shared))
+		odp_ticketlock_lock(&tp->lock);
+
+	if (odp_unlikely(tim->queue == ODP_QUEUE_INVALID))
+		ODP_ABORT("Invalid timer %p\n", tim);
+	if (odp_unlikely(tim->user_buf)) {
+		/* Swap out old user buffer */
+		old_buf = tim->tmo_buf;
+		tim->tmo_buf = ODP_BUFFER_INVALID;
+		/* tim->user_buf stays true */
 	}
+	/* Else a normal timer (no user-defined buffer) */
+	/* Increase timer tag to make any pending timeout stale */
+	tim->tag++;
+	/* Clear requested timeout, mark timer inactive */
+	tim->req_tmo = INVALID_PRIORITY;
+	/* Remove timer from the priority queue */
+	pq_deactivate_element(&tp->pq, &tim->pqelem);
+
+	if (odp_likely(tp->shared))
+		odp_ticketlock_unlock(&tp->lock);
+	/* Free user-defined buffer if present */
+	if (odp_unlikely(old_buf != ODP_BUFFER_INVALID))
+		odp_buffer_free(old_buf);
+}
 
-	/* ODP_DBG("Tick\n"); */
-
-	cur_tick = timer->cur_tick++;
-
-	odp_sync_stores();
+static inline void timer_return(odp_timer_pool *tp,
+				odp_timer *tim,
+				odp_timer_tmo_t tmo,
+				const odp_timeout_hdr_t *tmo_hdr)
+{
+	odp_buffer_t tmo_buf = odp_buffer_from_timeout(tmo);
+	if (odp_likely(tp->shared))
+		odp_ticketlock_lock(&tp->lock);
+	if (odp_unlikely(tim->user_buf))
+		ODP_ABORT("Timer %p has user-defined buffer\n", tim);
+	if (odp_likely(tmo_hdr->gencnt == tim->gencnt)) {
+		assert(tim->tmo_buf == ODP_BUFFER_INVALID);
+		/* Save returned buffer for use when timer expires next time */
+		tim->tmo_buf = tmo_buf;
+		tmo_buf = ODP_BUFFER_INVALID;
+		/* Check if timer is active and should have expired */
+		if (odp_unlikely(tim->req_tmo != INVALID_PRIORITY &&
+				 tim->req_tmo <= tp->cur_tick)) {
+			/* Expire timer now since we have restored the timeout
+			   buffer */
+			timer_expire(tim);
+		}
+		/* Else timer inactive or expires in the future */
+	}
+	/* Else timeout orphaned, free buffer later */
+	if (odp_likely(tp->shared))
+		odp_ticketlock_unlock(&tp->lock);
+	if (odp_unlikely(tmo_buf != ODP_BUFFER_INVALID))
+		odp_buffer_free(tmo_buf);
+}
 
-	tick = &timer->tick[cur_tick % MAX_TICKS];
+/* Non-public so not in odp_timer.h but externally visible, must declare
+ * somewhere */
+unsigned odp_timer_pool_expire(odp_timer_pool_t tpid, uint64_t tick);
 
-	while ((tmo = rem_tmo(tick)) != NULL) {
-		odp_queue_t  queue;
-		odp_buffer_t buf;
+unsigned odp_timer_pool_expire(odp_timer_pool_t tpid, uint64_t tick)
+{
+	if (odp_likely(tpid->shared))
+		odp_ticketlock_lock(&tpid->lock);
+
+	unsigned nexp = 0;
+	odp_timer_t tim;
+	tpid->cur_tick = tick;
+	tpid->min_tick = tick + tpid->min_tmo_tck;
+	tpid->max_tick = tick + tpid->max_tmo_tck;
+	while ((tim = (odp_timer_t)pq_release_element(&tpid->pq, tick)) !=
+	       ODP_TIMER_INVALID) {
+		assert(get_prio(&tim->pqelem) <= tick);
+		timer_expire(tim);
+		nexp++;
+	}
 
-		queue = tmo->queue;
-		buf   = tmo->buf;
+	if (odp_likely(tpid->shared))
+		odp_ticketlock_unlock(&tpid->lock);
+	return nexp;
+}
 
-		if (buf != tmo->tmo_buf)
-			odp_buffer_free(tmo->tmo_buf);
+/******************************************************************************
+ * POSIX timer support
+ * Functions that use Linux/POSIX per-process timers and related facilities
+ *****************************************************************************/
 
-		odp_queue_enq(queue, buf);
-	}
+static void timer_notify(sigval_t sigval)
+{
+	odp_timer_pool *tp = (odp_timer_pool *)sigval.sival_ptr;
+	uint64_t new_tick = tp->cur_tick + 1;
+	(void)odp_timer_pool_expire(tp, new_tick);
 }
 
-static void timer_start(timer_ring_t *timer)
+static void timer_init(odp_timer_pool *tp)
 {
 	struct sigevent   sigev;
 	struct itimerspec ispec;
 	uint64_t res, sec, nsec;
 
-	ODP_DBG("\nTimer (%u) starts\n", timer->timer_hdl);
+	ODP_DBG("Creating POSIX timer for timer pool %s, period %"
+		PRIu64" ns\n", tp->name, tp->resolution_ns);
 
 	memset(&sigev, 0, sizeof(sigev));
 	memset(&ispec, 0, sizeof(ispec));
 
 	sigev.sigev_notify          = SIGEV_THREAD;
-	sigev.sigev_notify_function = notify_function;
-	sigev.sigev_value.sival_ptr = timer;
+	sigev.sigev_notify_function = timer_notify;
+	sigev.sigev_value.sival_ptr = tp;
 
-	if (timer_create(CLOCK_MONOTONIC, &sigev, &timer->timerid)) {
-		ODP_DBG("Timer create failed\n");
-		return;
-	}
+	if (timer_create(CLOCK_MONOTONIC, &sigev, &tp->timerid))
+		ODP_ABORT("timer_create() returned error %s\n",
+			  strerror(errno));
 
-	res  = timer->resolution_ns;
+	res  = tp->resolution_ns;
 	sec  = res / ODP_TIME_SEC;
-	nsec = res - sec*ODP_TIME_SEC;
+	nsec = res - sec * ODP_TIME_SEC;
 
 	ispec.it_interval.tv_sec  = (time_t)sec;
 	ispec.it_interval.tv_nsec = (long)nsec;
 	ispec.it_value.tv_sec     = (time_t)sec;
 	ispec.it_value.tv_nsec    = (long)nsec;
 
-	if (timer_settime(timer->timerid, 0, &ispec, NULL)) {
-		ODP_DBG("Timer set failed\n");
-		return;
-	}
-
-	return;
+	if (timer_settime(&tp->timerid, 0, &ispec, NULL))
+		ODP_ABORT("timer_settime() returned error %s\n",
+			  strerror(errno));
 }
 
-int odp_timer_init_global(void)
+static void timer_exit(odp_timer_pool *tp)
 {
-	ODP_DBG("Timer init ...");
-
-	memset(&odp_timer, 0, sizeof(timer_global_t));
-
-	odp_spinlock_init(&odp_timer.lock);
-
-	ODP_DBG("done\n");
-
-	return 0;
+	if (timer_delete(tp->timerid) != 0)
+		ODP_ABORT("timer_delete() returned error %s\n",
+			  strerror(errno));
 }
 
-int odp_timer_disarm_all(void)
+/******************************************************************************
+ * Public API functions
+ * Some parameter checks and error messages
+ * No modificatios of internal state
+ *****************************************************************************/
+odp_timer_pool_t
+odp_timer_pool_create(const char *name,
+		      odp_buffer_pool_t buf_pool,
+		      uint64_t resolution_ns,
+		      uint64_t min_timeout,
+		      uint64_t max_timeout,
+		      uint32_t num_timers,
+		      bool shared,
+		      odp_timer_clk_src_t clk_src)
 {
-	int timers;
-	struct itimerspec ispec;
-
-	odp_spinlock_lock(&odp_timer.lock);
-
-	timers = odp_timer.num_timers;
-
-	ispec.it_interval.tv_sec  = 0;
-	ispec.it_interval.tv_nsec = 0;
-	ispec.it_value.tv_sec     = 0;
-	ispec.it_value.tv_nsec    = 0;
-
-	for (; timers >= 0; timers--) {
-		if (timer_settime(odp_timer.timer[timers].timerid,
-				  0, &ispec, NULL)) {
-			ODP_DBG("Timer reset failed\n");
-			odp_spinlock_unlock(&odp_timer.lock);
-			return -1;
-		}
-		odp_timer.num_timers--;
-	}
-
-	odp_spinlock_unlock(&odp_timer.lock);
-
-	return 0;
+	/* Verify that buffer pool can be used for timeouts */
+	odp_buffer_t buf = odp_buffer_alloc(buf_pool);
+	if (buf == ODP_BUFFER_INVALID)
+		ODP_ABORT("%s: Failed to allocate buffer\n", name);
+	if (odp_buffer_type(buf) != ODP_BUFFER_TYPE_TIMEOUT)
+		ODP_ABORT("%s: Buffer pool wrong type\n", name);
+	odp_buffer_free(buf);
+	odp_timer_pool_t tp = odp_timer_pool_new(name, buf_pool, resolution_ns,
+			      min_timeout, max_timeout, num_timers,
+			      shared, clk_src);
+	return tp;
 }
 
-odp_timer_t odp_timer_create(const char *name, odp_buffer_pool_t pool,
-			     uint64_t resolution_ns, uint64_t min_ns,
-			     uint64_t max_ns)
+void odp_timer_pool_start(void)
 {
-	uint32_t id;
-	timer_ring_t *timer;
-	odp_timer_t timer_hdl;
-	int i;
-	uint64_t max_ticks;
-	(void) name;
-
-	if (resolution_ns < MIN_RES)
-		resolution_ns = MIN_RES;
-
-	if (resolution_ns > MAX_RES)
-		resolution_ns = MAX_RES;
-
-	max_ticks = max_ns / resolution_ns;
-
-	if (max_ticks > MAX_TICKS) {
-		ODP_DBG("Maximum timeout too long: %"PRIu64" ticks\n",
-			max_ticks);
-		return ODP_TIMER_INVALID;
-	}
-
-	if (min_ns < resolution_ns) {
-		ODP_DBG("Min timeout %"PRIu64" ns < resolution %"PRIu64" ns\n",
-			min_ns, resolution_ns);
-		return ODP_TIMER_INVALID;
-	}
-
-	odp_spinlock_lock(&odp_timer.lock);
-
-	if (odp_timer.num_timers >= NUM_TIMERS) {
-		odp_spinlock_unlock(&odp_timer.lock);
-		ODP_DBG("All timers allocated\n");
-		return ODP_TIMER_INVALID;
-	}
-
-	for (id = 0; id < NUM_TIMERS; id++) {
-		if (odp_timer.timer[id].allocated == 0)
-			break;
-	}
-
-	timer = &odp_timer.timer[id];
-	timer->allocated = 1;
-	odp_timer.num_timers++;
-
-	odp_spinlock_unlock(&odp_timer.lock);
-
-	timer_hdl = id + 1;
-
-	timer->timer_hdl     = timer_hdl;
-	timer->pool          = pool;
-	timer->resolution_ns = resolution_ns;
-	timer->max_ticks     = MAX_TICKS;
-
-	for (i = 0; i < MAX_TICKS; i++) {
-		odp_spinlock_init(&timer->tick[i].lock);
-		timer->tick[i].list = NULL;
-	}
-
-	timer->active = 1;
-	odp_sync_stores();
-
-	timer_start(timer);
+	/* Nothing to do here, timer pools are started by the create call */
+}
 
-	return timer_hdl;
+void odp_timer_pool_destroy(odp_timer_pool_t tpid)
+{
+	odp_timer_pool_del(tpid);
 }
 
-odp_timer_tmo_t odp_timer_absolute_tmo(odp_timer_t timer_hdl, uint64_t tmo_tick,
-				       odp_queue_t queue, odp_buffer_t buf)
+uint64_t odp_timer_tick_to_ns(odp_timer_pool_t tpid, uint64_t ticks)
 {
-	int id;
-	uint64_t tick;
-	uint64_t cur_tick;
-	timeout_t *new_tmo;
-	odp_buffer_t tmo_buf;
-	odp_timeout_hdr_t *tmo_hdr;
-	timer_ring_t *timer;
+	return ticks * tpid->resolution_ns;
+}
 
-	id = (int)timer_hdl - 1;
-	timer = &odp_timer.timer[id];
+uint64_t odp_timer_ns_to_tick(odp_timer_pool_t tpid, uint64_t ns)
+{
+	return (uint64_t)(ns / tpid->resolution_ns);
+}
 
-	cur_tick = timer->cur_tick;
-	if (tmo_tick <= cur_tick) {
-		ODP_DBG("timeout too close\n");
-		return ODP_TIMER_TMO_INVALID;
-	}
+uint64_t odp_timer_current_tick(odp_timer_pool_t tpid)
+{
+	return tpid->cur_tick;
+}
 
-	if ((tmo_tick - cur_tick) > MAX_TICKS) {
-		ODP_DBG("timeout too far: cur %"PRIu64" tmo %"PRIu64"\n",
-			cur_tick, tmo_tick);
-		return ODP_TIMER_TMO_INVALID;
+uintptr_t odp_timer_pool_query_conf(odp_timer_pool_t tpid,
+				    odp_timer_pool_conf_t item)
+{
+	switch (item) {
+	case ODP_TIMER_NAME:
+		return (uintptr_t)(tpid->name);
+	case ODP_TIMER_RESOLUTION:
+		return tpid->resolution_ns;
+	case ODP_TIMER_MIN_TICKS:
+		return tpid->min_tmo_tck;
+	case ODP_TIMER_MAX_TICKS:
+		return tpid->max_tmo_tck;
+	case ODP_TIMER_NUM_TIMERS:
+		return tpid->max_timers;
+	case ODP_TIMER_SHARED:
+		return tpid->shared;
+	default:
+		return 0;
 	}
+}
 
-	tick = tmo_tick % MAX_TICKS;
-
-	tmo_buf = odp_buffer_alloc(timer->pool);
-	if (tmo_buf == ODP_BUFFER_INVALID) {
-		ODP_DBG("tmo buffer alloc failed\n");
-		return ODP_TIMER_TMO_INVALID;
+odp_timer_t odp_timer_alloc(odp_timer_pool_t tpid,
+			    odp_queue_t queue,
+			    void *user_ptr)
+{
+	/* We check this because ODP_QUEUE_INVALID is used */
+	/* to indicate a free timer */
+	if (odp_unlikely(queue == ODP_QUEUE_INVALID))
+		ODP_ABORT("%s: Invalid queue handle\n", tpid->name);
+	odp_buffer_t tmo_buf = odp_buffer_alloc(tpid->buf_pool);
+	if (odp_likely(tmo_buf != ODP_BUFFER_INVALID)) {
+		odp_timer *tim = timer_alloc(tpid, queue, user_ptr, tmo_buf);
+		if (odp_likely(tim != ODP_TIMER_INVALID)) {
+			/* Success */
+			assert(tim->queue != ODP_QUEUE_INVALID);
+			return tim;
+		}
+		odp_buffer_free(tmo_buf);
 	}
+	/* Else failed to allocate timeout event */
+	/* errno set by odp_buffer_alloc() or timer_alloc () */
+	return ODP_TIMER_INVALID;
+}
 
-	tmo_hdr = odp_timeout_hdr((odp_timeout_t) tmo_buf);
-	new_tmo = &tmo_hdr->meta;
-
-	new_tmo->timer_id = id;
-	new_tmo->tick     = (int)tick;
-	new_tmo->tmo_tick = tmo_tick;
-	new_tmo->queue    = queue;
-	new_tmo->tmo_buf  = tmo_buf;
-
-	if (buf != ODP_BUFFER_INVALID)
-		new_tmo->buf = buf;
-	else
-		new_tmo->buf = tmo_buf;
-
-	add_tmo(&timer->tick[tick], new_tmo);
-
-	return tmo_buf;
+void odp_timer_free(odp_timer_t tim)
+{
+	odp_timer_pool *tp = (odp_timer_pool *)get_pq(&tim->pqelem);
+	timer_free(tp, tim);
 }
 
-uint64_t odp_timer_tick_to_ns(odp_timer_t timer_hdl, uint64_t ticks)
+odp_timer_set_t odp_timer_set_abs_w_buf(odp_timer_t tim,
+					uint64_t abs_tck,
+					odp_buffer_t user_buf)
 {
-	uint32_t id;
+	odp_timer_pool *tp = (odp_timer_pool *)get_pq(&tim->pqelem);
+	odp_timer_set_t rc = timer_reset_w_buf(tp, tim, abs_tck, user_buf);
+	return rc;
+}
 
-	id = timer_hdl - 1;
-	return ticks * odp_timer.timer[id].resolution_ns;
+odp_timer_set_t odp_timer_set_abs(odp_timer_t tim, uint64_t abs_tck)
+{
+	odp_timer_pool *tp = (odp_timer_pool *)get_pq(&tim->pqelem);
+	odp_timer_set_t rc = timer_reset(tp, tim, abs_tck);
+	return rc;
 }
 
-uint64_t odp_timer_ns_to_tick(odp_timer_t timer_hdl, uint64_t ns)
+odp_timer_set_t odp_timer_set_rel_w_buf(odp_timer_t tim,
+					uint64_t rel_tck,
+					odp_buffer_t user_buf)
 {
-	uint32_t id;
+	odp_timer_pool *tp = (odp_timer_pool *)get_pq(&tim->pqelem);
+	odp_timer_set_t rc = timer_reset_w_buf(tp, tim, tp->cur_tick + rel_tck,
+					       user_buf);
+	return rc;
+}
 
-	id = timer_hdl - 1;
-	return ns / odp_timer.timer[id].resolution_ns;
+odp_timer_set_t odp_timer_set_rel(odp_timer_t tim, uint64_t rel_tck)
+{
+	odp_timer_pool *tp = (odp_timer_pool *)get_pq(&tim->pqelem);
+	odp_timer_set_t rc = timer_reset(tp, tim, tp->cur_tick + rel_tck);
+	return rc;
 }
 
-uint64_t odp_timer_resolution(odp_timer_t timer_hdl)
+void odp_timer_cancel(odp_timer_t tim)
 {
-	uint32_t id;
+	odp_timer_pool *tp = (odp_timer_pool *)get_pq(&tim->pqelem);
+	timer_cancel(tp, tim);
+}
 
-	id = timer_hdl - 1;
-	return odp_timer.timer[id].resolution_ns;
+void odp_timer_return_tmo(odp_timer_tmo_t tmo)
+{
+	const odp_timeout_hdr_t *tmo_hdr = odp_tmo_to_hdr(tmo);
+	odp_timer *parent_tim = tmo_hdr->timer;
+	odp_timer_pool *tp = (odp_timer_pool *)get_pq(&parent_tim->pqelem);
+	timer_return(tp, parent_tim, tmo, tmo_hdr);
 }
 
-uint64_t odp_timer_maximum_tmo(odp_timer_t timer_hdl)
+odp_timer_tmo_status_t odp_timer_tmo_status(odp_timer_tmo_t tmo)
 {
-	uint32_t id;
+	const odp_timeout_hdr_t *tmo_hdr = odp_tmo_to_hdr(tmo);
+	odp_timer *parent_tim = tmo_hdr->timer;
 
-	id = timer_hdl - 1;
-	return odp_timer.timer[id].max_ticks;
+	if (odp_unlikely(tmo_hdr->gencnt != parent_tim->gencnt)) {
+		/* Generation counters differ => timer has been freed */
+		return ODP_TMO_ORPHAN;
+	}
+	/* Else generation counters match => parent timer exists */
+
+	if (odp_likely(parent_tim->tag == tmo_hdr->tag))
+		return ODP_TMO_FRESH;
+	else
+		return ODP_TMO_STALE;
 }
 
-uint64_t odp_timer_current_tick(odp_timer_t timer_hdl)
+odp_timer_t odp_timer_handle(odp_timer_tmo_t tmo)
 {
-	uint32_t id;
+	odp_timeout_hdr_t *tmo_hdr = odp_tmo_to_hdr(tmo);
+	odp_timer_t parent_tim = tmo_hdr->timer;
+	if (odp_likely(tmo_hdr->gencnt == parent_tim->gencnt))
+		return parent_tim;
+	else
+		return ODP_TIMER_INVALID;
+}
 
-	id = timer_hdl - 1;
-	return odp_timer.timer[id].cur_tick;
+uint64_t odp_timer_expiration(odp_timer_tmo_t tmo)
+{
+	odp_timeout_hdr_t *tmo_hdr = odp_tmo_to_hdr(tmo);
+	return tmo_hdr->expiration;
 }
 
-odp_timeout_t odp_timeout_from_buffer(odp_buffer_t buf)
+void *odp_timer_userptr(odp_timer_tmo_t tmo)
 {
-	return (odp_timeout_t) buf;
+	odp_timeout_hdr_t *tmo_hdr = odp_tmo_to_hdr(tmo);
+	return tmo_hdr->user_ptr;
 }
 
-uint64_t odp_timeout_tick(odp_timeout_t tmo)
+int odp_timer_init_global(void)
 {
-	odp_timeout_hdr_t *tmo_hdr = odp_timeout_hdr(tmo);
-	return tmo_hdr->meta.tmo_tick;
+	return 0;
 }
diff --git a/test/api_test/odp_timer_ping.c b/test/api_test/odp_timer_ping.c
index 7406a45..2617b5c 100644
--- a/test/api_test/odp_timer_ping.c
+++ b/test/api_test/odp_timer_ping.c
@@ -20,6 +20,8 @@ 
  *    Otherwise timeout may happen bcz of slow nw speed
  */
 
+#include <assert.h>
+#include <stdlib.h>
 #include <unistd.h>
 #include <fcntl.h>
 #include <errno.h>
@@ -41,14 +43,15 @@ 
 #define MSG_POOL_SIZE         (4*1024*1024)
 #define BUF_SIZE		8
 #define PING_CNT	10
-#define PING_THRD	2	/* Send and Rx Ping thread */
+#define PING_THRD	2	/* send_ping and rx_ping threads */
 
 /* Nanoseconds */
 #define RESUS	10000
 #define MINUS	10000
 #define MAXUS	10000000
 
-static odp_timer_t test_timer_ping;
+static odp_timer_pool_t tp;
+static odp_timer_t test_timer_ping = ODP_TIMER_INVALID;
 static odp_timer_tmo_t test_ping_tmo;
 
 #define PKTSIZE      64
@@ -128,15 +131,7 @@  static int listen_to_pingack(void)
 					 (socklen_t *)&len);
 			if (bytes > 0) {
 				/* pkt rxvd therefore cancel the timeout */
-				if (odp_timer_cancel_tmo(test_timer_ping,
-							 test_ping_tmo) != 0) {
-					ODP_ERR("cancel_tmo failed ..exiting listner thread\n");
-					/* avoid exiting from here even if tmo
-					 * failed for current ping,
-					 * allow subsequent ping_rx request */
-					err = -1;
-
-				}
+				odp_timer_cancel(test_timer_ping);
 				/* cruel bad hack used for sender, listner ipc..
 				 * euwww.. FIXME ..
 				 */
@@ -160,7 +155,6 @@  static int send_ping_request(struct sockaddr_in *addr)
 
 	uint64_t tick;
 	odp_queue_t queue;
-	odp_buffer_t buf;
 
 	int err = 0;
 
@@ -184,8 +178,16 @@  static int send_ping_request(struct sockaddr_in *addr)
 
 	/* get the ping queue */
 	queue = odp_queue_lookup("ping_timer_queue");
+	test_timer_ping = odp_timer_alloc(tp, queue, NULL);
+	if (test_timer_ping == ODP_TIMER_INVALID) {
+		ODP_ERR("Failed to allocate timer.\n");
+		err = -1;
+		goto err;
+	}
 
 	for (i = 0; i < PING_CNT; i++) {
+		odp_buffer_t buf;
+		odp_timer_tmo_t tmo;
 		/* prepare icmp pkt */
 		bzero(&pckt, sizeof(pckt));
 		pckt.hdr.type = ICMP_ECHO;
@@ -209,12 +211,10 @@  static int send_ping_request(struct sockaddr_in *addr)
 		printf(" icmp_sent msg_cnt %d\n", i);
 
 		/* arm the timer */
-		tick = odp_timer_current_tick(test_timer_ping);
+		tick = odp_timer_current_tick(tp);
 
 		tick += 1000;
-		test_ping_tmo = odp_timer_absolute_tmo(test_timer_ping, tick,
-						       queue,
-						       ODP_BUFFER_INVALID);
+		odp_timer_set_abs(test_timer_ping, tick);
 		/* wait for timeout event */
 		while ((buf = odp_queue_deq(queue)) == ODP_BUFFER_INVALID) {
 			/* flag true means ack rxvd.. a cruel hack as I
@@ -229,17 +229,28 @@  static int send_ping_request(struct sockaddr_in *addr)
 				break;
 			}
 		}
+		assert(odp_buffer_type(buf) == ODP_BUFFER_TYPE_TIMEOUT);
+		tmo = odp_timeout_from_buffer(buf);
 
-		/* free tmo_buf for timeout case */
-		if (buf != ODP_BUFFER_INVALID) {
-			ODP_DBG(" timeout msg_cnt [%i] \n", i);
+		switch (odp_timer_tmo_status(tmo)) {
+		case ODP_TMO_FRESH:
+			ODP_DBG(" timeout msg_cnt [%i]\n", i);
 			/* so to avoid seg fault commented */
-			odp_buffer_free(buf);
 			err = -1;
+			break;
+		case ODP_TMO_STALE:
+			/* Ignore stale timeouts */
+			break;
+		case ODP_TMO_ORPHAN:
+			ODP_ERR("Received orphaned timeout!\n");
+			abort();
 		}
+		odp_timer_return_tmo(tmo);
 	}
 
 err:
+	if (test_timer_ping != ODP_TIMER_INVALID)
+		odp_timer_free(test_timer_ping);
 	return err;
 }
 
@@ -340,9 +351,9 @@  int main(int argc ODP_UNUSED, char *argv[] ODP_UNUSED)
 	pool = odp_buffer_pool_create("msg_pool", pool_base, MSG_POOL_SIZE,
 				      BUF_SIZE,
 				      ODP_CACHE_LINE_SIZE,
-				      ODP_BUFFER_TYPE_RAW);
+				      ODP_BUFFER_TYPE_TIMEOUT);
 	if (pool == ODP_BUFFER_POOL_INVALID) {
-		ODP_ERR("Pool create failed.\n");
+		ODP_ERR("Buffer pool create failed.\n");
 		return -1;
 	}
 
@@ -357,15 +368,19 @@  int main(int argc ODP_UNUSED, char *argv[] ODP_UNUSED)
 		return -1;
 	}
 
-	test_timer_ping = odp_timer_create("ping_timer", pool,
-					   RESUS*ODP_TIME_USEC,
-					   MINUS*ODP_TIME_USEC,
-					   MAXUS*ODP_TIME_USEC);
-
-	if (test_timer_ping == ODP_TIMER_INVALID) {
-		ODP_ERR("Timer create failed.\n");
+	/*
+	 * Create timer pool
+	 */
+	tp = odp_timer_pool_create("timer_pool", pool,
+				   RESUS*ODP_TIME_USEC,
+				   MINUS*ODP_TIME_USEC,
+				   MAXUS*ODP_TIME_USEC,
+				   1, false, ODP_CLOCK_CPU);
+	if (tp == ODP_TIMER_POOL_INVALID) {
+		ODP_ERR("Timer pool create failed.\n");
 		return -1;
 	}
+	odp_timer_pool_start();
 
 	odp_shm_print_all();