diff mbox

[PATCHv2,2/3] api: odp_timer.h: updated API, lock-less implementation

Message ID 1418769980-8244-3-git-send-email-ola.liljedahl@linaro.org
State New
Headers show

Commit Message

Ola Liljedahl Dec. 16, 2014, 10:46 p.m. UTC
The timer API is updated. A major change is that timers are allocated and freed
separately from timeouts being set and cancelled. The life-length of a timer
normally corresponds to the life-length of the associated stateful flow while
the life-length of a timeout corresponds to individual packets being
transmitted and received.
The reference timer implementation is lock-less for platforms with support
for 128-bit (16-byte) atomic exchange and CAS operations. Otherwise a lock-based
implementation (using as many locks as desired, no global lock) is used but
some operations (e.g. reset re-using existing timeout buffer) may still be
lock-less on some architectures.
Updated the example example/timer/odp_timer_test.c.

Signed-off-by: Ola Liljedahl <ola.liljedahl@linaro.org>
---
(This document/code contribution attached is provided under the terms of
agreement LES-LTM-21309)
 example/timer/odp_timer_test.c                     |  173 +--
 platform/linux-generic/include/api/odp_timer.h     |  474 +++++++--
 .../linux-generic/include/odp_timer_internal.h     |   60 +-
 platform/linux-generic/odp_timer.c                 | 1098 ++++++++++++++------
 4 files changed, 1324 insertions(+), 481 deletions(-)

Comments

Maxim Uvarov Dec. 17, 2014, 10:53 p.m. UTC | #1
Mike please suggest how to move this example to different file and link 
with documentation.
I think that needs to be in ./example/timer/ directory and be build each 
time. Otherwise very
soon documentation and code will not be synchronized.

Maxim.

On 12/17/2014 01:46 AM, Ola Liljedahl wrote:
> --- a/platform/linux-generic/include/api/odp_timer.h
> +++ b/platform/linux-generic/include/api/odp_timer.h
> @@ -8,9 +8,211 @@
>   /**
>    * @file
>    *
> - * ODP timer
> + * ODP timer service
>    */
>   
> +/** Example #1 Retransmission timer (e.g. for reliable connections)
> + @code
> +
> +//Create timer pool for reliable connections
> +#define SEC 1000000000ULL //1s expressed in nanoseconds
> +odp_timer_pool_t tcp_tpid =
> +    odp_timer_pool_create("TCP",
> +			  buffer_pool,
> +			  1000000,//resolution 1ms
> +			  0,//min tmo
> +			  7200 * SEC,//max tmo length 2hours
> +			  40000,//num_timers
> +			  true,//shared
> +			  ODP_CLOCK_CPU
> +			 );
> +if (tcp_tpid == ODP_TIMER_POOL_INVALID)
> +{
> +	//Failed to create timer pool => fatal error
> +}
> +
> +
> +//Setting up a new connection
> +//Allocate retransmission timeout (identical for supervision timeout)
> +//The user pointer points back to the connection context
> +conn->ret_tim = odp_timer_alloc(tcp_tpid, queue, conn);
> +//Check if all resources were successfully allocated
> +if (conn->ret_tim == ODP_TIMER_INVALID)
> +{
> +	//Failed to allocate all resources for connection => tear down
> +	//Tear down connection
> +	...
> +	return false;
> +}
> +//All necessary resources successfully allocated
> +//Compute initi
Ola Liljedahl Dec. 18, 2014, 1:13 p.m. UTC | #2
On 17 December 2014 at 23:53, Maxim Uvarov <maxim.uvarov@linaro.org> wrote:
> Mike please suggest how to move this example to different file and link with
> documentation.
What example are you thinking of? The code snippets in odp_timer.h?
Those are not real working examples, that will take a lot more boiler
plate code (e.g. creating buffer pools and queues etc) that is not
relevant here.

> I think that needs to be in ./example/timer/ directory and be build each
> time. Otherwise very
> soon documentation and code will not be synchronized.
Well those code snippets are in the same header file which defines the
API in question.

I wouldn't mind morphing the current example/timer/odp_timer_test.c
(should be renamed to odp_timer_example.c) into a working example that
exemplifies some of the cases in odp_timer.h. If this turns out well,
I can consider removing the code snippets from odp_timer.h.

>
> Maxim.
>
>
> On 12/17/2014 01:46 AM, Ola Liljedahl wrote:
>>
>> --- a/platform/linux-generic/include/api/odp_timer.h
>> +++ b/platform/linux-generic/include/api/odp_timer.h
>> @@ -8,9 +8,211 @@
>>   /**
>>    * @file
>>    *
>> - * ODP timer
>> + * ODP timer service
>>    */
>>   +/** Example #1 Retransmission timer (e.g. for reliable connections)
>> + @code
>> +
>> +//Create timer pool for reliable connections
>> +#define SEC 1000000000ULL //1s expressed in nanoseconds
>> +odp_timer_pool_t tcp_tpid =
>> +    odp_timer_pool_create("TCP",
>> +                         buffer_pool,
>> +                         1000000,//resolution 1ms
>> +                         0,//min tmo
>> +                         7200 * SEC,//max tmo length 2hours
>> +                         40000,//num_timers
>> +                         true,//shared
>> +                         ODP_CLOCK_CPU
>> +                        );
>> +if (tcp_tpid == ODP_TIMER_POOL_INVALID)
>> +{
>> +       //Failed to create timer pool => fatal error
>> +}
>> +
>> +
>> +//Setting up a new connection
>> +//Allocate retransmission timeout (identical for supervision timeout)
>> +//The user pointer points back to the connection context
>> +conn->ret_tim = odp_timer_alloc(tcp_tpid, queue, conn);
>> +//Check if all resources were successfully allocated
>> +if (conn->ret_tim == ODP_TIMER_INVALID)
>> +{
>> +       //Failed to allocate all resources for connection => tear down
>> +       //Tear down connection
>> +       ...
>> +       return false;
>> +}
>> +//All necessary resources successfully allocated
>> +//Compute initi
>
>
>
> _______________________________________________
> lng-odp mailing list
> lng-odp@lists.linaro.org
> http://lists.linaro.org/mailman/listinfo/lng-odp
Ola Liljedahl Dec. 18, 2014, 7:25 p.m. UTC | #3
On 18 December 2014 at 16:25, Savolainen, Petri (NSN - FI/Espoo)
<petri.savolainen@nsn.com> wrote:
> After quick look into it...
>
>> -----Original Message-----
>> From: lng-odp-bounces@lists.linaro.org [mailto:lng-odp-
>> bounces@lists.linaro.org] On Behalf Of ext Ola Liljedahl
>> Sent: Wednesday, December 17, 2014 12:46 AM
>> To: lng-odp@lists.linaro.org
>> Subject: [lng-odp] [PATCHv2 2/3] api: odp_timer.h: updated API, lock-less
>> implementation
>>
>> The timer API is updated. A major change is that timers are allocated and
>> freed
>> separately from timeouts being set and cancelled. The life-length of a
>> timer
>> normally corresponds to the life-length of the associated stateful flow
>> while
>> the life-length of a timeout corresponds to individual packets being
>> transmitted and received.
>> The reference timer implementation is lock-less for platforms with support
>> for 128-bit (16-byte) atomic exchange and CAS operations. Otherwise a
>> lock-based
>> implementation (using as many locks as desired, no global lock) is used
>> but
>> some operations (e.g. reset re-using existing timeout buffer) may still be
>> lock-less on some architectures.
>> Updated the example example/timer/odp_timer_test.c.
>>
>> Signed-off-by: Ola Liljedahl <ola.liljedahl@linaro.org>
>> ---
>> (This document/code contribution attached is provided under the terms of
>> agreement LES-LTM-21309)
>>  example/timer/odp_timer_test.c                     |  173 +--
>>  platform/linux-generic/include/api/odp_timer.h     |  474 +++++++--
>>  .../linux-generic/include/odp_timer_internal.h     |   60 +-
>>  platform/linux-generic/odp_timer.c                 | 1098 ++++++++++++++-
>> -----
>>  4 files changed, 1324 insertions(+), 481 deletions(-)
>>
>> diff --git a/example/timer/odp_timer_test.c
>> b/example/timer/odp_timer_test.c
>> index 972bc96..33ef219 100644
>> --- a/example/timer/odp_timer_test.c
>> +++ b/example/timer/odp_timer_test.c
>> @@ -26,7 +26,7 @@
>>
>>
>>  #define MAX_WORKERS           32            /**< Max worker threads */
>> -#define MSG_POOL_SIZE         (4*1024*1024) /**< Message pool size */
>> +#define MSG_POOL_SIZE         (4*1024) /**< Message pool size */
>>
>>
>>  /** Test arguments */
>> @@ -43,67 +43,116 @@ typedef struct {
>>  /** @private Barrier for test synchronisation */
>>  static odp_barrier_t test_barrier;
>>
>> -/** @private Timer handle*/
>> -static odp_timer_t test_timer;
>> +/** @private Buffer pool handle */
>> +static odp_buffer_pool_t pool;
>>
>> +/** @private Timer pool handle */
>> +static odp_timer_pool_t tp;
>> +
>> +/** @private Timeout status ASCII strings */
>> +static const char *const status2str[] = {
>> +     "fresh", "stale", "orphaned"
>> +};
>> +
>> +/** @private Timer set status ASCII strings */
>> +static const char *timerset2str(uint64_t val)
>> +{
>> +     switch (val) {
>> +     case ODP_TICK_TOOEARLY:
>> +             return "tooearly";
>> +     case ODP_TICK_TOOLATE:
>> +             return "toolate";
>> +     case ODP_TICK_INVALID:
>> +             return "error";
>> +     default:
>> +             return "success";
>> +     }
>> +};
>> +
>> +/** @private Helper struct for timers */
>> +struct test_timer {
>> +     odp_timer_t tim;
>> +     odp_buffer_t buf;
>> +     uint64_t tick;
>> +};
>> +
>> +/** @private Array of all timer helper structs */
>> +static struct test_timer tt[256];
>>
>>  /** @private test timeout */
>>  static void test_abs_timeouts(int thr, test_args_t *args)
>>  {
>> -     uint64_t tick;
>>       uint64_t period;
>>       uint64_t period_ns;
>>       odp_queue_t queue;
>> -     odp_buffer_t buf;
>> -     int num;
>> +     int remain = args->tmo_count;
>> +     uint64_t tick;
>> +     struct test_timer *ttp;
>>
>>       EXAMPLE_DBG("  [%i] test_timeouts\n", thr);
>>
>>       queue = odp_queue_lookup("timer_queue");
>>
>>       period_ns = args->period_us*ODP_TIME_USEC;
>> -     period    = odp_timer_ns_to_tick(test_timer, period_ns);
>> +     period    = odp_timer_ns_to_tick(tp, period_ns);
>>
>>       EXAMPLE_DBG("  [%i] period %"PRIu64" ticks,  %"PRIu64" ns\n", thr,
>>                   period, period_ns);
>>
>> -     tick = odp_timer_current_tick(test_timer);
>> +     EXAMPLE_DBG("  [%i] current tick %"PRIu64"\n", thr,
>> +                 odp_timer_current_tick(tp));
>>
>> -     EXAMPLE_DBG("  [%i] current tick %"PRIu64"\n", thr, tick);
>> -
>> -     tick += period;
>> -
>> -     if (odp_timer_absolute_tmo(test_timer, tick, queue,
>> ODP_BUFFER_INVALID)
>> -         == ODP_TIMER_TMO_INVALID){
>> -             EXAMPLE_DBG("Timeout request failed\n");
>> +     ttp = &tt[thr - 1]; /* Thread starts at 1 */
>> +     ttp->tim = odp_timer_alloc(tp, queue, ttp);
>> +     if (ttp->tim == ODP_TIMER_INVALID) {
>> +             EXAMPLE_ERR("Failed to allocate timer\n");
>>               return;
>>       }
>> +     ttp->buf = odp_buffer_alloc(pool);
>> +     if (ttp->buf == ODP_BUFFER_INVALID) {
>> +             EXAMPLE_ERR("Failed to allocate buffer\n");
>> +             return;
>> +     }
>> +     tick = odp_timer_current_tick(tp);
>>
>> -     num = args->tmo_count;
>> -
>> -     while (1) {
>> -             odp_timeout_t tmo;
>> -
>> -             buf = odp_schedule_one(&queue, ODP_SCHED_WAIT);
>> -
>> -             tmo  = odp_timeout_from_buffer(buf);
>> -             tick = odp_timeout_tick(tmo);
>> -
>> -             EXAMPLE_DBG("  [%i] timeout, tick %"PRIu64"\n", thr, tick);
>> -
>> -             odp_buffer_free(buf);
>> -
>> -             num--;
>> -
>> -             if (num == 0)
>> -                     break;
>> +     while (remain != 0) {
>> +             odp_buffer_t buf;
>>
>>               tick += period;
>> +             ttp->tick = odp_timer_set_abs(ttp->tim, tick, &ttp->buf);
>> +             if (odp_unlikely(ttp->tick == ODP_TICK_TOOEARLY ||
>> +                              ttp->tick == ODP_TICK_TOOLATE ||
>> +                              ttp->tick == ODP_TICK_INVALID)) {
>> +                     /* Too early or too late timeout requested */
>> +                     EXAMPLE_ABORT("odp_timer_set_abs() failed: %s\n",
>> +                                   timerset2str(ttp->tick));
>> +             }
>>
>> -             odp_timer_absolute_tmo(test_timer, tick,
>> -                                    queue, ODP_BUFFER_INVALID);
>> +             /* Get the next expired timeout */
>> +             buf = odp_schedule_one(&queue, ODP_SCHED_WAIT);
>> +             if (odp_timer_tmo_metadata(buf, NULL, &tick, (void **)&ttp)) {
>> +                     ttp->buf = buf;
>> +                     if (odp_likely(tick != ttp->tick)) {
>
> Isn't this a potential race condition introduced by the API ? I.e. if same operations run on multiple threads and returned tick value is used for checking tmo validity (STALE vs FRESH).
Yes. But as described in a private email, earlier you did not approve
of the alternative design where the timeout is checked against the
timer for freshness/staleness. You have to chose either or, there is
not free lunch.

The API could handle such a race condition (by keeping all mutable
state in the timer and ensuring atomicity) but having two threads
concurrently access the same resource(s) (what about other resources
in the application?) without synchronization seems like a bad idea
anyway.

>
> Thread A:
> // HW timer is set, timer expires,
> // tmo buf is scheduled to thread B, before this function returns (due to an interrupt, etc)
> ... = odp_timer_set_abs(ttp->tim, tick, &ttp->buf);
>
>
> Thread B:
> // receives and handles tmo
> buf = odp_schedule_one(&queue, ODP_SCHED_WAIT);
>                 if (odp_timer_tmo_metadata(buf, NULL, &tick, (void **)&ttp)) {
>                         ttp->buf = buf;
>
> // ttp->tick is not set yet
>                         if (odp_likely(tick != ttp->tick)) { ...
>
>
> Thread A:
>
> // odp_timer_set_abs() returns and sets ttp->tick
> ttp->tick = ...
>
>
>
>
>
>
>
>> --- a/platform/linux-generic/include/api/odp_timer.h
>> +++ b/platform/linux-generic/include/api/odp_timer.h
>> @@ -8,9 +8,211 @@
>>  /**
>>   * @file
>>   *
>> - * ODP timer
>> + * ODP timer service
>>   */
>>
>> +/** Example #1 Retransmission timer (e.g. for reliable connections)
>> + @code
>> +
>> +//Create timer pool for reliable connections
>> +#define SEC 1000000000ULL //1s expressed in nanoseconds
>> +odp_timer_pool_t tcp_tpid =
>> +    odp_timer_pool_create("TCP",
>> +                       buffer_pool,
>> +                       1000000,//resolution 1ms
>> +                       0,//min tmo
>> +                       7200 * SEC,//max tmo length 2hours
>> +                       40000,//num_timers
>> +                       true,//shared
>> +                       ODP_CLOCK_CPU
>> +                      );
>> +if (tcp_tpid == ODP_TIMER_POOL_INVALID)
>> +{
>> +     //Failed to create timer pool => fatal error
>> +}
>> +
>
> It's better to remove  all this pseudo example code from the API file and
> add one or two as real examples that can be built, run and verified
> that they actually work and are in sync with the current API version.
Except those real examples will have to include a lot of boiler plate
code (create buffer pool and queues etc) which is not important in
other to showcase how to use the timer API. But I don't see why one
precludes the other.

>
>>
>> +/**
>> + * Return values of timer set calls.
>> + */
>> +/**
>> + * Timer set operation failed, expiration too early.
>> + * Either retry with a later expiration time or process the timeout
>> + * immediately. */
>> +#define ODP_TICK_TOOEARLY 0xFFFFFFFFFFFFFFFDULL
>> +/**
>> + * Timer set operation failed, expiration too late.
>> + * Truncate the expiration time against the maximum timeout for the
>> + * timer pool. */
>> +#define ODP_TICK_TOOLATE  0xFFFFFFFFFFFFFFFEULL
>> +/**
>> + * Timer set operation failed because not timeout buffer present or
>> specified.
>> + * This value is also return from odp_timer_cancel() and
>> odp_timer_free().
>> + */
>> +#define ODP_TICK_INVALID  0xFFFFFFFFFFFFFFFFULL
>
>
> I think it's not good idea to overload tick variables with special tick values. Yes, the type is uint64_t and it's very unlikely run into this values when tick starts from 0 - but (on API level) it opens a door for strange behavior with if the system tick is uninitialized or init with a large value.
Overloading the return value is one thing (and it is not beautiful API
design, I agree). Having problem with overflow of uint64_t values is
another. If the system tick overflows (and three reserved values at
the end does not matter in reality, three ticks later and you're dead
anyway) because of missing initialization, then you have a serious bug
which I don't think application should try to recover from. Testing of
the implementation will have to make sure that the timer tick starts
form some suitable low value (e.g. 0) and does not overflow during the
life time of the system (a 64-bit counter @ 1GHz takes >580 years to
overflow).

>
>
>>
>>  /**
>> - * Create a timer
>> + * Create a timer pool
>>   *
>> - * Creates a new timer with requested properties.
>> + * Create a new timer pool.
>>   *
>>   * @param name       Name
>> - * @param pool       Buffer pool for allocating timeout notifications
>> + * @param buf_pool   Buffer pool for allocating timeouts (and only
>> timeouts)
>>   * @param resolution Timeout resolution in nanoseconds
>> - * @param min_tmo    Minimum timeout duration in nanoseconds
>> - * @param max_tmo    Maximum timeout duration in nanoseconds
>> + * @param min_tmo    Minimum relative timeout in nanoseconds
>> + * @param max_tmo    Maximum relative timeout in nanoseconds
>> + * @param num_timers Number of supported timers (minimum)
>> + * @param shared     Shared or private timer pool.
>> + *              Operations on shared timers will include the necessary
>> + *              mutual exclusion, operations on private timers may not
>> + *              (mutual exclusion is the responsibility of the caller).
>> + * @param clk_src    Clock source to use
>> + *
>> + * @return Timer pool handle if successful, otherwise
>> ODP_TIMER_POOL_INVALID
>> + * and errno set
>> + */
>> +odp_timer_pool_t
>> +odp_timer_pool_create(const char *name,
>> +                   odp_buffer_pool_t buf_pool,
>> +                   uint64_t resolution,
>> +                   uint64_t min_tmo,
>> +                   uint64_t max_tmo,
>> +                   uint32_t num_timers,
>> +                   int shared,
>> +                   odp_timer_clk_src_t clk_src);
>
> Not a must change in this point, but the API would be cleaner if all these params would be packed into a odp_timer_pool_param_t struct (all except name, just like odp_buffer_pool_param_t).
I could do that. This is modeled on the old timer API. But as the
number of parameters increase, of course it starts to become unwieldy.
My biggest problem with this function is that (as Leo once wrote) you
can specify two of resolution, min timeout and max timeout and the
third will be returned. But not all three.

>
>
> Also I'd swap "int shared" to "int private". So that the default value (private == 0) would select the shared timer mode (== multicore, which is the default for everything else in ODP).
Good suggestion. I do have problems with the non-shared (private) mode
anyway as the timer API requires some other agent to expire the timers
and all data structure access have to be synchronized between the only
client and the timer manager itself. So I would like to drop this
parameter (I think it was Bill Mills or David Lide who originally
suggested it a long time ago).

>
>
>>  /**
>> - * Maximum timeout in timer ticks
>> + * ODP timer pool information and configuration
>> + */
>> +
>> +typedef struct odp_timer_pool_info_s {
>> +     uint64_t resolution;/**< Timer resolution (in ns) */
>> +     uint64_t min_tmo;   /**< Min supported relative timeout (in ticks)*/
>> +     uint64_t max_tmo;   /**< Max supported relative timeout (in ticks)*/
>> +     uint32_t num_timers;/**< Number of supported timers */
>> +     uint32_t cur_timers;/**< Number of currently allocated timers */
>> +     uint32_t hwm_timers;/**< Number of used timers high watermark */
>> +     int shared;         /**< Shared flag */
>> +     char name[80];      /**< Name of timer pool */
>
> Name can be a const pointer to the string.
I don't like returning pointers to internal data. Who knows how long
the application wants to keep this pointer and then unexpectedly use
it, long after some other thread might have deleted the timer pool.

>
> Params from pool_create could be packed into the odp_timer_pool_param_t (just like odp_buffer_pool_info_t).
Yes, this follows from the earlier suggestion. Expect another patch.

>
>
>> +} odp_timer_pool_info_t;
>> +
>> +/**
>> + * Query timer pool information and configuration
>> + *  Timer resolution in nanoseconds
>> + *  Minimum and maximum (relative) timeouts in timer ticks
>> + *  Number of supported timers
>> + *  Nunber of timers in use
>> + *  Nunber of timers in use - high watermark
>> + *  Shared or private timer pool
>> + *  Name of timer pool.
>>   *
>> - * @param timer Timer
>> + * @param tpid Timer pool identifier
>> + * @param buf Pointer to information buffer
>> + * @param buf_size Size of information buffer
>
> Could be dropped since sizeof(odp_timer_pool_info_t) defines the size (just like odp_buffer_pool_info(
> ) outputs info and returns success/failure).
I expected the time_pool_info struct to potentially grow. But perhaps
we would require a recompilation of the application in such a case.

>
>>   *
>> - * @return Maximum timeout in timer ticks
>> + * @return Actual size written
>>   */
>> -uint64_t odp_timer_maximum_tmo(odp_timer_t timer);
>> +size_t odp_timer_pool_info(odp_timer_pool_t tpid,
>> +                        odp_timer_pool_info_t *buf,
>> +                        size_t buf_size);
>>
>
>
>
>> +
>> +/**
>> + * Free a timer
>> + *
>> + * Free (destroy) a timer, freeing associated resources.
>> + * The timeout buffer for an active timer will be returned.
>> + * An expired and enqueued timeout buffer will not be freed.
>> + * It is the responsibility of the application to free this timeout when
>> it
>> + * is received.
>> + *
>> + * @param tim      Timer handle
>> + * @param tmo_buf Reference to a buffer variable which will be written
>> with
>> + * the buffer handle of any present timeout buffer (e.g. for an active
>> timer).
>> + * @return       A tick value which will not match any valid expiration
>> tick.
>>   */
>> -uint64_t odp_timer_current_tick(odp_timer_t timer);
>> +uint64_t odp_timer_free(odp_timer_t tim, odp_buffer_t *tmo_buf);
>
> Why this returns tick? And what value it actually returns?
This is just to follow the pattern of odp_timer_set_abs/rel. All those
calls return a tick value which the application is expected to save so
that it can be compared with the tick value of any received timeout.
This is then used to check if a timeout is fresh or stale. Strictly
not necessary to return a value here but I want to make the API simple
to use.

>
> The documentation of tmo_buf output is misleading. It should say that if the operation outputs handle if active timer was successfully canceled, if it failed on that BUFFER_INVALID is output. I think that's the intention, so that user do not get handle to a buffer that may be on the way on the queue and potentially double free that.
I think the description is correct. The tmo_buf is written with the
buffer handle if the timer was active (and thus associated with a
timeout buffer) and the cancel operation (which is implied by the free
operation) "succeeds". If the cancel does not succeed (because the
timer had already expired and there is no timer buffer), it does not
write any value to the tmo_buf variable.

The application might already have the tmo_buf, either because it was
received or the timer was cancelled before it had expired (and thus
the timeout buffer could be returned in the cancel call). In such a
case, odp_timer_cancel or odp_timer_free should not overwrite tmo_buf
with ODP_BUFFER_INVALID because then we would leak the buffer that was
stored here.

I was hoping that the example code snippets should show the proper
behavior here.



>
>>
>>  /**
>> - * Request timeout with an absolute timer tick
>> + * Set a timer (absolute time) with a user-provided timeout buffer
>> + *
>> + * Set (arm) the timer to expire at specific time. The timeout
>> + * buffer will be enqueued when the timer expires.
>>   *
>> - * When tick reaches tmo_tick, the timer enqueues the timeout
>> notification into
>> - * the destination queue.
>> + * Note: any invalid parameters will be treated as programming errors and
>> will
>> + * cause the application to abort.
>>   *
>> - * @param timer    Timer
>> - * @param tmo_tick Absolute timer tick value which triggers the timeout
>> - * @param queue    Destination queue for the timeout notification
>> - * @param buf      User defined timeout notification buffer. When
>> - *                 ODP_BUFFER_INVALID, default timeout notification is
>> used.
>> + * @param tim      Timer
>> + * @param abs_tck  Expiration time in absolute timer ticks
>> + * @param tmo_buf  Reference to a buffer variable that points to timeout
>> buffer
>> + * or NULL to reuse the existing timeout buffer
>>   *
>> - * @return Timeout handle if successful, otherwise ODP_TIMER_TMO_INVALID
>> + * @return The expiration tick or ODP_TICK_TOOEARLY, ODP_TICK_TOOLATE or
>> + * ODP_TICK_INVALID (timer not active and no timeout buffer to reuse).
>>   */
>> -odp_timer_tmo_t odp_timer_absolute_tmo(odp_timer_t timer, uint64_t
>> tmo_tick,
>> -                                    odp_queue_t queue, odp_buffer_t buf);
>> +uint64_t odp_timer_set_abs(odp_timer_t tim,
>> +                        uint64_t abs_tck,
>> +                        odp_buffer_t *tmo_buf);
>
> Why this returns tick and not just a status. It promotes the race condition highlighted earlier (my->tick vs. tmo->tick).
>
> Can this accept also user defined buffers (not type timeout)?
Yes. The user is though responsible for the proper initialization of
such a buffer, the timer manager won't touch it (just enqueue it when
the timer expires).

>
>
>>
>>  /**
>> - * Convert buffer handle to timeout handle
>> + * Cancel a timer
>> + *
>> + * Cancel a timer, preventing future expiration and delivery. Return any
>> + * present timeout buffer.
>> + *
>> + * A timer that has already expired may be impossible to cancel and the
>> timeout
>> + * will instead be delivered to the destination queue.
>>   *
>> - * @param buf  Buffer handle
>> + * Note: any invalid parameters will be treated as programming errors and
>> will
>> + * cause the application to abort.
>>   *
>> - * @return Timeout buffer handle
>> + * @param tim     Timer
>> + * @param tmo_buf Reference to a buffer variable which will be written
>> with
>> + * the buffer handle of any present timeout buffer (e.g. for an active
>> timer).
>> + * @return       A tick value which will not match any valid expiration
>> tick.
>>   */
>> -odp_timeout_t odp_timeout_from_buffer(odp_buffer_t buf);
>> +uint64_t odp_timer_cancel(odp_timer_t tim, odp_buffer_t *tmo_buf);
>
> Same comments with the free call.
The free call is described that way because of the behavior of the cancel call.

>
>
>>
>>  /**
>> - * Return absolute timeout tick
>> + * Get metadata from system timeout buffer
>>   *
>> - * @param tmo Timeout buffer handle
>> + * @param buf A timeout buffer
>> + * @param hdl NULL or a pointer where the timer handle will be written.
>> + * @param exp_tck NULL or a pointer where the expiration tick will be
>> written.
>> + * @param user_ptr NULL or a pointer where the user pointer will be
>> written.
>>   *
>> - * @return Absolute timeout tick
>> + * @return True (1) if timeout buffer is of type ODP_BUFFER_TYPE_TIMEOUT
>> and
>> + *         metadata variables have been updated from the timeout.
>> + *         False (0) if timeout buffer is not of type
>> ODP_BUFFER_TYPE_TIMEOUT.
>>   */
>> -uint64_t odp_timeout_tick(odp_timeout_t tmo);
>> +int odp_timer_tmo_metadata(odp_buffer_t buf,
>> +             odp_timer_t *hdl,
>> +             uint64_t *exp_tck,
>> +             void **user_ptr);
>
> Normal odp_buffer_type() call should be used to check the buffer type first. This generic metadata call should be broke into separate calls just like packet API does.
odp_timer_tmo_metadata() does anyway check the buffer type because it
does not trust the user. Better to just check the buffer type once.
You suggest that we should not have any buffer type checks in these
calls and always require the user to check the buffer type first?

Having multiple metadata accessors will potentially cause a lot of
overhead. Converting from generic buffer to timeout buffer multiple
times and then accessing the internal buffer data, this is not free
(on e.g. linux-generic). Why is it better to have multiple accessor
functions? I need an objective argument here.

>
> odp_timer_t odp_timeout_timer(tmo)
> uint64_t odp_timeout_tick(tmo)
> void* odp_timeout_user_ptr(tmo)
>
>
> -Petri
>
diff mbox

Patch

diff --git a/example/timer/odp_timer_test.c b/example/timer/odp_timer_test.c
index 972bc96..33ef219 100644
--- a/example/timer/odp_timer_test.c
+++ b/example/timer/odp_timer_test.c
@@ -26,7 +26,7 @@ 
 
 
 #define MAX_WORKERS           32            /**< Max worker threads */
-#define MSG_POOL_SIZE         (4*1024*1024) /**< Message pool size */
+#define MSG_POOL_SIZE         (4*1024) /**< Message pool size */
 
 
 /** Test arguments */
@@ -43,67 +43,116 @@  typedef struct {
 /** @private Barrier for test synchronisation */
 static odp_barrier_t test_barrier;
 
-/** @private Timer handle*/
-static odp_timer_t test_timer;
+/** @private Buffer pool handle */
+static odp_buffer_pool_t pool;
 
+/** @private Timer pool handle */
+static odp_timer_pool_t tp;
+
+/** @private Timeout status ASCII strings */
+static const char *const status2str[] = {
+	"fresh", "stale", "orphaned"
+};
+
+/** @private Timer set status ASCII strings */
+static const char *timerset2str(uint64_t val)
+{
+	switch (val) {
+	case ODP_TICK_TOOEARLY:
+		return "tooearly";
+	case ODP_TICK_TOOLATE:
+		return "toolate";
+	case ODP_TICK_INVALID:
+		return "error";
+	default:
+		return "success";
+	}
+};
+
+/** @private Helper struct for timers */
+struct test_timer {
+	odp_timer_t tim;
+	odp_buffer_t buf;
+	uint64_t tick;
+};
+
+/** @private Array of all timer helper structs */
+static struct test_timer tt[256];
 
 /** @private test timeout */
 static void test_abs_timeouts(int thr, test_args_t *args)
 {
-	uint64_t tick;
 	uint64_t period;
 	uint64_t period_ns;
 	odp_queue_t queue;
-	odp_buffer_t buf;
-	int num;
+	int remain = args->tmo_count;
+	uint64_t tick;
+	struct test_timer *ttp;
 
 	EXAMPLE_DBG("  [%i] test_timeouts\n", thr);
 
 	queue = odp_queue_lookup("timer_queue");
 
 	period_ns = args->period_us*ODP_TIME_USEC;
-	period    = odp_timer_ns_to_tick(test_timer, period_ns);
+	period    = odp_timer_ns_to_tick(tp, period_ns);
 
 	EXAMPLE_DBG("  [%i] period %"PRIu64" ticks,  %"PRIu64" ns\n", thr,
 		    period, period_ns);
 
-	tick = odp_timer_current_tick(test_timer);
+	EXAMPLE_DBG("  [%i] current tick %"PRIu64"\n", thr,
+		    odp_timer_current_tick(tp));
 
-	EXAMPLE_DBG("  [%i] current tick %"PRIu64"\n", thr, tick);
-
-	tick += period;
-
-	if (odp_timer_absolute_tmo(test_timer, tick, queue, ODP_BUFFER_INVALID)
-	    == ODP_TIMER_TMO_INVALID){
-		EXAMPLE_DBG("Timeout request failed\n");
+	ttp = &tt[thr - 1]; /* Thread starts at 1 */
+	ttp->tim = odp_timer_alloc(tp, queue, ttp);
+	if (ttp->tim == ODP_TIMER_INVALID) {
+		EXAMPLE_ERR("Failed to allocate timer\n");
 		return;
 	}
+	ttp->buf = odp_buffer_alloc(pool);
+	if (ttp->buf == ODP_BUFFER_INVALID) {
+		EXAMPLE_ERR("Failed to allocate buffer\n");
+		return;
+	}
+	tick = odp_timer_current_tick(tp);
 
-	num = args->tmo_count;
-
-	while (1) {
-		odp_timeout_t tmo;
-
-		buf = odp_schedule_one(&queue, ODP_SCHED_WAIT);
-
-		tmo  = odp_timeout_from_buffer(buf);
-		tick = odp_timeout_tick(tmo);
-
-		EXAMPLE_DBG("  [%i] timeout, tick %"PRIu64"\n", thr, tick);
-
-		odp_buffer_free(buf);
-
-		num--;
-
-		if (num == 0)
-			break;
+	while (remain != 0) {
+		odp_buffer_t buf;
 
 		tick += period;
+		ttp->tick = odp_timer_set_abs(ttp->tim, tick, &ttp->buf);
+		if (odp_unlikely(ttp->tick == ODP_TICK_TOOEARLY ||
+				 ttp->tick == ODP_TICK_TOOLATE ||
+				 ttp->tick == ODP_TICK_INVALID)) {
+			/* Too early or too late timeout requested */
+			EXAMPLE_ABORT("odp_timer_set_abs() failed: %s\n",
+				      timerset2str(ttp->tick));
+		}
 
-		odp_timer_absolute_tmo(test_timer, tick,
-				       queue, ODP_BUFFER_INVALID);
+		/* Get the next expired timeout */
+		buf = odp_schedule_one(&queue, ODP_SCHED_WAIT);
+		if (odp_timer_tmo_metadata(buf, NULL, &tick, (void **)&ttp)) {
+			ttp->buf = buf;
+			if (odp_likely(tick != ttp->tick)) {
+				/* Not the expected expiration tick, timer has
+				 * been reset or cancelled or freed */
+				EXAMPLE_ABORT("Unexpected timeout received (timer %x, tick %"PRIu64", expected %"PRIu64")\n",
+					      ttp->tim, tick, ttp->tick);
+			}
+			EXAMPLE_DBG("  [%i] timeout, tick %"PRIu64"\n",
+				    thr, tick);
+			remain--;
+		} else {
+			/* Not a default timeout buffer */
+			EXAMPLE_ABORT("Unexpected buffer type (%u) received\n",
+				      odp_buffer_type(buf));
+		}
 	}
 
+	/* Free last timer used */
+	ttp->tick = odp_timer_free(ttp->tim, &ttp->buf);
+	if (ttp->buf != ODP_BUFFER_INVALID)
+		odp_buffer_free(ttp->buf);
+
 	if (odp_queue_sched_type(queue) == ODP_SCHED_SYNC_ATOMIC)
 		odp_schedule_release_atomic();
 }
@@ -192,14 +241,14 @@  static void parse_args(int argc, char *argv[], test_args_t *args)
 	/* defaults */
 	args->cpu_count     = 0; /* all CPU's */
 	args->resolution_us = 10000;
-	args->min_us        = args->resolution_us;
+	args->min_us        = 0;
 	args->max_us        = 10000000;
 	args->period_us     = 1000000;
 	args->tmo_count     = 30;
 
 	while (1) {
 		opt = getopt_long(argc, argv, "+c:r:m:x:p:t:h",
-				 longopts, &long_index);
+				  longopts, &long_index);
 
 		if (opt == -1)
 			break;	/* No more options */
@@ -243,13 +292,12 @@  int main(int argc, char *argv[])
 	odph_linux_pthread_t thread_tbl[MAX_WORKERS];
 	test_args_t args;
 	int num_workers;
-	odp_buffer_pool_t pool;
 	odp_queue_t queue;
 	int first_cpu;
 	uint64_t cycles, ns;
 	odp_queue_param_t param;
-	odp_shm_t shm;
 	odp_buffer_pool_param_t params;
+	odp_timer_pool_info_t tpinfo;
 
 	printf("\nODP timer example starts\n");
 
@@ -309,22 +357,42 @@  int main(int argc, char *argv[])
 	printf("timeouts:           %i\n", args.tmo_count);
 
 	/*
-	 * Create message pool
+	 * Create buffer pool for timeouts
 	 */
-	shm = odp_shm_reserve("msg_pool",
-			      MSG_POOL_SIZE, ODP_CACHE_LINE_SIZE, 0);
-
 	params.buf_size  = 0;
 	params.buf_align = 0;
 	params.num_bufs  = MSG_POOL_SIZE;
 	params.buf_type  = ODP_BUFFER_TYPE_TIMEOUT;
 
-	pool = odp_buffer_pool_create("msg_pool", shm, &params);
+	pool = odp_buffer_pool_create("msg_pool", ODP_SHM_NULL, &params);
 
 	if (pool == ODP_BUFFER_POOL_INVALID) {
-		EXAMPLE_ERR("Pool create failed.\n");
+		EXAMPLE_ERR("Buffer pool create failed.\n");
+		return -1;
+	}
+
+	tp = odp_timer_pool_create("timer_pool", pool,
+				   args.resolution_us*ODP_TIME_USEC,
+				   args.min_us*ODP_TIME_USEC,
+				   args.max_us*ODP_TIME_USEC,
+				   num_workers, /* One timer per worker */
+				   true,
+				   ODP_CLOCK_CPU);
+	if (tp == ODP_TIMER_POOL_INVALID) {
+		EXAMPLE_ERR("Timer pool create failed.\n");
 		return -1;
 	}
+	odp_timer_pool_start();
+
+	odp_shm_print_all();
+	(void)odp_timer_pool_info(tp, &tpinfo, sizeof(tpinfo));
+	printf("Timer pool\n");
+	printf("----------\n");
+	printf("  name: %s\n", tpinfo.name);
+	printf("  resolution: %"PRIu64" ns\n", tpinfo.resolution);
+	printf("  min tmo: %"PRIu64" ticks\n", tpinfo.min_tmo);
+	printf("  max tmo: %"PRIu64" ticks\n", tpinfo.max_tmo);
+	printf("\n");
 
 	/*
 	 * Create a queue for timer test
@@ -341,20 +409,7 @@  int main(int argc, char *argv[])
 		return -1;
 	}
 
-	test_timer = odp_timer_create("test_timer", pool,
-				      args.resolution_us*ODP_TIME_USEC,
-				      args.min_us*ODP_TIME_USEC,
-				      args.max_us*ODP_TIME_USEC);
-
-	if (test_timer == ODP_TIMER_INVALID) {
-		EXAMPLE_ERR("Timer create failed.\n");
-		return -1;
-	}
-
-
-	odp_shm_print_all();
-
-	printf("CPU freq %"PRIu64" hz\n", odp_sys_cpu_hz());
+	printf("CPU freq %"PRIu64" Hz\n", odp_sys_cpu_hz());
 	printf("Cycles vs nanoseconds:\n");
 	ns = 0;
 	cycles = odp_time_ns_to_cycles(ns);
diff --git a/platform/linux-generic/include/api/odp_timer.h b/platform/linux-generic/include/api/odp_timer.h
index 6cca27c..f8ecf64 100644
--- a/platform/linux-generic/include/api/odp_timer.h
+++ b/platform/linux-generic/include/api/odp_timer.h
@@ -8,9 +8,211 @@ 
 /**
  * @file
  *
- * ODP timer
+ * ODP timer service
  */
 
+/** Example #1 Retransmission timer (e.g. for reliable connections)
+ @code
+
+//Create timer pool for reliable connections
+#define SEC 1000000000ULL //1s expressed in nanoseconds
+odp_timer_pool_t tcp_tpid =
+    odp_timer_pool_create("TCP",
+			  buffer_pool,
+			  1000000,//resolution 1ms
+			  0,//min tmo
+			  7200 * SEC,//max tmo length 2hours
+			  40000,//num_timers
+			  true,//shared
+			  ODP_CLOCK_CPU
+			 );
+if (tcp_tpid == ODP_TIMER_POOL_INVALID)
+{
+	//Failed to create timer pool => fatal error
+}
+
+
+//Setting up a new connection
+//Allocate retransmission timeout (identical for supervision timeout)
+//The user pointer points back to the connection context
+conn->ret_tim = odp_timer_alloc(tcp_tpid, queue, conn);
+//Check if all resources were successfully allocated
+if (conn->ret_tim == ODP_TIMER_INVALID)
+{
+	//Failed to allocate all resources for connection => tear down
+	//Tear down connection
+	...
+	return false;
+}
+//All necessary resources successfully allocated
+//Compute initial retransmission length in timer ticks
+conn->ret_len = odp_timer_ns_to_tick(tcp_tpid, 3 * SEC);//Per RFC1122
+//Allocate a timeout buffer
+conn->tmo_buf = odp_buffer_alloc(buffer_pool);
+if (conn->tmo_buf == ODP_BUFFER_INVALID)
+	ODP_ABORT("Failed to allocate timeout buffer\n");
+//Arm the timer with our timeout
+conn->expected = odp_timer_set_rel(conn->ret_tim, conn->ret_len,
+				   &conn->tmo_buf);
+//Check return value for too early or too late expiration tick
+return true;
+
+
+//A packet for the connection has just been transmitted
+//Reset the retransmission timer, reusing the previously specified timeout
+conn->expected = odp_timer_set_rel(conn->ret_tim, conn->ret_len, NULL);
+if (conn->expected == ODP_TICK_INVALID) {
+	//Timer has expired
+	//We could allocate another timeout buffer and reset the timer with it
+	...
+}
+//Else check return value for too early or too late expiration tick
+
+
+//Check if the buffer is a system timeout buffer
+if (!odp_timer_tmo_metadata(&buf, NULL, &exp_tick, (void**)&conn)) {
+	//Not a system timeout buffer
+	...
+}
+//A retransmission timeout buffer for the connection has been received
+conn->tmo_buf = buf;
+//Check if timeout is fresh or stale, for stale timeouts we need to reset the
+//timer
+if (conn->expected == exp_tick) {
+	//Fresh timeout, last transmitted packet not acked in time =>
+	  retransmit
+	//Retransmit last packet (e.g. TCP segment)
+	...
+	//Re-arm timer using original delta value
+	odp_timer_set_rel(conn->ret_tim, conn->ret_len, &conn->tmo_buf);
+} else {
+	//Timeout does not have the expected expiration tick, timer has been
+	//reset or cancelled
+	//We can ignore the timeout or reset the timer to the expected tick
+	if (conn->expected != ODP_TICK_INVALID)
+		(void)odp_timer_set_abs(conn->ret_tim, conn->expected,
+					&conn->tmo_buf);
+}
+
+ @endcode
+*/
+
+/** Example #2 Periodic tick
+ @code
+
+//Create timer pool for periodic ticks
+odp_timer_pool_t per_tpid =
+    odp_timer_pool_create("periodic-tick",
+			  buffer_pool,
+			  1,//resolution 1ns
+			  1,//minimum timeout length 1ns
+			  1000000000,//maximum timeout length 1s
+			  10,//num_timers
+			  false,//not shared
+			  ODP_CLOCK_CPU
+			 );
+if (per_tpid == ODP_TIMER_POOL_INVALID)
+{
+	//Failed to create timer pool => fatal error
+}
+
+
+//Allocate periodic timer
+tim_1733 = odp_timer_alloc(per_tpid, queue, NULL);
+//Check if all resources were successfully allocated
+if (tim_1733 == ODP_TIMER_INVALID)
+{
+	//Failed to allocate all resources => tear down
+	//Tear down other state
+	...
+	return false;
+}
+//All necessary resources successfully allocated
+//Compute tick period in timer ticks
+period_1733 = odp_timer_ns_to_tick(per_tpid, 1000000000U / 1733U);//1733Hz
+//Compute when next tick should expire
+next_1733 = odp_timer_current_tick(per_tpid) + period_1733;
+//Allocate a timeout buffer
+buf_1733 = odp_buffer_alloc(buffer_pool);
+if (buf_1733 == ODP_BUFFER_INVALID)
+	ODP_ABORT("Failed to allocate timeout buffer\n");
+//Arm the periodic timer
+(void)odp_timer_set_abs(tim_1733, next_1733, &buf_1733);
+return true;
+
+
+
+//Check if the buffer is a system timeout buffer
+if (!odp_timer_tmo_metadata(&buf, &timer, NULL, NULL)) {
+	//Not a system timeout buffer
+	...
+}
+if (timer != tim_1733) {
+	//Not our periodic timer
+	...
+}
+//A periodic timer timeout has been received
+//Do processing driven by timeout *before*
+...
+do {
+	//Compute when the timer should expire next
+	next_1733 += period_1733;
+	//Check that this is in the future
+	if (likely(next_1733 > odp_timer_current_tick(per_tpid))
+		break;//Yes, done
+	//Else we missed a timeout
+	//Optionally attempt some recovery and/or logging of the problem
+	...
+} while (1);
+//Re-arm periodic timer
+(void)odp_timer_set_abs(tim_1733, next_1733, &buf);
+//Or do processing driven by timeout *after*
+...
+return;
+
+ @endcode
+*/
+
+/** Example #3 Tear down of flow
+ @code
+//ctx points to flow context data structure owned by application
+//Free the timer, cancelling any timeout
+ctx->expected = odp_timer_free(ctx->timer, &ctx->tmo_buf);
+//Invalidate our save timer handle
+ctx->timer = ODP_TIMER_INVALID;
+if (ctx->tmo_buf != ODP_BUFFER_INVALID) {
+	//Tear down the flow
+	...
+	odp_buffer_free(ctx->tmo_buf);
+	//Free flow context
+	..
+	//Teardown complete
+} else {
+	//Timer had already expired and timeout enqueued
+	//Continue teardown when receiving timeout
+	ctx->teardown_in_progress = true;
+}
+return;
+
+//A buffer has been received, check type
+if (!odp_timer_tmo_metadata(buf, &timer, NULL, (void**)&ctx)) {
+	//Not a system timeout buffer
+	...
+}
+ctx->tmo_buf = buf;
+//Check if we are tearing down flow
+if (ctx->teardown_in_progress) {
+	//Continue tearing down the flow
+	//Free the timeout buffer
+	odp_buffer_free(ctx->tmo_buf));
+	//Free the context
+	...
+	//Teardown complete
+}
+
+ @endcode
+*/
+
 #ifndef ODP_TIMER_H_
 #define ODP_TIMER_H_
 
@@ -18,6 +220,7 @@ 
 extern "C" {
 #endif
 
+#include <stdlib.h>
 #include <odp_std_types.h>
 #include <odp_buffer.h>
 #include <odp_buffer_pool.h>
@@ -27,138 +230,283 @@  extern "C" {
  *  @{
  */
 
-/**
- * ODP timer handle
- */
-typedef uint32_t odp_timer_t;
-
-/** Invalid timer */
-#define ODP_TIMER_INVALID 0
+struct odp_timer_pool_s; /**< Forward declaration */
 
+/**
+* ODP timer pool handle (platform dependent)
+*/
+typedef struct odp_timer_pool_s *odp_timer_pool_t;
 
 /**
- * ODP timeout handle
+ * Invalid timer pool handle (platform dependent).
  */
-typedef odp_buffer_t odp_timer_tmo_t;
+#define ODP_TIMER_POOL_INVALID NULL
 
-/** Invalid timeout */
-#define ODP_TIMER_TMO_INVALID 0
+/**
+ * Clock sources for timers in timer pool.
+ */
+typedef enum odp_timer_clk_src_e {
+	/** Use CPU clock as clock source for timers */
+	ODP_CLOCK_CPU,
+	/** Use external clock as clock source for timers */
+	ODP_CLOCK_EXT
+	/* Platform dependent which other clock sources exist */
+} odp_timer_clk_src_t;
 
+/**
+* ODP timer handle (platform dependent).
+*/
+typedef uint32_t odp_timer_t;
 
 /**
- * Timeout notification
+ * Invalid timer handle (platform dependent).
  */
-typedef odp_buffer_t odp_timeout_t;
+#define ODP_TIMER_INVALID ((uint32_t)~0U)
 
+/**
+ * Return values of timer set calls.
+ */
+/**
+ * Timer set operation failed, expiration too early.
+ * Either retry with a later expiration time or process the timeout
+ * immediately. */
+#define ODP_TICK_TOOEARLY 0xFFFFFFFFFFFFFFFDULL
+/**
+ * Timer set operation failed, expiration too late.
+ * Truncate the expiration time against the maximum timeout for the
+ * timer pool. */
+#define ODP_TICK_TOOLATE  0xFFFFFFFFFFFFFFFEULL
+/**
+ * Timer set operation failed because not timeout buffer present or specified.
+ * This value is also return from odp_timer_cancel() and odp_timer_free().
+ */
+#define ODP_TICK_INVALID  0xFFFFFFFFFFFFFFFFULL
 
 /**
- * Create a timer
+ * Create a timer pool
  *
- * Creates a new timer with requested properties.
+ * Create a new timer pool.
  *
  * @param name       Name
- * @param pool       Buffer pool for allocating timeout notifications
+ * @param buf_pool   Buffer pool for allocating timeouts (and only timeouts)
  * @param resolution Timeout resolution in nanoseconds
- * @param min_tmo    Minimum timeout duration in nanoseconds
- * @param max_tmo    Maximum timeout duration in nanoseconds
+ * @param min_tmo    Minimum relative timeout in nanoseconds
+ * @param max_tmo    Maximum relative timeout in nanoseconds
+ * @param num_timers Number of supported timers (minimum)
+ * @param shared     Shared or private timer pool.
+ *		   Operations on shared timers will include the necessary
+ *		   mutual exclusion, operations on private timers may not
+ *		   (mutual exclusion is the responsibility of the caller).
+ * @param clk_src    Clock source to use
+ *
+ * @return Timer pool handle if successful, otherwise ODP_TIMER_POOL_INVALID
+ * and errno set
+ */
+odp_timer_pool_t
+odp_timer_pool_create(const char *name,
+		      odp_buffer_pool_t buf_pool,
+		      uint64_t resolution,
+		      uint64_t min_tmo,
+		      uint64_t max_tmo,
+		      uint32_t num_timers,
+		      int shared,
+		      odp_timer_clk_src_t clk_src);
+
+/**
+ * Start a timer pool
+ *
+ * Start all created timer pools, enabling the allocation of timers.
+ * The purpose of this call is to coordinate the creation of multiple timer
+ * pools that may use the same underlying HW resources.
+ * This function may be called multiple times.
+ */
+void odp_timer_pool_start(void);
+
+/**
+ * Destroy a timer pool
  *
- * @return Timer handle if successful, otherwise ODP_TIMER_INVALID
+ * Destroy a timer pool, freeing all resources.
+ * All timers must have been freed.
+ *
+ * @param tpid  Timer pool identifier
  */
-odp_timer_t odp_timer_create(const char *name, odp_buffer_pool_t pool,
-			     uint64_t resolution, uint64_t min_tmo,
-			     uint64_t max_tmo);
+void odp_timer_pool_destroy(odp_timer_pool_t tpid);
 
 /**
  * Convert timer ticks to nanoseconds
  *
- * @param timer Timer
+ * @param tpid  Timer pool identifier
  * @param ticks Timer ticks
  *
  * @return Nanoseconds
  */
-uint64_t odp_timer_tick_to_ns(odp_timer_t timer, uint64_t ticks);
+uint64_t odp_timer_tick_to_ns(odp_timer_pool_t tpid, uint64_t ticks);
 
 /**
  * Convert nanoseconds to timer ticks
  *
- * @param timer Timer
+ * @param tpid  Timer pool identifier
  * @param ns    Nanoseconds
  *
  * @return Timer ticks
  */
-uint64_t odp_timer_ns_to_tick(odp_timer_t timer, uint64_t ns);
+uint64_t odp_timer_ns_to_tick(odp_timer_pool_t tpid, uint64_t ns);
 
 /**
- * Timer resolution in nanoseconds
+ * Current tick value
  *
- * @param timer Timer
+ * @param tpid Timer pool identifier
  *
- * @return Resolution in nanoseconds
+ * @return Current time in timer ticks
  */
-uint64_t odp_timer_resolution(odp_timer_t timer);
+uint64_t odp_timer_current_tick(odp_timer_pool_t tpid);
 
 /**
- * Maximum timeout in timer ticks
+ * ODP timer pool information and configuration
+ */
+
+typedef struct odp_timer_pool_info_s {
+	uint64_t resolution;/**< Timer resolution (in ns) */
+	uint64_t min_tmo;   /**< Min supported relative timeout (in ticks)*/
+	uint64_t max_tmo;   /**< Max supported relative timeout (in ticks)*/
+	uint32_t num_timers;/**< Number of supported timers */
+	uint32_t cur_timers;/**< Number of currently allocated timers */
+	uint32_t hwm_timers;/**< Number of used timers high watermark */
+	int shared;         /**< Shared flag */
+	char name[80];      /**< Name of timer pool */
+} odp_timer_pool_info_t;
+
+/**
+ * Query timer pool information and configuration
+ *  Timer resolution in nanoseconds
+ *  Minimum and maximum (relative) timeouts in timer ticks
+ *  Number of supported timers
+ *  Nunber of timers in use
+ *  Nunber of timers in use - high watermark
+ *  Shared or private timer pool
+ *  Name of timer pool.
  *
- * @param timer Timer
+ * @param tpid Timer pool identifier
+ * @param buf Pointer to information buffer
+ * @param buf_size Size of information buffer
  *
- * @return Maximum timeout in timer ticks
+ * @return Actual size written
  */
-uint64_t odp_timer_maximum_tmo(odp_timer_t timer);
+size_t odp_timer_pool_info(odp_timer_pool_t tpid,
+			   odp_timer_pool_info_t *buf,
+			   size_t buf_size);
 
 /**
- * Current timer tick
+ * Allocate a timer
  *
- * @param timer Timer
+ * Create a timer (allocating all necessary resources e.g. timeout event) from
+ * the timer pool. The user_ptr is copied to timeouts and can be retrieved
+ * using the odp_timer_userptr() call.
  *
- * @return Current time in timer ticks
+ * @param tpid     Timer pool identifier
+ * @param queue    Destination queue for timeout notifications
+ * @param user_ptr User defined pointer or NULL to be copied to timeouts
+ *
+ * @return Timer handle if successful, otherwise ODP_TIMER_INVALID and
+ *	   errno set.
+ */
+odp_timer_t odp_timer_alloc(odp_timer_pool_t tpid,
+			    odp_queue_t queue,
+			    void *user_ptr);
+
+/**
+ * Free a timer
+ *
+ * Free (destroy) a timer, freeing associated resources.
+ * The timeout buffer for an active timer will be returned.
+ * An expired and enqueued timeout buffer will not be freed.
+ * It is the responsibility of the application to free this timeout when it
+ * is received.
+ *
+ * @param tim      Timer handle
+ * @param tmo_buf Reference to a buffer variable which will be written with
+ * the buffer handle of any present timeout buffer (e.g. for an active timer).
+ * @return       A tick value which will not match any valid expiration tick.
  */
-uint64_t odp_timer_current_tick(odp_timer_t timer);
+uint64_t odp_timer_free(odp_timer_t tim, odp_buffer_t *tmo_buf);
 
 /**
- * Request timeout with an absolute timer tick
+ * Set a timer (absolute time) with a user-provided timeout buffer
+ *
+ * Set (arm) the timer to expire at specific time. The timeout
+ * buffer will be enqueued when the timer expires.
  *
- * When tick reaches tmo_tick, the timer enqueues the timeout notification into
- * the destination queue.
+ * Note: any invalid parameters will be treated as programming errors and will
+ * cause the application to abort.
  *
- * @param timer    Timer
- * @param tmo_tick Absolute timer tick value which triggers the timeout
- * @param queue    Destination queue for the timeout notification
- * @param buf      User defined timeout notification buffer. When
- *                 ODP_BUFFER_INVALID, default timeout notification is used.
+ * @param tim      Timer
+ * @param abs_tck  Expiration time in absolute timer ticks
+ * @param tmo_buf  Reference to a buffer variable that points to timeout buffer
+ * or NULL to reuse the existing timeout buffer
  *
- * @return Timeout handle if successful, otherwise ODP_TIMER_TMO_INVALID
+ * @return The expiration tick or ODP_TICK_TOOEARLY, ODP_TICK_TOOLATE or
+ * ODP_TICK_INVALID (timer not active and no timeout buffer to reuse).
  */
-odp_timer_tmo_t odp_timer_absolute_tmo(odp_timer_t timer, uint64_t tmo_tick,
-				       odp_queue_t queue, odp_buffer_t buf);
+uint64_t odp_timer_set_abs(odp_timer_t tim,
+			   uint64_t abs_tck,
+			   odp_buffer_t *tmo_buf);
 
 /**
- * Cancel a timeout
+ * Set a timer with a relative expiration time and user-provided buffer.
  *
- * @param timer Timer
- * @param tmo   Timeout to cancel
+ * Set (arm) the timer to expire at a relative future time.
  *
- * @return 0 if successful
+ * Note: any invalid parameters will be treated as programming errors and will
+ * cause the application to abort.
+ *
+ * @param tim      Timer
+ * @param rel_tck  Expiration time in timer ticks relative to current time of
+ *		   the timer pool the timer belongs to
+ * @param tmo_buf  Reference to a buffer variable that points to timeout buffer
+ * or NULL to reuse the existing timeout buffer
+ *
+ * @return The expiration tick or ODP_TICK_TOOEARLY, ODP_TICK_TOOLATE or
+ * ODP_TICK_INVALID (timer not active and no timeout buffer to reuse).
  */
-int odp_timer_cancel_tmo(odp_timer_t timer, odp_timer_tmo_t tmo);
+uint64_t odp_timer_set_rel(odp_timer_t tim,
+			   uint64_t rel_tck,
+			   odp_buffer_t *tmo_buf);
 
 /**
- * Convert buffer handle to timeout handle
+ * Cancel a timer
+ *
+ * Cancel a timer, preventing future expiration and delivery. Return any
+ * present timeout buffer.
+ *
+ * A timer that has already expired may be impossible to cancel and the timeout
+ * will instead be delivered to the destination queue.
  *
- * @param buf  Buffer handle
+ * Note: any invalid parameters will be treated as programming errors and will
+ * cause the application to abort.
  *
- * @return Timeout buffer handle
+ * @param tim     Timer
+ * @param tmo_buf Reference to a buffer variable which will be written with
+ * the buffer handle of any present timeout buffer (e.g. for an active timer).
+ * @return       A tick value which will not match any valid expiration tick.
  */
-odp_timeout_t odp_timeout_from_buffer(odp_buffer_t buf);
+uint64_t odp_timer_cancel(odp_timer_t tim, odp_buffer_t *tmo_buf);
 
 /**
- * Return absolute timeout tick
+ * Get metadata from system timeout buffer
  *
- * @param tmo Timeout buffer handle
+ * @param buf A timeout buffer
+ * @param hdl NULL or a pointer where the timer handle will be written.
+ * @param exp_tck NULL or a pointer where the expiration tick will be written.
+ * @param user_ptr NULL or a pointer where the user pointer will be written.
  *
- * @return Absolute timeout tick
+ * @return True (1) if timeout buffer is of type ODP_BUFFER_TYPE_TIMEOUT and
+ *         metadata variables have been updated from the timeout.
+ *         False (0) if timeout buffer is not of type ODP_BUFFER_TYPE_TIMEOUT.
  */
-uint64_t odp_timeout_tick(odp_timeout_t tmo);
+int odp_timer_tmo_metadata(odp_buffer_t buf,
+		odp_timer_t *hdl,
+		uint64_t *exp_tck,
+		void **user_ptr);
 
 /**
  * @}
diff --git a/platform/linux-generic/include/odp_timer_internal.h b/platform/linux-generic/include/odp_timer_internal.h
index 2ff36ce..526b8e8 100644
--- a/platform/linux-generic/include/odp_timer_internal.h
+++ b/platform/linux-generic/include/odp_timer_internal.h
@@ -1,4 +1,4 @@ 
-/* Copyright (c) 2013, Linaro Limited
+/* Copyright (c) 2014, Linaro Limited
  * All rights reserved.
  *
  * SPDX-License-Identifier:     BSD-3-Clause
@@ -8,67 +8,47 @@ 
 /**
  * @file
  *
- * ODP timer timeout descriptor - implementation internal
+ * ODP timeout descriptor - implementation internal
  */
 
 #ifndef ODP_TIMER_INTERNAL_H_
 #define ODP_TIMER_INTERNAL_H_
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#include <odp_std_types.h>
-#include <odp_queue.h>
-#include <odp_buffer.h>
+#include <odp_align.h>
+#include <odp_debug.h>
 #include <odp_buffer_internal.h>
 #include <odp_buffer_pool_internal.h>
 #include <odp_timer.h>
 
-struct timeout_t;
-
-typedef struct timeout_t {
-	struct timeout_t *next;
-	int               timer_id;
-	int               tick;
-	uint64_t          tmo_tick;
-	odp_queue_t       queue;
-	odp_buffer_t      buf;
-	odp_buffer_t      tmo_buf;
-} timeout_t;
-
-
-struct odp_timeout_hdr_t;
-
 /**
- * Timeout notification header
+ * Internal Timeout header
  */
-typedef struct odp_timeout_hdr_t {
+typedef struct {
+	/* common buffer header */
 	odp_buffer_hdr_t buf_hdr;
 
-	timeout_t meta;
-
-	uint8_t buf_data[];
+	/* Requested expiration time */
+	uint64_t expiration;
+	/* User ptr inherited from parent timer */
+	void *user_ptr;
+	/* Parent timer */
+	odp_timer_t timer;
+#if __SIZEOF_POINTER__ != 4
+	uint32_t pad32;
+#endif
+	uint8_t buf_data[0];
 } odp_timeout_hdr_t;
 
 typedef struct odp_timeout_hdr_stride {
 	uint8_t pad[ODP_CACHE_LINE_SIZE_ROUNDUP(sizeof(odp_timeout_hdr_t))];
 } odp_timeout_hdr_stride;
 
-
 /**
- * Return timeout header
+ * Return the timeout header
  */
-static inline odp_timeout_hdr_t *odp_timeout_hdr(odp_timeout_t tmo)
+static inline odp_timeout_hdr_t *odp_timeout_hdr(odp_buffer_t buf)
 {
-	odp_buffer_hdr_t *buf_hdr = odp_buf_to_hdr((odp_buffer_t)tmo);
-	return (odp_timeout_hdr_t *)(uintptr_t)buf_hdr;
+	return (odp_timeout_hdr_t *)odp_buf_to_hdr(buf);
 }
 
-
-
-#ifdef __cplusplus
-}
-#endif
-
 #endif
diff --git a/platform/linux-generic/odp_timer.c b/platform/linux-generic/odp_timer.c
index 7bd6874..491598c 100644
--- a/platform/linux-generic/odp_timer.c
+++ b/platform/linux-generic/odp_timer.c
@@ -4,429 +4,889 @@ 
  * SPDX-License-Identifier:     BSD-3-Clause
  */
 
-#include <odp_timer.h>
-#include <odp_time.h>
-#include <odp_buffer_pool_internal.h>
+/**
+ * @file
+ *
+ * ODP timer service
+ *
+ */
+
+/* Check if compiler supports 16-byte atomics. GCC needs -mcx16 flag */
+/* Using spin lock actually seems faster on Core2 */
+#ifdef ODP_ATOMIC_U128
+/* TB_NEEDS_PAD defined if sizeof(odp_buffer_t) != 8 */
+#define TB_NEEDS_PAD
+#define TB_SET_PAD(x) ((x).pad = 0)
+#else
+#define TB_SET_PAD(x) (void)(x)
+#endif
+
+/* For strdup() */
+#define _BSD_SOURCE
+/* For POSIX timers and sigevent */
+#define _POSIX_C_SOURCE 199309L
+#include <assert.h>
+#include <errno.h>
+#include <string.h>
+#include <stdlib.h>
+#include <time.h>
+#include <signal.h>
+#include <odp_align.h>
+#include <odp_align_internal.h>
+#include <odp_atomic.h>
+#include <odp_atomic_internal.h>
+#include <odp_buffer.h>
 #include <odp_buffer_inlines.h>
-#include <odp_timer_internal.h>
+#include <odp_buffer_pool.h>
+#include <odp_buffer_pool_internal.h>
+#include <odp_debug.h>
+#include <odp_debug_internal.h>
+#include <odp_hints.h>
 #include <odp_internal.h>
-#include <odp_atomic.h>
+#include <odp_queue.h>
+#include <odp_shared_memory.h>
+#include <odp_spin_internal.h>
 #include <odp_spinlock.h>
+#include <odp_std_types.h>
 #include <odp_sync.h>
-#include <odp_debug_internal.h>
-
-#include <signal.h>
-#include <time.h>
+#include <odp_time.h>
+#include <odp_timer.h>
+#include <odp_timer_internal.h>
 
-#include <string.h>
+#define TMO_UNUSED   ((uint64_t)0xFFFFFFFFFFFFFFFF)
+#define TMO_INACTIVE ((uint64_t)0xFFFFFFFFFFFFFFFE)
 
-#define NUM_TIMERS    1
-#define MAX_TICKS     1024
-#define MAX_RES       ODP_TIME_SEC
-#define MIN_RES       (100*ODP_TIME_USEC)
+#ifdef __ARM_ARCH
+#define PREFETCH(ptr) __builtin_prefetch((ptr), 0, 0)
+#else
+#define PREFETCH(ptr) (void)(ptr)
+#endif
 
+/******************************************************************************
+ * Mutual exclusion in the absence of CAS16
+ *****************************************************************************/
 
-typedef struct {
-	odp_spinlock_t lock;
-	timeout_t      *list;
-} tick_t;
-
-typedef struct {
-	int               allocated;
-	volatile int      active;
-	volatile uint64_t cur_tick;
-	timer_t           timerid;
-	odp_timer_t       timer_hdl;
-	odp_buffer_pool_t pool;
-	uint64_t          resolution_ns;
-	uint64_t          max_ticks;
-	tick_t            tick[MAX_TICKS];
-
-} timer_ring_t;
-
-typedef struct {
-	odp_spinlock_t lock;
-	int            num_timers;
-	timer_ring_t   timer[NUM_TIMERS];
+#ifndef ODP_ATOMIC_U128
+#define NUM_LOCKS 1024
+static _odp_atomic_flag_t locks[NUM_LOCKS]; /* Multiple locks per cache line! */
+#define IDX2LOCK(idx) (&locks[(idx) % NUM_LOCKS])
+#endif
 
-} timer_global_t;
+/******************************************************************************
+ * Translation between timeout and timeout header
+ *****************************************************************************/
 
-/* Global */
-static timer_global_t odp_timer;
+/**
+ * System timeout buffer (ODP_BUFFER_TYPE_TIEMOUT)
+ */
+typedef odp_buffer_t odp_timer_tmo_t;
 
-static void add_tmo(tick_t *tick, timeout_t *tmo)
+/** Translate from generic buffer to timeout */
+static inline odp_timer_tmo_t odp_tmo_from_buffer(odp_buffer_t buf)
 {
-	odp_spinlock_lock(&tick->lock);
-
-	tmo->next  = tick->list;
-	tick->list = tmo;
-
-	odp_spinlock_unlock(&tick->lock);
+	/* In this implementation, timeout == buffer */
+	return (odp_timer_tmo_t)buf;
 }
 
-static timeout_t *rem_tmo(tick_t *tick)
+/** Translate from timeout to generic buffer */
+static inline odp_buffer_t odp_buffer_from_tmo(odp_timer_tmo_t tmo)
 {
-	timeout_t *tmo;
+	/* In this implementation, buffer == timeout */
+	return (odp_buffer_t)tmo;
+}
 
-	odp_spinlock_lock(&tick->lock);
+static inline odp_timeout_hdr_t *odp_tmo_to_hdr(odp_timer_tmo_t tmo)
+{
+	odp_buffer_t buf = odp_buffer_from_tmo(tmo);
+	odp_timeout_hdr_t *tmo_hdr = (odp_timeout_hdr_t *)odp_buf_to_hdr(buf);
+	return tmo_hdr;
+}
 
-	tmo = tick->list;
+/******************************************************************************
+ * odp_timer abstract datatype
+ *****************************************************************************/
+
+typedef struct tick_buf_s {
+	odp_atomic_u64_t exp_tck;/* Expiration tick or TMO_xxx */
+	odp_buffer_t tmo_buf;/* ODP_BUFFER_INVALID if timer not active */
+#ifdef TB_NEEDS_PAD
+	uint32_t pad;/* Need to be able to access padding for successful CAS */
+#endif
+} tick_buf_t
+#ifdef ODP_ATOMIC_U128
+ODP_ALIGNED(16) /* 16-byte atomic operations need properly aligned addresses */
+#endif
+;
+
+ODP_STATIC_ASSERT(sizeof(tick_buf_t) == 16, "sizeof(tick_buf_t) == 16");
+
+typedef struct odp_timer_s {
+	void *user_ptr; /* User-defined pointer for odp_timer_tmo_t buffers */
+	odp_queue_t queue;/* Used for free list when timer is free */
+} odp_timer;
+
+static void timer_init(odp_timer *tim,
+		tick_buf_t *tb,
+		odp_queue_t _q,
+		void *_up)
+{
+	tim->queue = _q;
+	tim->user_ptr = _up;
+	tb->tmo_buf = ODP_BUFFER_INVALID;
+	/* All pad fields need a defined and constant value */
+	TB_SET_PAD(*tb);
+	/* Release the timer by setting timer state to inactive */
+	_odp_atomic_u64_store_mm(&tb->exp_tck, TMO_INACTIVE, _ODP_MEMMODEL_RLS);
+}
 
-	if (tmo)
-		tick->list = tmo->next;
+/* Teardown when timer is freed */
+static void timer_fini(odp_timer *tim, tick_buf_t *tb)
+{
+	assert(tb->exp_tck.v == TMO_UNUSED);
+	assert(tb->tmo_buf == ODP_BUFFER_INVALID);
+	tim->queue = ODP_QUEUE_INVALID;
+	tim->user_ptr = NULL;
+}
 
-	odp_spinlock_unlock(&tick->lock);
+static inline uint32_t get_next_free(odp_timer *tim)
+{
+	/* Reusing 'queue' for next free index */
+	return tim->queue;
+}
 
-	if (tmo)
-		tmo->next = NULL;
+static inline void set_next_free(odp_timer *tim, uint32_t nf)
+{
+	assert(tim->queue == ODP_QUEUE_INVALID);
+	/* Reusing 'queue' for next free index */
+	tim->queue = nf;
+}
 
-	return tmo;
+/******************************************************************************
+ * odp_timer_pool abstract datatype
+ * Inludes alloc and free timer
+ *****************************************************************************/
+
+typedef struct odp_timer_pool_s {
+/* Put frequently accessed fields in the first cache line */
+	odp_atomic_u64_t cur_tick;/* Current tick value */
+	uint64_t min_rel_tck;
+	uint64_t max_rel_tck;
+	uint64_t resolution_ns;
+	tick_buf_t *tick_buf; /* Expiration tick and timeout buffer */
+	odp_timer *timers; /* User pointer and queue handle (and lock) */
+	odp_atomic_u32_t high_wm;/* High watermark of allocated timers */
+	odp_spinlock_t itimer_running;
+	odp_spinlock_t lock;
+	bool shared;
+	uint32_t num_alloc;/* Current number of allocated timers */
+	uint32_t max_timers;/* Max number of timers */
+	uint32_t first_free;/* 0..max_timers-1 => free timer */
+	uint32_t tp_idx;/* Index into timer_pool array */
+	const char *name;
+	odp_buffer_pool_t buf_pool;
+	odp_shm_t shm;
+	timer_t timerid;
+	odp_timer_clk_src_t clk_src;
+} odp_timer_pool;
+
+#define MAX_TIMER_POOLS 255 /* Leave one for ODP_TIMER_INVALID */
+#define INDEX_BITS 24
+static odp_atomic_u32_t num_timer_pools;
+static odp_timer_pool *timer_pool[MAX_TIMER_POOLS];
+
+static inline odp_timer_pool *handle_to_tp(odp_timer_t hdl)
+{
+	uint32_t tp_idx = hdl >> INDEX_BITS;
+	if (odp_likely(tp_idx < MAX_TIMER_POOLS)) {
+		odp_timer_pool *tp = timer_pool[tp_idx];
+		if (odp_likely(tp != NULL))
+			return timer_pool[tp_idx];
+	}
+	ODP_ABORT("Invalid timer handle %#x\n", hdl);
 }
 
-/**
- * Search and delete tmo entry from timeout list
- * return -1 : on error.. handle not in list
- *		0 : success
- */
-static int find_and_del_tmo(timeout_t **tmo, odp_timer_tmo_t handle)
+static inline uint32_t handle_to_idx(odp_timer_t hdl,
+		struct odp_timer_pool_s *tp)
 {
-	timeout_t *cur, *prev;
-	prev = NULL;
+	uint32_t idx = hdl & ((1U << INDEX_BITS) - 1U);
+	PREFETCH(&tp->tick_buf[idx]);
+	if (odp_likely(idx < odp_atomic_load_u32(&tp->high_wm)))
+		return idx;
+	ODP_ABORT("Invalid timer handle %#x\n", hdl);
+}
 
-	for (cur = *tmo; cur != NULL; prev = cur, cur = cur->next) {
-		if (cur->tmo_buf == handle) {
-			if (prev == NULL)
-				*tmo = cur->next;
-			else
-				prev->next = cur->next;
+static inline odp_timer_t tp_idx_to_handle(struct odp_timer_pool_s *tp,
+		uint32_t idx)
+{
+	assert(idx < (1U << INDEX_BITS));
+	return (tp->tp_idx << INDEX_BITS) | idx;
+}
 
-			break;
-		}
+/* Forward declarations */
+static void itimer_init(odp_timer_pool *tp);
+static void itimer_fini(odp_timer_pool *tp);
+
+static odp_timer_pool *odp_timer_pool_new(
+	const char *_name,
+	odp_buffer_pool_t _bp,
+	uint64_t _res,
+	uint64_t _mintmo,
+	uint64_t _maxtmo,
+	uint32_t _maxtim,
+	bool _sh,
+	odp_timer_clk_src_t _cs)
+{
+	uint32_t tp_idx = odp_atomic_fetch_add_u32(&num_timer_pools, 1);
+	if (odp_unlikely(tp_idx >= MAX_TIMER_POOLS)) {
+		/* Restore the previous value */
+		odp_atomic_sub_u32(&num_timer_pools, 1);
+		errno = ENFILE; /* Table overflow */
+		return NULL;
 	}
+	size_t sz0 = ODP_ALIGN_ROUNDUP(sizeof(odp_timer_pool),
+			ODP_CACHE_LINE_SIZE);
+	size_t sz1 = ODP_ALIGN_ROUNDUP(sizeof(tick_buf_t) * _maxtim,
+			ODP_CACHE_LINE_SIZE);
+	size_t sz2 = ODP_ALIGN_ROUNDUP(sizeof(odp_timer) * _maxtim,
+			ODP_CACHE_LINE_SIZE);
+	odp_shm_t shm = odp_shm_reserve(_name, sz0 + sz1 + sz2,
+			ODP_CACHE_LINE_SIZE, ODP_SHM_SW_ONLY);
+	if (odp_unlikely(shm == ODP_SHM_INVALID))
+		ODP_ABORT("%s: timer pool shm-alloc(%zuKB) failed\n",
+			  _name, (sz0 + sz1 + sz2) / 1024);
+	odp_timer_pool *tp = (odp_timer_pool *)odp_shm_addr(shm);
+	odp_atomic_init_u64(&tp->cur_tick, 0);
+	tp->name = strdup(_name);
+	tp->shm = shm;
+	tp->buf_pool = _bp;
+	tp->resolution_ns = _res;
+	tp->min_rel_tck = odp_timer_ns_to_tick(tp, _mintmo);
+	tp->max_rel_tck = odp_timer_ns_to_tick(tp, _maxtmo);
+	tp->num_alloc = 0;
+	odp_atomic_init_u32(&tp->high_wm, 0);
+	tp->max_timers = _maxtim;
+	tp->first_free = 0;
+	tp->shared = _sh;
+	tp->clk_src = _cs;
+	tp->tick_buf = (void *)((char *)odp_shm_addr(shm) + sz0);
+	tp->timers = (void *)((char *)odp_shm_addr(shm) + sz0 + sz1);
+	/* Initialize all odp_timer entries */
+	uint32_t i;
+	for (i = 0; i < tp->max_timers; i++) {
+		set_next_free(&tp->timers[i], i + 1);
+		tp->timers[i].user_ptr = NULL;
+	}
+	tp->tp_idx = tp_idx;
+	odp_spinlock_init(&tp->lock);
+	odp_spinlock_init(&tp->itimer_running);
+	timer_pool[tp_idx] = tp;
+	if (tp->clk_src == ODP_CLOCK_CPU)
+		itimer_init(tp);
+	return tp;
+}
 
-	if (!cur)
-		/* couldn't find tmo in list */
-		return -1;
-
-	/* application to free tmo_buf provided by absolute_tmo call */
-	return 0;
+static void odp_timer_pool_del(odp_timer_pool *tp)
+{
+	odp_spinlock_lock(&tp->lock);
+	timer_pool[tp->tp_idx] = NULL;
+	/* Wait for itimer thread to stop running */
+	odp_spinlock_lock(&tp->itimer_running);
+	if (tp->num_alloc != 0) {
+		/* It's a programming error to attempt to destroy a */
+		/* timer pool which is still in use */
+		ODP_ABORT("%s: timers in use\n", tp->name);
+	}
+	if (tp->clk_src == ODP_CLOCK_CPU)
+		itimer_fini(tp);
+	int rc = odp_shm_free(tp->shm);
+	if (rc != 0)
+		ODP_ABORT("Failed to free shared memory (%d)\n", rc);
 }
 
-int odp_timer_cancel_tmo(odp_timer_t timer_hdl, odp_timer_tmo_t tmo)
+static inline odp_timer_t timer_alloc(odp_timer_pool *tp,
+				      odp_queue_t queue,
+				      void *user_ptr)
 {
-	int id;
-	int tick_idx;
-	timeout_t *cancel_tmo;
-	odp_timeout_hdr_t *tmo_hdr;
-	tick_t *tick;
+	odp_timer_t hdl;
+	odp_spinlock_lock(&tp->lock);
+	if (odp_likely(tp->num_alloc < tp->max_timers)) {
+		tp->num_alloc++;
+		/* Remove first unused timer from free list */
+		assert(tp->first_free != tp->max_timers);
+		uint32_t idx = tp->first_free;
+		odp_timer *tim = &tp->timers[idx];
+		tp->first_free = get_next_free(tim);
+		/* Initialize timer */
+		timer_init(tim, &tp->tick_buf[idx], queue, user_ptr);
+		if (odp_unlikely(tp->num_alloc >
+				 odp_atomic_load_u32(&tp->high_wm)))
+			/* Update high_wm last with release model to
+			 * ensure timer initialization is visible */
+			_odp_atomic_u32_store_mm(&tp->high_wm,
+						 tp->num_alloc,
+						 _ODP_MEMMODEL_RLS);
+		hdl = tp_idx_to_handle(tp, idx);
+	} else {
+		errno = ENFILE; /* Reusing file table overflow */
+		hdl = ODP_TIMER_INVALID;
+	}
+	odp_spinlock_unlock(&tp->lock);
+	return hdl;
+}
 
-	/* get id */
-	id = (int)timer_hdl - 1;
+static odp_buffer_t timer_cancel(odp_timer_pool *tp,
+		uint32_t idx,
+		uint64_t new_state);
 
-	tmo_hdr = odp_timeout_hdr((odp_timeout_t) tmo);
-	/* get tmo_buf to cancel */
-	cancel_tmo = &tmo_hdr->meta;
+static inline odp_buffer_t timer_free(odp_timer_pool *tp, uint32_t idx)
+{
+	odp_timer *tim = &tp->timers[idx];
 
-	tick_idx = cancel_tmo->tick;
-	tick = &odp_timer.timer[id].tick[tick_idx];
+	/* Free the timer by setting timer state to unused and
+	 * grab any timeout buffer */
+	odp_buffer_t old_buf = timer_cancel(tp, idx, TMO_UNUSED);
 
-	odp_spinlock_lock(&tick->lock);
-	/* search and delete tmo from tick list */
-	if (find_and_del_tmo(&tick->list, tmo) != 0) {
-		odp_spinlock_unlock(&tick->lock);
-		ODP_DBG("Couldn't find the tmo (%d) in tick list\n", (int)tmo);
-		return -1;
-	}
-	odp_spinlock_unlock(&tick->lock);
+	/* Destroy timer */
+	timer_fini(tim, &tp->tick_buf[idx]);
 
-	return 0;
+	/* Insert timer into free list */
+	odp_spinlock_lock(&tp->lock);
+	set_next_free(tim, tp->first_free);
+	tp->first_free = idx;
+	assert(tp->num_alloc != 0);
+	tp->num_alloc--;
+	odp_spinlock_unlock(&tp->lock);
+
+	return old_buf;
 }
 
-static void notify_function(union sigval sigval)
-{
-	uint64_t cur_tick;
-	timeout_t *tmo;
-	tick_t *tick;
-	timer_ring_t *timer;
+/******************************************************************************
+ * Operations on timers
+ * expire/reset/cancel timer
+ *****************************************************************************/
 
-	timer = sigval.sival_ptr;
+static bool timer_reset(uint32_t idx,
+		uint64_t abs_tck,
+		odp_buffer_t *tmo_buf,
+		odp_timer_pool *tp)
+{
+	bool success = true;
+	tick_buf_t *tb = &tp->tick_buf[idx];
+
+	if (tmo_buf == NULL || *tmo_buf == ODP_BUFFER_INVALID) {
+#ifdef ODP_ATOMIC_U128
+		tick_buf_t new, old;
+		do {
+			/* Relaxed and non-atomic read of current values */
+			old.exp_tck.v = tb->exp_tck.v;
+			old.tmo_buf = tb->tmo_buf;
+			TB_SET_PAD(old);
+			/* Check if there actually is a timeout buffer
+			 * present */
+			if (old.tmo_buf == ODP_BUFFER_INVALID) {
+				/* Cannot reset a timer with neither old nor
+				 * new timeout buffer */
+				success = false;
+				break;
+			}
+			/* Set up new values */
+			new.exp_tck.v = abs_tck;
+			new.tmo_buf = old.tmo_buf;
+			TB_SET_PAD(new);
+			/* Atomic CAS will fail if we experienced torn reads,
+			 * retry update sequence until CAS succeeds */
+		} while (!_odp_atomic_u128_cmp_xchg_mm(
+					(_odp_atomic_u128_t *)tb,
+					(_uint128_t *)&old,
+					(_uint128_t *)&new,
+					_ODP_MEMMODEL_RLS,
+					_ODP_MEMMODEL_RLX));
+#else
+#ifdef __ARM_ARCH
+		/* Since barriers are not good for C-A15, we take an
+		 * alternative approach using relaxed memory model */
+		uint64_t old;
+		/* Swap in new expiration tick, get back old tick which
+		 * will indicate active/inactive timer state */
+		old = _odp_atomic_u64_xchg_mm(&tb->exp_tck, abs_tck,
+			_ODP_MEMMODEL_RLX);
+		if (old == TMO_INACTIVE) {
+			/* Timer was inactive (cancelled or expired),
+			 * we can't reset a timer without a timeout buffer.
+			 * Attempt to restore inactive state, we don't
+			 * want this timer to continue as active without
+			 * timeout as this will trigger unnecessary and
+			 * aborted expiration attempts.
+			 * We don't care if we fail, then some other thread
+			 * reset or cancelled the timer. Without any
+			 * synchronization between the threads, we have a
+			 * data race and the behavior is undefined */
+			(void)_odp_atomic_u64_cmp_xchg_strong_mm(
+					&tb->exp_tck,
+					&abs_tck,
+					TMO_INACTIVE,
+					_ODP_MEMMODEL_RLX,
+					_ODP_MEMMODEL_RLX);
+			success = false;
+		}
+#else
+		/* Take a related lock */
+		while (_odp_atomic_flag_tas(IDX2LOCK(idx)))
+			/* While lock is taken, spin using relaxed loads */
+			while (_odp_atomic_flag_load(IDX2LOCK(idx)))
+				odp_spin();
+
+		/* Only if there is a timeout buffer can be reset the timer */
+		if (odp_likely(tb->tmo_buf != ODP_BUFFER_INVALID)) {
+			/* Write the new expiration tick */
+			tb->exp_tck.v = abs_tck;
+		} else {
+			/* Cannot reset a timer with neither old nor new
+			 * timeout buffer */
+			success = false;
+		}
 
-	if (timer->active == 0) {
-		ODP_DBG("Timer (%u) not active\n", timer->timer_hdl);
-		return;
+		/* Release the lock */
+		_odp_atomic_flag_clear(IDX2LOCK(idx));
+#endif
+#endif
+	} else {
+		/* We have a new timeout buffer which replaces any old one */
+		odp_buffer_t old_buf = ODP_BUFFER_INVALID;
+#ifdef ODP_ATOMIC_U128
+		tick_buf_t new, old;
+		new.exp_tck.v = abs_tck;
+		new.tmo_buf = *tmo_buf;
+		TB_SET_PAD(new);
+		/* We are releasing the new timeout buffer to some other
+		 * thread */
+		_odp_atomic_u128_xchg_mm((_odp_atomic_u128_t *)tb,
+					 (_uint128_t *)&new,
+					 (_uint128_t *)&old,
+					 _ODP_MEMMODEL_ACQ_RLS);
+		old_buf = old.tmo_buf;
+#else
+		/* Take a related lock */
+		while (_odp_atomic_flag_tas(IDX2LOCK(idx)))
+			/* While lock is taken, spin using relaxed loads */
+			while (_odp_atomic_flag_load(IDX2LOCK(idx)))
+				odp_spin();
+
+		/* Swap in new buffer, save any old buffer */
+		old_buf = tb->tmo_buf;
+		tb->tmo_buf = *tmo_buf;
+
+		/* Write the new expiration tick */
+		tb->exp_tck.v = abs_tck;
+
+		/* Release the lock */
+		_odp_atomic_flag_clear(IDX2LOCK(idx));
+#endif
+		/* Return old timeout buffer */
+		*tmo_buf = old_buf;
 	}
+	return success;
+}
 
-	/* ODP_DBG("Tick\n"); */
-
-	cur_tick = timer->cur_tick++;
-
-	odp_sync_stores();
-
-	tick = &timer->tick[cur_tick % MAX_TICKS];
+static odp_buffer_t timer_cancel(odp_timer_pool *tp,
+		uint32_t idx,
+		uint64_t new_state)
+{
+	tick_buf_t *tb = &tp->tick_buf[idx];
+	odp_buffer_t old_buf;
+
+#ifdef ODP_ATOMIC_U128
+	tick_buf_t new, old;
+	/* Update the timer state (e.g. cancel the current timeout) */
+	new.exp_tck.v = new_state;
+	/* Swap out the old buffer */
+	new.tmo_buf = ODP_BUFFER_INVALID;
+	TB_SET_PAD(new);
+	_odp_atomic_u128_xchg_mm((_odp_atomic_u128_t *)tb,
+				 (_uint128_t *)&new, (_uint128_t *)&old,
+				 _ODP_MEMMODEL_RLX);
+	old_buf = old.tmo_buf;
+#else
+	/* Take a related lock */
+	while (_odp_atomic_flag_tas(IDX2LOCK(idx)))
+		/* While lock is taken, spin using relaxed loads */
+		while (_odp_atomic_flag_load(IDX2LOCK(idx)))
+			odp_spin();
+
+	/* Update the timer state (e.g. cancel the current timeout) */
+	tb->exp_tck.v = new_state;
+
+	/* Swap out the old buffer */
+	old_buf = tb->tmo_buf;
+	tb->tmo_buf = ODP_BUFFER_INVALID;
+
+	/* Release the lock */
+	_odp_atomic_flag_clear(IDX2LOCK(idx));
+#endif
+	/* Return the old buffer */
+	return old_buf;
+}
 
-	while ((tmo = rem_tmo(tick)) != NULL) {
-		odp_queue_t  queue;
-		odp_buffer_t buf;
+static unsigned timer_expire(odp_timer_pool *tp, uint32_t idx, uint64_t tick)
+{
+	odp_timer *tim = &tp->timers[idx];
+	tick_buf_t *tb = &tp->tick_buf[idx];
+	odp_buffer_t tmo_buf = ODP_BUFFER_INVALID;
+	uint64_t exp_tck;
+#ifdef ODP_ATOMIC_U128
+	/* Atomic re-read for correctness */
+	exp_tck = _odp_atomic_u64_load_mm(&tb->exp_tck, _ODP_MEMMODEL_RLX);
+	/* Re-check exp_tck */
+	if (odp_likely(exp_tck <= tick)) {
+		/* Attempt to grab timeout buffer, replace with inactive timer
+		 * and invalid buffer */
+		tick_buf_t new, old;
+		old.exp_tck.v = exp_tck;
+		old.tmo_buf = tb->tmo_buf;
+		TB_SET_PAD(old);
+		new.exp_tck.v = TMO_INACTIVE;
+		new.tmo_buf = ODP_BUFFER_INVALID;
+		TB_SET_PAD(new);
+		int succ = _odp_atomic_u128_cmp_xchg_mm(
+				(_odp_atomic_u128_t *)tb,
+				(_uint128_t *)&old, (_uint128_t *)&new,
+				_ODP_MEMMODEL_RLS, _ODP_MEMMODEL_RLX);
+		if (succ)
+			tmo_buf = old.tmo_buf;
+		/* Else CAS failed, something changed => skip timer
+		 * this tick, it will be checked again next tick */
+	}
+	/* Else false positive, ignore */
+#else
+	/* Take a related lock */
+	while (_odp_atomic_flag_tas(IDX2LOCK(idx)))
+		/* While lock is taken, spin using relaxed loads */
+		while (_odp_atomic_flag_load(IDX2LOCK(idx)))
+			odp_spin();
+	/* Proper check for timer expired */
+	exp_tck = tb->exp_tck.v;
+	if (odp_likely(exp_tck <= tick)) {
+		/* Verify that there is a timeout buffer */
+		if (odp_likely(tb->tmo_buf != ODP_BUFFER_INVALID)) {
+			/* Grab timeout buffer, replace with inactive timer
+			 * and invalid buffer */
+			tmo_buf = tb->tmo_buf;
+			tb->tmo_buf = ODP_BUFFER_INVALID;
+			tb->exp_tck.v = TMO_INACTIVE;
+		}
+		/* Else somehow active timer without user buffer */
+	}
+	/* Else false positive, ignore */
+	/* Release the lock */
+	_odp_atomic_flag_clear(IDX2LOCK(idx));
+#endif
+	if (odp_likely(tmo_buf != ODP_BUFFER_INVALID)) {
+		/* Fill in metadata fields in system timeout buffer */
+		if (odp_buffer_type(tmo_buf) == ODP_BUFFER_TYPE_TIMEOUT) {
+			/* Convert from buffer to timeout hdr */
+			odp_timeout_hdr_t *tmo_hdr =
+				odp_tmo_to_hdr(odp_tmo_from_buffer(tmo_buf));
+			tmo_hdr->timer = tp_idx_to_handle(tp, idx);
+			tmo_hdr->expiration = exp_tck;
+			tmo_hdr->user_ptr = tim->user_ptr;
+		}
+		/* Else ignore buffers of other types */
+		/* Post the timeout to the destination queue */
+		int rc = odp_queue_enq(tim->queue, tmo_buf);
+		if (odp_unlikely(rc != 0))
+			ODP_ABORT("Failed to enqueue timeout buffer (%d)\n",
+				  rc);
+		return 1;
+	} else {
+		/* Else false positive, ignore */
+		return 0;
+	}
+}
 
-		queue = tmo->queue;
-		buf   = tmo->buf;
+static unsigned odp_timer_pool_expire(odp_timer_pool_t tpid, uint64_t tick)
+{
+	tick_buf_t *array = &tpid->tick_buf[0];
+	uint32_t high_wm = _odp_atomic_u32_load_mm(&tpid->high_wm,
+			_ODP_MEMMODEL_ACQ);
+	unsigned nexp = 0;
+	uint32_t i;
+
+	for (i = 0; i < high_wm;) {
+#ifdef __ARM_ARCH
+		/* As a rare occurence, we can outsmart the HW prefetcher
+		 * and the compiler (GCC -fprefetch-loop-arrays) with some
+		 * tuned manual prefetching (32x16=512B ahead), seems to
+		 * give 30% better performance on ARM C-A15 */
+		PREFETCH(&array[i + 32]);
+#endif
+		/* Non-atomic read for speed */
+		uint64_t exp_tck = array[i++].exp_tck.v;
+		if (odp_unlikely(exp_tck <= tick)) {
+			/* Attempt to expire timer */
+			nexp += timer_expire(tpid, i - 1, tick);
+		}
+	}
+	return nexp;
+}
 
-		if (buf != tmo->tmo_buf)
-			odp_buffer_free(tmo->tmo_buf);
+/******************************************************************************
+ * POSIX timer support
+ * Functions that use Linux/POSIX per-process timers and related facilities
+ *****************************************************************************/
 
-		odp_queue_enq(queue, buf);
+static void timer_notify(sigval_t sigval)
+{
+	odp_timer_pool *tp = (odp_timer_pool *)sigval.sival_ptr;
+#ifdef __ARM_ARCH
+	odp_timer *array = &tp->timers[0];
+	uint32_t i;
+	/* Prefetch initial cache lines (match 32 above) */
+	for (i = 0; i < 32; i += ODP_CACHE_LINE_SIZE / sizeof(array[0]))
+		PREFETCH(&array[i]);
+#endif
+	uint64_t prev_tick = odp_atomic_fetch_inc_u64(&tp->cur_tick);
+	/* Attempt to acquire the lock, check if the old value was clear */
+	if (odp_spinlock_trylock(&tp->itimer_running)) {
+		/* Scan timer array, looking for timers to expire */
+		(void)odp_timer_pool_expire(tp, prev_tick);
+		odp_spinlock_unlock(&tp->itimer_running);
 	}
+	/* Else skip scan of timers. cur_tick was updated and next itimer
+	 * invocation will process older expiration ticks as well */
 }
 
-static void timer_start(timer_ring_t *timer)
+static void itimer_init(odp_timer_pool *tp)
 {
 	struct sigevent   sigev;
 	struct itimerspec ispec;
 	uint64_t res, sec, nsec;
 
-	ODP_DBG("\nTimer (%u) starts\n", timer->timer_hdl);
+	ODP_DBG("Creating POSIX timer for timer pool %s, period %"
+		PRIu64" ns\n", tp->name, tp->resolution_ns);
 
 	memset(&sigev, 0, sizeof(sigev));
 	memset(&ispec, 0, sizeof(ispec));
 
 	sigev.sigev_notify          = SIGEV_THREAD;
-	sigev.sigev_notify_function = notify_function;
-	sigev.sigev_value.sival_ptr = timer;
+	sigev.sigev_notify_function = timer_notify;
+	sigev.sigev_value.sival_ptr = tp;
 
-	if (timer_create(CLOCK_MONOTONIC, &sigev, &timer->timerid)) {
-		ODP_DBG("Timer create failed\n");
-		return;
-	}
+	if (timer_create(CLOCK_MONOTONIC, &sigev, &tp->timerid))
+		ODP_ABORT("timer_create() returned error %s\n",
+			  strerror(errno));
 
-	res  = timer->resolution_ns;
+	res  = tp->resolution_ns;
 	sec  = res / ODP_TIME_SEC;
-	nsec = res - sec*ODP_TIME_SEC;
+	nsec = res - sec * ODP_TIME_SEC;
 
 	ispec.it_interval.tv_sec  = (time_t)sec;
 	ispec.it_interval.tv_nsec = (long)nsec;
 	ispec.it_value.tv_sec     = (time_t)sec;
 	ispec.it_value.tv_nsec    = (long)nsec;
 
-	if (timer_settime(timer->timerid, 0, &ispec, NULL)) {
-		ODP_DBG("Timer set failed\n");
-		return;
-	}
-
-	return;
+	if (timer_settime(&tp->timerid, 0, &ispec, NULL))
+		ODP_ABORT("timer_settime() returned error %s\n",
+			  strerror(errno));
 }
 
-int odp_timer_init_global(void)
+static void itimer_fini(odp_timer_pool *tp)
 {
-	ODP_DBG("Timer init ...");
-
-	memset(&odp_timer, 0, sizeof(timer_global_t));
-
-	odp_spinlock_init(&odp_timer.lock);
-
-	ODP_DBG("done\n");
-
-	return 0;
+	if (timer_delete(tp->timerid) != 0)
+		ODP_ABORT("timer_delete() returned error %s\n",
+			  strerror(errno));
 }
 
-int odp_timer_disarm_all(void)
+/******************************************************************************
+ * Public API functions
+ * Some parameter checks and error messages
+ * No modificatios of internal state
+ *****************************************************************************/
+odp_timer_pool_t
+odp_timer_pool_create(const char *name,
+		      odp_buffer_pool_t buf_pool,
+		      uint64_t resolution_ns,
+		      uint64_t min_timeout,
+		      uint64_t max_timeout,
+		      uint32_t num_timers,
+		      int shared,
+		      odp_timer_clk_src_t clk_src)
 {
-	int timers;
-	struct itimerspec ispec;
-
-	odp_spinlock_lock(&odp_timer.lock);
-
-	timers = odp_timer.num_timers;
-
-	ispec.it_interval.tv_sec  = 0;
-	ispec.it_interval.tv_nsec = 0;
-	ispec.it_value.tv_sec     = 0;
-	ispec.it_value.tv_nsec    = 0;
-
-	for (; timers >= 0; timers--) {
-		if (timer_settime(odp_timer.timer[timers].timerid,
-				  0, &ispec, NULL)) {
-			ODP_DBG("Timer reset failed\n");
-			odp_spinlock_unlock(&odp_timer.lock);
-			return -1;
-		}
-		odp_timer.num_timers--;
-	}
-
-	odp_spinlock_unlock(&odp_timer.lock);
-
-	return 0;
+	/* Verify that buffer pool can be used for timeouts */
+	odp_buffer_t buf = odp_buffer_alloc(buf_pool);
+	if (buf == ODP_BUFFER_INVALID)
+		ODP_ABORT("%s: Failed to allocate buffer\n", name);
+	if (odp_buffer_type(buf) != ODP_BUFFER_TYPE_TIMEOUT)
+		ODP_ABORT("%s: Buffer pool wrong type\n", name);
+	odp_buffer_free(buf);
+	odp_timer_pool_t tp = odp_timer_pool_new(name, buf_pool, resolution_ns,
+			      min_timeout, max_timeout, num_timers, shared,
+			      clk_src);
+	return tp;
 }
 
-odp_timer_t odp_timer_create(const char *name, odp_buffer_pool_t pool,
-			     uint64_t resolution_ns, uint64_t min_ns,
-			     uint64_t max_ns)
+void odp_timer_pool_start(void)
 {
-	uint32_t id;
-	timer_ring_t *timer;
-	odp_timer_t timer_hdl;
-	int i;
-	uint64_t max_ticks;
-	(void) name;
-
-	if (resolution_ns < MIN_RES)
-		resolution_ns = MIN_RES;
-
-	if (resolution_ns > MAX_RES)
-		resolution_ns = MAX_RES;
-
-	max_ticks = max_ns / resolution_ns;
-
-	if (max_ticks > MAX_TICKS) {
-		ODP_DBG("Maximum timeout too long: %"PRIu64" ticks\n",
-			max_ticks);
-		return ODP_TIMER_INVALID;
-	}
-
-	if (min_ns < resolution_ns) {
-		ODP_DBG("Min timeout %"PRIu64" ns < resolution %"PRIu64" ns\n",
-			min_ns, resolution_ns);
-		return ODP_TIMER_INVALID;
-	}
-
-	odp_spinlock_lock(&odp_timer.lock);
-
-	if (odp_timer.num_timers >= NUM_TIMERS) {
-		odp_spinlock_unlock(&odp_timer.lock);
-		ODP_DBG("All timers allocated\n");
-		return ODP_TIMER_INVALID;
-	}
-
-	for (id = 0; id < NUM_TIMERS; id++) {
-		if (odp_timer.timer[id].allocated == 0)
-			break;
-	}
-
-	timer = &odp_timer.timer[id];
-	timer->allocated = 1;
-	odp_timer.num_timers++;
-
-	odp_spinlock_unlock(&odp_timer.lock);
-
-	timer_hdl = id + 1;
-
-	timer->timer_hdl     = timer_hdl;
-	timer->pool          = pool;
-	timer->resolution_ns = resolution_ns;
-	timer->max_ticks     = MAX_TICKS;
-
-	for (i = 0; i < MAX_TICKS; i++) {
-		odp_spinlock_init(&timer->tick[i].lock);
-		timer->tick[i].list = NULL;
-	}
-
-	timer->active = 1;
-	odp_sync_stores();
-
-	timer_start(timer);
-
-	return timer_hdl;
+	/* Nothing to do here, timer pools are started by the create call */
 }
 
-odp_timer_tmo_t odp_timer_absolute_tmo(odp_timer_t timer_hdl, uint64_t tmo_tick,
-				       odp_queue_t queue, odp_buffer_t buf)
+void odp_timer_pool_destroy(odp_timer_pool_t tpid)
 {
-	int id;
-	uint64_t tick;
-	uint64_t cur_tick;
-	timeout_t *new_tmo;
-	odp_buffer_t tmo_buf;
-	odp_timeout_hdr_t *tmo_hdr;
-	timer_ring_t *timer;
-
-	id = (int)timer_hdl - 1;
-	timer = &odp_timer.timer[id];
+	odp_timer_pool_del(tpid);
+}
 
-	cur_tick = timer->cur_tick;
-	if (tmo_tick <= cur_tick) {
-		ODP_DBG("timeout too close\n");
-		return ODP_TIMER_TMO_INVALID;
-	}
+uint64_t odp_timer_tick_to_ns(odp_timer_pool_t tpid, uint64_t ticks)
+{
+	return ticks * tpid->resolution_ns;
+}
 
-	if ((tmo_tick - cur_tick) > MAX_TICKS) {
-		ODP_DBG("timeout too far: cur %"PRIu64" tmo %"PRIu64"\n",
-			cur_tick, tmo_tick);
-		return ODP_TIMER_TMO_INVALID;
-	}
+uint64_t odp_timer_ns_to_tick(odp_timer_pool_t tpid, uint64_t ns)
+{
+	return (uint64_t)(ns / tpid->resolution_ns);
+}
 
-	tick = tmo_tick % MAX_TICKS;
+uint64_t odp_timer_current_tick(odp_timer_pool_t tpid)
+{
+	/* Relaxed atomic read for lowest overhead */
+	return odp_atomic_load_u64(&tpid->cur_tick);
+}
 
-	tmo_buf = odp_buffer_alloc(timer->pool);
-	if (tmo_buf == ODP_BUFFER_INVALID) {
-		ODP_DBG("tmo buffer alloc failed\n");
-		return ODP_TIMER_TMO_INVALID;
+size_t odp_timer_pool_info(odp_timer_pool_t tpid,
+			   odp_timer_pool_info_t *buf,
+			   size_t buf_size)
+{
+	size_t needs;
+	buf->resolution = tpid->resolution_ns;
+	buf->min_tmo = tpid->min_rel_tck;
+	buf->max_tmo = tpid->max_rel_tck;
+	buf->num_timers = tpid->max_timers;
+	buf->cur_timers = tpid->num_alloc;
+	buf->hwm_timers = odp_atomic_load_u32(&tpid->high_wm);
+	buf->shared = tpid->shared;
+	/* Compute how large buffer is needed */
+	needs = offsetof(struct odp_timer_pool_info_s, name) +
+		strlen(tpid->name) + 1;
+	if (buf_size >= needs) {
+		/* Provided buffer is large enough */
+		strcpy(buf->name, tpid->name);
+		return needs;
+	} else {
+		/* Provided buffer is too small */
+		size_t siz = sizeof(buf->name);
+		/* Truncate name when copying into buffer */
+		strncpy(buf->name, tpid->name, siz - 1);
+		buf->name[siz] = 0;
+		return buf_size;
 	}
-
-	tmo_hdr = odp_timeout_hdr((odp_timeout_t) tmo_buf);
-	new_tmo = &tmo_hdr->meta;
-
-	new_tmo->timer_id = id;
-	new_tmo->tick     = (int)tick;
-	new_tmo->tmo_tick = tmo_tick;
-	new_tmo->queue    = queue;
-	new_tmo->tmo_buf  = tmo_buf;
-
-	if (buf != ODP_BUFFER_INVALID)
-		new_tmo->buf = buf;
-	else
-		new_tmo->buf = tmo_buf;
-
-	add_tmo(&timer->tick[tick], new_tmo);
-
-	return tmo_buf;
 }
 
-uint64_t odp_timer_tick_to_ns(odp_timer_t timer_hdl, uint64_t ticks)
+odp_timer_t odp_timer_alloc(odp_timer_pool_t tpid,
+			    odp_queue_t queue,
+			    void *user_ptr)
 {
-	uint32_t id;
-
-	id = timer_hdl - 1;
-	return ticks * odp_timer.timer[id].resolution_ns;
+	if (odp_unlikely(queue == ODP_QUEUE_INVALID))
+		ODP_ABORT("%s: Invalid queue handle\n", tpid->name);
+	/* We don't care about the validity of user_ptr because we will not
+	 * attempt to dereference it */
+	odp_timer_t hdl = timer_alloc(tpid, queue, user_ptr);
+	if (odp_likely(hdl != ODP_TIMER_INVALID)) {
+		/* Success */
+		return hdl;
+	}
+	/* errno set by timer_alloc() */
+	return ODP_TIMER_INVALID;
 }
 
-uint64_t odp_timer_ns_to_tick(odp_timer_t timer_hdl, uint64_t ns)
+uint64_t odp_timer_free(odp_timer_t hdl, odp_buffer_t *tmo_buf)
 {
-	uint32_t id;
-
-	id = timer_hdl - 1;
-	return ns / odp_timer.timer[id].resolution_ns;
+	odp_timer_pool *tp = handle_to_tp(hdl);
+	uint32_t idx = handle_to_idx(hdl, tp);
+	odp_buffer_t old_buf = timer_free(tp, idx);
+	/* Only overwrite *tmo_buf if we are actually returning a timeout
+	 * buffer */
+	if (old_buf != ODP_BUFFER_INVALID)
+		*tmo_buf = old_buf;
+	/* Return a special value that will not match any timeout expiration
+	 * tick and thus help identify stale timeouts */
+	return ODP_TICK_INVALID;
 }
 
-uint64_t odp_timer_resolution(odp_timer_t timer_hdl)
+uint64_t odp_timer_set_abs(odp_timer_t hdl,
+			   uint64_t abs_tck,
+			   odp_buffer_t *tmo_buf)
 {
-	uint32_t id;
-
-	id = timer_hdl - 1;
-	return odp_timer.timer[id].resolution_ns;
+	odp_timer_pool *tp = handle_to_tp(hdl);
+	uint32_t idx = handle_to_idx(hdl, tp);
+	uint64_t cur_tick = odp_atomic_load_u64(&tp->cur_tick);
+	if (odp_unlikely(abs_tck < cur_tick + tp->min_rel_tck))
+		return ODP_TICK_TOOEARLY;
+	if (odp_unlikely(abs_tck > cur_tick + tp->max_rel_tck))
+		return ODP_TICK_TOOLATE;
+	/* Return either the requested tick value when successful or a special
+	 * value that will not match any timeout for errors */
+	return timer_reset(idx, abs_tck, tmo_buf, tp) ?
+		abs_tck : ODP_TICK_INVALID;
 }
 
-uint64_t odp_timer_maximum_tmo(odp_timer_t timer_hdl)
+uint64_t odp_timer_set_rel(odp_timer_t hdl,
+			   uint64_t rel_tck,
+			   odp_buffer_t *tmo_buf)
 {
-	uint32_t id;
-
-	id = timer_hdl - 1;
-	return odp_timer.timer[id].max_ticks;
+	odp_timer_pool *tp = handle_to_tp(hdl);
+	uint32_t idx = handle_to_idx(hdl, tp);
+	if (odp_unlikely(rel_tck < tp->min_rel_tck))
+		return ODP_TICK_TOOEARLY;
+	if (odp_unlikely(rel_tck > tp->max_rel_tck))
+		return ODP_TICK_TOOLATE;
+	uint64_t abs_tck = odp_atomic_load_u64(&tp->cur_tick) + rel_tck;
+	/* Return either the requested tick value when successful or a special
+	 * value that will not match any timeout for errors */
+	return timer_reset(idx, abs_tck, tmo_buf, tp) ?
+		abs_tck : ODP_TICK_INVALID;
 }
 
-uint64_t odp_timer_current_tick(odp_timer_t timer_hdl)
+uint64_t odp_timer_cancel(odp_timer_t hdl, odp_buffer_t *tmo_buf)
 {
-	uint32_t id;
-
-	id = timer_hdl - 1;
-	return odp_timer.timer[id].cur_tick;
+	odp_timer_pool *tp = handle_to_tp(hdl);
+	uint32_t idx = handle_to_idx(hdl, tp);
+	*tmo_buf = timer_cancel(tp, idx, TMO_INACTIVE);
+	/* Return a special value that will not match any timeout expiration
+	 * tick and thus help identify stale timeouts */
+	return ODP_TICK_INVALID;
 }
 
-odp_timeout_t odp_timeout_from_buffer(odp_buffer_t buf)
+int odp_timer_tmo_metadata(odp_buffer_t buf,
+		odp_timer_t *hdl_p,
+		uint64_t *exp_tck_p,
+		void **user_ptr_p)
 {
-	return (odp_timeout_t) buf;
+	if (odp_likely(odp_buffer_type(buf) == ODP_BUFFER_TYPE_TIMEOUT)) {
+		odp_timeout_hdr_t *tmo_hdr =
+			odp_tmo_to_hdr(odp_tmo_from_buffer(buf));
+		odp_timer_t timer = tmo_hdr->timer;
+		uint64_t expiration = tmo_hdr->expiration;
+		void *user_ptr = tmo_hdr->user_ptr;
+		if (odp_likely(hdl_p != NULL))
+			*hdl_p = timer;
+		if (odp_likely(exp_tck_p != NULL))
+			*exp_tck_p = expiration;
+		if (odp_likely(user_ptr_p != NULL))
+			*user_ptr_p = user_ptr;
+		return 1;
+	} else {
+		/* Not a buffer of the appropriate type */
+		return 0;
+	}
 }
 
-uint64_t odp_timeout_tick(odp_timeout_t tmo)
+int odp_timer_init_global(void)
 {
-	odp_timeout_hdr_t *tmo_hdr = odp_timeout_hdr(tmo);
-	return tmo_hdr->meta.tmo_tick;
+#ifndef ODP_ATOMIC_U128
+	uint32_t i;
+	for (i = 0; i < NUM_LOCKS; i++)
+		_odp_atomic_flag_clear(&locks[i]);
+#else
+	ODP_DBG("Using lock-less timer implementation\n");
+#endif
+	odp_atomic_init_u32(&num_timer_pools, 0);
+	return 0;
 }