[PATCHv2] linux-generic: timer: remove dead 128 bit optimizations

Message ID 1463426107-24816-1-git-send-email-maxim.uvarov@linaro.org
State New
Headers show

Commit Message

Maxim Uvarov May 16, 2016, 7:15 p.m.
Remove totally untested branch with 128 bit optimizations
for timer code.
This also fixes static assert bug:
https://bugs.linaro.org/show_bug.cgi?id=2211

Signed-off-by: Maxim Uvarov <maxim.uvarov@linaro.org>
---
 platform/linux-generic/odp_timer.c | 107 -------------------------------------
 1 file changed, 107 deletions(-)

Comments

Bill Fischofer May 16, 2016, 8:43 p.m. | #1
On Mon, May 16, 2016 at 2:15 PM, Maxim Uvarov <maxim.uvarov@linaro.org>
wrote:

> Remove totally untested branch with 128 bit optimizations
> for timer code.
> This also fixes static assert bug:
> https://bugs.linaro.org/show_bug.cgi?id=2211
>
> Signed-off-by: Maxim Uvarov <maxim.uvarov@linaro.org>
>

Reviewed-and-tested-by: Bill Fischofer <bill.fischofer@linaro.org>


> ---
>  platform/linux-generic/odp_timer.c | 107
> -------------------------------------
>  1 file changed, 107 deletions(-)
>
> diff --git a/platform/linux-generic/odp_timer.c
> b/platform/linux-generic/odp_timer.c
> index 6b84309..be28da3 100644
> --- a/platform/linux-generic/odp_timer.c
> +++ b/platform/linux-generic/odp_timer.c
> @@ -11,15 +11,7 @@
>   *
>   */
>
> -/* Check if compiler supports 16-byte atomics. GCC needs -mcx16 flag on
> x86 */
> -/* Using spin lock actually seems faster on Core2 */
> -#ifdef ODP_ATOMIC_U128
> -/* TB_NEEDS_PAD defined if sizeof(odp_buffer_t) != 8 */
> -#define TB_NEEDS_PAD
> -#define TB_SET_PAD(x) ((x).pad = 0)
> -#else
>  #define TB_SET_PAD(x) (void)(x)
> -#endif
>
>  #include <odp_posix_extensions.h>
>
> @@ -70,11 +62,9 @@
>   * Mutual exclusion in the absence of CAS16
>
> *****************************************************************************/
>
> -#ifndef ODP_ATOMIC_U128
>  #define NUM_LOCKS 1024
>  static _odp_atomic_flag_t locks[NUM_LOCKS]; /* Multiple locks per cache
> line! */
>  #define IDX2LOCK(idx) (&locks[(idx) % NUM_LOCKS])
> -#endif
>
>
>  /******************************************************************************
>   * Translation between timeout buffer and timeout header
> @@ -98,17 +88,9 @@ static odp_timeout_hdr_t *timeout_hdr(odp_timeout_t tmo)
>  typedef struct tick_buf_s {
>         odp_atomic_u64_t exp_tck;/* Expiration tick or TMO_xxx */
>         odp_buffer_t tmo_buf;/* ODP_BUFFER_INVALID if timer not active */
> -#ifdef TB_NEEDS_PAD
> -       uint32_t pad;/* Need to be able to access padding for successful
> CAS */
> -#endif
>  } tick_buf_t
> -#ifdef ODP_ATOMIC_U128
> -ODP_ALIGNED(16) /* 16-byte atomic operations need properly aligned
> addresses */
> -#endif
>  ;
>
> -ODP_STATIC_ASSERT(sizeof(tick_buf_t) == 16, "sizeof(tick_buf_t) == 16");
> -
>  typedef struct odp_timer_s {
>         void *user_ptr;
>         odp_queue_t queue;/* Used for free list when timer is free */
> @@ -378,34 +360,6 @@ static bool timer_reset(uint32_t idx,
>         tick_buf_t *tb = &tp->tick_buf[idx];
>
>         if (tmo_buf == NULL || *tmo_buf == ODP_BUFFER_INVALID) {
> -#ifdef ODP_ATOMIC_U128
> -               tick_buf_t new, old;
> -               do {
> -                       /* Relaxed and non-atomic read of current values */
> -                       old.exp_tck.v = tb->exp_tck.v;
> -                       old.tmo_buf = tb->tmo_buf;
> -                       TB_SET_PAD(old);
> -                       /* Check if there actually is a timeout buffer
> -                        * present */
> -                       if (old.tmo_buf == ODP_BUFFER_INVALID) {
> -                               /* Cannot reset a timer with neither old
> nor
> -                                * new timeout buffer */
> -                               success = false;
> -                               break;
> -                       }
> -                       /* Set up new values */
> -                       new.exp_tck.v = abs_tck;
> -                       new.tmo_buf = old.tmo_buf;
> -                       TB_SET_PAD(new);
> -                       /* Atomic CAS will fail if we experienced torn
> reads,
> -                        * retry update sequence until CAS succeeds */
> -               } while (!_odp_atomic_u128_cmp_xchg_mm(
> -                                       (_odp_atomic_u128_t *)tb,
> -                                       (_uint128_t *)&old,
> -                                       (_uint128_t *)&new,
> -                                       _ODP_MEMMODEL_RLS,
> -                                       _ODP_MEMMODEL_RLX));
> -#else
>  #ifdef __ARM_ARCH
>                 /* Since barriers are not good for C-A15, we take an
>                  * alternative approach using relaxed memory model */
> @@ -453,7 +407,6 @@ static bool timer_reset(uint32_t idx,
>                 /* Release the lock */
>                 _odp_atomic_flag_clear(IDX2LOCK(idx));
>  #endif
> -#endif
>         } else {
>                 /* We have a new timeout buffer which replaces any old one
> */
>                 /* Fill in some (constant) header fields for timeout
> events */
> @@ -468,19 +421,6 @@ static bool timer_reset(uint32_t idx,
>                 }
>                 /* Else ignore buffers of other types */
>                 odp_buffer_t old_buf = ODP_BUFFER_INVALID;
> -#ifdef ODP_ATOMIC_U128
> -               tick_buf_t new, old;
> -               new.exp_tck.v = abs_tck;
> -               new.tmo_buf = *tmo_buf;
> -               TB_SET_PAD(new);
> -               /* We are releasing the new timeout buffer to some other
> -                * thread */
> -               _odp_atomic_u128_xchg_mm((_odp_atomic_u128_t *)tb,
> -                                        (_uint128_t *)&new,
> -                                        (_uint128_t *)&old,
> -                                        _ODP_MEMMODEL_ACQ_RLS);
> -               old_buf = old.tmo_buf;
> -#else
>                 /* Take a related lock */
>                 while (_odp_atomic_flag_tas(IDX2LOCK(idx)))
>                         /* While lock is taken, spin using relaxed loads */
> @@ -496,7 +436,6 @@ static bool timer_reset(uint32_t idx,
>
>                 /* Release the lock */
>                 _odp_atomic_flag_clear(IDX2LOCK(idx));
> -#endif
>                 /* Return old timeout buffer */
>                 *tmo_buf = old_buf;
>         }
> @@ -510,18 +449,6 @@ static odp_buffer_t timer_cancel(odp_timer_pool *tp,
>         tick_buf_t *tb = &tp->tick_buf[idx];
>         odp_buffer_t old_buf;
>
> -#ifdef ODP_ATOMIC_U128
> -       tick_buf_t new, old;
> -       /* Update the timer state (e.g. cancel the current timeout) */
> -       new.exp_tck.v = new_state;
> -       /* Swap out the old buffer */
> -       new.tmo_buf = ODP_BUFFER_INVALID;
> -       TB_SET_PAD(new);
> -       _odp_atomic_u128_xchg_mm((_odp_atomic_u128_t *)tb,
> -                                (_uint128_t *)&new, (_uint128_t *)&old,
> -                                _ODP_MEMMODEL_RLX);
> -       old_buf = old.tmo_buf;
> -#else
>         /* Take a related lock */
>         while (_odp_atomic_flag_tas(IDX2LOCK(idx)))
>                 /* While lock is taken, spin using relaxed loads */
> @@ -537,7 +464,6 @@ static odp_buffer_t timer_cancel(odp_timer_pool *tp,
>
>         /* Release the lock */
>         _odp_atomic_flag_clear(IDX2LOCK(idx));
> -#endif
>         /* Return the old buffer */
>         return old_buf;
>  }
> @@ -548,34 +474,6 @@ static unsigned timer_expire(odp_timer_pool *tp,
> uint32_t idx, uint64_t tick)
>         tick_buf_t *tb = &tp->tick_buf[idx];
>         odp_buffer_t tmo_buf = ODP_BUFFER_INVALID;
>         uint64_t exp_tck;
> -#ifdef ODP_ATOMIC_U128
> -       /* Atomic re-read for correctness */
> -       exp_tck = _odp_atomic_u64_load_mm(&tb->exp_tck, _ODP_MEMMODEL_RLX);
> -       /* Re-check exp_tck */
> -       if (odp_likely(exp_tck <= tick)) {
> -               /* Attempt to grab timeout buffer, replace with inactive
> timer
> -                * and invalid buffer */
> -               tick_buf_t new, old;
> -               old.exp_tck.v = exp_tck;
> -               old.tmo_buf = tb->tmo_buf;
> -               TB_SET_PAD(old);
> -               /* Set the inactive/expired bit keeping the expiration
> tick so
> -                * that we can check against the expiration tick of the
> timeout
> -                * when it is received */
> -               new.exp_tck.v = exp_tck | TMO_INACTIVE;
> -               new.tmo_buf = ODP_BUFFER_INVALID;
> -               TB_SET_PAD(new);
> -               int succ = _odp_atomic_u128_cmp_xchg_mm(
> -                               (_odp_atomic_u128_t *)tb,
> -                               (_uint128_t *)&old, (_uint128_t *)&new,
> -                               _ODP_MEMMODEL_RLS, _ODP_MEMMODEL_RLX);
> -               if (succ)
> -                       tmo_buf = old.tmo_buf;
> -               /* Else CAS failed, something changed => skip timer
> -                * this tick, it will be checked again next tick */
> -       }
> -       /* Else false positive, ignore */
> -#else
>         /* Take a related lock */
>         while (_odp_atomic_flag_tas(IDX2LOCK(idx)))
>                 /* While lock is taken, spin using relaxed loads */
> @@ -600,7 +498,6 @@ static unsigned timer_expire(odp_timer_pool *tp,
> uint32_t idx, uint64_t tick)
>         /* Else false positive, ignore */
>         /* Release the lock */
>         _odp_atomic_flag_clear(IDX2LOCK(idx));
> -#endif
>         if (odp_likely(tmo_buf != ODP_BUFFER_INVALID)) {
>                 /* Fill in expiration tick for timeout events */
>                 if (odp_event_type(odp_buffer_to_event(tmo_buf)) ==
> @@ -974,13 +871,9 @@ void odp_timeout_free(odp_timeout_t tmo)
>
>  int odp_timer_init_global(void)
>  {
> -#ifndef ODP_ATOMIC_U128
>         uint32_t i;
>         for (i = 0; i < NUM_LOCKS; i++)
>                 _odp_atomic_flag_clear(&locks[i]);
> -#else
> -       ODP_DBG("Using lock-less timer implementation\n");
> -#endif
>         odp_atomic_init_u32(&num_timer_pools, 0);
>
>         block_sigalarm();
> --
> 2.7.1.250.gff4ea60
>
> _______________________________________________
> lng-odp mailing list
> lng-odp@lists.linaro.org
> https://lists.linaro.org/mailman/listinfo/lng-odp
>
Ola Liljedahl May 16, 2016, 9:21 p.m. | #2
I disagree. The 128-bit support is important because that's the lock-free
timer implementation which was the whole idea. The lock-based code is just
a work-around. We should rather add support in configure to detect compiler
support for the -mcx16 flag which enables 128-bit CAS on x86-64. Now when I
have an ARM64 target, I can actually write and test 128-bit CAS support on
ARM (A64 ISA in AArch64 mode) and contribute that in another patch. I am
not sure whether common compilers (e.g. GCC) properly supports 128-bit CAS
on ARM64, might have to use LDXP/STXP (load/store exclusive pair) directly.

-- Ola

On 16 May 2016 at 22:43, Bill Fischofer <bill.fischofer@linaro.org> wrote:

> On Mon, May 16, 2016 at 2:15 PM, Maxim Uvarov <maxim.uvarov@linaro.org>
> wrote:
>
> > Remove totally untested branch with 128 bit optimizations
> > for timer code.
> > This also fixes static assert bug:
> > https://bugs.linaro.org/show_bug.cgi?id=2211
> >
> > Signed-off-by: Maxim Uvarov <maxim.uvarov@linaro.org>
> >
>
> Reviewed-and-tested-by: Bill Fischofer <bill.fischofer@linaro.org>
>
>
> > ---
> >  platform/linux-generic/odp_timer.c | 107
> > -------------------------------------
> >  1 file changed, 107 deletions(-)
> >
> > diff --git a/platform/linux-generic/odp_timer.c
> > b/platform/linux-generic/odp_timer.c
> > index 6b84309..be28da3 100644
> > --- a/platform/linux-generic/odp_timer.c
> > +++ b/platform/linux-generic/odp_timer.c
> > @@ -11,15 +11,7 @@
> >   *
> >   */
> >
> > -/* Check if compiler supports 16-byte atomics. GCC needs -mcx16 flag on
> > x86 */
> > -/* Using spin lock actually seems faster on Core2 */
> > -#ifdef ODP_ATOMIC_U128
> > -/* TB_NEEDS_PAD defined if sizeof(odp_buffer_t) != 8 */
> > -#define TB_NEEDS_PAD
> > -#define TB_SET_PAD(x) ((x).pad = 0)
> > -#else
> >  #define TB_SET_PAD(x) (void)(x)
> > -#endif
> >
> >  #include <odp_posix_extensions.h>
> >
> > @@ -70,11 +62,9 @@
> >   * Mutual exclusion in the absence of CAS16
> >
> >
> *****************************************************************************/
> >
> > -#ifndef ODP_ATOMIC_U128
> >  #define NUM_LOCKS 1024
> >  static _odp_atomic_flag_t locks[NUM_LOCKS]; /* Multiple locks per cache
> > line! */
> >  #define IDX2LOCK(idx) (&locks[(idx) % NUM_LOCKS])
> > -#endif
> >
> >
> >
> /******************************************************************************
> >   * Translation between timeout buffer and timeout header
> > @@ -98,17 +88,9 @@ static odp_timeout_hdr_t *timeout_hdr(odp_timeout_t
> tmo)
> >  typedef struct tick_buf_s {
> >         odp_atomic_u64_t exp_tck;/* Expiration tick or TMO_xxx */
> >         odp_buffer_t tmo_buf;/* ODP_BUFFER_INVALID if timer not active */
> > -#ifdef TB_NEEDS_PAD
> > -       uint32_t pad;/* Need to be able to access padding for successful
> > CAS */
> > -#endif
> >  } tick_buf_t
> > -#ifdef ODP_ATOMIC_U128
> > -ODP_ALIGNED(16) /* 16-byte atomic operations need properly aligned
> > addresses */
> > -#endif
> >  ;
> >
> > -ODP_STATIC_ASSERT(sizeof(tick_buf_t) == 16, "sizeof(tick_buf_t) == 16");
> > -
> >  typedef struct odp_timer_s {
> >         void *user_ptr;
> >         odp_queue_t queue;/* Used for free list when timer is free */
> > @@ -378,34 +360,6 @@ static bool timer_reset(uint32_t idx,
> >         tick_buf_t *tb = &tp->tick_buf[idx];
> >
> >         if (tmo_buf == NULL || *tmo_buf == ODP_BUFFER_INVALID) {
> > -#ifdef ODP_ATOMIC_U128
> > -               tick_buf_t new, old;
> > -               do {
> > -                       /* Relaxed and non-atomic read of current values
> */
> > -                       old.exp_tck.v = tb->exp_tck.v;
> > -                       old.tmo_buf = tb->tmo_buf;
> > -                       TB_SET_PAD(old);
> > -                       /* Check if there actually is a timeout buffer
> > -                        * present */
> > -                       if (old.tmo_buf == ODP_BUFFER_INVALID) {
> > -                               /* Cannot reset a timer with neither old
> > nor
> > -                                * new timeout buffer */
> > -                               success = false;
> > -                               break;
> > -                       }
> > -                       /* Set up new values */
> > -                       new.exp_tck.v = abs_tck;
> > -                       new.tmo_buf = old.tmo_buf;
> > -                       TB_SET_PAD(new);
> > -                       /* Atomic CAS will fail if we experienced torn
> > reads,
> > -                        * retry update sequence until CAS succeeds */
> > -               } while (!_odp_atomic_u128_cmp_xchg_mm(
> > -                                       (_odp_atomic_u128_t *)tb,
> > -                                       (_uint128_t *)&old,
> > -                                       (_uint128_t *)&new,
> > -                                       _ODP_MEMMODEL_RLS,
> > -                                       _ODP_MEMMODEL_RLX));
> > -#else
> >  #ifdef __ARM_ARCH
> >                 /* Since barriers are not good for C-A15, we take an
> >                  * alternative approach using relaxed memory model */
> > @@ -453,7 +407,6 @@ static bool timer_reset(uint32_t idx,
> >                 /* Release the lock */
> >                 _odp_atomic_flag_clear(IDX2LOCK(idx));
> >  #endif
> > -#endif
> >         } else {
> >                 /* We have a new timeout buffer which replaces any old
> one
> > */
> >                 /* Fill in some (constant) header fields for timeout
> > events */
> > @@ -468,19 +421,6 @@ static bool timer_reset(uint32_t idx,
> >                 }
> >                 /* Else ignore buffers of other types */
> >                 odp_buffer_t old_buf = ODP_BUFFER_INVALID;
> > -#ifdef ODP_ATOMIC_U128
> > -               tick_buf_t new, old;
> > -               new.exp_tck.v = abs_tck;
> > -               new.tmo_buf = *tmo_buf;
> > -               TB_SET_PAD(new);
> > -               /* We are releasing the new timeout buffer to some other
> > -                * thread */
> > -               _odp_atomic_u128_xchg_mm((_odp_atomic_u128_t *)tb,
> > -                                        (_uint128_t *)&new,
> > -                                        (_uint128_t *)&old,
> > -                                        _ODP_MEMMODEL_ACQ_RLS);
> > -               old_buf = old.tmo_buf;
> > -#else
> >                 /* Take a related lock */
> >                 while (_odp_atomic_flag_tas(IDX2LOCK(idx)))
> >                         /* While lock is taken, spin using relaxed loads
> */
> > @@ -496,7 +436,6 @@ static bool timer_reset(uint32_t idx,
> >
> >                 /* Release the lock */
> >                 _odp_atomic_flag_clear(IDX2LOCK(idx));
> > -#endif
> >                 /* Return old timeout buffer */
> >                 *tmo_buf = old_buf;
> >         }
> > @@ -510,18 +449,6 @@ static odp_buffer_t timer_cancel(odp_timer_pool *tp,
> >         tick_buf_t *tb = &tp->tick_buf[idx];
> >         odp_buffer_t old_buf;
> >
> > -#ifdef ODP_ATOMIC_U128
> > -       tick_buf_t new, old;
> > -       /* Update the timer state (e.g. cancel the current timeout) */
> > -       new.exp_tck.v = new_state;
> > -       /* Swap out the old buffer */
> > -       new.tmo_buf = ODP_BUFFER_INVALID;
> > -       TB_SET_PAD(new);
> > -       _odp_atomic_u128_xchg_mm((_odp_atomic_u128_t *)tb,
> > -                                (_uint128_t *)&new, (_uint128_t *)&old,
> > -                                _ODP_MEMMODEL_RLX);
> > -       old_buf = old.tmo_buf;
> > -#else
> >         /* Take a related lock */
> >         while (_odp_atomic_flag_tas(IDX2LOCK(idx)))
> >                 /* While lock is taken, spin using relaxed loads */
> > @@ -537,7 +464,6 @@ static odp_buffer_t timer_cancel(odp_timer_pool *tp,
> >
> >         /* Release the lock */
> >         _odp_atomic_flag_clear(IDX2LOCK(idx));
> > -#endif
> >         /* Return the old buffer */
> >         return old_buf;
> >  }
> > @@ -548,34 +474,6 @@ static unsigned timer_expire(odp_timer_pool *tp,
> > uint32_t idx, uint64_t tick)
> >         tick_buf_t *tb = &tp->tick_buf[idx];
> >         odp_buffer_t tmo_buf = ODP_BUFFER_INVALID;
> >         uint64_t exp_tck;
> > -#ifdef ODP_ATOMIC_U128
> > -       /* Atomic re-read for correctness */
> > -       exp_tck = _odp_atomic_u64_load_mm(&tb->exp_tck,
> _ODP_MEMMODEL_RLX);
> > -       /* Re-check exp_tck */
> > -       if (odp_likely(exp_tck <= tick)) {
> > -               /* Attempt to grab timeout buffer, replace with inactive
> > timer
> > -                * and invalid buffer */
> > -               tick_buf_t new, old;
> > -               old.exp_tck.v = exp_tck;
> > -               old.tmo_buf = tb->tmo_buf;
> > -               TB_SET_PAD(old);
> > -               /* Set the inactive/expired bit keeping the expiration
> > tick so
> > -                * that we can check against the expiration tick of the
> > timeout
> > -                * when it is received */
> > -               new.exp_tck.v = exp_tck | TMO_INACTIVE;
> > -               new.tmo_buf = ODP_BUFFER_INVALID;
> > -               TB_SET_PAD(new);
> > -               int succ = _odp_atomic_u128_cmp_xchg_mm(
> > -                               (_odp_atomic_u128_t *)tb,
> > -                               (_uint128_t *)&old, (_uint128_t *)&new,
> > -                               _ODP_MEMMODEL_RLS, _ODP_MEMMODEL_RLX);
> > -               if (succ)
> > -                       tmo_buf = old.tmo_buf;
> > -               /* Else CAS failed, something changed => skip timer
> > -                * this tick, it will be checked again next tick */
> > -       }
> > -       /* Else false positive, ignore */
> > -#else
> >         /* Take a related lock */
> >         while (_odp_atomic_flag_tas(IDX2LOCK(idx)))
> >                 /* While lock is taken, spin using relaxed loads */
> > @@ -600,7 +498,6 @@ static unsigned timer_expire(odp_timer_pool *tp,
> > uint32_t idx, uint64_t tick)
> >         /* Else false positive, ignore */
> >         /* Release the lock */
> >         _odp_atomic_flag_clear(IDX2LOCK(idx));
> > -#endif
> >         if (odp_likely(tmo_buf != ODP_BUFFER_INVALID)) {
> >                 /* Fill in expiration tick for timeout events */
> >                 if (odp_event_type(odp_buffer_to_event(tmo_buf)) ==
> > @@ -974,13 +871,9 @@ void odp_timeout_free(odp_timeout_t tmo)
> >
> >  int odp_timer_init_global(void)
> >  {
> > -#ifndef ODP_ATOMIC_U128
> >         uint32_t i;
> >         for (i = 0; i < NUM_LOCKS; i++)
> >                 _odp_atomic_flag_clear(&locks[i]);
> > -#else
> > -       ODP_DBG("Using lock-less timer implementation\n");
> > -#endif
> >         odp_atomic_init_u32(&num_timer_pools, 0);
> >
> >         block_sigalarm();
> > --
> > 2.7.1.250.gff4ea60
> >
> > _______________________________________________
> > lng-odp mailing list
> > lng-odp@lists.linaro.org
> > https://lists.linaro.org/mailman/listinfo/lng-odp
> >
> _______________________________________________
> lng-odp mailing list
> lng-odp@lists.linaro.org
> https://lists.linaro.org/mailman/listinfo/lng-odp
>
Bill Fischofer May 16, 2016, 9:31 p.m. | #3
Thanks Ola.  The original bug is that this fails compiling with clang on
32-bit systems and the reason is that the struct in that environment winds
up being 24 bytes rather than 16 bytes long, so the ODP_STATIC_ASSERT()
fails.

My original (simple) patch is at
http://patches.opendataplane.org/patch/5932/ however Maxim didn't like it.
Perhaps you can offer a compromise?

On Mon, May 16, 2016 at 4:21 PM, Ola Liljedahl <ola.liljedahl@linaro.org>
wrote:

> I disagree. The 128-bit support is important because that's the lock-free
> timer implementation which was the whole idea. The lock-based code is just
> a work-around. We should rather add support in configure to detect compiler
> support for the -mcx16 flag which enables 128-bit CAS on x86-64. Now when I
> have an ARM64 target, I can actually write and test 128-bit CAS support on
> ARM (A64 ISA in AArch64 mode) and contribute that in another patch. I am
> not sure whether common compilers (e.g. GCC) properly supports 128-bit CAS
> on ARM64, might have to use LDXP/STXP (load/store exclusive pair) directly.
>
> -- Ola
>
> On 16 May 2016 at 22:43, Bill Fischofer <bill.fischofer@linaro.org> wrote:
>
>> On Mon, May 16, 2016 at 2:15 PM, Maxim Uvarov <maxim.uvarov@linaro.org>
>> wrote:
>>
>> > Remove totally untested branch with 128 bit optimizations
>> > for timer code.
>> > This also fixes static assert bug:
>> > https://bugs.linaro.org/show_bug.cgi?id=2211
>> >
>> > Signed-off-by: Maxim Uvarov <maxim.uvarov@linaro.org>
>> >
>>
>> Reviewed-and-tested-by: Bill Fischofer <bill.fischofer@linaro.org>
>>
>>
>> > ---
>> >  platform/linux-generic/odp_timer.c | 107
>> > -------------------------------------
>> >  1 file changed, 107 deletions(-)
>> >
>> > diff --git a/platform/linux-generic/odp_timer.c
>> > b/platform/linux-generic/odp_timer.c
>> > index 6b84309..be28da3 100644
>> > --- a/platform/linux-generic/odp_timer.c
>> > +++ b/platform/linux-generic/odp_timer.c
>> > @@ -11,15 +11,7 @@
>> >   *
>> >   */
>> >
>> > -/* Check if compiler supports 16-byte atomics. GCC needs -mcx16 flag on
>> > x86 */
>> > -/* Using spin lock actually seems faster on Core2 */
>> > -#ifdef ODP_ATOMIC_U128
>> > -/* TB_NEEDS_PAD defined if sizeof(odp_buffer_t) != 8 */
>> > -#define TB_NEEDS_PAD
>> > -#define TB_SET_PAD(x) ((x).pad = 0)
>> > -#else
>> >  #define TB_SET_PAD(x) (void)(x)
>> > -#endif
>> >
>> >  #include <odp_posix_extensions.h>
>> >
>> > @@ -70,11 +62,9 @@
>> >   * Mutual exclusion in the absence of CAS16
>> >
>> >
>> *****************************************************************************/
>> >
>> > -#ifndef ODP_ATOMIC_U128
>> >  #define NUM_LOCKS 1024
>> >  static _odp_atomic_flag_t locks[NUM_LOCKS]; /* Multiple locks per cache
>> > line! */
>> >  #define IDX2LOCK(idx) (&locks[(idx) % NUM_LOCKS])
>> > -#endif
>> >
>> >
>> >
>> /******************************************************************************
>> >   * Translation between timeout buffer and timeout header
>> > @@ -98,17 +88,9 @@ static odp_timeout_hdr_t *timeout_hdr(odp_timeout_t
>> tmo)
>> >  typedef struct tick_buf_s {
>> >         odp_atomic_u64_t exp_tck;/* Expiration tick or TMO_xxx */
>> >         odp_buffer_t tmo_buf;/* ODP_BUFFER_INVALID if timer not active
>> */
>> > -#ifdef TB_NEEDS_PAD
>> > -       uint32_t pad;/* Need to be able to access padding for successful
>> > CAS */
>> > -#endif
>> >  } tick_buf_t
>> > -#ifdef ODP_ATOMIC_U128
>> > -ODP_ALIGNED(16) /* 16-byte atomic operations need properly aligned
>> > addresses */
>> > -#endif
>> >  ;
>> >
>> > -ODP_STATIC_ASSERT(sizeof(tick_buf_t) == 16, "sizeof(tick_buf_t) ==
>> 16");
>> > -
>> >  typedef struct odp_timer_s {
>> >         void *user_ptr;
>> >         odp_queue_t queue;/* Used for free list when timer is free */
>> > @@ -378,34 +360,6 @@ static bool timer_reset(uint32_t idx,
>> >         tick_buf_t *tb = &tp->tick_buf[idx];
>> >
>> >         if (tmo_buf == NULL || *tmo_buf == ODP_BUFFER_INVALID) {
>> > -#ifdef ODP_ATOMIC_U128
>> > -               tick_buf_t new, old;
>> > -               do {
>> > -                       /* Relaxed and non-atomic read of current
>> values */
>> > -                       old.exp_tck.v = tb->exp_tck.v;
>> > -                       old.tmo_buf = tb->tmo_buf;
>> > -                       TB_SET_PAD(old);
>> > -                       /* Check if there actually is a timeout buffer
>> > -                        * present */
>> > -                       if (old.tmo_buf == ODP_BUFFER_INVALID) {
>> > -                               /* Cannot reset a timer with neither old
>> > nor
>> > -                                * new timeout buffer */
>> > -                               success = false;
>> > -                               break;
>> > -                       }
>> > -                       /* Set up new values */
>> > -                       new.exp_tck.v = abs_tck;
>> > -                       new.tmo_buf = old.tmo_buf;
>> > -                       TB_SET_PAD(new);
>> > -                       /* Atomic CAS will fail if we experienced torn
>> > reads,
>> > -                        * retry update sequence until CAS succeeds */
>> > -               } while (!_odp_atomic_u128_cmp_xchg_mm(
>> > -                                       (_odp_atomic_u128_t *)tb,
>> > -                                       (_uint128_t *)&old,
>> > -                                       (_uint128_t *)&new,
>> > -                                       _ODP_MEMMODEL_RLS,
>> > -                                       _ODP_MEMMODEL_RLX));
>> > -#else
>> >  #ifdef __ARM_ARCH
>> >                 /* Since barriers are not good for C-A15, we take an
>> >                  * alternative approach using relaxed memory model */
>> > @@ -453,7 +407,6 @@ static bool timer_reset(uint32_t idx,
>> >                 /* Release the lock */
>> >                 _odp_atomic_flag_clear(IDX2LOCK(idx));
>> >  #endif
>> > -#endif
>> >         } else {
>> >                 /* We have a new timeout buffer which replaces any old
>> one
>> > */
>> >                 /* Fill in some (constant) header fields for timeout
>> > events */
>> > @@ -468,19 +421,6 @@ static bool timer_reset(uint32_t idx,
>> >                 }
>> >                 /* Else ignore buffers of other types */
>> >                 odp_buffer_t old_buf = ODP_BUFFER_INVALID;
>> > -#ifdef ODP_ATOMIC_U128
>> > -               tick_buf_t new, old;
>> > -               new.exp_tck.v = abs_tck;
>> > -               new.tmo_buf = *tmo_buf;
>> > -               TB_SET_PAD(new);
>> > -               /* We are releasing the new timeout buffer to some other
>> > -                * thread */
>> > -               _odp_atomic_u128_xchg_mm((_odp_atomic_u128_t *)tb,
>> > -                                        (_uint128_t *)&new,
>> > -                                        (_uint128_t *)&old,
>> > -                                        _ODP_MEMMODEL_ACQ_RLS);
>> > -               old_buf = old.tmo_buf;
>> > -#else
>> >                 /* Take a related lock */
>> >                 while (_odp_atomic_flag_tas(IDX2LOCK(idx)))
>> >                         /* While lock is taken, spin using relaxed
>> loads */
>> > @@ -496,7 +436,6 @@ static bool timer_reset(uint32_t idx,
>> >
>> >                 /* Release the lock */
>> >                 _odp_atomic_flag_clear(IDX2LOCK(idx));
>> > -#endif
>> >                 /* Return old timeout buffer */
>> >                 *tmo_buf = old_buf;
>> >         }
>> > @@ -510,18 +449,6 @@ static odp_buffer_t timer_cancel(odp_timer_pool
>> *tp,
>> >         tick_buf_t *tb = &tp->tick_buf[idx];
>> >         odp_buffer_t old_buf;
>> >
>> > -#ifdef ODP_ATOMIC_U128
>> > -       tick_buf_t new, old;
>> > -       /* Update the timer state (e.g. cancel the current timeout) */
>> > -       new.exp_tck.v = new_state;
>> > -       /* Swap out the old buffer */
>> > -       new.tmo_buf = ODP_BUFFER_INVALID;
>> > -       TB_SET_PAD(new);
>> > -       _odp_atomic_u128_xchg_mm((_odp_atomic_u128_t *)tb,
>> > -                                (_uint128_t *)&new, (_uint128_t *)&old,
>> > -                                _ODP_MEMMODEL_RLX);
>> > -       old_buf = old.tmo_buf;
>> > -#else
>> >         /* Take a related lock */
>> >         while (_odp_atomic_flag_tas(IDX2LOCK(idx)))
>> >                 /* While lock is taken, spin using relaxed loads */
>> > @@ -537,7 +464,6 @@ static odp_buffer_t timer_cancel(odp_timer_pool *tp,
>> >
>> >         /* Release the lock */
>> >         _odp_atomic_flag_clear(IDX2LOCK(idx));
>> > -#endif
>> >         /* Return the old buffer */
>> >         return old_buf;
>> >  }
>> > @@ -548,34 +474,6 @@ static unsigned timer_expire(odp_timer_pool *tp,
>> > uint32_t idx, uint64_t tick)
>> >         tick_buf_t *tb = &tp->tick_buf[idx];
>> >         odp_buffer_t tmo_buf = ODP_BUFFER_INVALID;
>> >         uint64_t exp_tck;
>> > -#ifdef ODP_ATOMIC_U128
>> > -       /* Atomic re-read for correctness */
>> > -       exp_tck = _odp_atomic_u64_load_mm(&tb->exp_tck,
>> _ODP_MEMMODEL_RLX);
>> > -       /* Re-check exp_tck */
>> > -       if (odp_likely(exp_tck <= tick)) {
>> > -               /* Attempt to grab timeout buffer, replace with inactive
>> > timer
>> > -                * and invalid buffer */
>> > -               tick_buf_t new, old;
>> > -               old.exp_tck.v = exp_tck;
>> > -               old.tmo_buf = tb->tmo_buf;
>> > -               TB_SET_PAD(old);
>> > -               /* Set the inactive/expired bit keeping the expiration
>> > tick so
>> > -                * that we can check against the expiration tick of the
>> > timeout
>> > -                * when it is received */
>> > -               new.exp_tck.v = exp_tck | TMO_INACTIVE;
>> > -               new.tmo_buf = ODP_BUFFER_INVALID;
>> > -               TB_SET_PAD(new);
>> > -               int succ = _odp_atomic_u128_cmp_xchg_mm(
>> > -                               (_odp_atomic_u128_t *)tb,
>> > -                               (_uint128_t *)&old, (_uint128_t *)&new,
>> > -                               _ODP_MEMMODEL_RLS, _ODP_MEMMODEL_RLX);
>> > -               if (succ)
>> > -                       tmo_buf = old.tmo_buf;
>> > -               /* Else CAS failed, something changed => skip timer
>> > -                * this tick, it will be checked again next tick */
>> > -       }
>> > -       /* Else false positive, ignore */
>> > -#else
>> >         /* Take a related lock */
>> >         while (_odp_atomic_flag_tas(IDX2LOCK(idx)))
>> >                 /* While lock is taken, spin using relaxed loads */
>> > @@ -600,7 +498,6 @@ static unsigned timer_expire(odp_timer_pool *tp,
>> > uint32_t idx, uint64_t tick)
>> >         /* Else false positive, ignore */
>> >         /* Release the lock */
>> >         _odp_atomic_flag_clear(IDX2LOCK(idx));
>> > -#endif
>> >         if (odp_likely(tmo_buf != ODP_BUFFER_INVALID)) {
>> >                 /* Fill in expiration tick for timeout events */
>> >                 if (odp_event_type(odp_buffer_to_event(tmo_buf)) ==
>> > @@ -974,13 +871,9 @@ void odp_timeout_free(odp_timeout_t tmo)
>> >
>> >  int odp_timer_init_global(void)
>> >  {
>> > -#ifndef ODP_ATOMIC_U128
>> >         uint32_t i;
>> >         for (i = 0; i < NUM_LOCKS; i++)
>> >                 _odp_atomic_flag_clear(&locks[i]);
>> > -#else
>> > -       ODP_DBG("Using lock-less timer implementation\n");
>> > -#endif
>> >         odp_atomic_init_u32(&num_timer_pools, 0);
>> >
>> >         block_sigalarm();
>> > --
>> > 2.7.1.250.gff4ea60
>> >
>> > _______________________________________________
>> > lng-odp mailing list
>> > lng-odp@lists.linaro.org
>> > https://lists.linaro.org/mailman/listinfo/lng-odp
>> >
>> _______________________________________________
>> lng-odp mailing list
>> lng-odp@lists.linaro.org
>> https://lists.linaro.org/mailman/listinfo/lng-odp
>>
>
>
Ola Liljedahl May 16, 2016, 9:55 p.m. | #4
On 16 May 2016 at 23:31, Bill Fischofer <bill.fischofer@linaro.org> wrote:

> Thanks Ola.  The original bug is that this fails compiling with clang on
> 32-bit systems and the reason is that the struct in that environment winds
> up being 24 bytes rather than 16 bytes long, so the ODP_STATIC_ASSERT()
> fails.
>
The timer code should stop using odp_atomic_u64_t (which will include the
spinlock for systems that do not support atomic operations on 64-bit
locations, ARMv7A is one of the few 32-bit platforms which do support
64-bit atomics natively). Instead use a plain 64-bit datatype (uint64_t) on
such systems. For systems which do not support 128-bit (or 64-bit?) atomic
operations (which is the ideal but I think I managed to get something
working with only 64-bit atomics), we should use locks instead. I think the
code does use a separate array of locks which is indexed through a hash of
the timer handle or something similar. This locks makes the additional lock
in odp_atomic_u64_t unused.

Which platforms use the 64-bit atomic code in the timer?
ARMv7A (32-bit but with 64-bit atomics support - ldrexd/strexd)
OCTEON/MIPS64 (with 64-bit atomics support - lld/scd)
64-bit POWER? (with 64--bit atomics support - ldarx/stdcx.)

Se we need a couple of things:
1) 128-bit CAS support on e.g. x86 (requires that configure detects and
enables -mcx16 flag).
2) 128-bit ldxp/stxp support on ARM64 (A64 ISA in AArch64 mode). I don't
think common compilers support 128-bit CAS on ARM64 (it's a bit tricky) so
need to use inline assembly.
3) Some cleanup in the timer code not to use odp_atomic_u64_t on systems
which do not support 64-bit atomic operations (and instead uses the spin
lock array). This will avoid the failed static assert.


> My original (simple) patch is at
> http://patches.opendataplane.org/patch/5932/ however Maxim didn't like
> it. Perhaps you can offer a compromise?
>
> On Mon, May 16, 2016 at 4:21 PM, Ola Liljedahl <ola.liljedahl@linaro.org>
> wrote:
>
>> I disagree. The 128-bit support is important because that's the lock-free
>> timer implementation which was the whole idea. The lock-based code is just
>> a work-around. We should rather add support in configure to detect compiler
>> support for the -mcx16 flag which enables 128-bit CAS on x86-64. Now when I
>> have an ARM64 target, I can actually write and test 128-bit CAS support on
>> ARM (A64 ISA in AArch64 mode) and contribute that in another patch. I am
>> not sure whether common compilers (e.g. GCC) properly supports 128-bit CAS
>> on ARM64, might have to use LDXP/STXP (load/store exclusive pair) directly.
>>
>> -- Ola
>>
>> On 16 May 2016 at 22:43, Bill Fischofer <bill.fischofer@linaro.org>
>> wrote:
>>
>>> On Mon, May 16, 2016 at 2:15 PM, Maxim Uvarov <maxim.uvarov@linaro.org>
>>> wrote:
>>>
>>> > Remove totally untested branch with 128 bit optimizations
>>> > for timer code.
>>> > This also fixes static assert bug:
>>> > https://bugs.linaro.org/show_bug.cgi?id=2211
>>> >
>>> > Signed-off-by: Maxim Uvarov <maxim.uvarov@linaro.org>
>>> >
>>>
>>> Reviewed-and-tested-by: Bill Fischofer <bill.fischofer@linaro.org>
>>>
>>>
>>> > ---
>>> >  platform/linux-generic/odp_timer.c | 107
>>> > -------------------------------------
>>> >  1 file changed, 107 deletions(-)
>>> >
>>> > diff --git a/platform/linux-generic/odp_timer.c
>>> > b/platform/linux-generic/odp_timer.c
>>> > index 6b84309..be28da3 100644
>>> > --- a/platform/linux-generic/odp_timer.c
>>> > +++ b/platform/linux-generic/odp_timer.c
>>> > @@ -11,15 +11,7 @@
>>> >   *
>>> >   */
>>> >
>>> > -/* Check if compiler supports 16-byte atomics. GCC needs -mcx16 flag
>>> on
>>> > x86 */
>>> > -/* Using spin lock actually seems faster on Core2 */
>>> > -#ifdef ODP_ATOMIC_U128
>>> > -/* TB_NEEDS_PAD defined if sizeof(odp_buffer_t) != 8 */
>>> > -#define TB_NEEDS_PAD
>>> > -#define TB_SET_PAD(x) ((x).pad = 0)
>>> > -#else
>>> >  #define TB_SET_PAD(x) (void)(x)
>>> > -#endif
>>> >
>>> >  #include <odp_posix_extensions.h>
>>> >
>>> > @@ -70,11 +62,9 @@
>>> >   * Mutual exclusion in the absence of CAS16
>>> >
>>> >
>>> *****************************************************************************/
>>> >
>>> > -#ifndef ODP_ATOMIC_U128
>>> >  #define NUM_LOCKS 1024
>>> >  static _odp_atomic_flag_t locks[NUM_LOCKS]; /* Multiple locks per
>>> cache
>>> > line! */
>>> >  #define IDX2LOCK(idx) (&locks[(idx) % NUM_LOCKS])
>>> > -#endif
>>> >
>>> >
>>> >
>>> /******************************************************************************
>>> >   * Translation between timeout buffer and timeout header
>>> > @@ -98,17 +88,9 @@ static odp_timeout_hdr_t *timeout_hdr(odp_timeout_t
>>> tmo)
>>> >  typedef struct tick_buf_s {
>>> >         odp_atomic_u64_t exp_tck;/* Expiration tick or TMO_xxx */
>>> >         odp_buffer_t tmo_buf;/* ODP_BUFFER_INVALID if timer not active
>>> */
>>> > -#ifdef TB_NEEDS_PAD
>>> > -       uint32_t pad;/* Need to be able to access padding for
>>> successful
>>> > CAS */
>>> > -#endif
>>> >  } tick_buf_t
>>> > -#ifdef ODP_ATOMIC_U128
>>> > -ODP_ALIGNED(16) /* 16-byte atomic operations need properly aligned
>>> > addresses */
>>> > -#endif
>>> >  ;
>>> >
>>> > -ODP_STATIC_ASSERT(sizeof(tick_buf_t) == 16, "sizeof(tick_buf_t) ==
>>> 16");
>>> > -
>>> >  typedef struct odp_timer_s {
>>> >         void *user_ptr;
>>> >         odp_queue_t queue;/* Used for free list when timer is free */
>>> > @@ -378,34 +360,6 @@ static bool timer_reset(uint32_t idx,
>>> >         tick_buf_t *tb = &tp->tick_buf[idx];
>>> >
>>> >         if (tmo_buf == NULL || *tmo_buf == ODP_BUFFER_INVALID) {
>>> > -#ifdef ODP_ATOMIC_U128
>>> > -               tick_buf_t new, old;
>>> > -               do {
>>> > -                       /* Relaxed and non-atomic read of current
>>> values */
>>> > -                       old.exp_tck.v = tb->exp_tck.v;
>>> > -                       old.tmo_buf = tb->tmo_buf;
>>> > -                       TB_SET_PAD(old);
>>> > -                       /* Check if there actually is a timeout buffer
>>> > -                        * present */
>>> > -                       if (old.tmo_buf == ODP_BUFFER_INVALID) {
>>> > -                               /* Cannot reset a timer with neither
>>> old
>>> > nor
>>> > -                                * new timeout buffer */
>>> > -                               success = false;
>>> > -                               break;
>>> > -                       }
>>> > -                       /* Set up new values */
>>> > -                       new.exp_tck.v = abs_tck;
>>> > -                       new.tmo_buf = old.tmo_buf;
>>> > -                       TB_SET_PAD(new);
>>> > -                       /* Atomic CAS will fail if we experienced torn
>>> > reads,
>>> > -                        * retry update sequence until CAS succeeds */
>>> > -               } while (!_odp_atomic_u128_cmp_xchg_mm(
>>> > -                                       (_odp_atomic_u128_t *)tb,
>>> > -                                       (_uint128_t *)&old,
>>> > -                                       (_uint128_t *)&new,
>>> > -                                       _ODP_MEMMODEL_RLS,
>>> > -                                       _ODP_MEMMODEL_RLX));
>>> > -#else
>>> >  #ifdef __ARM_ARCH
>>> >                 /* Since barriers are not good for C-A15, we take an
>>> >                  * alternative approach using relaxed memory model */
>>> > @@ -453,7 +407,6 @@ static bool timer_reset(uint32_t idx,
>>> >                 /* Release the lock */
>>> >                 _odp_atomic_flag_clear(IDX2LOCK(idx));
>>> >  #endif
>>> > -#endif
>>> >         } else {
>>> >                 /* We have a new timeout buffer which replaces any old
>>> one
>>> > */
>>> >                 /* Fill in some (constant) header fields for timeout
>>> > events */
>>> > @@ -468,19 +421,6 @@ static bool timer_reset(uint32_t idx,
>>> >                 }
>>> >                 /* Else ignore buffers of other types */
>>> >                 odp_buffer_t old_buf = ODP_BUFFER_INVALID;
>>> > -#ifdef ODP_ATOMIC_U128
>>> > -               tick_buf_t new, old;
>>> > -               new.exp_tck.v = abs_tck;
>>> > -               new.tmo_buf = *tmo_buf;
>>> > -               TB_SET_PAD(new);
>>> > -               /* We are releasing the new timeout buffer to some
>>> other
>>> > -                * thread */
>>> > -               _odp_atomic_u128_xchg_mm((_odp_atomic_u128_t *)tb,
>>> > -                                        (_uint128_t *)&new,
>>> > -                                        (_uint128_t *)&old,
>>> > -                                        _ODP_MEMMODEL_ACQ_RLS);
>>> > -               old_buf = old.tmo_buf;
>>> > -#else
>>> >                 /* Take a related lock */
>>> >                 while (_odp_atomic_flag_tas(IDX2LOCK(idx)))
>>> >                         /* While lock is taken, spin using relaxed
>>> loads */
>>> > @@ -496,7 +436,6 @@ static bool timer_reset(uint32_t idx,
>>> >
>>> >                 /* Release the lock */
>>> >                 _odp_atomic_flag_clear(IDX2LOCK(idx));
>>> > -#endif
>>> >                 /* Return old timeout buffer */
>>> >                 *tmo_buf = old_buf;
>>> >         }
>>> > @@ -510,18 +449,6 @@ static odp_buffer_t timer_cancel(odp_timer_pool
>>> *tp,
>>> >         tick_buf_t *tb = &tp->tick_buf[idx];
>>> >         odp_buffer_t old_buf;
>>> >
>>> > -#ifdef ODP_ATOMIC_U128
>>> > -       tick_buf_t new, old;
>>> > -       /* Update the timer state (e.g. cancel the current timeout) */
>>> > -       new.exp_tck.v = new_state;
>>> > -       /* Swap out the old buffer */
>>> > -       new.tmo_buf = ODP_BUFFER_INVALID;
>>> > -       TB_SET_PAD(new);
>>> > -       _odp_atomic_u128_xchg_mm((_odp_atomic_u128_t *)tb,
>>> > -                                (_uint128_t *)&new, (_uint128_t
>>> *)&old,
>>> > -                                _ODP_MEMMODEL_RLX);
>>> > -       old_buf = old.tmo_buf;
>>> > -#else
>>> >         /* Take a related lock */
>>> >         while (_odp_atomic_flag_tas(IDX2LOCK(idx)))
>>> >                 /* While lock is taken, spin using relaxed loads */
>>> > @@ -537,7 +464,6 @@ static odp_buffer_t timer_cancel(odp_timer_pool
>>> *tp,
>>> >
>>> >         /* Release the lock */
>>> >         _odp_atomic_flag_clear(IDX2LOCK(idx));
>>> > -#endif
>>> >         /* Return the old buffer */
>>> >         return old_buf;
>>> >  }
>>> > @@ -548,34 +474,6 @@ static unsigned timer_expire(odp_timer_pool *tp,
>>> > uint32_t idx, uint64_t tick)
>>> >         tick_buf_t *tb = &tp->tick_buf[idx];
>>> >         odp_buffer_t tmo_buf = ODP_BUFFER_INVALID;
>>> >         uint64_t exp_tck;
>>> > -#ifdef ODP_ATOMIC_U128
>>> > -       /* Atomic re-read for correctness */
>>> > -       exp_tck = _odp_atomic_u64_load_mm(&tb->exp_tck,
>>> _ODP_MEMMODEL_RLX);
>>> > -       /* Re-check exp_tck */
>>> > -       if (odp_likely(exp_tck <= tick)) {
>>> > -               /* Attempt to grab timeout buffer, replace with
>>> inactive
>>> > timer
>>> > -                * and invalid buffer */
>>> > -               tick_buf_t new, old;
>>> > -               old.exp_tck.v = exp_tck;
>>> > -               old.tmo_buf = tb->tmo_buf;
>>> > -               TB_SET_PAD(old);
>>> > -               /* Set the inactive/expired bit keeping the expiration
>>> > tick so
>>> > -                * that we can check against the expiration tick of the
>>> > timeout
>>> > -                * when it is received */
>>> > -               new.exp_tck.v = exp_tck | TMO_INACTIVE;
>>> > -               new.tmo_buf = ODP_BUFFER_INVALID;
>>> > -               TB_SET_PAD(new);
>>> > -               int succ = _odp_atomic_u128_cmp_xchg_mm(
>>> > -                               (_odp_atomic_u128_t *)tb,
>>> > -                               (_uint128_t *)&old, (_uint128_t *)&new,
>>> > -                               _ODP_MEMMODEL_RLS, _ODP_MEMMODEL_RLX);
>>> > -               if (succ)
>>> > -                       tmo_buf = old.tmo_buf;
>>> > -               /* Else CAS failed, something changed => skip timer
>>> > -                * this tick, it will be checked again next tick */
>>> > -       }
>>> > -       /* Else false positive, ignore */
>>> > -#else
>>> >         /* Take a related lock */
>>> >         while (_odp_atomic_flag_tas(IDX2LOCK(idx)))
>>> >                 /* While lock is taken, spin using relaxed loads */
>>> > @@ -600,7 +498,6 @@ static unsigned timer_expire(odp_timer_pool *tp,
>>> > uint32_t idx, uint64_t tick)
>>> >         /* Else false positive, ignore */
>>> >         /* Release the lock */
>>> >         _odp_atomic_flag_clear(IDX2LOCK(idx));
>>> > -#endif
>>> >         if (odp_likely(tmo_buf != ODP_BUFFER_INVALID)) {
>>> >                 /* Fill in expiration tick for timeout events */
>>> >                 if (odp_event_type(odp_buffer_to_event(tmo_buf)) ==
>>> > @@ -974,13 +871,9 @@ void odp_timeout_free(odp_timeout_t tmo)
>>> >
>>> >  int odp_timer_init_global(void)
>>> >  {
>>> > -#ifndef ODP_ATOMIC_U128
>>> >         uint32_t i;
>>> >         for (i = 0; i < NUM_LOCKS; i++)
>>> >                 _odp_atomic_flag_clear(&locks[i]);
>>> > -#else
>>> > -       ODP_DBG("Using lock-less timer implementation\n");
>>> > -#endif
>>> >         odp_atomic_init_u32(&num_timer_pools, 0);
>>> >
>>> >         block_sigalarm();
>>> > --
>>> > 2.7.1.250.gff4ea60
>>> >
>>> > _______________________________________________
>>> > lng-odp mailing list
>>> > lng-odp@lists.linaro.org
>>> > https://lists.linaro.org/mailman/listinfo/lng-odp
>>> >
>>> _______________________________________________
>>> lng-odp mailing list
>>> lng-odp@lists.linaro.org
>>> https://lists.linaro.org/mailman/listinfo/lng-odp
>>>
>>
>>
>
Bill Fischofer May 16, 2016, 10:45 p.m. | #5
On Mon, May 16, 2016 at 4:55 PM, Ola Liljedahl <ola.liljedahl@linaro.org>
wrote:

> On 16 May 2016 at 23:31, Bill Fischofer <bill.fischofer@linaro.org> wrote:
>
>> Thanks Ola.  The original bug is that this fails compiling with clang on
>> 32-bit systems and the reason is that the struct in that environment winds
>> up being 24 bytes rather than 16 bytes long, so the ODP_STATIC_ASSERT()
>> fails.
>>
> The timer code should stop using odp_atomic_u64_t (which will include the
> spinlock for systems that do not support atomic operations on 64-bit
> locations, ARMv7A is one of the few 32-bit platforms which do support
> 64-bit atomics natively). Instead use a plain 64-bit datatype (uint64_t) on
> such systems. For systems which do not support 128-bit (or 64-bit?) atomic
> operations (which is the ideal but I think I managed to get something
> working with only 64-bit atomics), we should use locks instead. I think the
> code does use a separate array of locks which is indexed through a hash of
> the timer handle or something similar. This locks makes the additional lock
> in odp_atomic_u64_t unused.
>
> Which platforms use the 64-bit atomic code in the timer?
> ARMv7A (32-bit but with 64-bit atomics support - ldrexd/strexd)
> OCTEON/MIPS64 (with 64-bit atomics support - lld/scd)
> 64-bit POWER? (with 64--bit atomics support - ldarx/stdcx.)
>
> Se we need a couple of things:
> 1) 128-bit CAS support on e.g. x86 (requires that configure detects and
> enables -mcx16 flag).
> 2) 128-bit ldxp/stxp support on ARM64 (A64 ISA in AArch64 mode). I don't
> think common compilers support 128-bit CAS on ARM64 (it's a bit tricky) so
> need to use inline assembly.
> 3) Some cleanup in the timer code not to use odp_atomic_u64_t on systems
> which do not support 64-bit atomic operations (and instead uses the spin
> lock array). This will avoid the failed static assert.
>

Unless you'd like to submit that patch this week, we need to do something
on at least a temporary basis to close out Monarch RC3. Sounds like this is
an area we should revisit for restructure/cleanup for Tiger Moth. Given the
divergence between near-term need and the (better) longer-term solution you
outline, what do you recommend for now?


>
>
>> My original (simple) patch is at
>> http://patches.opendataplane.org/patch/5932/ however Maxim didn't like
>> it. Perhaps you can offer a compromise?
>>
>> On Mon, May 16, 2016 at 4:21 PM, Ola Liljedahl <ola.liljedahl@linaro.org>
>> wrote:
>>
>>> I disagree. The 128-bit support is important because that's the
>>> lock-free timer implementation which was the whole idea. The lock-based
>>> code is just a work-around. We should rather add support in configure to
>>> detect compiler support for the -mcx16 flag which enables 128-bit CAS on
>>> x86-64. Now when I have an ARM64 target, I can actually write and test
>>> 128-bit CAS support on ARM (A64 ISA in AArch64 mode) and contribute that in
>>> another patch. I am not sure whether common compilers (e.g. GCC) properly
>>> supports 128-bit CAS on ARM64, might have to use LDXP/STXP (load/store
>>> exclusive pair) directly.
>>>
>>> -- Ola
>>>
>>> On 16 May 2016 at 22:43, Bill Fischofer <bill.fischofer@linaro.org>
>>> wrote:
>>>
>>>> On Mon, May 16, 2016 at 2:15 PM, Maxim Uvarov <maxim.uvarov@linaro.org>
>>>> wrote:
>>>>
>>>> > Remove totally untested branch with 128 bit optimizations
>>>> > for timer code.
>>>> > This also fixes static assert bug:
>>>> > https://bugs.linaro.org/show_bug.cgi?id=2211
>>>> >
>>>> > Signed-off-by: Maxim Uvarov <maxim.uvarov@linaro.org>
>>>> >
>>>>
>>>> Reviewed-and-tested-by: Bill Fischofer <bill.fischofer@linaro.org>
>>>>
>>>>
>>>> > ---
>>>> >  platform/linux-generic/odp_timer.c | 107
>>>> > -------------------------------------
>>>> >  1 file changed, 107 deletions(-)
>>>> >
>>>> > diff --git a/platform/linux-generic/odp_timer.c
>>>> > b/platform/linux-generic/odp_timer.c
>>>> > index 6b84309..be28da3 100644
>>>> > --- a/platform/linux-generic/odp_timer.c
>>>> > +++ b/platform/linux-generic/odp_timer.c
>>>> > @@ -11,15 +11,7 @@
>>>> >   *
>>>> >   */
>>>> >
>>>> > -/* Check if compiler supports 16-byte atomics. GCC needs -mcx16 flag
>>>> on
>>>> > x86 */
>>>> > -/* Using spin lock actually seems faster on Core2 */
>>>> > -#ifdef ODP_ATOMIC_U128
>>>> > -/* TB_NEEDS_PAD defined if sizeof(odp_buffer_t) != 8 */
>>>> > -#define TB_NEEDS_PAD
>>>> > -#define TB_SET_PAD(x) ((x).pad = 0)
>>>> > -#else
>>>> >  #define TB_SET_PAD(x) (void)(x)
>>>> > -#endif
>>>> >
>>>> >  #include <odp_posix_extensions.h>
>>>> >
>>>> > @@ -70,11 +62,9 @@
>>>> >   * Mutual exclusion in the absence of CAS16
>>>> >
>>>> >
>>>> *****************************************************************************/
>>>> >
>>>> > -#ifndef ODP_ATOMIC_U128
>>>> >  #define NUM_LOCKS 1024
>>>> >  static _odp_atomic_flag_t locks[NUM_LOCKS]; /* Multiple locks per
>>>> cache
>>>> > line! */
>>>> >  #define IDX2LOCK(idx) (&locks[(idx) % NUM_LOCKS])
>>>> > -#endif
>>>> >
>>>> >
>>>> >
>>>> /******************************************************************************
>>>> >   * Translation between timeout buffer and timeout header
>>>> > @@ -98,17 +88,9 @@ static odp_timeout_hdr_t
>>>> *timeout_hdr(odp_timeout_t tmo)
>>>> >  typedef struct tick_buf_s {
>>>> >         odp_atomic_u64_t exp_tck;/* Expiration tick or TMO_xxx */
>>>> >         odp_buffer_t tmo_buf;/* ODP_BUFFER_INVALID if timer not
>>>> active */
>>>> > -#ifdef TB_NEEDS_PAD
>>>> > -       uint32_t pad;/* Need to be able to access padding for
>>>> successful
>>>> > CAS */
>>>> > -#endif
>>>> >  } tick_buf_t
>>>> > -#ifdef ODP_ATOMIC_U128
>>>> > -ODP_ALIGNED(16) /* 16-byte atomic operations need properly aligned
>>>> > addresses */
>>>> > -#endif
>>>> >  ;
>>>> >
>>>> > -ODP_STATIC_ASSERT(sizeof(tick_buf_t) == 16, "sizeof(tick_buf_t) ==
>>>> 16");
>>>> > -
>>>> >  typedef struct odp_timer_s {
>>>> >         void *user_ptr;
>>>> >         odp_queue_t queue;/* Used for free list when timer is free */
>>>> > @@ -378,34 +360,6 @@ static bool timer_reset(uint32_t idx,
>>>> >         tick_buf_t *tb = &tp->tick_buf[idx];
>>>> >
>>>> >         if (tmo_buf == NULL || *tmo_buf == ODP_BUFFER_INVALID) {
>>>> > -#ifdef ODP_ATOMIC_U128
>>>> > -               tick_buf_t new, old;
>>>> > -               do {
>>>> > -                       /* Relaxed and non-atomic read of current
>>>> values */
>>>> > -                       old.exp_tck.v = tb->exp_tck.v;
>>>> > -                       old.tmo_buf = tb->tmo_buf;
>>>> > -                       TB_SET_PAD(old);
>>>> > -                       /* Check if there actually is a timeout buffer
>>>> > -                        * present */
>>>> > -                       if (old.tmo_buf == ODP_BUFFER_INVALID) {
>>>> > -                               /* Cannot reset a timer with neither
>>>> old
>>>> > nor
>>>> > -                                * new timeout buffer */
>>>> > -                               success = false;
>>>> > -                               break;
>>>> > -                       }
>>>> > -                       /* Set up new values */
>>>> > -                       new.exp_tck.v = abs_tck;
>>>> > -                       new.tmo_buf = old.tmo_buf;
>>>> > -                       TB_SET_PAD(new);
>>>> > -                       /* Atomic CAS will fail if we experienced torn
>>>> > reads,
>>>> > -                        * retry update sequence until CAS succeeds */
>>>> > -               } while (!_odp_atomic_u128_cmp_xchg_mm(
>>>> > -                                       (_odp_atomic_u128_t *)tb,
>>>> > -                                       (_uint128_t *)&old,
>>>> > -                                       (_uint128_t *)&new,
>>>> > -                                       _ODP_MEMMODEL_RLS,
>>>> > -                                       _ODP_MEMMODEL_RLX));
>>>> > -#else
>>>> >  #ifdef __ARM_ARCH
>>>> >                 /* Since barriers are not good for C-A15, we take an
>>>> >                  * alternative approach using relaxed memory model */
>>>> > @@ -453,7 +407,6 @@ static bool timer_reset(uint32_t idx,
>>>> >                 /* Release the lock */
>>>> >                 _odp_atomic_flag_clear(IDX2LOCK(idx));
>>>> >  #endif
>>>> > -#endif
>>>> >         } else {
>>>> >                 /* We have a new timeout buffer which replaces any
>>>> old one
>>>> > */
>>>> >                 /* Fill in some (constant) header fields for timeout
>>>> > events */
>>>> > @@ -468,19 +421,6 @@ static bool timer_reset(uint32_t idx,
>>>> >                 }
>>>> >                 /* Else ignore buffers of other types */
>>>> >                 odp_buffer_t old_buf = ODP_BUFFER_INVALID;
>>>> > -#ifdef ODP_ATOMIC_U128
>>>> > -               tick_buf_t new, old;
>>>> > -               new.exp_tck.v = abs_tck;
>>>> > -               new.tmo_buf = *tmo_buf;
>>>> > -               TB_SET_PAD(new);
>>>> > -               /* We are releasing the new timeout buffer to some
>>>> other
>>>> > -                * thread */
>>>> > -               _odp_atomic_u128_xchg_mm((_odp_atomic_u128_t *)tb,
>>>> > -                                        (_uint128_t *)&new,
>>>> > -                                        (_uint128_t *)&old,
>>>> > -                                        _ODP_MEMMODEL_ACQ_RLS);
>>>> > -               old_buf = old.tmo_buf;
>>>> > -#else
>>>> >                 /* Take a related lock */
>>>> >                 while (_odp_atomic_flag_tas(IDX2LOCK(idx)))
>>>> >                         /* While lock is taken, spin using relaxed
>>>> loads */
>>>> > @@ -496,7 +436,6 @@ static bool timer_reset(uint32_t idx,
>>>> >
>>>> >                 /* Release the lock */
>>>> >                 _odp_atomic_flag_clear(IDX2LOCK(idx));
>>>> > -#endif
>>>> >                 /* Return old timeout buffer */
>>>> >                 *tmo_buf = old_buf;
>>>> >         }
>>>> > @@ -510,18 +449,6 @@ static odp_buffer_t timer_cancel(odp_timer_pool
>>>> *tp,
>>>> >         tick_buf_t *tb = &tp->tick_buf[idx];
>>>> >         odp_buffer_t old_buf;
>>>> >
>>>> > -#ifdef ODP_ATOMIC_U128
>>>> > -       tick_buf_t new, old;
>>>> > -       /* Update the timer state (e.g. cancel the current timeout) */
>>>> > -       new.exp_tck.v = new_state;
>>>> > -       /* Swap out the old buffer */
>>>> > -       new.tmo_buf = ODP_BUFFER_INVALID;
>>>> > -       TB_SET_PAD(new);
>>>> > -       _odp_atomic_u128_xchg_mm((_odp_atomic_u128_t *)tb,
>>>> > -                                (_uint128_t *)&new, (_uint128_t
>>>> *)&old,
>>>> > -                                _ODP_MEMMODEL_RLX);
>>>> > -       old_buf = old.tmo_buf;
>>>> > -#else
>>>> >         /* Take a related lock */
>>>> >         while (_odp_atomic_flag_tas(IDX2LOCK(idx)))
>>>> >                 /* While lock is taken, spin using relaxed loads */
>>>> > @@ -537,7 +464,6 @@ static odp_buffer_t timer_cancel(odp_timer_pool
>>>> *tp,
>>>> >
>>>> >         /* Release the lock */
>>>> >         _odp_atomic_flag_clear(IDX2LOCK(idx));
>>>> > -#endif
>>>> >         /* Return the old buffer */
>>>> >         return old_buf;
>>>> >  }
>>>> > @@ -548,34 +474,6 @@ static unsigned timer_expire(odp_timer_pool *tp,
>>>> > uint32_t idx, uint64_t tick)
>>>> >         tick_buf_t *tb = &tp->tick_buf[idx];
>>>> >         odp_buffer_t tmo_buf = ODP_BUFFER_INVALID;
>>>> >         uint64_t exp_tck;
>>>> > -#ifdef ODP_ATOMIC_U128
>>>> > -       /* Atomic re-read for correctness */
>>>> > -       exp_tck = _odp_atomic_u64_load_mm(&tb->exp_tck,
>>>> _ODP_MEMMODEL_RLX);
>>>> > -       /* Re-check exp_tck */
>>>> > -       if (odp_likely(exp_tck <= tick)) {
>>>> > -               /* Attempt to grab timeout buffer, replace with
>>>> inactive
>>>> > timer
>>>> > -                * and invalid buffer */
>>>> > -               tick_buf_t new, old;
>>>> > -               old.exp_tck.v = exp_tck;
>>>> > -               old.tmo_buf = tb->tmo_buf;
>>>> > -               TB_SET_PAD(old);
>>>> > -               /* Set the inactive/expired bit keeping the expiration
>>>> > tick so
>>>> > -                * that we can check against the expiration tick of
>>>> the
>>>> > timeout
>>>> > -                * when it is received */
>>>> > -               new.exp_tck.v = exp_tck | TMO_INACTIVE;
>>>> > -               new.tmo_buf = ODP_BUFFER_INVALID;
>>>> > -               TB_SET_PAD(new);
>>>> > -               int succ = _odp_atomic_u128_cmp_xchg_mm(
>>>> > -                               (_odp_atomic_u128_t *)tb,
>>>> > -                               (_uint128_t *)&old, (_uint128_t
>>>> *)&new,
>>>> > -                               _ODP_MEMMODEL_RLS, _ODP_MEMMODEL_RLX);
>>>> > -               if (succ)
>>>> > -                       tmo_buf = old.tmo_buf;
>>>> > -               /* Else CAS failed, something changed => skip timer
>>>> > -                * this tick, it will be checked again next tick */
>>>> > -       }
>>>> > -       /* Else false positive, ignore */
>>>> > -#else
>>>> >         /* Take a related lock */
>>>> >         while (_odp_atomic_flag_tas(IDX2LOCK(idx)))
>>>> >                 /* While lock is taken, spin using relaxed loads */
>>>> > @@ -600,7 +498,6 @@ static unsigned timer_expire(odp_timer_pool *tp,
>>>> > uint32_t idx, uint64_t tick)
>>>> >         /* Else false positive, ignore */
>>>> >         /* Release the lock */
>>>> >         _odp_atomic_flag_clear(IDX2LOCK(idx));
>>>> > -#endif
>>>> >         if (odp_likely(tmo_buf != ODP_BUFFER_INVALID)) {
>>>> >                 /* Fill in expiration tick for timeout events */
>>>> >                 if (odp_event_type(odp_buffer_to_event(tmo_buf)) ==
>>>> > @@ -974,13 +871,9 @@ void odp_timeout_free(odp_timeout_t tmo)
>>>> >
>>>> >  int odp_timer_init_global(void)
>>>> >  {
>>>> > -#ifndef ODP_ATOMIC_U128
>>>> >         uint32_t i;
>>>> >         for (i = 0; i < NUM_LOCKS; i++)
>>>> >                 _odp_atomic_flag_clear(&locks[i]);
>>>> > -#else
>>>> > -       ODP_DBG("Using lock-less timer implementation\n");
>>>> > -#endif
>>>> >         odp_atomic_init_u32(&num_timer_pools, 0);
>>>> >
>>>> >         block_sigalarm();
>>>> > --
>>>> > 2.7.1.250.gff4ea60
>>>> >
>>>> > _______________________________________________
>>>> > lng-odp mailing list
>>>> > lng-odp@lists.linaro.org
>>>> > https://lists.linaro.org/mailman/listinfo/lng-odp
>>>> >
>>>> _______________________________________________
>>>> lng-odp mailing list
>>>> lng-odp@lists.linaro.org
>>>> https://lists.linaro.org/mailman/listinfo/lng-odp
>>>>
>>>
>>>
>>
>
Ola Liljedahl May 17, 2016, 7 a.m. | #6
On 17 May 2016 at 00:45, Bill Fischofer <bill.fischofer@linaro.org> wrote:

>
>
> On Mon, May 16, 2016 at 4:55 PM, Ola Liljedahl <ola.liljedahl@linaro.org>
> wrote:
>
>> On 16 May 2016 at 23:31, Bill Fischofer <bill.fischofer@linaro.org>
>> wrote:
>>
>>> Thanks Ola.  The original bug is that this fails compiling with clang on
>>> 32-bit systems and the reason is that the struct in that environment winds
>>> up being 24 bytes rather than 16 bytes long, so the ODP_STATIC_ASSERT()
>>> fails.
>>>
>> The timer code should stop using odp_atomic_u64_t (which will include the
>> spinlock for systems that do not support atomic operations on 64-bit
>> locations, ARMv7A is one of the few 32-bit platforms which do support
>> 64-bit atomics natively). Instead use a plain 64-bit datatype (uint64_t) on
>> such systems. For systems which do not support 128-bit (or 64-bit?) atomic
>> operations (which is the ideal but I think I managed to get something
>> working with only 64-bit atomics), we should use locks instead. I think the
>> code does use a separate array of locks which is indexed through a hash of
>> the timer handle or something similar. This locks makes the additional lock
>> in odp_atomic_u64_t unused.
>>
>> Which platforms use the 64-bit atomic code in the timer?
>> ARMv7A (32-bit but with 64-bit atomics support - ldrexd/strexd)
>> OCTEON/MIPS64 (with 64-bit atomics support - lld/scd)
>> 64-bit POWER? (with 64--bit atomics support - ldarx/stdcx.)
>>
>> Se we need a couple of things:
>> 1) 128-bit CAS support on e.g. x86 (requires that configure detects and
>> enables -mcx16 flag).
>> 2) 128-bit ldxp/stxp support on ARM64 (A64 ISA in AArch64 mode). I don't
>> think common compilers support 128-bit CAS on ARM64 (it's a bit tricky) so
>> need to use inline assembly.
>> 3) Some cleanup in the timer code not to use odp_atomic_u64_t on systems
>> which do not support 64-bit atomic operations (and instead uses the spin
>> lock array). This will avoid the failed static assert.
>>
>
> Unless you'd like to submit that patch this week, we need to do something
> on at least a temporary basis to close out Monarch RC3. Sounds like this is
> an area we should revisit for restructure/cleanup for Tiger Moth. Given the
> divergence between near-term need and the (better) longer-term solution you
> outline, what do you recommend for now?
>
Fix the static assert problem on certain 32-bit targets by changing
odp_atomic_u64_t to uint64_t on those targets.


>
>
>>
>>
>>> My original (simple) patch is at
>>> http://patches.opendataplane.org/patch/5932/ however Maxim didn't like
>>> it. Perhaps you can offer a compromise?
>>>
>>> On Mon, May 16, 2016 at 4:21 PM, Ola Liljedahl <ola.liljedahl@linaro.org
>>> > wrote:
>>>
>>>> I disagree. The 128-bit support is important because that's the
>>>> lock-free timer implementation which was the whole idea. The lock-based
>>>> code is just a work-around. We should rather add support in configure to
>>>> detect compiler support for the -mcx16 flag which enables 128-bit CAS on
>>>> x86-64. Now when I have an ARM64 target, I can actually write and test
>>>> 128-bit CAS support on ARM (A64 ISA in AArch64 mode) and contribute that in
>>>> another patch. I am not sure whether common compilers (e.g. GCC) properly
>>>> supports 128-bit CAS on ARM64, might have to use LDXP/STXP (load/store
>>>> exclusive pair) directly.
>>>>
>>>> -- Ola
>>>>
>>>> On 16 May 2016 at 22:43, Bill Fischofer <bill.fischofer@linaro.org>
>>>> wrote:
>>>>
>>>>> On Mon, May 16, 2016 at 2:15 PM, Maxim Uvarov <maxim.uvarov@linaro.org
>>>>> >
>>>>> wrote:
>>>>>
>>>>> > Remove totally untested branch with 128 bit optimizations
>>>>> > for timer code.
>>>>> > This also fixes static assert bug:
>>>>> > https://bugs.linaro.org/show_bug.cgi?id=2211
>>>>> >
>>>>> > Signed-off-by: Maxim Uvarov <maxim.uvarov@linaro.org>
>>>>> >
>>>>>
>>>>> Reviewed-and-tested-by: Bill Fischofer <bill.fischofer@linaro.org>
>>>>>
>>>>>
>>>>> > ---
>>>>> >  platform/linux-generic/odp_timer.c | 107
>>>>> > -------------------------------------
>>>>> >  1 file changed, 107 deletions(-)
>>>>> >
>>>>> > diff --git a/platform/linux-generic/odp_timer.c
>>>>> > b/platform/linux-generic/odp_timer.c
>>>>> > index 6b84309..be28da3 100644
>>>>> > --- a/platform/linux-generic/odp_timer.c
>>>>> > +++ b/platform/linux-generic/odp_timer.c
>>>>> > @@ -11,15 +11,7 @@
>>>>> >   *
>>>>> >   */
>>>>> >
>>>>> > -/* Check if compiler supports 16-byte atomics. GCC needs -mcx16
>>>>> flag on
>>>>> > x86 */
>>>>> > -/* Using spin lock actually seems faster on Core2 */
>>>>> > -#ifdef ODP_ATOMIC_U128
>>>>> > -/* TB_NEEDS_PAD defined if sizeof(odp_buffer_t) != 8 */
>>>>> > -#define TB_NEEDS_PAD
>>>>> > -#define TB_SET_PAD(x) ((x).pad = 0)
>>>>> > -#else
>>>>> >  #define TB_SET_PAD(x) (void)(x)
>>>>> > -#endif
>>>>> >
>>>>> >  #include <odp_posix_extensions.h>
>>>>> >
>>>>> > @@ -70,11 +62,9 @@
>>>>> >   * Mutual exclusion in the absence of CAS16
>>>>> >
>>>>> >
>>>>> *****************************************************************************/
>>>>> >
>>>>> > -#ifndef ODP_ATOMIC_U128
>>>>> >  #define NUM_LOCKS 1024
>>>>> >  static _odp_atomic_flag_t locks[NUM_LOCKS]; /* Multiple locks per
>>>>> cache
>>>>> > line! */
>>>>> >  #define IDX2LOCK(idx) (&locks[(idx) % NUM_LOCKS])
>>>>> > -#endif
>>>>> >
>>>>> >
>>>>> >
>>>>> /******************************************************************************
>>>>> >   * Translation between timeout buffer and timeout header
>>>>> > @@ -98,17 +88,9 @@ static odp_timeout_hdr_t
>>>>> *timeout_hdr(odp_timeout_t tmo)
>>>>> >  typedef struct tick_buf_s {
>>>>> >         odp_atomic_u64_t exp_tck;/* Expiration tick or TMO_xxx */
>>>>> >         odp_buffer_t tmo_buf;/* ODP_BUFFER_INVALID if timer not
>>>>> active */
>>>>> > -#ifdef TB_NEEDS_PAD
>>>>> > -       uint32_t pad;/* Need to be able to access padding for
>>>>> successful
>>>>> > CAS */
>>>>> > -#endif
>>>>> >  } tick_buf_t
>>>>> > -#ifdef ODP_ATOMIC_U128
>>>>> > -ODP_ALIGNED(16) /* 16-byte atomic operations need properly aligned
>>>>> > addresses */
>>>>> > -#endif
>>>>> >  ;
>>>>> >
>>>>> > -ODP_STATIC_ASSERT(sizeof(tick_buf_t) == 16, "sizeof(tick_buf_t) ==
>>>>> 16");
>>>>> > -
>>>>> >  typedef struct odp_timer_s {
>>>>> >         void *user_ptr;
>>>>> >         odp_queue_t queue;/* Used for free list when timer is free */
>>>>> > @@ -378,34 +360,6 @@ static bool timer_reset(uint32_t idx,
>>>>> >         tick_buf_t *tb = &tp->tick_buf[idx];
>>>>> >
>>>>> >         if (tmo_buf == NULL || *tmo_buf == ODP_BUFFER_INVALID) {
>>>>> > -#ifdef ODP_ATOMIC_U128
>>>>> > -               tick_buf_t new, old;
>>>>> > -               do {
>>>>> > -                       /* Relaxed and non-atomic read of current
>>>>> values */
>>>>> > -                       old.exp_tck.v = tb->exp_tck.v;
>>>>> > -                       old.tmo_buf = tb->tmo_buf;
>>>>> > -                       TB_SET_PAD(old);
>>>>> > -                       /* Check if there actually is a timeout
>>>>> buffer
>>>>> > -                        * present */
>>>>> > -                       if (old.tmo_buf == ODP_BUFFER_INVALID) {
>>>>> > -                               /* Cannot reset a timer with neither
>>>>> old
>>>>> > nor
>>>>> > -                                * new timeout buffer */
>>>>> > -                               success = false;
>>>>> > -                               break;
>>>>> > -                       }
>>>>> > -                       /* Set up new values */
>>>>> > -                       new.exp_tck.v = abs_tck;
>>>>> > -                       new.tmo_buf = old.tmo_buf;
>>>>> > -                       TB_SET_PAD(new);
>>>>> > -                       /* Atomic CAS will fail if we experienced
>>>>> torn
>>>>> > reads,
>>>>> > -                        * retry update sequence until CAS succeeds
>>>>> */
>>>>> > -               } while (!_odp_atomic_u128_cmp_xchg_mm(
>>>>> > -                                       (_odp_atomic_u128_t *)tb,
>>>>> > -                                       (_uint128_t *)&old,
>>>>> > -                                       (_uint128_t *)&new,
>>>>> > -                                       _ODP_MEMMODEL_RLS,
>>>>> > -                                       _ODP_MEMMODEL_RLX));
>>>>> > -#else
>>>>> >  #ifdef __ARM_ARCH
>>>>> >                 /* Since barriers are not good for C-A15, we take an
>>>>> >                  * alternative approach using relaxed memory model */
>>>>> > @@ -453,7 +407,6 @@ static bool timer_reset(uint32_t idx,
>>>>> >                 /* Release the lock */
>>>>> >                 _odp_atomic_flag_clear(IDX2LOCK(idx));
>>>>> >  #endif
>>>>> > -#endif
>>>>> >         } else {
>>>>> >                 /* We have a new timeout buffer which replaces any
>>>>> old one
>>>>> > */
>>>>> >                 /* Fill in some (constant) header fields for timeout
>>>>> > events */
>>>>> > @@ -468,19 +421,6 @@ static bool timer_reset(uint32_t idx,
>>>>> >                 }
>>>>> >                 /* Else ignore buffers of other types */
>>>>> >                 odp_buffer_t old_buf = ODP_BUFFER_INVALID;
>>>>> > -#ifdef ODP_ATOMIC_U128
>>>>> > -               tick_buf_t new, old;
>>>>> > -               new.exp_tck.v = abs_tck;
>>>>> > -               new.tmo_buf = *tmo_buf;
>>>>> > -               TB_SET_PAD(new);
>>>>> > -               /* We are releasing the new timeout buffer to some
>>>>> other
>>>>> > -                * thread */
>>>>> > -               _odp_atomic_u128_xchg_mm((_odp_atomic_u128_t *)tb,
>>>>> > -                                        (_uint128_t *)&new,
>>>>> > -                                        (_uint128_t *)&old,
>>>>> > -                                        _ODP_MEMMODEL_ACQ_RLS);
>>>>> > -               old_buf = old.tmo_buf;
>>>>> > -#else
>>>>> >                 /* Take a related lock */
>>>>> >                 while (_odp_atomic_flag_tas(IDX2LOCK(idx)))
>>>>> >                         /* While lock is taken, spin using relaxed
>>>>> loads */
>>>>> > @@ -496,7 +436,6 @@ static bool timer_reset(uint32_t idx,
>>>>> >
>>>>> >                 /* Release the lock */
>>>>> >                 _odp_atomic_flag_clear(IDX2LOCK(idx));
>>>>> > -#endif
>>>>> >                 /* Return old timeout buffer */
>>>>> >                 *tmo_buf = old_buf;
>>>>> >         }
>>>>> > @@ -510,18 +449,6 @@ static odp_buffer_t timer_cancel(odp_timer_pool
>>>>> *tp,
>>>>> >         tick_buf_t *tb = &tp->tick_buf[idx];
>>>>> >         odp_buffer_t old_buf;
>>>>> >
>>>>> > -#ifdef ODP_ATOMIC_U128
>>>>> > -       tick_buf_t new, old;
>>>>> > -       /* Update the timer state (e.g. cancel the current timeout)
>>>>> */
>>>>> > -       new.exp_tck.v = new_state;
>>>>> > -       /* Swap out the old buffer */
>>>>> > -       new.tmo_buf = ODP_BUFFER_INVALID;
>>>>> > -       TB_SET_PAD(new);
>>>>> > -       _odp_atomic_u128_xchg_mm((_odp_atomic_u128_t *)tb,
>>>>> > -                                (_uint128_t *)&new, (_uint128_t
>>>>> *)&old,
>>>>> > -                                _ODP_MEMMODEL_RLX);
>>>>> > -       old_buf = old.tmo_buf;
>>>>> > -#else
>>>>> >         /* Take a related lock */
>>>>> >         while (_odp_atomic_flag_tas(IDX2LOCK(idx)))
>>>>> >                 /* While lock is taken, spin using relaxed loads */
>>>>> > @@ -537,7 +464,6 @@ static odp_buffer_t timer_cancel(odp_timer_pool
>>>>> *tp,
>>>>> >
>>>>> >         /* Release the lock */
>>>>> >         _odp_atomic_flag_clear(IDX2LOCK(idx));
>>>>> > -#endif
>>>>> >         /* Return the old buffer */
>>>>> >         return old_buf;
>>>>> >  }
>>>>> > @@ -548,34 +474,6 @@ static unsigned timer_expire(odp_timer_pool *tp,
>>>>> > uint32_t idx, uint64_t tick)
>>>>> >         tick_buf_t *tb = &tp->tick_buf[idx];
>>>>> >         odp_buffer_t tmo_buf = ODP_BUFFER_INVALID;
>>>>> >         uint64_t exp_tck;
>>>>> > -#ifdef ODP_ATOMIC_U128
>>>>> > -       /* Atomic re-read for correctness */
>>>>> > -       exp_tck = _odp_atomic_u64_load_mm(&tb->exp_tck,
>>>>> _ODP_MEMMODEL_RLX);
>>>>> > -       /* Re-check exp_tck */
>>>>> > -       if (odp_likely(exp_tck <= tick)) {
>>>>> > -               /* Attempt to grab timeout buffer, replace with
>>>>> inactive
>>>>> > timer
>>>>> > -                * and invalid buffer */
>>>>> > -               tick_buf_t new, old;
>>>>> > -               old.exp_tck.v = exp_tck;
>>>>> > -               old.tmo_buf = tb->tmo_buf;
>>>>> > -               TB_SET_PAD(old);
>>>>> > -               /* Set the inactive/expired bit keeping the
>>>>> expiration
>>>>> > tick so
>>>>> > -                * that we can check against the expiration tick of
>>>>> the
>>>>> > timeout
>>>>> > -                * when it is received */
>>>>> > -               new.exp_tck.v = exp_tck | TMO_INACTIVE;
>>>>> > -               new.tmo_buf = ODP_BUFFER_INVALID;
>>>>> > -               TB_SET_PAD(new);
>>>>> > -               int succ = _odp_atomic_u128_cmp_xchg_mm(
>>>>> > -                               (_odp_atomic_u128_t *)tb,
>>>>> > -                               (_uint128_t *)&old, (_uint128_t
>>>>> *)&new,
>>>>> > -                               _ODP_MEMMODEL_RLS,
>>>>> _ODP_MEMMODEL_RLX);
>>>>> > -               if (succ)
>>>>> > -                       tmo_buf = old.tmo_buf;
>>>>> > -               /* Else CAS failed, something changed => skip timer
>>>>> > -                * this tick, it will be checked again next tick */
>>>>> > -       }
>>>>> > -       /* Else false positive, ignore */
>>>>> > -#else
>>>>> >         /* Take a related lock */
>>>>> >         while (_odp_atomic_flag_tas(IDX2LOCK(idx)))
>>>>> >                 /* While lock is taken, spin using relaxed loads */
>>>>> > @@ -600,7 +498,6 @@ static unsigned timer_expire(odp_timer_pool *tp,
>>>>> > uint32_t idx, uint64_t tick)
>>>>> >         /* Else false positive, ignore */
>>>>> >         /* Release the lock */
>>>>> >         _odp_atomic_flag_clear(IDX2LOCK(idx));
>>>>> > -#endif
>>>>> >         if (odp_likely(tmo_buf != ODP_BUFFER_INVALID)) {
>>>>> >                 /* Fill in expiration tick for timeout events */
>>>>> >                 if (odp_event_type(odp_buffer_to_event(tmo_buf)) ==
>>>>> > @@ -974,13 +871,9 @@ void odp_timeout_free(odp_timeout_t tmo)
>>>>> >
>>>>> >  int odp_timer_init_global(void)
>>>>> >  {
>>>>> > -#ifndef ODP_ATOMIC_U128
>>>>> >         uint32_t i;
>>>>> >         for (i = 0; i < NUM_LOCKS; i++)
>>>>> >                 _odp_atomic_flag_clear(&locks[i]);
>>>>> > -#else
>>>>> > -       ODP_DBG("Using lock-less timer implementation\n");
>>>>> > -#endif
>>>>> >         odp_atomic_init_u32(&num_timer_pools, 0);
>>>>> >
>>>>> >         block_sigalarm();
>>>>> > --
>>>>> > 2.7.1.250.gff4ea60
>>>>> >
>>>>> > _______________________________________________
>>>>> > lng-odp mailing list
>>>>> > lng-odp@lists.linaro.org
>>>>> > https://lists.linaro.org/mailman/listinfo/lng-odp
>>>>> >
>>>>> _______________________________________________
>>>>> lng-odp mailing list
>>>>> lng-odp@lists.linaro.org
>>>>> https://lists.linaro.org/mailman/listinfo/lng-odp
>>>>>
>>>>
>>>>
>>>
>>
>
Ola Liljedahl May 17, 2016, 11:12 a.m. | #7
Coding would be much simpler if one could use GCC __atomic builtins
directly instead of ODP public and internal atomic API's.
The problem with odp_atomic_u64_t sometimes (i.e. for most 32-bit
architectures, ARMv7a and x86-32 the exceptions) containing a lock would go
away.

-- Ola



On 17 May 2016 at 09:00, Ola Liljedahl <ola.liljedahl@linaro.org> wrote:

>
>
> On 17 May 2016 at 00:45, Bill Fischofer <bill.fischofer@linaro.org> wrote:
>
>>
>>
>> On Mon, May 16, 2016 at 4:55 PM, Ola Liljedahl <ola.liljedahl@linaro.org>
>> wrote:
>>
>>> On 16 May 2016 at 23:31, Bill Fischofer <bill.fischofer@linaro.org>
>>> wrote:
>>>
>>>> Thanks Ola.  The original bug is that this fails compiling with clang
>>>> on 32-bit systems and the reason is that the struct in that environment
>>>> winds up being 24 bytes rather than 16 bytes long, so the
>>>> ODP_STATIC_ASSERT() fails.
>>>>
>>> The timer code should stop using odp_atomic_u64_t (which will include
>>> the spinlock for systems that do not support atomic operations on 64-bit
>>> locations, ARMv7A is one of the few 32-bit platforms which do support
>>> 64-bit atomics natively). Instead use a plain 64-bit datatype (uint64_t) on
>>> such systems. For systems which do not support 128-bit (or 64-bit?) atomic
>>> operations (which is the ideal but I think I managed to get something
>>> working with only 64-bit atomics), we should use locks instead. I think the
>>> code does use a separate array of locks which is indexed through a hash of
>>> the timer handle or something similar. This locks makes the additional lock
>>> in odp_atomic_u64_t unused.
>>>
>>> Which platforms use the 64-bit atomic code in the timer?
>>> ARMv7A (32-bit but with 64-bit atomics support - ldrexd/strexd)
>>> OCTEON/MIPS64 (with 64-bit atomics support - lld/scd)
>>> 64-bit POWER? (with 64--bit atomics support - ldarx/stdcx.)
>>>
>>> Se we need a couple of things:
>>> 1) 128-bit CAS support on e.g. x86 (requires that configure detects and
>>> enables -mcx16 flag).
>>> 2) 128-bit ldxp/stxp support on ARM64 (A64 ISA in AArch64 mode). I don't
>>> think common compilers support 128-bit CAS on ARM64 (it's a bit tricky) so
>>> need to use inline assembly.
>>> 3) Some cleanup in the timer code not to use odp_atomic_u64_t on systems
>>> which do not support 64-bit atomic operations (and instead uses the spin
>>> lock array). This will avoid the failed static assert.
>>>
>>
>> Unless you'd like to submit that patch this week, we need to do something
>> on at least a temporary basis to close out Monarch RC3. Sounds like this is
>> an area we should revisit for restructure/cleanup for Tiger Moth. Given the
>> divergence between near-term need and the (better) longer-term solution you
>> outline, what do you recommend for now?
>>
> Fix the static assert problem on certain 32-bit targets by changing
> odp_atomic_u64_t to uint64_t on those targets.
>
>
>>
>>
>>>
>>>
>>>> My original (simple) patch is at
>>>> http://patches.opendataplane.org/patch/5932/ however Maxim didn't like
>>>> it. Perhaps you can offer a compromise?
>>>>
>>>> On Mon, May 16, 2016 at 4:21 PM, Ola Liljedahl <
>>>> ola.liljedahl@linaro.org> wrote:
>>>>
>>>>> I disagree. The 128-bit support is important because that's the
>>>>> lock-free timer implementation which was the whole idea. The lock-based
>>>>> code is just a work-around. We should rather add support in configure to
>>>>> detect compiler support for the -mcx16 flag which enables 128-bit CAS on
>>>>> x86-64. Now when I have an ARM64 target, I can actually write and test
>>>>> 128-bit CAS support on ARM (A64 ISA in AArch64 mode) and contribute that in
>>>>> another patch. I am not sure whether common compilers (e.g. GCC) properly
>>>>> supports 128-bit CAS on ARM64, might have to use LDXP/STXP (load/store
>>>>> exclusive pair) directly.
>>>>>
>>>>> -- Ola
>>>>>
>>>>> On 16 May 2016 at 22:43, Bill Fischofer <bill.fischofer@linaro.org>
>>>>> wrote:
>>>>>
>>>>>> On Mon, May 16, 2016 at 2:15 PM, Maxim Uvarov <
>>>>>> maxim.uvarov@linaro.org>
>>>>>> wrote:
>>>>>>
>>>>>> > Remove totally untested branch with 128 bit optimizations
>>>>>> > for timer code.
>>>>>> > This also fixes static assert bug:
>>>>>> > https://bugs.linaro.org/show_bug.cgi?id=2211
>>>>>> >
>>>>>> > Signed-off-by: Maxim Uvarov <maxim.uvarov@linaro.org>
>>>>>> >
>>>>>>
>>>>>> Reviewed-and-tested-by: Bill Fischofer <bill.fischofer@linaro.org>
>>>>>>
>>>>>>
>>>>>> > ---
>>>>>> >  platform/linux-generic/odp_timer.c | 107
>>>>>> > -------------------------------------
>>>>>> >  1 file changed, 107 deletions(-)
>>>>>> >
>>>>>> > diff --git a/platform/linux-generic/odp_timer.c
>>>>>> > b/platform/linux-generic/odp_timer.c
>>>>>> > index 6b84309..be28da3 100644
>>>>>> > --- a/platform/linux-generic/odp_timer.c
>>>>>> > +++ b/platform/linux-generic/odp_timer.c
>>>>>> > @@ -11,15 +11,7 @@
>>>>>> >   *
>>>>>> >   */
>>>>>> >
>>>>>> > -/* Check if compiler supports 16-byte atomics. GCC needs -mcx16
>>>>>> flag on
>>>>>> > x86 */
>>>>>> > -/* Using spin lock actually seems faster on Core2 */
>>>>>> > -#ifdef ODP_ATOMIC_U128
>>>>>> > -/* TB_NEEDS_PAD defined if sizeof(odp_buffer_t) != 8 */
>>>>>> > -#define TB_NEEDS_PAD
>>>>>> > -#define TB_SET_PAD(x) ((x).pad = 0)
>>>>>> > -#else
>>>>>> >  #define TB_SET_PAD(x) (void)(x)
>>>>>> > -#endif
>>>>>> >
>>>>>> >  #include <odp_posix_extensions.h>
>>>>>> >
>>>>>> > @@ -70,11 +62,9 @@
>>>>>> >   * Mutual exclusion in the absence of CAS16
>>>>>> >
>>>>>> >
>>>>>> *****************************************************************************/
>>>>>> >
>>>>>> > -#ifndef ODP_ATOMIC_U128
>>>>>> >  #define NUM_LOCKS 1024
>>>>>> >  static _odp_atomic_flag_t locks[NUM_LOCKS]; /* Multiple locks per
>>>>>> cache
>>>>>> > line! */
>>>>>> >  #define IDX2LOCK(idx) (&locks[(idx) % NUM_LOCKS])
>>>>>> > -#endif
>>>>>> >
>>>>>> >
>>>>>> >
>>>>>> /******************************************************************************
>>>>>> >   * Translation between timeout buffer and timeout header
>>>>>> > @@ -98,17 +88,9 @@ static odp_timeout_hdr_t
>>>>>> *timeout_hdr(odp_timeout_t tmo)
>>>>>> >  typedef struct tick_buf_s {
>>>>>> >         odp_atomic_u64_t exp_tck;/* Expiration tick or TMO_xxx */
>>>>>> >         odp_buffer_t tmo_buf;/* ODP_BUFFER_INVALID if timer not
>>>>>> active */
>>>>>> > -#ifdef TB_NEEDS_PAD
>>>>>> > -       uint32_t pad;/* Need to be able to access padding for
>>>>>> successful
>>>>>> > CAS */
>>>>>> > -#endif
>>>>>> >  } tick_buf_t
>>>>>> > -#ifdef ODP_ATOMIC_U128
>>>>>> > -ODP_ALIGNED(16) /* 16-byte atomic operations need properly aligned
>>>>>> > addresses */
>>>>>> > -#endif
>>>>>> >  ;
>>>>>> >
>>>>>> > -ODP_STATIC_ASSERT(sizeof(tick_buf_t) == 16, "sizeof(tick_buf_t) ==
>>>>>> 16");
>>>>>> > -
>>>>>> >  typedef struct odp_timer_s {
>>>>>> >         void *user_ptr;
>>>>>> >         odp_queue_t queue;/* Used for free list when timer is free
>>>>>> */
>>>>>> > @@ -378,34 +360,6 @@ static bool timer_reset(uint32_t idx,
>>>>>> >         tick_buf_t *tb = &tp->tick_buf[idx];
>>>>>> >
>>>>>> >         if (tmo_buf == NULL || *tmo_buf == ODP_BUFFER_INVALID) {
>>>>>> > -#ifdef ODP_ATOMIC_U128
>>>>>> > -               tick_buf_t new, old;
>>>>>> > -               do {
>>>>>> > -                       /* Relaxed and non-atomic read of current
>>>>>> values */
>>>>>> > -                       old.exp_tck.v = tb->exp_tck.v;
>>>>>> > -                       old.tmo_buf = tb->tmo_buf;
>>>>>> > -                       TB_SET_PAD(old);
>>>>>> > -                       /* Check if there actually is a timeout
>>>>>> buffer
>>>>>> > -                        * present */
>>>>>> > -                       if (old.tmo_buf == ODP_BUFFER_INVALID) {
>>>>>> > -                               /* Cannot reset a timer with
>>>>>> neither old
>>>>>> > nor
>>>>>> > -                                * new timeout buffer */
>>>>>> > -                               success = false;
>>>>>> > -                               break;
>>>>>> > -                       }
>>>>>> > -                       /* Set up new values */
>>>>>> > -                       new.exp_tck.v = abs_tck;
>>>>>> > -                       new.tmo_buf = old.tmo_buf;
>>>>>> > -                       TB_SET_PAD(new);
>>>>>> > -                       /* Atomic CAS will fail if we experienced
>>>>>> torn
>>>>>> > reads,
>>>>>> > -                        * retry update sequence until CAS succeeds
>>>>>> */
>>>>>> > -               } while (!_odp_atomic_u128_cmp_xchg_mm(
>>>>>> > -                                       (_odp_atomic_u128_t *)tb,
>>>>>> > -                                       (_uint128_t *)&old,
>>>>>> > -                                       (_uint128_t *)&new,
>>>>>> > -                                       _ODP_MEMMODEL_RLS,
>>>>>> > -                                       _ODP_MEMMODEL_RLX));
>>>>>> > -#else
>>>>>> >  #ifdef __ARM_ARCH
>>>>>> >                 /* Since barriers are not good for C-A15, we take an
>>>>>> >                  * alternative approach using relaxed memory model
>>>>>> */
>>>>>> > @@ -453,7 +407,6 @@ static bool timer_reset(uint32_t idx,
>>>>>> >                 /* Release the lock */
>>>>>> >                 _odp_atomic_flag_clear(IDX2LOCK(idx));
>>>>>> >  #endif
>>>>>> > -#endif
>>>>>> >         } else {
>>>>>> >                 /* We have a new timeout buffer which replaces any
>>>>>> old one
>>>>>> > */
>>>>>> >                 /* Fill in some (constant) header fields for timeout
>>>>>> > events */
>>>>>> > @@ -468,19 +421,6 @@ static bool timer_reset(uint32_t idx,
>>>>>> >                 }
>>>>>> >                 /* Else ignore buffers of other types */
>>>>>> >                 odp_buffer_t old_buf = ODP_BUFFER_INVALID;
>>>>>> > -#ifdef ODP_ATOMIC_U128
>>>>>> > -               tick_buf_t new, old;
>>>>>> > -               new.exp_tck.v = abs_tck;
>>>>>> > -               new.tmo_buf = *tmo_buf;
>>>>>> > -               TB_SET_PAD(new);
>>>>>> > -               /* We are releasing the new timeout buffer to some
>>>>>> other
>>>>>> > -                * thread */
>>>>>> > -               _odp_atomic_u128_xchg_mm((_odp_atomic_u128_t *)tb,
>>>>>> > -                                        (_uint128_t *)&new,
>>>>>> > -                                        (_uint128_t *)&old,
>>>>>> > -                                        _ODP_MEMMODEL_ACQ_RLS);
>>>>>> > -               old_buf = old.tmo_buf;
>>>>>> > -#else
>>>>>> >                 /* Take a related lock */
>>>>>> >                 while (_odp_atomic_flag_tas(IDX2LOCK(idx)))
>>>>>> >                         /* While lock is taken, spin using relaxed
>>>>>> loads */
>>>>>> > @@ -496,7 +436,6 @@ static bool timer_reset(uint32_t idx,
>>>>>> >
>>>>>> >                 /* Release the lock */
>>>>>> >                 _odp_atomic_flag_clear(IDX2LOCK(idx));
>>>>>> > -#endif
>>>>>> >                 /* Return old timeout buffer */
>>>>>> >                 *tmo_buf = old_buf;
>>>>>> >         }
>>>>>> > @@ -510,18 +449,6 @@ static odp_buffer_t
>>>>>> timer_cancel(odp_timer_pool *tp,
>>>>>> >         tick_buf_t *tb = &tp->tick_buf[idx];
>>>>>> >         odp_buffer_t old_buf;
>>>>>> >
>>>>>> > -#ifdef ODP_ATOMIC_U128
>>>>>> > -       tick_buf_t new, old;
>>>>>> > -       /* Update the timer state (e.g. cancel the current timeout)
>>>>>> */
>>>>>> > -       new.exp_tck.v = new_state;
>>>>>> > -       /* Swap out the old buffer */
>>>>>> > -       new.tmo_buf = ODP_BUFFER_INVALID;
>>>>>> > -       TB_SET_PAD(new);
>>>>>> > -       _odp_atomic_u128_xchg_mm((_odp_atomic_u128_t *)tb,
>>>>>> > -                                (_uint128_t *)&new, (_uint128_t
>>>>>> *)&old,
>>>>>> > -                                _ODP_MEMMODEL_RLX);
>>>>>> > -       old_buf = old.tmo_buf;
>>>>>> > -#else
>>>>>> >         /* Take a related lock */
>>>>>> >         while (_odp_atomic_flag_tas(IDX2LOCK(idx)))
>>>>>> >                 /* While lock is taken, spin using relaxed loads */
>>>>>> > @@ -537,7 +464,6 @@ static odp_buffer_t timer_cancel(odp_timer_pool
>>>>>> *tp,
>>>>>> >
>>>>>> >         /* Release the lock */
>>>>>> >         _odp_atomic_flag_clear(IDX2LOCK(idx));
>>>>>> > -#endif
>>>>>> >         /* Return the old buffer */
>>>>>> >         return old_buf;
>>>>>> >  }
>>>>>> > @@ -548,34 +474,6 @@ static unsigned timer_expire(odp_timer_pool
>>>>>> *tp,
>>>>>> > uint32_t idx, uint64_t tick)
>>>>>> >         tick_buf_t *tb = &tp->tick_buf[idx];
>>>>>> >         odp_buffer_t tmo_buf = ODP_BUFFER_INVALID;
>>>>>> >         uint64_t exp_tck;
>>>>>> > -#ifdef ODP_ATOMIC_U128
>>>>>> > -       /* Atomic re-read for correctness */
>>>>>> > -       exp_tck = _odp_atomic_u64_load_mm(&tb->exp_tck,
>>>>>> _ODP_MEMMODEL_RLX);
>>>>>> > -       /* Re-check exp_tck */
>>>>>> > -       if (odp_likely(exp_tck <= tick)) {
>>>>>> > -               /* Attempt to grab timeout buffer, replace with
>>>>>> inactive
>>>>>> > timer
>>>>>> > -                * and invalid buffer */
>>>>>> > -               tick_buf_t new, old;
>>>>>> > -               old.exp_tck.v = exp_tck;
>>>>>> > -               old.tmo_buf = tb->tmo_buf;
>>>>>> > -               TB_SET_PAD(old);
>>>>>> > -               /* Set the inactive/expired bit keeping the
>>>>>> expiration
>>>>>> > tick so
>>>>>> > -                * that we can check against the expiration tick of
>>>>>> the
>>>>>> > timeout
>>>>>> > -                * when it is received */
>>>>>> > -               new.exp_tck.v = exp_tck | TMO_INACTIVE;
>>>>>> > -               new.tmo_buf = ODP_BUFFER_INVALID;
>>>>>> > -               TB_SET_PAD(new);
>>>>>> > -               int succ = _odp_atomic_u128_cmp_xchg_mm(
>>>>>> > -                               (_odp_atomic_u128_t *)tb,
>>>>>> > -                               (_uint128_t *)&old, (_uint128_t
>>>>>> *)&new,
>>>>>> > -                               _ODP_MEMMODEL_RLS,
>>>>>> _ODP_MEMMODEL_RLX);
>>>>>> > -               if (succ)
>>>>>> > -                       tmo_buf = old.tmo_buf;
>>>>>> > -               /* Else CAS failed, something changed => skip timer
>>>>>> > -                * this tick, it will be checked again next tick */
>>>>>> > -       }
>>>>>> > -       /* Else false positive, ignore */
>>>>>> > -#else
>>>>>> >         /* Take a related lock */
>>>>>> >         while (_odp_atomic_flag_tas(IDX2LOCK(idx)))
>>>>>> >                 /* While lock is taken, spin using relaxed loads */
>>>>>> > @@ -600,7 +498,6 @@ static unsigned timer_expire(odp_timer_pool *tp,
>>>>>> > uint32_t idx, uint64_t tick)
>>>>>> >         /* Else false positive, ignore */
>>>>>> >         /* Release the lock */
>>>>>> >         _odp_atomic_flag_clear(IDX2LOCK(idx));
>>>>>> > -#endif
>>>>>> >         if (odp_likely(tmo_buf != ODP_BUFFER_INVALID)) {
>>>>>> >                 /* Fill in expiration tick for timeout events */
>>>>>> >                 if (odp_event_type(odp_buffer_to_event(tmo_buf)) ==
>>>>>> > @@ -974,13 +871,9 @@ void odp_timeout_free(odp_timeout_t tmo)
>>>>>> >
>>>>>> >  int odp_timer_init_global(void)
>>>>>> >  {
>>>>>> > -#ifndef ODP_ATOMIC_U128
>>>>>> >         uint32_t i;
>>>>>> >         for (i = 0; i < NUM_LOCKS; i++)
>>>>>> >                 _odp_atomic_flag_clear(&locks[i]);
>>>>>> > -#else
>>>>>> > -       ODP_DBG("Using lock-less timer implementation\n");
>>>>>> > -#endif
>>>>>> >         odp_atomic_init_u32(&num_timer_pools, 0);
>>>>>> >
>>>>>> >         block_sigalarm();
>>>>>> > --
>>>>>> > 2.7.1.250.gff4ea60
>>>>>> >
>>>>>> > _______________________________________________
>>>>>> > lng-odp mailing list
>>>>>> > lng-odp@lists.linaro.org
>>>>>> > https://lists.linaro.org/mailman/listinfo/lng-odp
>>>>>> >
>>>>>> _______________________________________________
>>>>>> lng-odp mailing list
>>>>>> lng-odp@lists.linaro.org
>>>>>> https://lists.linaro.org/mailman/listinfo/lng-odp
>>>>>>
>>>>>
>>>>>
>>>>
>>>
>>
>

Patch

diff --git a/platform/linux-generic/odp_timer.c b/platform/linux-generic/odp_timer.c
index 6b84309..be28da3 100644
--- a/platform/linux-generic/odp_timer.c
+++ b/platform/linux-generic/odp_timer.c
@@ -11,15 +11,7 @@ 
  *
  */
 
-/* Check if compiler supports 16-byte atomics. GCC needs -mcx16 flag on x86 */
-/* Using spin lock actually seems faster on Core2 */
-#ifdef ODP_ATOMIC_U128
-/* TB_NEEDS_PAD defined if sizeof(odp_buffer_t) != 8 */
-#define TB_NEEDS_PAD
-#define TB_SET_PAD(x) ((x).pad = 0)
-#else
 #define TB_SET_PAD(x) (void)(x)
-#endif
 
 #include <odp_posix_extensions.h>
 
@@ -70,11 +62,9 @@ 
  * Mutual exclusion in the absence of CAS16
  *****************************************************************************/
 
-#ifndef ODP_ATOMIC_U128
 #define NUM_LOCKS 1024
 static _odp_atomic_flag_t locks[NUM_LOCKS]; /* Multiple locks per cache line! */
 #define IDX2LOCK(idx) (&locks[(idx) % NUM_LOCKS])
-#endif
 
 /******************************************************************************
  * Translation between timeout buffer and timeout header
@@ -98,17 +88,9 @@  static odp_timeout_hdr_t *timeout_hdr(odp_timeout_t tmo)
 typedef struct tick_buf_s {
 	odp_atomic_u64_t exp_tck;/* Expiration tick or TMO_xxx */
 	odp_buffer_t tmo_buf;/* ODP_BUFFER_INVALID if timer not active */
-#ifdef TB_NEEDS_PAD
-	uint32_t pad;/* Need to be able to access padding for successful CAS */
-#endif
 } tick_buf_t
-#ifdef ODP_ATOMIC_U128
-ODP_ALIGNED(16) /* 16-byte atomic operations need properly aligned addresses */
-#endif
 ;
 
-ODP_STATIC_ASSERT(sizeof(tick_buf_t) == 16, "sizeof(tick_buf_t) == 16");
-
 typedef struct odp_timer_s {
 	void *user_ptr;
 	odp_queue_t queue;/* Used for free list when timer is free */
@@ -378,34 +360,6 @@  static bool timer_reset(uint32_t idx,
 	tick_buf_t *tb = &tp->tick_buf[idx];
 
 	if (tmo_buf == NULL || *tmo_buf == ODP_BUFFER_INVALID) {
-#ifdef ODP_ATOMIC_U128
-		tick_buf_t new, old;
-		do {
-			/* Relaxed and non-atomic read of current values */
-			old.exp_tck.v = tb->exp_tck.v;
-			old.tmo_buf = tb->tmo_buf;
-			TB_SET_PAD(old);
-			/* Check if there actually is a timeout buffer
-			 * present */
-			if (old.tmo_buf == ODP_BUFFER_INVALID) {
-				/* Cannot reset a timer with neither old nor
-				 * new timeout buffer */
-				success = false;
-				break;
-			}
-			/* Set up new values */
-			new.exp_tck.v = abs_tck;
-			new.tmo_buf = old.tmo_buf;
-			TB_SET_PAD(new);
-			/* Atomic CAS will fail if we experienced torn reads,
-			 * retry update sequence until CAS succeeds */
-		} while (!_odp_atomic_u128_cmp_xchg_mm(
-					(_odp_atomic_u128_t *)tb,
-					(_uint128_t *)&old,
-					(_uint128_t *)&new,
-					_ODP_MEMMODEL_RLS,
-					_ODP_MEMMODEL_RLX));
-#else
 #ifdef __ARM_ARCH
 		/* Since barriers are not good for C-A15, we take an
 		 * alternative approach using relaxed memory model */
@@ -453,7 +407,6 @@  static bool timer_reset(uint32_t idx,
 		/* Release the lock */
 		_odp_atomic_flag_clear(IDX2LOCK(idx));
 #endif
-#endif
 	} else {
 		/* We have a new timeout buffer which replaces any old one */
 		/* Fill in some (constant) header fields for timeout events */
@@ -468,19 +421,6 @@  static bool timer_reset(uint32_t idx,
 		}
 		/* Else ignore buffers of other types */
 		odp_buffer_t old_buf = ODP_BUFFER_INVALID;
-#ifdef ODP_ATOMIC_U128
-		tick_buf_t new, old;
-		new.exp_tck.v = abs_tck;
-		new.tmo_buf = *tmo_buf;
-		TB_SET_PAD(new);
-		/* We are releasing the new timeout buffer to some other
-		 * thread */
-		_odp_atomic_u128_xchg_mm((_odp_atomic_u128_t *)tb,
-					 (_uint128_t *)&new,
-					 (_uint128_t *)&old,
-					 _ODP_MEMMODEL_ACQ_RLS);
-		old_buf = old.tmo_buf;
-#else
 		/* Take a related lock */
 		while (_odp_atomic_flag_tas(IDX2LOCK(idx)))
 			/* While lock is taken, spin using relaxed loads */
@@ -496,7 +436,6 @@  static bool timer_reset(uint32_t idx,
 
 		/* Release the lock */
 		_odp_atomic_flag_clear(IDX2LOCK(idx));
-#endif
 		/* Return old timeout buffer */
 		*tmo_buf = old_buf;
 	}
@@ -510,18 +449,6 @@  static odp_buffer_t timer_cancel(odp_timer_pool *tp,
 	tick_buf_t *tb = &tp->tick_buf[idx];
 	odp_buffer_t old_buf;
 
-#ifdef ODP_ATOMIC_U128
-	tick_buf_t new, old;
-	/* Update the timer state (e.g. cancel the current timeout) */
-	new.exp_tck.v = new_state;
-	/* Swap out the old buffer */
-	new.tmo_buf = ODP_BUFFER_INVALID;
-	TB_SET_PAD(new);
-	_odp_atomic_u128_xchg_mm((_odp_atomic_u128_t *)tb,
-				 (_uint128_t *)&new, (_uint128_t *)&old,
-				 _ODP_MEMMODEL_RLX);
-	old_buf = old.tmo_buf;
-#else
 	/* Take a related lock */
 	while (_odp_atomic_flag_tas(IDX2LOCK(idx)))
 		/* While lock is taken, spin using relaxed loads */
@@ -537,7 +464,6 @@  static odp_buffer_t timer_cancel(odp_timer_pool *tp,
 
 	/* Release the lock */
 	_odp_atomic_flag_clear(IDX2LOCK(idx));
-#endif
 	/* Return the old buffer */
 	return old_buf;
 }
@@ -548,34 +474,6 @@  static unsigned timer_expire(odp_timer_pool *tp, uint32_t idx, uint64_t tick)
 	tick_buf_t *tb = &tp->tick_buf[idx];
 	odp_buffer_t tmo_buf = ODP_BUFFER_INVALID;
 	uint64_t exp_tck;
-#ifdef ODP_ATOMIC_U128
-	/* Atomic re-read for correctness */
-	exp_tck = _odp_atomic_u64_load_mm(&tb->exp_tck, _ODP_MEMMODEL_RLX);
-	/* Re-check exp_tck */
-	if (odp_likely(exp_tck <= tick)) {
-		/* Attempt to grab timeout buffer, replace with inactive timer
-		 * and invalid buffer */
-		tick_buf_t new, old;
-		old.exp_tck.v = exp_tck;
-		old.tmo_buf = tb->tmo_buf;
-		TB_SET_PAD(old);
-		/* Set the inactive/expired bit keeping the expiration tick so
-		 * that we can check against the expiration tick of the timeout
-		 * when it is received */
-		new.exp_tck.v = exp_tck | TMO_INACTIVE;
-		new.tmo_buf = ODP_BUFFER_INVALID;
-		TB_SET_PAD(new);
-		int succ = _odp_atomic_u128_cmp_xchg_mm(
-				(_odp_atomic_u128_t *)tb,
-				(_uint128_t *)&old, (_uint128_t *)&new,
-				_ODP_MEMMODEL_RLS, _ODP_MEMMODEL_RLX);
-		if (succ)
-			tmo_buf = old.tmo_buf;
-		/* Else CAS failed, something changed => skip timer
-		 * this tick, it will be checked again next tick */
-	}
-	/* Else false positive, ignore */
-#else
 	/* Take a related lock */
 	while (_odp_atomic_flag_tas(IDX2LOCK(idx)))
 		/* While lock is taken, spin using relaxed loads */
@@ -600,7 +498,6 @@  static unsigned timer_expire(odp_timer_pool *tp, uint32_t idx, uint64_t tick)
 	/* Else false positive, ignore */
 	/* Release the lock */
 	_odp_atomic_flag_clear(IDX2LOCK(idx));
-#endif
 	if (odp_likely(tmo_buf != ODP_BUFFER_INVALID)) {
 		/* Fill in expiration tick for timeout events */
 		if (odp_event_type(odp_buffer_to_event(tmo_buf)) ==
@@ -974,13 +871,9 @@  void odp_timeout_free(odp_timeout_t tmo)
 
 int odp_timer_init_global(void)
 {
-#ifndef ODP_ATOMIC_U128
 	uint32_t i;
 	for (i = 0; i < NUM_LOCKS; i++)
 		_odp_atomic_flag_clear(&locks[i]);
-#else
-	ODP_DBG("Using lock-less timer implementation\n");
-#endif
 	odp_atomic_init_u32(&num_timer_pools, 0);
 
 	block_sigalarm();