diff mbox

[PATCHv2,1/2] linux-generic: odp_atomic_internal.h: add 128-bit atomics

Message ID 1463511498-855-2-git-send-email-ola.liljedahl@linaro.org
State Superseded
Headers show

Commit Message

Ola Liljedahl May 17, 2016, 6:58 p.m. UTC
(This document/code contribution attached is provided under the terms of
agreement LES-LTM-21309)

Add detection of availability of the -mcx16 compiler flag to
the configure script. This flag is necessary on x86-64 to enable
cpmxchg16.
Implement 128-bit atomics if natively supported by the platform.
128-bit atomics are used by linux-generic timer implementation
on certain targets (e.g. x86-64) for lock-free implementation.

Signed-off-by: Ola Liljedahl <ola.liljedahl@linaro.org>
---
 configure.ac                                       | 13 +++++
 .../linux-generic/include/odp_atomic_internal.h    | 62 ++++++++++++++++++++++
 platform/linux-generic/odp_timer.c                 |  4 +-
 3 files changed, 76 insertions(+), 3 deletions(-)

Comments

Mike Holmes May 17, 2016, 7 p.m. UTC | #1
On 17 May 2016 at 14:58, Ola Liljedahl <ola.liljedahl@linaro.org> wrote:

> (This document/code contribution attached is provided under the terms of
> agreement LES-LTM-21309)
>
>
This was sent from a linro address, the contribution agreements are already
defined.


> Add detection of availability of the -mcx16 compiler flag to
> the configure script. This flag is necessary on x86-64 to enable
> cpmxchg16.
> Implement 128-bit atomics if natively supported by the platform.
> 128-bit atomics are used by linux-generic timer implementation
> on certain targets (e.g. x86-64) for lock-free implementation.
>
> Signed-off-by: Ola Liljedahl <ola.liljedahl@linaro.org>
> ---
>  configure.ac                                       | 13 +++++
>  .../linux-generic/include/odp_atomic_internal.h    | 62
> ++++++++++++++++++++++
>  platform/linux-generic/odp_timer.c                 |  4 +-
>  3 files changed, 76 insertions(+), 3 deletions(-)
>
> diff --git a/configure.ac b/configure.ac
> index c59d2d1..7cd6670 100644
> --- a/configure.ac
> +++ b/configure.ac
> @@ -207,6 +207,19 @@ ODP_CFLAGS="$ODP_CFLAGS -std=c99"
>  # Extra flags for example to suppress certain warning types
>  ODP_CFLAGS="$ODP_CFLAGS $ODP_CFLAGS_EXTRA"
>
> +#########################################################################
> +# Check if compiler supports cmpxchng16
> +##########################################################################
> +my_save_cflags="$CFLAGS"
> +CFLAGS=-mcx16
> +AC_MSG_CHECKING([whether CC supports -mcx16])
> +AC_COMPILE_IFELSE([AC_LANG_PROGRAM([])],
> +       [AC_MSG_RESULT([yes])]
> +       [ODP_CFLAGS="$ODP_CFLAGS -mcx16"],
> +       [AC_MSG_RESULT([no])]
> +)
> +CFLAGS="$my_save_cflags"
> +
>  ##########################################################################
>  # Default include setup
>  ##########################################################################
> diff --git a/platform/linux-generic/include/odp_atomic_internal.h
> b/platform/linux-generic/include/odp_atomic_internal.h
> index 093280f..3c5606c 100644
> --- a/platform/linux-generic/include/odp_atomic_internal.h
> +++ b/platform/linux-generic/include/odp_atomic_internal.h
> @@ -587,6 +587,68 @@ static inline void
> _odp_atomic_flag_clear(_odp_atomic_flag_t *flag)
>         __atomic_clear(flag, __ATOMIC_RELEASE);
>  }
>
> +/* Check if target and compiler supports 128-bit scalars and corresponding
> + * exchange and CAS operations */
> +/* GCC on x86-64 needs -mcx16 compiler option */
> +#if defined __SIZEOF_INT128__ && defined
> __GCC_HAVE_SYNC_COMPARE_AND_SWAP_16
> +
> +/** Preprocessor symbol that indicates support for 128-bit atomics */
> +#define ODP_ATOMIC_U128
> +
> +/** An unsigned 128-bit (16-byte) scalar type */
> +typedef __int128 _uint128_t;
> +
> +/** Atomic 128-bit type */
> +typedef struct {
> +       _uint128_t v; /**< Actual storage for the atomic variable */
> +} _odp_atomic_u128_t ODP_ALIGNED(16);
> +
> +/**
> + * 16-byte atomic exchange operation
> + *
> + * @param ptr   Pointer to a 16-byte atomic variable
> + * @param val   Pointer to new value to write
> + * @param old   Pointer to location for old value
> + * @param       mmodel Memory model associated with the exchange operation
> + */
> +static inline void _odp_atomic_u128_xchg_mm(_odp_atomic_u128_t *ptr,
> +                                           _uint128_t *val,
> +               _uint128_t *old,
> +               _odp_memmodel_t mm)
> +{
> +       __atomic_exchange(&ptr->v, val, old, mm);
> +}
> +
> +/**
> + * Atomic compare and exchange (swap) of 16-byte atomic variable
> + * "Strong" semantics, will not fail spuriously.
> + *
> + * @param ptr   Pointer to a 16-byte atomic variable
> + * @param exp   Pointer to expected value (updated on failure)
> + * @param val   Pointer to new value to write
> + * @param succ  Memory model associated with a successful compare-and-swap
> + * operation
> + * @param fail  Memory model associated with a failed compare-and-swap
> + * operation
> + *
> + * @retval 1 exchange successul
> + * @retval 0 exchange failed and '*exp' updated with current value
> + */
> +static inline int _odp_atomic_u128_cmp_xchg_mm(_odp_atomic_u128_t *ptr,
> +                                              _uint128_t *exp,
> +                                              _uint128_t *val,
> +                                              _odp_memmodel_t succ,
> +                                              _odp_memmodel_t fail)
> +{
> +       return __atomic_compare_exchange(&ptr->v, exp, val,
> +                       false/*strong*/, succ, fail);
> +}
> +#endif
> +
> +/**
> + * @}
> + */
> +
>  #ifdef __cplusplus
>  }
>  #endif
> diff --git a/platform/linux-generic/odp_timer.c
> b/platform/linux-generic/odp_timer.c
> index 6b84309..41e7195 100644
> --- a/platform/linux-generic/odp_timer.c
> +++ b/platform/linux-generic/odp_timer.c
> @@ -11,9 +11,7 @@
>   *
>   */
>
> -/* Check if compiler supports 16-byte atomics. GCC needs -mcx16 flag on
> x86 */
> -/* Using spin lock actually seems faster on Core2 */
> -#ifdef ODP_ATOMIC_U128
> +#if __SIZEOF_POINTER__ != 8
>  /* TB_NEEDS_PAD defined if sizeof(odp_buffer_t) != 8 */
>  #define TB_NEEDS_PAD
>  #define TB_SET_PAD(x) ((x).pad = 0)
> --
> 2.5.0
>
> _______________________________________________
> lng-odp mailing list
> lng-odp@lists.linaro.org
> https://lists.linaro.org/mailman/listinfo/lng-odp
>
diff mbox

Patch

diff --git a/configure.ac b/configure.ac
index c59d2d1..7cd6670 100644
--- a/configure.ac
+++ b/configure.ac
@@ -207,6 +207,19 @@  ODP_CFLAGS="$ODP_CFLAGS -std=c99"
 # Extra flags for example to suppress certain warning types
 ODP_CFLAGS="$ODP_CFLAGS $ODP_CFLAGS_EXTRA"
 
+#########################################################################
+# Check if compiler supports cmpxchng16
+##########################################################################
+my_save_cflags="$CFLAGS"
+CFLAGS=-mcx16
+AC_MSG_CHECKING([whether CC supports -mcx16])
+AC_COMPILE_IFELSE([AC_LANG_PROGRAM([])],
+       [AC_MSG_RESULT([yes])]
+       [ODP_CFLAGS="$ODP_CFLAGS -mcx16"],
+       [AC_MSG_RESULT([no])]
+)
+CFLAGS="$my_save_cflags"
+
 ##########################################################################
 # Default include setup
 ##########################################################################
diff --git a/platform/linux-generic/include/odp_atomic_internal.h b/platform/linux-generic/include/odp_atomic_internal.h
index 093280f..3c5606c 100644
--- a/platform/linux-generic/include/odp_atomic_internal.h
+++ b/platform/linux-generic/include/odp_atomic_internal.h
@@ -587,6 +587,68 @@  static inline void _odp_atomic_flag_clear(_odp_atomic_flag_t *flag)
 	__atomic_clear(flag, __ATOMIC_RELEASE);
 }
 
+/* Check if target and compiler supports 128-bit scalars and corresponding
+ * exchange and CAS operations */
+/* GCC on x86-64 needs -mcx16 compiler option */
+#if defined __SIZEOF_INT128__ && defined __GCC_HAVE_SYNC_COMPARE_AND_SWAP_16
+
+/** Preprocessor symbol that indicates support for 128-bit atomics */
+#define ODP_ATOMIC_U128
+
+/** An unsigned 128-bit (16-byte) scalar type */
+typedef __int128 _uint128_t;
+
+/** Atomic 128-bit type */
+typedef struct {
+	_uint128_t v; /**< Actual storage for the atomic variable */
+} _odp_atomic_u128_t ODP_ALIGNED(16);
+
+/**
+ * 16-byte atomic exchange operation
+ *
+ * @param ptr   Pointer to a 16-byte atomic variable
+ * @param val   Pointer to new value to write
+ * @param old   Pointer to location for old value
+ * @param       mmodel Memory model associated with the exchange operation
+ */
+static inline void _odp_atomic_u128_xchg_mm(_odp_atomic_u128_t *ptr,
+					    _uint128_t *val,
+		_uint128_t *old,
+		_odp_memmodel_t mm)
+{
+	__atomic_exchange(&ptr->v, val, old, mm);
+}
+
+/**
+ * Atomic compare and exchange (swap) of 16-byte atomic variable
+ * "Strong" semantics, will not fail spuriously.
+ *
+ * @param ptr   Pointer to a 16-byte atomic variable
+ * @param exp   Pointer to expected value (updated on failure)
+ * @param val   Pointer to new value to write
+ * @param succ  Memory model associated with a successful compare-and-swap
+ * operation
+ * @param fail  Memory model associated with a failed compare-and-swap
+ * operation
+ *
+ * @retval 1 exchange successul
+ * @retval 0 exchange failed and '*exp' updated with current value
+ */
+static inline int _odp_atomic_u128_cmp_xchg_mm(_odp_atomic_u128_t *ptr,
+					       _uint128_t *exp,
+					       _uint128_t *val,
+					       _odp_memmodel_t succ,
+					       _odp_memmodel_t fail)
+{
+	return __atomic_compare_exchange(&ptr->v, exp, val,
+			false/*strong*/, succ, fail);
+}
+#endif
+
+/**
+ * @}
+ */
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/platform/linux-generic/odp_timer.c b/platform/linux-generic/odp_timer.c
index 6b84309..41e7195 100644
--- a/platform/linux-generic/odp_timer.c
+++ b/platform/linux-generic/odp_timer.c
@@ -11,9 +11,7 @@ 
  *
  */
 
-/* Check if compiler supports 16-byte atomics. GCC needs -mcx16 flag on x86 */
-/* Using spin lock actually seems faster on Core2 */
-#ifdef ODP_ATOMIC_U128
+#if __SIZEOF_POINTER__ != 8
 /* TB_NEEDS_PAD defined if sizeof(odp_buffer_t) != 8 */
 #define TB_NEEDS_PAD
 #define TB_SET_PAD(x) ((x).pad = 0)