diff mbox

[AArch64] Add more Poly64_t intrinsics to GCC

Message ID VI1PR0801MB2031F706F3D58C560E3CFDF0FF890@VI1PR0801MB2031.eurprd08.prod.outlook.com
State New
Headers show

Commit Message

Tamar Christina Nov. 25, 2016, 10:46 a.m. UTC
Hi all,

This adds the following NEON intrinsics
to the Aarch64 back-end of GCC:

* vsriq_n_p64
* vsri_n_p64
* vextq_p64
* vext_p64
* vceq_p64
* vbslq_p64
* vbsl_p64

Added new tests for these and ran regression tests on aarch64-none-linux-gnu
and on arm-none-linux-gnueabihf. Tests added in other patch series.

Ok for trunk?

Thanks,
Tamar

gcc/
2016-11-25  Tamar Christina  <tamar.christina@arm.com>

	* config/aarch64/aarch64-builtins.c
	(vsriq_n_p64, vsri_n_p64): Added poly type.
	(vextq_p64, vext_p64): Likewise.
	(vceq_p64, vbslq_p64, vbsl_p64): Likewise.

Comments

James Greenhalgh Nov. 28, 2016, 12:28 p.m. UTC | #1
On Fri, Nov 25, 2016 at 10:46:49AM +0000, Tamar Christina wrote:
> Hi all,

> 

> This adds the following NEON intrinsics

> to the Aarch64 back-end of GCC:

> 

> * vsriq_n_p64

> * vsri_n_p64

> * vextq_p64

> * vext_p64

> * vceq_p64

> * vbslq_p64

> * vbsl_p64

> 

> Added new tests for these and ran regression tests on aarch64-none-linux-gnu

> and on arm-none-linux-gnueabihf. Tests added in other patch series.

> 

> Ok for trunk?


OK.

Thanks,
James

> gcc/

> 2016-11-25  Tamar Christina  <tamar.christina@arm.com>

> 

> 	* config/aarch64/aarch64-builtins.c

> 	(vsriq_n_p64, vsri_n_p64): Added poly type.

> 	(vextq_p64, vext_p64): Likewise.

> 	(vceq_p64, vbslq_p64, vbsl_p64): Likewise.
diff mbox

Patch

diff --git a/gcc/config/aarch64/aarch64-simd-builtins.def b/gcc/config/aarch64/aarch64-simd-builtins.def
index e1154b4b27820c0075d9a9edb4f8b48ef4f06b07..49efeea6f90cf8535aec4b9287bc9b30b7b79e60 100644
--- a/gcc/config/aarch64/aarch64-simd-builtins.def
+++ b/gcc/config/aarch64/aarch64-simd-builtins.def
@@ -429,6 +429,7 @@ 
 
   /* Implemented by aarch64_simd_bsl<mode>.  */
   BUILTIN_VDQQH (BSL_P, simd_bsl, 0)
+  VAR2 (BSL_P, simd_bsl,0, di, v2di)
   BUILTIN_VSDQ_I_DI (BSL_U, simd_bsl, 0)
   BUILTIN_VALLDIF (BSL_S, simd_bsl, 0)
 
diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h
index c463e3b698a47b9b5c5a04e0fb7fff1f71817af1..ddaaa4f8c5615b979df8f765760c41c8e158fba1 100644
--- a/gcc/config/aarch64/arm_neon.h
+++ b/gcc/config/aarch64/arm_neon.h
@@ -10164,6 +10164,19 @@  vrsqrteq_u32 (uint32x4_t a)
        result;                                                          \
      })
 
+#define vsri_n_p64(a, b, c)						\
+  __extension__								\
+    ({									\
+       poly64x1_t b_ = (b);						\
+       poly64x1_t a_ = (a);						\
+       poly64x1_t result;						\
+       __asm__ ("sri %d0,%d2,%3"					\
+		: "=w"(result)						\
+		: "0"(a_), "w"(b_), "i"(c)				\
+		: /* No clobbers.  */);					\
+       result;								\
+     })
+
 #define vsriq_n_p8(a, b, c)                                             \
   __extension__                                                         \
     ({                                                                  \
@@ -10190,6 +10203,19 @@  vrsqrteq_u32 (uint32x4_t a)
        result;                                                          \
      })
 
+#define vsriq_n_p64(a, b, c)						\
+  __extension__								\
+    ({									\
+       poly64x2_t b_ = (b);						\
+       poly64x2_t a_ = (a);						\
+       poly64x2_t result;						\
+       __asm__ ("sri %0.2d,%2.2d,%3"					\
+		: "=w"(result)						\
+		: "0"(a_), "w"(b_), "i"(c)				\
+		: /* No clobbers.  */);					\
+       result;								\
+     })
+
 __extension__ extern __inline uint8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vtst_p8 (poly8x8_t a, poly8x8_t b)
@@ -11320,6 +11346,13 @@  vbsl_p16 (uint16x4_t __a, poly16x4_t __b, poly16x4_t __c)
 {
   return __builtin_aarch64_simd_bslv4hi_pupp (__a, __b, __c);
 }
+__extension__ extern __inline poly64x1_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vbsl_p64 (uint64x1_t __a, poly64x1_t __b, poly64x1_t __c)
+{
+  return (poly64x1_t)
+      {__builtin_aarch64_simd_bsldi_pupp (__a[0], __b[0], __c[0])};
+}
 
 __extension__ extern __inline int8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
@@ -11428,6 +11461,13 @@  vbslq_s16 (uint16x8_t __a, int16x8_t __b, int16x8_t __c)
   return __builtin_aarch64_simd_bslv8hi_suss (__a, __b, __c);
 }
 
+__extension__ extern __inline poly64x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vbslq_p64 (uint64x2_t __a, poly64x2_t __b, poly64x2_t __c)
+{
+  return __builtin_aarch64_simd_bslv2di_pupp (__a, __b, __c);
+}
+
 __extension__ extern __inline int32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vbslq_s32 (uint32x4_t __a, int32x4_t __b, int32x4_t __c)
@@ -11959,6 +11999,13 @@  vceq_p8 (poly8x8_t __a, poly8x8_t __b)
   return (uint8x8_t) (__a == __b);
 }
 
+__extension__ extern __inline uint64x1_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vceq_p64 (poly64x1_t __a, poly64x1_t __b)
+{
+  return (uint64x1_t) (__a == __b);
+}
+
 __extension__ extern __inline uint8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vceq_s8 (int8x8_t __a, int8x8_t __b)
@@ -15620,6 +15667,15 @@  vext_p16 (poly16x4_t __a, poly16x4_t __b, __const int __c)
 #endif
 }
 
+__extension__ extern __inline poly64x1_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vext_p64 (poly64x1_t __a, poly64x1_t __b, __const int __c)
+{
+  __AARCH64_LANE_CHECK (__a, __c);
+  /* The only possible index to the assembler instruction returns element 0.  */
+  return __a;
+}
+
 __extension__ extern __inline int8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vext_s8 (int8x8_t __a, int8x8_t __b, __const int __c)
@@ -15788,6 +15844,18 @@  vextq_p16 (poly16x8_t __a, poly16x8_t __b, __const int __c)
 #endif
 }
 
+__extension__ extern __inline poly64x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vextq_p64 (poly64x2_t __a, poly64x2_t __b, __const int __c)
+{
+  __AARCH64_LANE_CHECK (__a, __c);
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__b, __a, (uint64x2_t) {2-__c, 3-__c});
+#else
+  return __builtin_shuffle (__a, __b, (uint64x2_t) {__c, __c+1});
+#endif
+}
+
 __extension__ extern __inline int8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vextq_s8 (int8x16_t __a, int8x16_t __b, __const int __c)