diff mbox

[ARM/AArch64,10/11] Add missing tests for intrinsics operating on poly64 and poly128 types.

Message ID 1462973041-7911-11-git-send-email-christophe.lyon@linaro.org
State Accepted
Commit 0dcfe9ab48b775629e5a8f5894a017e2bacc1c58
Headers show

Commit Message

Christophe Lyon May 11, 2016, 1:24 p.m. UTC
2016-05-02  Christophe Lyon  <christophe.lyon@linaro.org>

	* gcc.target/aarch64/advsimd-intrinsics/arm-neon-ref.h (result):
	Add poly64x1_t and poly64x2_t cases if supported.
	* gcc.target/aarch64/advsimd-intrinsics/compute-ref-data.h
	(buffer, buffer_pad, buffer_dup, buffer_dup_pad): Likewise.
	* gcc.target/aarch64/advsimd-intrinsics/p64_p128.c: New file.
	* gcc.target/aarch64/advsimd-intrinsics/vreinterpret_p128.c: New file.
	* gcc.target/aarch64/advsimd-intrinsics/vreinterpret_p64.c: New file.

Change-Id: Ie9bb0c4fd0b8f04fb37668cdb315eaafd06e55c4

-- 
1.9.1

Comments

James Greenhalgh May 13, 2016, 3:16 p.m. UTC | #1
On Wed, May 11, 2016 at 03:24:00PM +0200, Christophe Lyon wrote:
> 2016-05-02  Christophe Lyon  <christophe.lyon@linaro.org>

> 

> 	* gcc.target/aarch64/advsimd-intrinsics/arm-neon-ref.h (result):

> 	Add poly64x1_t and poly64x2_t cases if supported.

> 	* gcc.target/aarch64/advsimd-intrinsics/compute-ref-data.h

> 	(buffer, buffer_pad, buffer_dup, buffer_dup_pad): Likewise.

> 	* gcc.target/aarch64/advsimd-intrinsics/p64_p128.c: New file.

> 	* gcc.target/aarch64/advsimd-intrinsics/vreinterpret_p128.c: New file.

> 	* gcc.target/aarch64/advsimd-intrinsics/vreinterpret_p64.c: New file.

> 


> --- /dev/null

> +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/p64_p128.c

> @@ -0,0 +1,665 @@

> +/* This file contains tests for all the *p64 intrinsics, except for

> +   vreinterpret which have their own testcase.  */

> +

> +/* { dg-require-effective-target arm_crypto_ok } */

> +/* { dg-add-options arm_crypto } */

> +

> +#include <arm_neon.h>

> +#include "arm-neon-ref.h"

> +#include "compute-ref-data.h"

> +

> +/* Expected results: vbsl.  */

> +VECT_VAR_DECL(vbsl_expected,poly,64,1) [] = { 0xfffffff1 };

> +VECT_VAR_DECL(vbsl_expected,poly,64,2) [] = { 0xfffffff1,

> +					      0xfffffff1 };

> +

> +/* Expected results: vceq.  */

> +VECT_VAR_DECL(vceq_expected,uint,64,1) [] = { 0x0 };


vceqq_p64
vceqz_p64
vceqzq_p64
vtst_p64
vtstq_p64

are missing, but will not be trivial to add. Could you raise a bug report
(or fix it if you like :-) )?

This is OK without a fix for those intrinsics with a suitable bug report
opened.

Thanks,
James
Christophe Lyon May 23, 2016, 9:12 a.m. UTC | #2
On 13 May 2016 at 17:16, James Greenhalgh <james.greenhalgh@arm.com> wrote:
> On Wed, May 11, 2016 at 03:24:00PM +0200, Christophe Lyon wrote:

>> 2016-05-02  Christophe Lyon  <christophe.lyon@linaro.org>

>>

>>       * gcc.target/aarch64/advsimd-intrinsics/arm-neon-ref.h (result):

>>       Add poly64x1_t and poly64x2_t cases if supported.

>>       * gcc.target/aarch64/advsimd-intrinsics/compute-ref-data.h

>>       (buffer, buffer_pad, buffer_dup, buffer_dup_pad): Likewise.

>>       * gcc.target/aarch64/advsimd-intrinsics/p64_p128.c: New file.

>>       * gcc.target/aarch64/advsimd-intrinsics/vreinterpret_p128.c: New file.

>>       * gcc.target/aarch64/advsimd-intrinsics/vreinterpret_p64.c: New file.

>>

>

>> --- /dev/null

>> +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/p64_p128.c

>> @@ -0,0 +1,665 @@

>> +/* This file contains tests for all the *p64 intrinsics, except for

>> +   vreinterpret which have their own testcase.  */

>> +

>> +/* { dg-require-effective-target arm_crypto_ok } */

>> +/* { dg-add-options arm_crypto } */

>> +

>> +#include <arm_neon.h>

>> +#include "arm-neon-ref.h"

>> +#include "compute-ref-data.h"

>> +

>> +/* Expected results: vbsl.  */

>> +VECT_VAR_DECL(vbsl_expected,poly,64,1) [] = { 0xfffffff1 };

>> +VECT_VAR_DECL(vbsl_expected,poly,64,2) [] = { 0xfffffff1,

>> +                                           0xfffffff1 };

>> +

>> +/* Expected results: vceq.  */

>> +VECT_VAR_DECL(vceq_expected,uint,64,1) [] = { 0x0 };

>

> vceqq_p64

> vceqz_p64

> vceqzq_p64

> vtst_p64

> vtstq_p64

>

> are missing, but will not be trivial to add. Could you raise a bug report

> (or fix it if you like :-) )?

>

> This is OK without a fix for those intrinsics with a suitable bug report

> opened.

>


OK, I've opened PR 71233 to track this.

Thanks,

Christophe.

> Thanks,

> James

>
diff mbox

Patch

diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/arm-neon-ref.h b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/arm-neon-ref.h
index a2c160c..8664dfc 100644
--- a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/arm-neon-ref.h
+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/arm-neon-ref.h
@@ -133,6 +133,9 @@  static ARRAY(result, uint, 32, 2);
 static ARRAY(result, uint, 64, 1);
 static ARRAY(result, poly, 8, 8);
 static ARRAY(result, poly, 16, 4);
+#if defined (__ARM_FEATURE_CRYPTO)
+static ARRAY(result, poly, 64, 1);
+#endif
 #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
 static ARRAY(result, float, 16, 4);
 #endif
@@ -147,6 +150,9 @@  static ARRAY(result, uint, 32, 4);
 static ARRAY(result, uint, 64, 2);
 static ARRAY(result, poly, 8, 16);
 static ARRAY(result, poly, 16, 8);
+#if defined (__ARM_FEATURE_CRYPTO)
+static ARRAY(result, poly, 64, 2);
+#endif
 #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
 static ARRAY(result, float, 16, 8);
 #endif
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/compute-ref-data.h b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/compute-ref-data.h
index c8d4336..f8c4aef 100644
--- a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/compute-ref-data.h
+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/compute-ref-data.h
@@ -118,6 +118,10 @@  VECT_VAR_DECL_INIT(buffer, uint, 32, 2);
 PAD(buffer_pad, uint, 32, 2);
 VECT_VAR_DECL_INIT(buffer, uint, 64, 1);
 PAD(buffer_pad, uint, 64, 1);
+#if defined (__ARM_FEATURE_CRYPTO)
+VECT_VAR_DECL_INIT(buffer, poly, 64, 1);
+PAD(buffer_pad, poly, 64, 1);
+#endif
 #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
 VECT_VAR_DECL_INIT(buffer, float, 16, 4);
 PAD(buffer_pad, float, 16, 4);
@@ -144,6 +148,10 @@  VECT_VAR_DECL_INIT(buffer, poly, 8, 16);
 PAD(buffer_pad, poly, 8, 16);
 VECT_VAR_DECL_INIT(buffer, poly, 16, 8);
 PAD(buffer_pad, poly, 16, 8);
+#if defined (__ARM_FEATURE_CRYPTO)
+VECT_VAR_DECL_INIT(buffer, poly, 64, 2);
+PAD(buffer_pad, poly, 64, 2);
+#endif
 #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
 VECT_VAR_DECL_INIT(buffer, float, 16, 8);
 PAD(buffer_pad, float, 16, 8);
@@ -178,6 +186,10 @@  VECT_VAR_DECL_INIT(buffer_dup, poly, 8, 8);
 VECT_VAR_DECL(buffer_dup_pad, poly, 8, 8);
 VECT_VAR_DECL_INIT(buffer_dup, poly, 16, 4);
 VECT_VAR_DECL(buffer_dup_pad, poly, 16, 4);
+#if defined (__ARM_FEATURE_CRYPTO)
+VECT_VAR_DECL_INIT4(buffer_dup, poly, 64, 1);
+VECT_VAR_DECL(buffer_dup_pad, poly, 64, 1);
+#endif
 #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
 VECT_VAR_DECL_INIT4(buffer_dup, float, 16, 4);
 VECT_VAR_DECL(buffer_dup_pad, float, 16, 4);
@@ -205,6 +217,10 @@  VECT_VAR_DECL_INIT(buffer_dup, poly, 8, 16);
 VECT_VAR_DECL(buffer_dup_pad, poly, 8, 16);
 VECT_VAR_DECL_INIT(buffer_dup, poly, 16, 8);
 VECT_VAR_DECL(buffer_dup_pad, poly, 16, 8);
+#if defined (__ARM_FEATURE_CRYPTO)
+VECT_VAR_DECL_INIT4(buffer_dup, poly, 64, 2);
+VECT_VAR_DECL(buffer_dup_pad, poly, 64, 2);
+#endif
 #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
 VECT_VAR_DECL_INIT(buffer_dup, float, 16, 8);
 VECT_VAR_DECL(buffer_dup_pad, float, 16, 8);
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/p64_p128.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/p64_p128.c
new file mode 100644
index 0000000..ced3884
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/p64_p128.c
@@ -0,0 +1,665 @@ 
+/* This file contains tests for all the *p64 intrinsics, except for
+   vreinterpret which have their own testcase.  */
+
+/* { dg-require-effective-target arm_crypto_ok } */
+/* { dg-add-options arm_crypto } */
+
+#include <arm_neon.h>
+#include "arm-neon-ref.h"
+#include "compute-ref-data.h"
+
+/* Expected results: vbsl.  */
+VECT_VAR_DECL(vbsl_expected,poly,64,1) [] = { 0xfffffff1 };
+VECT_VAR_DECL(vbsl_expected,poly,64,2) [] = { 0xfffffff1,
+					      0xfffffff1 };
+
+/* Expected results: vceq.  */
+VECT_VAR_DECL(vceq_expected,uint,64,1) [] = { 0x0 };
+
+/* Expected results: vcombine.  */
+VECT_VAR_DECL(vcombine_expected,poly,64,2) [] = { 0xfffffffffffffff0, 0x88 };
+
+/* Expected results: vcreate.  */
+VECT_VAR_DECL(vcreate_expected,poly,64,1) [] = { 0x123456789abcdef0 };
+
+/* Expected results: vdup_lane.  */
+VECT_VAR_DECL(vdup_lane_expected,poly,64,1) [] = { 0xfffffffffffffff0 };
+VECT_VAR_DECL(vdup_lane_expected,poly,64,2) [] = { 0xfffffffffffffff0,
+						   0xfffffffffffffff0 };
+
+/* Expected results: vdup_n.  */
+VECT_VAR_DECL(vdup_n_expected0,poly,64,1) [] = { 0xfffffffffffffff0 };
+VECT_VAR_DECL(vdup_n_expected0,poly,64,2) [] = { 0xfffffffffffffff0,
+						 0xfffffffffffffff0 };
+VECT_VAR_DECL(vdup_n_expected1,poly,64,1) [] = { 0xfffffffffffffff1 };
+VECT_VAR_DECL(vdup_n_expected1,poly,64,2) [] = { 0xfffffffffffffff1,
+						 0xfffffffffffffff1 };
+VECT_VAR_DECL(vdup_n_expected2,poly,64,1) [] = { 0xfffffffffffffff2 };
+VECT_VAR_DECL(vdup_n_expected2,poly,64,2) [] = { 0xfffffffffffffff2,
+						 0xfffffffffffffff2 };
+
+/* Expected results: vext.  */
+VECT_VAR_DECL(vext_expected,poly,64,1) [] = { 0xfffffffffffffff0 };
+VECT_VAR_DECL(vext_expected,poly,64,2) [] = { 0xfffffffffffffff1, 0x88 };
+
+/* Expected results: vget_low.  */
+VECT_VAR_DECL(vget_low_expected,poly,64,1) [] = { 0xfffffffffffffff0 };
+
+/* Expected results: vld1.  */
+VECT_VAR_DECL(vld1_expected,poly,64,1) [] = { 0xfffffffffffffff0 };
+VECT_VAR_DECL(vld1_expected,poly,64,2) [] = { 0xfffffffffffffff0,
+					      0xfffffffffffffff1 };
+
+/* Expected results: vld1_dup.  */
+VECT_VAR_DECL(vld1_dup_expected0,poly,64,1) [] = { 0xfffffffffffffff0 };
+VECT_VAR_DECL(vld1_dup_expected0,poly,64,2) [] = { 0xfffffffffffffff0,
+						   0xfffffffffffffff0 };
+VECT_VAR_DECL(vld1_dup_expected1,poly,64,1) [] = { 0xfffffffffffffff1 };
+VECT_VAR_DECL(vld1_dup_expected1,poly,64,2) [] = { 0xfffffffffffffff1,
+						   0xfffffffffffffff1 };
+VECT_VAR_DECL(vld1_dup_expected2,poly,64,1) [] = { 0xfffffffffffffff2 };
+VECT_VAR_DECL(vld1_dup_expected2,poly,64,2) [] = { 0xfffffffffffffff2,
+						   0xfffffffffffffff2 };
+
+/* Expected results: vld1_lane.  */
+VECT_VAR_DECL(vld1_lane_expected,poly,64,1) [] = { 0xfffffffffffffff0 };
+VECT_VAR_DECL(vld1_lane_expected,poly,64,2) [] = { 0xfffffffffffffff0,
+						   0xaaaaaaaaaaaaaaaa };
+
+/* Expected results: vldX.  */
+VECT_VAR_DECL(vld2_expected_0,poly,64,1) [] = { 0xfffffffffffffff0 };
+VECT_VAR_DECL(vld2_expected_1,poly,64,1) [] = { 0xfffffffffffffff1 };
+VECT_VAR_DECL(vld3_expected_0,poly,64,1) [] = { 0xfffffffffffffff0 };
+VECT_VAR_DECL(vld3_expected_1,poly,64,1) [] = { 0xfffffffffffffff1 };
+VECT_VAR_DECL(vld3_expected_2,poly,64,1) [] = { 0xfffffffffffffff2 };
+VECT_VAR_DECL(vld4_expected_0,poly,64,1) [] = { 0xfffffffffffffff0 };
+VECT_VAR_DECL(vld4_expected_1,poly,64,1) [] = { 0xfffffffffffffff1 };
+VECT_VAR_DECL(vld4_expected_2,poly,64,1) [] = { 0xfffffffffffffff2 };
+VECT_VAR_DECL(vld4_expected_3,poly,64,1) [] = { 0xfffffffffffffff3 };
+
+/* Expected results: vldX_dup.  */
+VECT_VAR_DECL(vld2_dup_expected_0,poly,64,1) [] = { 0xfffffffffffffff0 };
+VECT_VAR_DECL(vld2_dup_expected_1,poly,64,1) [] = { 0xfffffffffffffff1 };
+VECT_VAR_DECL(vld3_dup_expected_0,poly,64,1) [] = { 0xfffffffffffffff0 };
+VECT_VAR_DECL(vld3_dup_expected_1,poly,64,1) [] = { 0xfffffffffffffff1 };
+VECT_VAR_DECL(vld3_dup_expected_2,poly,64,1) [] = { 0xfffffffffffffff2 };
+VECT_VAR_DECL(vld4_dup_expected_0,poly,64,1) [] = { 0xfffffffffffffff0 };
+VECT_VAR_DECL(vld4_dup_expected_1,poly,64,1) [] = { 0xfffffffffffffff1 };
+VECT_VAR_DECL(vld4_dup_expected_2,poly,64,1) [] = { 0xfffffffffffffff2 };
+VECT_VAR_DECL(vld4_dup_expected_3,poly,64,1) [] = { 0xfffffffffffffff3 };
+
+/* Expected results: vsli.  */
+VECT_VAR_DECL(vsli_expected,poly,64,1) [] = { 0x10 };
+VECT_VAR_DECL(vsli_expected,poly,64,2) [] = { 0x7ffffffffffff0,
+					      0x7ffffffffffff1 };
+VECT_VAR_DECL(vsli_expected_max_shift,poly,64,1) [] = { 0x7ffffffffffffff0 };
+VECT_VAR_DECL(vsli_expected_max_shift,poly,64,2) [] = { 0xfffffffffffffff0,
+							0xfffffffffffffff1 };
+
+/* Expected results: vsri.  */
+VECT_VAR_DECL(vsri_expected,poly,64,1) [] = { 0xe000000000000000 };
+VECT_VAR_DECL(vsri_expected,poly,64,2) [] = { 0xfffffffffffff800,
+					      0xfffffffffffff800 };
+VECT_VAR_DECL(vsri_expected_max_shift,poly,64,1) [] = { 0xfffffffffffffff0 };
+VECT_VAR_DECL(vsri_expected_max_shift,poly,64,2) [] = { 0xfffffffffffffff0,
+							0xfffffffffffffff1 };
+
+/* Expected results: vst1_lane.  */
+VECT_VAR_DECL(vst1_lane_expected,poly,64,1) [] = { 0xfffffffffffffff0 };
+VECT_VAR_DECL(vst1_lane_expected,poly,64,2) [] = { 0xfffffffffffffff0,
+						   0x3333333333333333 };
+
+int main (void)
+{
+  int i;
+
+  /* vbsl_p64 tests.  */
+#define TEST_MSG "VBSL/VBSLQ"
+
+#define TEST_VBSL(T3, Q, T1, T2, W, N)					\
+  VECT_VAR(vbsl_vector_res, T1, W, N) =					\
+    vbsl##Q##_##T2##W(VECT_VAR(vbsl_vector_first, T3, W, N),		\
+		      VECT_VAR(vbsl_vector, T1, W, N),			\
+		      VECT_VAR(vbsl_vector2, T1, W, N));		\
+  vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(vbsl_vector_res, T1, W, N))
+
+  DECL_VARIABLE(vbsl_vector, poly, 64, 1);
+  DECL_VARIABLE(vbsl_vector, poly, 64, 2);
+  DECL_VARIABLE(vbsl_vector2, poly, 64, 1);
+  DECL_VARIABLE(vbsl_vector2, poly, 64, 2);
+  DECL_VARIABLE(vbsl_vector_res, poly, 64, 1);
+  DECL_VARIABLE(vbsl_vector_res, poly, 64, 2);
+
+  DECL_VARIABLE(vbsl_vector_first, uint, 64, 1);
+  DECL_VARIABLE(vbsl_vector_first, uint, 64, 2);
+
+  CLEAN(result, poly, 64, 1);
+  CLEAN(result, poly, 64, 2);
+
+  VLOAD(vbsl_vector, buffer, , poly, p, 64, 1);
+  VLOAD(vbsl_vector, buffer, q, poly, p, 64, 2);
+
+  VDUP(vbsl_vector2, , poly, p, 64, 1, 0xFFFFFFF3);
+  VDUP(vbsl_vector2, q, poly, p, 64, 2, 0xFFFFFFF3);
+
+  VDUP(vbsl_vector_first, , uint, u, 64, 1, 0xFFFFFFF2);
+  VDUP(vbsl_vector_first, q, uint, u, 64, 2, 0xFFFFFFF2);
+
+  TEST_VBSL(uint, , poly, p, 64, 1);
+  TEST_VBSL(uint, q, poly, p, 64, 2);
+
+  CHECK(TEST_MSG, poly, 64, 1, PRIx64, vbsl_expected, "");
+  CHECK(TEST_MSG, poly, 64, 2, PRIx64, vbsl_expected, "");
+
+  /* vceq_p64 tests. */
+#undef TEST_MSG
+#define TEST_MSG "VCEQ"
+
+#define TEST_VCOMP1(INSN, Q, T1, T2, T3, W, N)				\
+  VECT_VAR(vceq_vector_res, T3, W, N) =					\
+    INSN##Q##_##T2##W(VECT_VAR(vceq_vector, T1, W, N),			\
+		      VECT_VAR(vceq_vector2, T1, W, N));		\
+  vst1##Q##_u##W(VECT_VAR(result, T3, W, N), VECT_VAR(vceq_vector_res, T3, W, N))
+
+#define TEST_VCOMP(INSN, Q, T1, T2, T3, W, N)				\
+  TEST_VCOMP1(INSN, Q, T1, T2, T3, W, N)
+
+  DECL_VARIABLE(vceq_vector, poly, 64, 1);
+  DECL_VARIABLE(vceq_vector2, poly, 64, 1);
+  DECL_VARIABLE(vceq_vector_res, uint, 64, 1);
+
+  CLEAN(result, uint, 64, 1);
+
+  VLOAD(vceq_vector, buffer, , poly, p, 64, 1);
+
+  VDUP(vceq_vector2, , poly, p, 64, 1, 0x88);
+
+  fprintf(stderr, "toto\n");
+  TEST_VCOMP(vceq, , poly, p, uint, 64, 1);
+
+  CHECK(TEST_MSG, uint, 64, 1, PRIx64, vceq_expected, "");
+  fprintf(stderr, "toto\n");
+
+  /* vcombine_p64 tests.  */
+#undef TEST_MSG
+#define TEST_MSG "VCOMBINE"
+
+#define TEST_VCOMBINE(T1, T2, W, N, N2)					\
+  VECT_VAR(vcombine_vector128, T1, W, N2) =				\
+    vcombine_##T2##W(VECT_VAR(vcombine_vector64_a, T1, W, N),		\
+		     VECT_VAR(vcombine_vector64_b, T1, W, N));		\
+  vst1q_##T2##W(VECT_VAR(result, T1, W, N2), VECT_VAR(vcombine_vector128, T1, W, N2))
+
+  DECL_VARIABLE(vcombine_vector64_a, poly, 64, 1);
+  DECL_VARIABLE(vcombine_vector64_b, poly, 64, 1);
+  DECL_VARIABLE(vcombine_vector128, poly, 64, 2);
+
+  CLEAN(result, poly, 64, 2);
+
+  VLOAD(vcombine_vector64_a, buffer, , poly, p, 64, 1);
+
+  VDUP(vcombine_vector64_b, , poly, p, 64, 1, 0x88);
+
+  TEST_VCOMBINE(poly, p, 64, 1, 2);
+
+  CHECK(TEST_MSG, poly, 64, 2, PRIx16, vcombine_expected, "");
+
+  /* vcreate_p64 tests.  */
+#undef TEST_MSG
+#define TEST_MSG "VCREATE"
+
+#define TEST_VCREATE(T1, T2, W, N)					\
+  VECT_VAR(vcreate_vector_res, T1, W, N) =				\
+    vcreate_##T2##W(VECT_VAR(vcreate_val, T1, W, N));			\
+  vst1_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(vcreate_vector_res, T1, W, N))
+
+#define DECL_VAL(VAR, T1, W, N)			\
+  uint64_t VECT_VAR(VAR, T1, W, N)
+
+  DECL_VAL(vcreate_val, poly, 64, 1);
+  DECL_VARIABLE(vcreate_vector_res, poly, 64, 1);
+
+  CLEAN(result, poly, 64, 2);
+
+  VECT_VAR(vcreate_val, poly, 64, 1) = 0x123456789abcdef0ULL;
+
+  TEST_VCREATE(poly, p, 64, 1);
+
+  CHECK(TEST_MSG, poly, 64, 1, PRIx64, vcreate_expected, "");
+
+  /* vdup_lane_p64 tests.  */
+#undef TEST_MSG
+#define TEST_MSG "VDUP_LANE/VDUP_LANEQ"
+
+#define TEST_VDUP_LANE(Q, T1, T2, W, N, N2, L)				\
+  VECT_VAR(vdup_lane_vector_res, T1, W, N) =				\
+    vdup##Q##_lane_##T2##W(VECT_VAR(vdup_lane_vector, T1, W, N2), L);	\
+  vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(vdup_lane_vector_res, T1, W, N))
+
+  DECL_VARIABLE(vdup_lane_vector, poly, 64, 1);
+  DECL_VARIABLE(vdup_lane_vector, poly, 64, 2);
+  DECL_VARIABLE(vdup_lane_vector_res, poly, 64, 1);
+  DECL_VARIABLE(vdup_lane_vector_res, poly, 64, 2);
+
+  CLEAN(result, poly, 64, 1);
+  CLEAN(result, poly, 64, 2);
+
+  VLOAD(vdup_lane_vector, buffer, , poly, p, 64, 1);
+
+  TEST_VDUP_LANE(, poly, p, 64, 1, 1, 0);
+  TEST_VDUP_LANE(q, poly, p, 64, 2, 1, 0);
+
+  CHECK(TEST_MSG, poly, 64, 1, PRIx64, vdup_lane_expected, "");
+  CHECK(TEST_MSG, poly, 64, 2, PRIx64, vdup_lane_expected, "");
+
+  /* vdup_n_p64 tests.  */
+#undef TEST_MSG
+#define TEST_MSG "VDUP/VDUPQ"
+
+#define TEST_VDUP(Q, T1, T2, W, N)					\
+  VECT_VAR(vdup_n_vector, T1, W, N) =					\
+    vdup##Q##_n_##T2##W(VECT_VAR(buffer_dup, T1, W, N)[i]);		\
+  vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(vdup_n_vector, T1, W, N))
+
+  DECL_VARIABLE(vdup_n_vector, poly, 64, 1);
+  DECL_VARIABLE(vdup_n_vector, poly, 64, 2);
+
+  /* Try to read different places from the input buffer.  */
+  for (i=0; i< 3; i++) {
+    CLEAN(result, poly, 64, 1);
+    CLEAN(result, poly, 64, 2);
+
+    TEST_VDUP(, poly, p, 64, 1);
+    TEST_VDUP(q, poly, p, 64, 2);
+
+    switch (i) {
+    case 0:
+      CHECK(TEST_MSG, poly, 64, 1, PRIx64, vdup_n_expected0, "");
+      CHECK(TEST_MSG, poly, 64, 2, PRIx64, vdup_n_expected0, "");
+      break;
+    case 1:
+      CHECK(TEST_MSG, poly, 64, 1, PRIx64, vdup_n_expected1, "");
+      CHECK(TEST_MSG, poly, 64, 2, PRIx64, vdup_n_expected1, "");
+      break;
+    case 2:
+      CHECK(TEST_MSG, poly, 64, 1, PRIx64, vdup_n_expected2, "");
+      CHECK(TEST_MSG, poly, 64, 2, PRIx64, vdup_n_expected2, "");
+      break;
+    default:
+      abort();
+    }
+  }
+
+  /* vexit_p64 tests.  */
+#undef TEST_MSG
+#define TEST_MSG "VEXT/VEXTQ"
+
+#define TEST_VEXT(Q, T1, T2, W, N, V)					\
+  VECT_VAR(vext_vector_res, T1, W, N) =					\
+    vext##Q##_##T2##W(VECT_VAR(vext_vector1, T1, W, N),			\
+		      VECT_VAR(vext_vector2, T1, W, N),			\
+		      V);						\
+  vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(vext_vector_res, T1, W, N))
+
+  DECL_VARIABLE(vext_vector1, poly, 64, 1);
+  DECL_VARIABLE(vext_vector1, poly, 64, 2);
+  DECL_VARIABLE(vext_vector2, poly, 64, 1);
+  DECL_VARIABLE(vext_vector2, poly, 64, 2);
+  DECL_VARIABLE(vext_vector_res, poly, 64, 1);
+  DECL_VARIABLE(vext_vector_res, poly, 64, 2);
+
+  CLEAN(result, poly, 64, 1);
+  CLEAN(result, poly, 64, 2);
+
+  VLOAD(vext_vector1, buffer, , poly, p, 64, 1);
+  VLOAD(vext_vector1, buffer, q, poly, p, 64, 2);
+
+  VDUP(vext_vector2, , poly, p, 64, 1, 0x88);
+  VDUP(vext_vector2, q, poly, p, 64, 2, 0x88);
+
+  TEST_VEXT(, poly, p, 64, 1, 0);
+  TEST_VEXT(q, poly, p, 64, 2, 1);
+
+  CHECK(TEST_MSG, poly, 64, 1, PRIx64, vext_expected, "");
+  CHECK(TEST_MSG, poly, 64, 2, PRIx64, vext_expected, "");
+
+  /* vget_low_p64 tests.  */
+#undef TEST_MSG
+#define TEST_MSG "VGET_LOW"
+
+#define TEST_VGET_LOW(T1, T2, W, N, N2)					\
+  VECT_VAR(vget_low_vector64, T1, W, N) =				\
+    vget_low_##T2##W(VECT_VAR(vget_low_vector128, T1, W, N2));		\
+  vst1_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(vget_low_vector64, T1, W, N))
+
+  DECL_VARIABLE(vget_low_vector64, poly, 64, 1);
+  DECL_VARIABLE(vget_low_vector128, poly, 64, 2);
+
+  CLEAN(result, poly, 64, 1);
+
+  VLOAD(vget_low_vector128, buffer, q, poly, p, 64, 2);
+
+  TEST_VGET_LOW(poly, p, 64, 1, 2);
+
+  CHECK(TEST_MSG, poly, 64, 1, PRIx64, vget_low_expected, "");
+
+  /* vld1_p64 tests.  */
+#undef TEST_MSG
+#define TEST_MSG "VLD1/VLD1Q"
+
+#define TEST_VLD1(VAR, BUF, Q, T1, T2, W, N)				\
+  VECT_VAR(VAR, T1, W, N) = vld1##Q##_##T2##W(VECT_VAR(BUF, T1, W, N)); \
+  vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(VAR, T1, W, N))
+
+  DECL_VARIABLE(vld1_vector, poly, 64, 1);
+  DECL_VARIABLE(vld1_vector, poly, 64, 2);
+
+  CLEAN(result, poly, 64, 1);
+  CLEAN(result, poly, 64, 2);
+
+  VLOAD(vld1_vector, buffer, , poly, p, 64, 1);
+  VLOAD(vld1_vector, buffer, q, poly, p, 64, 2);
+
+  TEST_VLD1(vld1_vector, buffer, , poly, p, 64, 1);
+  TEST_VLD1(vld1_vector, buffer, q, poly, p, 64, 2);
+
+  CHECK(TEST_MSG, poly, 64, 1, PRIx64, vld1_expected, "");
+  CHECK(TEST_MSG, poly, 64, 2, PRIx64, vld1_expected, "");
+
+  /* vld1_dup_p64 tests.  */
+#undef TEST_MSG
+#define TEST_MSG "VLD1_DUP/VLD1_DUPQ"
+
+#define TEST_VLD1_DUP(VAR, BUF, Q, T1, T2, W, N)			\
+  VECT_VAR(VAR, T1, W, N) =						\
+    vld1##Q##_dup_##T2##W(&VECT_VAR(BUF, T1, W, N)[i]);			\
+  vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(VAR, T1, W, N))
+
+  DECL_VARIABLE(vld1_dup_vector, poly, 64, 1);
+  DECL_VARIABLE(vld1_dup_vector, poly, 64, 2);
+
+  /* Try to read different places from the input buffer.  */
+  for (i=0; i<3; i++) {
+    CLEAN(result, poly, 64, 1);
+    CLEAN(result, poly, 64, 2);
+
+    TEST_VLD1_DUP(vld1_dup_vector, buffer_dup, , poly, p, 64, 1);
+    TEST_VLD1_DUP(vld1_dup_vector, buffer_dup, q, poly, p, 64, 2);
+
+    switch (i) {
+    case 0:
+      CHECK(TEST_MSG, poly, 64, 1, PRIx64, vld1_dup_expected0, "");
+      CHECK(TEST_MSG, poly, 64, 2, PRIx64, vld1_dup_expected0, "");
+      break;
+    case 1:
+      CHECK(TEST_MSG, poly, 64, 1, PRIx64, vld1_dup_expected1, "");
+      CHECK(TEST_MSG, poly, 64, 2, PRIx64, vld1_dup_expected1, "");
+      break;
+    case 2:
+      CHECK(TEST_MSG, poly, 64, 1, PRIx64, vld1_dup_expected2, "");
+      CHECK(TEST_MSG, poly, 64, 2, PRIx64, vld1_dup_expected2, "");
+      break;
+    default:
+      abort();
+    }
+  }
+
+  /* vld1_lane_p64 tests.  */
+#undef TEST_MSG
+#define TEST_MSG "VLD1_LANE/VLD1_LANEQ"
+
+#define TEST_VLD1_LANE(Q, T1, T2, W, N, L)				\
+  memset (VECT_VAR(vld1_lane_buffer_src, T1, W, N), 0xAA, W/8*N);	\
+  VECT_VAR(vld1_lane_vector_src, T1, W, N) =				\
+    vld1##Q##_##T2##W(VECT_VAR(vld1_lane_buffer_src, T1, W, N));	\
+  VECT_VAR(vld1_lane_vector, T1, W, N) =				\
+    vld1##Q##_lane_##T2##W(VECT_VAR(buffer, T1, W, N),			\
+			   VECT_VAR(vld1_lane_vector_src, T1, W, N), L); \
+  vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(vld1_lane_vector, T1, W, N))
+
+  DECL_VARIABLE(vld1_lane_vector, poly, 64, 1);
+  DECL_VARIABLE(vld1_lane_vector, poly, 64, 2);
+  DECL_VARIABLE(vld1_lane_vector_src, poly, 64, 1);
+  DECL_VARIABLE(vld1_lane_vector_src, poly, 64, 2);
+
+  ARRAY(vld1_lane_buffer_src, poly, 64, 1);
+  ARRAY(vld1_lane_buffer_src, poly, 64, 2);
+
+  CLEAN(result, poly, 64, 1);
+  CLEAN(result, poly, 64, 2);
+
+  TEST_VLD1_LANE(, poly, p, 64, 1, 0);
+  TEST_VLD1_LANE(q, poly, p, 64, 2, 0);
+
+  CHECK(TEST_MSG, poly, 64, 1, PRIx64, vld1_lane_expected, "");
+  CHECK(TEST_MSG, poly, 64, 2, PRIx64, vld1_lane_expected, "");
+
+  /* vldX_p64 tests.  */
+#define DECL_VLDX(T1, W, N, X)						\
+  VECT_ARRAY_TYPE(T1, W, N, X) VECT_ARRAY_VAR(vldX_vector, T1, W, N, X); \
+  VECT_VAR_DECL(vldX_result_bis_##X, T1, W, N)[X * N]
+
+#define TEST_VLDX(Q, T1, T2, W, N, X)					\
+  VECT_ARRAY_VAR(vldX_vector, T1, W, N, X) =				\
+    /* Use dedicated init buffer, of size X */				\
+    vld##X##Q##_##T2##W(VECT_ARRAY_VAR(buffer_vld##X, T1, W, N, X));	\
+  vst##X##Q##_##T2##W(VECT_VAR(vldX_result_bis_##X, T1, W, N),		\
+		      VECT_ARRAY_VAR(vldX_vector, T1, W, N, X));	\
+  memcpy(VECT_VAR(result, T1, W, N), VECT_VAR(vldX_result_bis_##X, T1, W, N), \
+	 sizeof(VECT_VAR(result, T1, W, N)));
+
+  /* Overwrite "result" with the contents of "result_bis"[Y].  */
+#define TEST_EXTRA_CHUNK(T1, W, N, X,Y)				\
+  memcpy(VECT_VAR(result, T1, W, N),				\
+	 &(VECT_VAR(vldX_result_bis_##X, T1, W, N)[Y*N]),	\
+	 sizeof(VECT_VAR(result, T1, W, N)));
+
+  DECL_VLDX(poly, 64, 1, 2);
+  DECL_VLDX(poly, 64, 1, 3);
+  DECL_VLDX(poly, 64, 1, 4);
+
+  VECT_ARRAY_INIT2(buffer_vld2, poly, 64, 1);
+  PAD(buffer_vld2_pad, poly, 64, 1);
+  VECT_ARRAY_INIT3(buffer_vld3, poly, 64, 1);
+  PAD(buffer_vld3_pad, poly, 64, 1);
+  VECT_ARRAY_INIT4(buffer_vld4, poly, 64, 1);
+  PAD(buffer_vld4_pad, poly, 64, 1);
+
+#undef TEST_MSG
+#define TEST_MSG "VLD2/VLD2Q"
+  CLEAN(result, poly, 64, 1);
+  TEST_VLDX(, poly, p, 64, 1, 2);
+  CHECK(TEST_MSG, poly, 64, 1, PRIx64, vld2_expected_0, "chunk 0");
+  CLEAN(result, poly, 64, 1);
+  TEST_EXTRA_CHUNK(poly, 64, 1, 2, 1);
+  CHECK(TEST_MSG, poly, 64, 1, PRIx64, vld2_expected_1, "chunk 1");
+
+#undef TEST_MSG
+#define TEST_MSG "VLD3/VLD3Q"
+  CLEAN(result, poly, 64, 1);
+  TEST_VLDX(, poly, p, 64, 1, 3);
+  CHECK(TEST_MSG, poly, 64, 1, PRIx64, vld3_expected_0, "chunk 0");
+  CLEAN(result, poly, 64, 1);
+  TEST_EXTRA_CHUNK(poly, 64, 1, 3, 1);
+  CHECK(TEST_MSG, poly, 64, 1, PRIx64, vld3_expected_1, "chunk 1");
+  CLEAN(result, poly, 64, 1);
+  TEST_EXTRA_CHUNK(poly, 64, 1, 3, 2);
+  CHECK(TEST_MSG, poly, 64, 1, PRIx64, vld3_expected_2, "chunk 2");
+
+#undef TEST_MSG
+#define TEST_MSG "VLD4/VLD4Q"
+  CLEAN(result, poly, 64, 1);
+  TEST_VLDX(, poly, p, 64, 1, 4);
+  CHECK(TEST_MSG, poly, 64, 1, PRIx64, vld4_expected_0, "chunk 0");
+  CLEAN(result, poly, 64, 1);
+  TEST_EXTRA_CHUNK(poly, 64, 1, 4, 1);
+  CHECK(TEST_MSG, poly, 64, 1, PRIx64, vld4_expected_1, "chunk 1");
+  CLEAN(result, poly, 64, 1);
+  TEST_EXTRA_CHUNK(poly, 64, 1, 4, 2);
+  CHECK(TEST_MSG, poly, 64, 1, PRIx64, vld4_expected_2, "chunk 2");
+  CLEAN(result, poly, 64, 1);
+  TEST_EXTRA_CHUNK(poly, 64, 1, 4, 3);
+  CHECK(TEST_MSG, poly, 64, 1, PRIx64, vld4_expected_3, "chunk 3");
+
+  /* vldX_dup_p64 tests.  */
+#define DECL_VLDX_DUP(T1, W, N, X)					\
+  VECT_ARRAY_TYPE(T1, W, N, X) VECT_ARRAY_VAR(vldX_dup_vector, T1, W, N, X); \
+  VECT_VAR_DECL(vldX_dup_result_bis_##X, T1, W, N)[X * N]
+
+#define TEST_VLDX_DUP(Q, T1, T2, W, N, X)				\
+  VECT_ARRAY_VAR(vldX_dup_vector, T1, W, N, X) =			\
+    vld##X##Q##_dup_##T2##W(&VECT_VAR(buffer_dup, T1, W, N)[0]);	\
+    									\
+  vst##X##Q##_##T2##W(VECT_VAR(vldX_dup_result_bis_##X, T1, W, N),	\
+		      VECT_ARRAY_VAR(vldX_dup_vector, T1, W, N, X));	\
+  memcpy(VECT_VAR(result, T1, W, N), VECT_VAR(vldX_dup_result_bis_##X, T1, W, N), \
+	 sizeof(VECT_VAR(result, T1, W, N)));
+
+  /* Overwrite "result" with the contents of "result_bis"[Y].  */
+#define TEST_VLDX_DUP_EXTRA_CHUNK(T1, W, N, X,Y)		\
+  memcpy(VECT_VAR(result, T1, W, N),				\
+	 &(VECT_VAR(vldX_dup_result_bis_##X, T1, W, N)[Y*N]),	\
+	 sizeof(VECT_VAR(result, T1, W, N)));
+
+  DECL_VLDX_DUP(poly, 64, 1, 2);
+  DECL_VLDX_DUP(poly, 64, 1, 3);
+  DECL_VLDX_DUP(poly, 64, 1, 4);
+
+
+#undef TEST_MSG
+#define TEST_MSG "VLD2_DUP/VLD2Q_DUP"
+  CLEAN(result, poly, 64, 1);
+  TEST_VLDX_DUP(, poly, p, 64, 1, 2);
+  CHECK(TEST_MSG, poly, 64, 1, PRIx64, vld2_dup_expected_0, "chunk 0");
+  CLEAN(result, poly, 64, 1);
+  TEST_VLDX_DUP_EXTRA_CHUNK(poly, 64, 1, 2, 1);
+  CHECK(TEST_MSG, poly, 64, 1, PRIx64, vld2_dup_expected_1, "chunk 1");
+
+#undef TEST_MSG
+#define TEST_MSG "VLD3_DUP/VLD3Q_DUP"
+  CLEAN(result, poly, 64, 1);
+  TEST_VLDX_DUP(, poly, p, 64, 1, 3);
+  CHECK(TEST_MSG, poly, 64, 1, PRIx64, vld3_dup_expected_0, "chunk 0");
+  CLEAN(result, poly, 64, 1);
+  TEST_VLDX_DUP_EXTRA_CHUNK(poly, 64, 1, 3, 1);
+  CHECK(TEST_MSG, poly, 64, 1, PRIx64, vld3_dup_expected_1, "chunk 1");
+  CLEAN(result, poly, 64, 1);
+  TEST_VLDX_DUP_EXTRA_CHUNK(poly, 64, 1, 3, 2);
+  CHECK(TEST_MSG, poly, 64, 1, PRIx64, vld3_dup_expected_2, "chunk 2");
+
+#undef TEST_MSG
+#define TEST_MSG "VLD4_DUP/VLD4Q_DUP"
+  CLEAN(result, poly, 64, 1);
+  TEST_VLDX_DUP(, poly, p, 64, 1, 4);
+  CHECK(TEST_MSG, poly, 64, 1, PRIx64, vld4_dup_expected_0, "chunk 0");
+  CLEAN(result, poly, 64, 1);
+  TEST_VLDX_DUP_EXTRA_CHUNK(poly, 64, 1, 4, 1);
+  CHECK(TEST_MSG, poly, 64, 1, PRIx64, vld4_dup_expected_1, "chunk 1");
+  CLEAN(result, poly, 64, 1);
+  TEST_VLDX_DUP_EXTRA_CHUNK(poly, 64, 1, 4, 2);
+  CHECK(TEST_MSG, poly, 64, 1, PRIx64, vld4_dup_expected_2, "chunk 2");
+  CLEAN(result, poly, 64, 1);
+  TEST_VLDX_DUP_EXTRA_CHUNK(poly, 64, 1, 4, 3);
+  CHECK(TEST_MSG, poly, 64, 1, PRIx64, vld4_dup_expected_3, "chunk 3");
+
+  /* vsli_p64 tests.  */
+#undef TEST_MSG
+#define TEST_MSG "VSLI"
+
+#define TEST_VSXI1(INSN, Q, T1, T2, W, N, V)				\
+  VECT_VAR(vsXi_vector_res, T1, W, N) =					\
+    INSN##Q##_n_##T2##W(VECT_VAR(vsXi_vector, T1, W, N),		\
+		      VECT_VAR(vsXi_vector2, T1, W, N),			\
+		      V);						\
+  vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(vsXi_vector_res, T1, W, N))
+
+#define TEST_VSXI(INSN, Q, T1, T2, W, N, V)	\
+  TEST_VSXI1(INSN, Q, T1, T2, W, N, V)
+
+  DECL_VARIABLE(vsXi_vector, poly, 64, 1);
+  DECL_VARIABLE(vsXi_vector, poly, 64, 2);
+  DECL_VARIABLE(vsXi_vector2, poly, 64, 1);
+  DECL_VARIABLE(vsXi_vector2, poly, 64, 2);
+  DECL_VARIABLE(vsXi_vector_res, poly, 64, 1);
+  DECL_VARIABLE(vsXi_vector_res, poly, 64, 2);
+
+  CLEAN(result, poly, 64, 1);
+  CLEAN(result, poly, 64, 2);
+
+  VLOAD(vsXi_vector, buffer, , poly, p, 64, 1);
+  VLOAD(vsXi_vector, buffer, q, poly, p, 64, 2);
+
+  VDUP(vsXi_vector2, , poly, p, 64, 1, 2);
+  VDUP(vsXi_vector2, q, poly, p, 64, 2, 3);
+
+  TEST_VSXI(vsli, , poly, p, 64, 1, 3);
+  TEST_VSXI(vsli, q, poly, p, 64, 2, 53);
+
+  CHECK(TEST_MSG, poly, 64, 1, PRIx64, vsli_expected, "");
+  CHECK(TEST_MSG, poly, 64, 2, PRIx64, vsli_expected, "");
+
+  /* Test cases with maximum shift amount.  */
+  CLEAN(result, poly, 64, 1);
+  CLEAN(result, poly, 64, 2);
+
+  TEST_VSXI(vsli, , poly, p, 64, 1, 63);
+  TEST_VSXI(vsli, q, poly, p, 64, 2, 63);
+
+#define COMMENT "(max shift amount)"
+  CHECK(TEST_MSG, poly, 64, 1, PRIx64, vsli_expected_max_shift, COMMENT);
+  CHECK(TEST_MSG, poly, 64, 2, PRIx64, vsli_expected_max_shift, COMMENT);
+
+  /* vsri_p64 tests.  */
+#undef TEST_MSG
+#define TEST_MSG "VSRI"
+
+  CLEAN(result, poly, 64, 1);
+  CLEAN(result, poly, 64, 2);
+
+  VLOAD(vsXi_vector, buffer, , poly, p, 64, 1);
+  VLOAD(vsXi_vector, buffer, q, poly, p, 64, 2);
+
+  VDUP(vsXi_vector2, , poly, p, 64, 1, 2);
+  VDUP(vsXi_vector2, q, poly, p, 64, 2, 3);
+
+  TEST_VSXI(vsri, , poly, p, 64, 1, 3);
+  TEST_VSXI(vsri, q, poly, p, 64, 2, 53);
+
+  CHECK(TEST_MSG, poly, 64, 1, PRIx64, vsri_expected, "");
+  CHECK(TEST_MSG, poly, 64, 2, PRIx64, vsri_expected, "");
+
+  /* Test cases with maximum shift amount.  */
+  CLEAN(result, poly, 64, 1);
+  CLEAN(result, poly, 64, 2);
+
+  TEST_VSXI(vsri, , poly, p, 64, 1, 64);
+  TEST_VSXI(vsri, q, poly, p, 64, 2, 64);
+
+#define COMMENT "(max shift amount)"
+  CHECK(TEST_MSG, poly, 64, 1, PRIx64, vsri_expected_max_shift, COMMENT);
+  CHECK(TEST_MSG, poly, 64, 2, PRIx64, vsri_expected_max_shift, COMMENT);
+
+  /* vst1_lane_p64 tests.  */
+#undef TEST_MSG
+#define TEST_MSG "VST1_LANE/VST1_LANEQ"
+
+#define TEST_VST1_LANE(Q, T1, T2, W, N, L)				\
+  VECT_VAR(vst1_lane_vector, T1, W, N) =				\
+    vld1##Q##_##T2##W(VECT_VAR(buffer, T1, W, N));			\
+  vst1##Q##_lane_##T2##W(VECT_VAR(result, T1, W, N),			\
+			 VECT_VAR(vst1_lane_vector, T1, W, N), L)
+
+  DECL_VARIABLE(vst1_lane_vector, poly, 64, 1);
+  DECL_VARIABLE(vst1_lane_vector, poly, 64, 2);
+
+  CLEAN(result, poly, 64, 1);
+  CLEAN(result, poly, 64, 2);
+
+  TEST_VST1_LANE(, poly, p, 64, 1, 0);
+  TEST_VST1_LANE(q, poly, p, 64, 2, 0);
+
+  CHECK(TEST_MSG, poly, 64, 1, PRIx64, vst1_lane_expected, "");
+  CHECK(TEST_MSG, poly, 64, 2, PRIx64, vst1_lane_expected, "");
+
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vreinterpret_p128.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vreinterpret_p128.c
new file mode 100644
index 0000000..a049cb3
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vreinterpret_p128.c
@@ -0,0 +1,151 @@ 
+/* This file contains tests for the vreinterpret *p128 intrinsics.  */
+
+/* { dg-require-effective-target arm_crypto_ok } */
+/* { dg-add-options arm_crypto } */
+
+#include <arm_neon.h>
+#include "arm-neon-ref.h"
+#include "compute-ref-data.h"
+
+/* Expected results: vreinterpretq_p128_*.  */
+VECT_VAR_DECL(vreint_expected_q_p128_s8,poly,64,2) [] = { 0xf7f6f5f4f3f2f1f0,
+							  0xfffefdfcfbfaf9f8 };
+VECT_VAR_DECL(vreint_expected_q_p128_s16,poly,64,2) [] = { 0xfff3fff2fff1fff0,
+							   0xfff7fff6fff5fff4 };
+VECT_VAR_DECL(vreint_expected_q_p128_s32,poly,64,2) [] = { 0xfffffff1fffffff0,
+							   0xfffffff3fffffff2 };
+VECT_VAR_DECL(vreint_expected_q_p128_s64,poly,64,2) [] = { 0xfffffffffffffff0,
+							   0xfffffffffffffff1 };
+VECT_VAR_DECL(vreint_expected_q_p128_u8,poly,64,2) [] = { 0xf7f6f5f4f3f2f1f0,
+							  0xfffefdfcfbfaf9f8 };
+VECT_VAR_DECL(vreint_expected_q_p128_u16,poly,64,2) [] = { 0xfff3fff2fff1fff0,
+							   0xfff7fff6fff5fff4 };
+VECT_VAR_DECL(vreint_expected_q_p128_u32,poly,64,2) [] = { 0xfffffff1fffffff0,
+							   0xfffffff3fffffff2 };
+VECT_VAR_DECL(vreint_expected_q_p128_u64,poly,64,2) [] = { 0xfffffffffffffff0,
+							   0xfffffffffffffff1 };
+VECT_VAR_DECL(vreint_expected_q_p128_p8,poly,64,2) [] = { 0xf7f6f5f4f3f2f1f0,
+							  0xfffefdfcfbfaf9f8 };
+VECT_VAR_DECL(vreint_expected_q_p128_p16,poly,64,2) [] = { 0xfff3fff2fff1fff0,
+							   0xfff7fff6fff5fff4 };
+VECT_VAR_DECL(vreint_expected_q_p128_f32,poly,64,2) [] = { 0xc1700000c1800000,
+							   0xc1500000c1600000 };
+
+/* Expected results: vreinterpretq_*_p128.  */
+VECT_VAR_DECL(vreint_expected_q_s8_p128,int,8,16) [] = { 0xf0, 0xff, 0xff, 0xff,
+							 0xff, 0xff, 0xff, 0xff,
+							 0xf1, 0xff, 0xff, 0xff,
+							 0xff, 0xff, 0xff, 0xff };
+VECT_VAR_DECL(vreint_expected_q_s16_p128,int,16,8) [] = { 0xfff0, 0xffff,
+							  0xffff, 0xffff,
+							  0xfff1, 0xffff,
+							  0xffff, 0xffff };
+VECT_VAR_DECL(vreint_expected_q_s32_p128,int,32,4) [] = { 0xfffffff0, 0xffffffff,
+							  0xfffffff1, 0xffffffff };
+VECT_VAR_DECL(vreint_expected_q_s64_p128,int,64,2) [] = { 0xfffffffffffffff0,
+							  0xfffffffffffffff1 };
+VECT_VAR_DECL(vreint_expected_q_u8_p128,uint,8,16) [] = { 0xf0, 0xff, 0xff, 0xff,
+							  0xff, 0xff, 0xff, 0xff,
+							  0xf1, 0xff, 0xff, 0xff,
+							  0xff, 0xff, 0xff, 0xff };
+VECT_VAR_DECL(vreint_expected_q_u16_p128,uint,16,8) [] = { 0xfff0, 0xffff,
+							   0xffff, 0xffff,
+							   0xfff1, 0xffff,
+							   0xffff, 0xffff };
+VECT_VAR_DECL(vreint_expected_q_u32_p128,uint,32,4) [] = { 0xfffffff0, 0xffffffff,
+							   0xfffffff1, 0xffffffff };
+VECT_VAR_DECL(vreint_expected_q_u64_p128,uint,64,2) [] = { 0xfffffffffffffff0,
+							   0xfffffffffffffff1 };
+VECT_VAR_DECL(vreint_expected_q_p8_p128,poly,8,16) [] = { 0xf0, 0xff, 0xff, 0xff,
+							  0xff, 0xff, 0xff, 0xff,
+							  0xf1, 0xff, 0xff, 0xff,
+							  0xff, 0xff, 0xff, 0xff };
+VECT_VAR_DECL(vreint_expected_q_p16_p128,poly,16,8) [] = { 0xfff0, 0xffff,
+							   0xffff, 0xffff,
+							   0xfff1, 0xffff,
+							   0xffff, 0xffff };
+VECT_VAR_DECL(vreint_expected_q_p64_p128,uint,64,2) [] = { 0xfffffffffffffff0,
+							   0xfffffffffffffff1 };
+VECT_VAR_DECL(vreint_expected_q_f32_p128,hfloat,32,4) [] = { 0xfffffff0, 0xffffffff,
+							     0xfffffff1, 0xffffffff };
+
+int main (void)
+{
+  DECL_VARIABLE_128BITS_VARIANTS(vreint_vector);
+  DECL_VARIABLE(vreint_vector, poly, 64, 2);
+  DECL_VARIABLE_128BITS_VARIANTS(vreint_vector_res);
+  DECL_VARIABLE(vreint_vector_res, poly, 64, 2);
+
+  clean_results ();
+
+  TEST_MACRO_128BITS_VARIANTS_2_5(VLOAD, vreint_vector, buffer);
+  VLOAD(vreint_vector, buffer, q, poly, p, 64, 2);
+  VLOAD(vreint_vector, buffer, q, float, f, 32, 4);
+
+  /* vreinterpretq_p128_* tests.  */
+#undef TEST_MSG
+#define TEST_MSG "VREINTERPRETQ_P128_*"
+
+  /* Since there is no way to store a poly128_t value, convert to
+     poly64x2_t before storing. This means that we are not able to
+     test vreinterpretq_p128* alone, and that errors in
+     vreinterpretq_p64_p128 could compensate for errors in
+     vreinterpretq_p128*.  */
+#define TEST_VREINTERPRET128(Q, T1, T2, W, N, TS1, TS2, WS, NS, EXPECTED) \
+  VECT_VAR(vreint_vector_res, poly, 64, 2) =  vreinterpretq_p64_p128(	\
+    vreinterpret##Q##_##T2##W##_##TS2##WS(VECT_VAR(vreint_vector, TS1, WS, NS))); \
+  vst1##Q##_##T2##64(VECT_VAR(result, poly, 64, 2),			\
+                     VECT_VAR(vreint_vector_res, poly, 64, 2));		\
+  CHECK(TEST_MSG, T1, 64, 2, PRIx##64, EXPECTED, "");
+
+  TEST_VREINTERPRET128(q, poly, p, 128, 1, int, s, 8, 16, vreint_expected_q_p128_s8);
+  TEST_VREINTERPRET128(q, poly, p, 128, 1, int, s, 16, 8, vreint_expected_q_p128_s16);
+  TEST_VREINTERPRET128(q, poly, p, 128, 1, int, s, 32, 4, vreint_expected_q_p128_s32);
+  TEST_VREINTERPRET128(q, poly, p, 128, 1, int, s, 64, 2, vreint_expected_q_p128_s64);
+  TEST_VREINTERPRET128(q, poly, p, 128, 1, uint, u, 8, 16, vreint_expected_q_p128_u8);
+  TEST_VREINTERPRET128(q, poly, p, 128, 1, uint, u, 16, 8, vreint_expected_q_p128_u16);
+  TEST_VREINTERPRET128(q, poly, p, 128, 1, uint, u, 32, 4, vreint_expected_q_p128_u32);
+  TEST_VREINTERPRET128(q, poly, p, 128, 1, uint, u, 64, 2, vreint_expected_q_p128_u64);
+  TEST_VREINTERPRET128(q, poly, p, 128, 1, poly, p, 8, 16, vreint_expected_q_p128_p8);
+  TEST_VREINTERPRET128(q, poly, p, 128, 1, poly, p, 16, 8, vreint_expected_q_p128_p16);
+  TEST_VREINTERPRET128(q, poly, p, 128, 1, float, f, 32, 4, vreint_expected_q_p128_f32);
+
+  /* vreinterpretq_*_p128 tests.  */
+#undef TEST_MSG
+#define TEST_MSG "VREINTERPRETQ_*_P128"
+
+  /* Since there is no way to load a poly128_t value, load a
+     poly64x2_t and convert it to poly128_t. This means that we are
+     not able to test vreinterpretq_*_p128 alone, and that errors in
+     vreinterpretq_p128_p64 could compensate for errors in
+     vreinterpretq_*_p128*.  */
+#define TEST_VREINTERPRET_FROM_P128(Q, T1, T2, W, N, TS1, TS2, WS, NS, EXPECTED) \
+  VECT_VAR(vreint_vector_res, T1, W, N) =				\
+    vreinterpret##Q##_##T2##W##_##TS2##WS(				\
+  vreinterpretq_p128_p64(VECT_VAR(vreint_vector, TS1, 64, 2)));		\
+  vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N),				\
+		    VECT_VAR(vreint_vector_res, T1, W, N));		\
+  CHECK(TEST_MSG, T1, W, N, PRIx##W, EXPECTED, "");
+
+#define TEST_VREINTERPRET_FP_FROM_P128(Q, T1, T2, W, N, TS1, TS2, WS, NS, EXPECTED) \
+  VECT_VAR(vreint_vector_res, T1, W, N) =				\
+    vreinterpret##Q##_##T2##W##_##TS2##WS(				\
+  vreinterpretq_p128_p64(VECT_VAR(vreint_vector, TS1, 64, 2)));		\
+  vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N),				\
+		    VECT_VAR(vreint_vector_res, T1, W, N));		\
+  CHECK_FP(TEST_MSG, T1, W, N, PRIx##W, EXPECTED, "");
+
+  TEST_VREINTERPRET_FROM_P128(q, int, s, 8, 16, poly, p, 128, 1, vreint_expected_q_s8_p128);
+  TEST_VREINTERPRET_FROM_P128(q, int, s, 16, 8, poly, p, 128, 1, vreint_expected_q_s16_p128);
+  TEST_VREINTERPRET_FROM_P128(q, int, s, 32, 4, poly, p, 128, 1, vreint_expected_q_s32_p128);
+  TEST_VREINTERPRET_FROM_P128(q, int, s, 64, 2, poly, p, 128, 1, vreint_expected_q_s64_p128);
+  TEST_VREINTERPRET_FROM_P128(q, uint, u, 8, 16, poly, p, 128, 1, vreint_expected_q_u8_p128);
+  TEST_VREINTERPRET_FROM_P128(q, uint, u, 16, 8, poly, p, 128, 1, vreint_expected_q_u16_p128);
+  TEST_VREINTERPRET_FROM_P128(q, uint, u, 32, 4, poly, p, 128, 1, vreint_expected_q_u32_p128);
+  TEST_VREINTERPRET_FROM_P128(q, uint, u, 64, 2, poly, p, 128, 1, vreint_expected_q_u64_p128);
+  TEST_VREINTERPRET_FROM_P128(q, poly, p, 8, 16, poly, p, 128, 1, vreint_expected_q_p8_p128);
+  TEST_VREINTERPRET_FROM_P128(q, poly, p, 16, 8, poly, p, 128, 1, vreint_expected_q_p16_p128);
+  TEST_VREINTERPRET_FP_FROM_P128(q, float, f, 32, 4, poly, p, 128, 1, vreint_expected_q_f32_p128);
+
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vreinterpret_p64.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vreinterpret_p64.c
new file mode 100644
index 0000000..79f3cd6
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vreinterpret_p64.c
@@ -0,0 +1,188 @@ 
+/* This file contains tests for the vreinterpret *p64 intrinsics.  */
+
+/* { dg-require-effective-target arm_crypto_ok } */
+/* { dg-add-options arm_crypto } */
+
+#include <arm_neon.h>
+#include "arm-neon-ref.h"
+#include "compute-ref-data.h"
+
+/* Expected results: vreinterpret_p64_*.  */
+VECT_VAR_DECL(vreint_expected_p64_s8,poly,64,1) [] = { 0xf7f6f5f4f3f2f1f0 };
+VECT_VAR_DECL(vreint_expected_p64_s16,poly,64,1) [] = { 0xfff3fff2fff1fff0 };
+VECT_VAR_DECL(vreint_expected_p64_s32,poly,64,1) [] = { 0xfffffff1fffffff0 };
+VECT_VAR_DECL(vreint_expected_p64_s64,poly,64,1) [] = { 0xfffffffffffffff0 };
+VECT_VAR_DECL(vreint_expected_p64_u8,poly,64,1) [] = { 0xf7f6f5f4f3f2f1f0 };
+VECT_VAR_DECL(vreint_expected_p64_u16,poly,64,1) [] = { 0xfff3fff2fff1fff0 };
+VECT_VAR_DECL(vreint_expected_p64_u32,poly,64,1) [] = { 0xfffffff1fffffff0 };
+VECT_VAR_DECL(vreint_expected_p64_u64,poly,64,1) [] = { 0xfffffffffffffff0 };
+VECT_VAR_DECL(vreint_expected_p64_p8,poly,64,1) [] = { 0xf7f6f5f4f3f2f1f0 };
+VECT_VAR_DECL(vreint_expected_p64_p16,poly,64,1) [] = { 0xfff3fff2fff1fff0 };
+VECT_VAR_DECL(vreint_expected_p64_f32,poly,64,1) [] = { 0xc1700000c1800000 };
+
+/* Expected results: vreinterpretq_p64_*.  */
+VECT_VAR_DECL(vreint_expected_q_p64_s8,poly,64,2) [] = { 0xf7f6f5f4f3f2f1f0,
+							 0xfffefdfcfbfaf9f8 };
+VECT_VAR_DECL(vreint_expected_q_p64_s16,poly,64,2) [] = { 0xfff3fff2fff1fff0,
+							  0xfff7fff6fff5fff4 };
+VECT_VAR_DECL(vreint_expected_q_p64_s32,poly,64,2) [] = { 0xfffffff1fffffff0,
+							  0xfffffff3fffffff2 };
+VECT_VAR_DECL(vreint_expected_q_p64_s64,poly,64,2) [] = { 0xfffffffffffffff0,
+							  0xfffffffffffffff1 };
+VECT_VAR_DECL(vreint_expected_q_p64_u8,poly,64,2) [] = { 0xf7f6f5f4f3f2f1f0,
+							 0xfffefdfcfbfaf9f8 };
+VECT_VAR_DECL(vreint_expected_q_p64_u16,poly,64,2) [] = { 0xfff3fff2fff1fff0,
+							  0xfff7fff6fff5fff4 };
+VECT_VAR_DECL(vreint_expected_q_p64_u32,poly,64,2) [] = { 0xfffffff1fffffff0,
+							  0xfffffff3fffffff2 };
+VECT_VAR_DECL(vreint_expected_q_p64_u64,poly,64,2) [] = { 0xfffffffffffffff0,
+							  0xfffffffffffffff1 };
+VECT_VAR_DECL(vreint_expected_q_p64_p8,poly,64,2) [] = { 0xf7f6f5f4f3f2f1f0,
+							 0xfffefdfcfbfaf9f8 };
+VECT_VAR_DECL(vreint_expected_q_p64_p16,poly,64,2) [] = { 0xfff3fff2fff1fff0,
+							  0xfff7fff6fff5fff4 };
+VECT_VAR_DECL(vreint_expected_q_p64_f32,poly,64,2) [] = { 0xc1700000c1800000,
+							  0xc1500000c1600000 };
+
+/* Expected results: vreinterpret_*_p64.  */
+VECT_VAR_DECL(vreint_expected_s8_p64,int,8,8) [] = { 0xf0, 0xff, 0xff, 0xff,
+						     0xff, 0xff, 0xff, 0xff };
+VECT_VAR_DECL(vreint_expected_s16_p64,int,16,4) [] = { 0xfff0, 0xffff, 0xffff, 0xffff };
+VECT_VAR_DECL(vreint_expected_s32_p64,int,32,2) [] = { 0xfffffff0, 0xffffffff };
+VECT_VAR_DECL(vreint_expected_s64_p64,int,64,1) [] = { 0xfffffffffffffff0 };
+VECT_VAR_DECL(vreint_expected_u8_p64,uint,8,8) [] = { 0xf0, 0xff, 0xff, 0xff,
+						      0xff, 0xff, 0xff, 0xff };
+VECT_VAR_DECL(vreint_expected_u16_p64,uint,16,4) [] = { 0xfff0, 0xffff, 0xffff, 0xffff };
+VECT_VAR_DECL(vreint_expected_u32_p64,uint,32,2) [] = { 0xfffffff0, 0xffffffff };
+VECT_VAR_DECL(vreint_expected_u64_p64,uint,64,1) [] = { 0xfffffffffffffff0 };
+VECT_VAR_DECL(vreint_expected_p8_p64,poly,8,8) [] = { 0xf0, 0xff, 0xff, 0xff,
+						      0xff, 0xff, 0xff, 0xff };
+VECT_VAR_DECL(vreint_expected_p16_p64,poly,16,4) [] = { 0xfff0, 0xffff, 0xffff, 0xffff };
+VECT_VAR_DECL(vreint_expected_f32_p64,hfloat,32,2) [] = { 0xfffffff0, 0xffffffff };
+
+/* Expected results: vreinterpretq_*_p64.  */
+VECT_VAR_DECL(vreint_expected_q_s8_p64,int,8,16) [] = { 0xf0, 0xff, 0xff, 0xff,
+							0xff, 0xff, 0xff, 0xff,
+							0xf1, 0xff, 0xff, 0xff,
+							0xff, 0xff, 0xff, 0xff };
+VECT_VAR_DECL(vreint_expected_q_s16_p64,int,16,8) [] = { 0xfff0, 0xffff,
+							 0xffff, 0xffff,
+							 0xfff1, 0xffff,
+							 0xffff, 0xffff };
+VECT_VAR_DECL(vreint_expected_q_s32_p64,int,32,4) [] = { 0xfffffff0, 0xffffffff,
+							 0xfffffff1, 0xffffffff };
+VECT_VAR_DECL(vreint_expected_q_s64_p64,int,64,2) [] = { 0xfffffffffffffff0,
+							 0xfffffffffffffff1 };
+VECT_VAR_DECL(vreint_expected_q_u8_p64,uint,8,16) [] = { 0xf0, 0xff, 0xff, 0xff,
+							 0xff, 0xff, 0xff, 0xff,
+							 0xf1, 0xff, 0xff, 0xff,
+							 0xff, 0xff, 0xff, 0xff };
+VECT_VAR_DECL(vreint_expected_q_u16_p64,uint,16,8) [] = { 0xfff0, 0xffff,
+							  0xffff, 0xffff,
+							  0xfff1, 0xffff,
+							  0xffff, 0xffff };
+VECT_VAR_DECL(vreint_expected_q_u32_p64,uint,32,4) [] = { 0xfffffff0, 0xffffffff,
+							  0xfffffff1, 0xffffffff };
+VECT_VAR_DECL(vreint_expected_q_u64_p64,uint,64,2) [] = { 0xfffffffffffffff0,
+							  0xfffffffffffffff1 };
+VECT_VAR_DECL(vreint_expected_q_p8_p64,poly,8,16) [] = { 0xf0, 0xff, 0xff, 0xff,
+							 0xff, 0xff, 0xff, 0xff,
+							 0xf1, 0xff, 0xff, 0xff,
+							 0xff, 0xff, 0xff, 0xff };
+VECT_VAR_DECL(vreint_expected_q_p16_p64,poly,16,8) [] = { 0xfff0, 0xffff,
+							  0xffff, 0xffff,
+							  0xfff1, 0xffff,
+							  0xffff, 0xffff };
+VECT_VAR_DECL(vreint_expected_q_f32_p64,hfloat,32,4) [] = { 0xfffffff0, 0xffffffff,
+							    0xfffffff1, 0xffffffff };
+
+int main (void)
+{
+#define TEST_VREINTERPRET(Q, T1, T2, W, N, TS1, TS2, WS, NS, EXPECTED)	\
+  VECT_VAR(vreint_vector_res, T1, W, N) =				\
+    vreinterpret##Q##_##T2##W##_##TS2##WS(VECT_VAR(vreint_vector, TS1, WS, NS)); \
+  vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N),				\
+		    VECT_VAR(vreint_vector_res, T1, W, N));		\
+  CHECK(TEST_MSG, T1, W, N, PRIx##W, EXPECTED, "");
+
+#define TEST_VREINTERPRET_FP(Q, T1, T2, W, N, TS1, TS2, WS, NS, EXPECTED) \
+  VECT_VAR(vreint_vector_res, T1, W, N) =				\
+    vreinterpret##Q##_##T2##W##_##TS2##WS(VECT_VAR(vreint_vector, TS1, WS, NS)); \
+  vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N),				\
+		    VECT_VAR(vreint_vector_res, T1, W, N));		\
+  CHECK_FP(TEST_MSG, T1, W, N, PRIx##W, EXPECTED, "");
+
+  DECL_VARIABLE_ALL_VARIANTS(vreint_vector);
+  DECL_VARIABLE(vreint_vector, poly, 64, 1);
+  DECL_VARIABLE(vreint_vector, poly, 64, 2);
+  DECL_VARIABLE_ALL_VARIANTS(vreint_vector_res);
+  DECL_VARIABLE(vreint_vector_res, poly, 64, 1);
+  DECL_VARIABLE(vreint_vector_res, poly, 64, 2);
+
+  clean_results ();
+
+  TEST_MACRO_ALL_VARIANTS_2_5(VLOAD, vreint_vector, buffer);
+  VLOAD(vreint_vector, buffer, , poly, p, 64, 1);
+  VLOAD(vreint_vector, buffer, q, poly, p, 64, 2);
+  VLOAD(vreint_vector, buffer, , float, f, 32, 2);
+  VLOAD(vreint_vector, buffer, q, float, f, 32, 4);
+
+  /* vreinterpret_p64_* tests.  */
+#undef TEST_MSG
+#define TEST_MSG "VREINTERPRET_P64_*"
+  TEST_VREINTERPRET(, poly, p, 64, 1, int, s, 8, 8, vreint_expected_p64_s8);
+  TEST_VREINTERPRET(, poly, p, 64, 1, int, s, 16, 4, vreint_expected_p64_s16);
+  TEST_VREINTERPRET(, poly, p, 64, 1, int, s, 32, 2, vreint_expected_p64_s32);
+  TEST_VREINTERPRET(, poly, p, 64, 1, int, s, 64, 1, vreint_expected_p64_s64);
+  TEST_VREINTERPRET(, poly, p, 64, 1, uint, u, 8, 8, vreint_expected_p64_u8);
+  TEST_VREINTERPRET(, poly, p, 64, 1, uint, u, 16, 4, vreint_expected_p64_u16);
+  TEST_VREINTERPRET(, poly, p, 64, 1, uint, u, 32, 2, vreint_expected_p64_u32);
+  TEST_VREINTERPRET(, poly, p, 64, 1, uint, u, 64, 1, vreint_expected_p64_u64);
+  TEST_VREINTERPRET(, poly, p, 64, 1, poly, p, 8, 8, vreint_expected_p64_p8);
+  TEST_VREINTERPRET(, poly, p, 64, 1, poly, p, 16, 4, vreint_expected_p64_p16);
+  TEST_VREINTERPRET(, poly, p, 64, 1, float, f, 32, 2, vreint_expected_p64_f32);
+
+  /* vreinterpretq_p64_* tests.  */
+#undef TEST_MSG
+#define TEST_MSG "VREINTERPRETQ_P64_*"
+  TEST_VREINTERPRET(q, poly, p, 64, 2, int, s, 8, 16, vreint_expected_q_p64_s8);
+  TEST_VREINTERPRET(q, poly, p, 64, 2, int, s, 16, 8, vreint_expected_q_p64_s16);
+  TEST_VREINTERPRET(q, poly, p, 64, 2, int, s, 32, 4, vreint_expected_q_p64_s32);
+  TEST_VREINTERPRET(q, poly, p, 64, 2, int, s, 64, 2, vreint_expected_q_p64_s64);
+  TEST_VREINTERPRET(q, poly, p, 64, 2, uint, u, 8, 16, vreint_expected_q_p64_u8);
+  TEST_VREINTERPRET(q, poly, p, 64, 2, uint, u, 16, 8, vreint_expected_q_p64_u16);
+  TEST_VREINTERPRET(q, poly, p, 64, 2, uint, u, 32, 4, vreint_expected_q_p64_u32);
+  TEST_VREINTERPRET(q, poly, p, 64, 2, uint, u, 64, 2, vreint_expected_q_p64_u64);
+  TEST_VREINTERPRET(q, poly, p, 64, 2, poly, p, 8, 16, vreint_expected_q_p64_p8);
+  TEST_VREINTERPRET(q, poly, p, 64, 2, poly, p, 16, 8, vreint_expected_q_p64_p16);
+  TEST_VREINTERPRET(q, poly, p, 64, 2, float, f, 32, 4, vreint_expected_q_p64_f32);
+
+  /* vreinterpret_*_p64 tests.  */
+#undef TEST_MSG
+#define TEST_MSG "VREINTERPRET_*_P64"
+
+  TEST_VREINTERPRET(, int, s, 8, 8, poly, p, 64, 1, vreint_expected_s8_p64);
+  TEST_VREINTERPRET(, int, s, 16, 4, poly, p, 64, 1, vreint_expected_s16_p64);
+  TEST_VREINTERPRET(, int, s, 32, 2, poly, p, 64, 1, vreint_expected_s32_p64);
+  TEST_VREINTERPRET(, int, s, 64, 1, poly, p, 64, 1, vreint_expected_s64_p64);
+  TEST_VREINTERPRET(, uint, u, 8, 8, poly, p, 64, 1, vreint_expected_u8_p64);
+  TEST_VREINTERPRET(, uint, u, 16, 4, poly, p, 64, 1, vreint_expected_u16_p64);
+  TEST_VREINTERPRET(, uint, u, 32, 2, poly, p, 64, 1, vreint_expected_u32_p64);
+  TEST_VREINTERPRET(, uint, u, 64, 1, poly, p, 64, 1, vreint_expected_u64_p64);
+  TEST_VREINTERPRET(, poly, p, 8, 8, poly, p, 64, 1, vreint_expected_p8_p64);
+  TEST_VREINTERPRET(, poly, p, 16, 4, poly, p, 64, 1, vreint_expected_p16_p64);
+  TEST_VREINTERPRET_FP(, float, f, 32, 2, poly, p, 64, 1, vreint_expected_f32_p64);
+  TEST_VREINTERPRET(q, int, s, 8, 16, poly, p, 64, 2, vreint_expected_q_s8_p64);
+  TEST_VREINTERPRET(q, int, s, 16, 8, poly, p, 64, 2, vreint_expected_q_s16_p64);
+  TEST_VREINTERPRET(q, int, s, 32, 4, poly, p, 64, 2, vreint_expected_q_s32_p64);
+  TEST_VREINTERPRET(q, int, s, 64, 2, poly, p, 64, 2, vreint_expected_q_s64_p64);
+  TEST_VREINTERPRET(q, uint, u, 8, 16, poly, p, 64, 2, vreint_expected_q_u8_p64);
+  TEST_VREINTERPRET(q, uint, u, 16, 8, poly, p, 64, 2, vreint_expected_q_u16_p64);
+  TEST_VREINTERPRET(q, uint, u, 32, 4, poly, p, 64, 2, vreint_expected_q_u32_p64);
+  TEST_VREINTERPRET(q, uint, u, 64, 2, poly, p, 64, 2, vreint_expected_q_u64_p64);
+  TEST_VREINTERPRET(q, poly, p, 8, 16, poly, p, 64, 2, vreint_expected_q_p8_p64);
+  TEST_VREINTERPRET(q, poly, p, 16, 8, poly, p, 64, 2, vreint_expected_q_p16_p64);
+  TEST_VREINTERPRET_FP(q, float, f, 32, 4, poly, p, 64, 2, vreint_expected_q_f32_p64);
+
+  return 0;
+}