diff mbox

[RFC,Testsuite,ARM] Neon intrinsics executable tests

Message ID CAKdteObMf3xutjh1KyEpvAxYS89FT84XfjuKyYkWvmvcQCAXnQ@mail.gmail.com
State New
Headers show

Commit Message

Christophe Lyon April 14, 2014, 10:16 p.m. UTC
Hi Ramana,

Here is an updated version of my proposal to include tests for Neon intrinsics.

wrt to my previous post, I have made a few changes:
- renamed the test files, removing the "ref_" prefix.
- removed the TEST_ prefix on some initialization macros
- use the c-torture framework

I have run it successfully on the following configurations:
    aarch64-none-linux-gnu
    aarch64-none-elf
    aarch64_be-none-elf
    arm-none-linux-gnueabihf
    armeb-none-linux-gnueabihf
    arm-none-linux-gnueabi
    armeb-none-linux-gnueabi
    arm-none-eabi
using qemu for most of them and the Foundation Model for aarch64*elf

Any comments?

Thanks,

Christophe.


On 29 October 2013 19:09, Christophe Lyon <christophe.lyon@linaro.org> wrote:
> On 29 October 2013 03:24, Ramana Radhakrishnan <ramrad01@arm.com> wrote:
>> On 10/09/13 23:16, Christophe Lyon wrote:
>
>> Irrespective of our earlier conversations on this now I'm actually wondering
>> if instead of doing this and integrating this in the GCC source base it
>> maybe easier to write a harness to test this cross on qemu or natively.
>> Additionally setting up an auto-tester to do this might be a more productive
>> use of time rather than manually dejagnuizing this which appears to be a
>> tedious and slow process.
>
> This would be easy to setup, since the Makefile on gitorious is
> already targetting qemu. I used it occasionnally on boards with
> minimal changes.
> This just means we'd have to agree on how to set up such an
> auto-tester, where do we send the results to, etc...
>
>>> I'd like your feedback before continuing, as there are a lot more
>>> files to come.
>>>
>>> I have made some cleanup to help review, but the two .h files will
>>> need to grow as more intrinsics will be added (see the original ones).
>>
>> Which one should I compare this with in terms of the original file ?
>
> I have kept the same file names.
>
>
>>> I'd like to keep the modifications at a minimal level, to save my time
>>> when adapting each test (there are currently 145 test files, so 143
>>> left:-).
>>
>>
>> On to the patch itself.
>>
>> The prefix TEST_ seems a bit misleading in that it suggests this is testing
>> something when in reality this is initializing stuff.
> In fact, TEST_XXXX executes the XXXX intrinsics, and copies the
> results to memory when relevant. But I can easily change TEST_ to
> something else.
>
> So in the sample I posted:
>
> TEST_VABA: VAR=vaba(....); vst1(BUFFER,VAR)
> TEST_VLD1: VAR=vld1(....); vst1(BUFFER, VAR)
>
> VDUP is special in that it is a helper for other tests:
> TEST_VDUP: VAR1=vdup(VAR2,xxxx)
> and similarly for TEST_VLOAD and TEST_VSETLANE
>
>>> +# Exit immediately if this isn't an ARM target.
>>> +if ![istarget arm*-*-*] then {
>>> +  return
>>> +}
>>
>>
>> Also for aarch64*-*-* as all these intrinsics are compatible with the
>> aarch64 port. I would also prefer that this be tortured over multiple
>> optimization levels as many times we find issues with different optimization
>> levels.
>
> OK, this sounds easy to do, and I agree. I prefered to post a simple
> version first.
> And given you talked me about your plans to factorize arm and aarch64
> tests, I thought it was better to start with a simple version I knew
> was working.
>
>> More later I need to get back to something else and I need to play more with
>> your original testsuite - but I'd like some discussion around some of these
>> points anyway.
>>
>> Ramana
>
> OK thanks for the feedback. If we decide to go with auto-testers
> instead, the discussion will probably be shorter.
>
> Christophe
>
>>
>>> +
>>> +# Load support procs.
>>> +load_lib gcc-dg.exp
>>> +
>>> +# Initialize `dg'.
>>> +dg-init
>>> +
>>> +# Main loop.
>>> +gcc-dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/*.\[cCS\]]] \
>>> +       ""
>>> +
>>> +# All done.
>>> +dg-finish
>>> diff -rNup '--exclude=.git'
>>> gcc-fsf/gcc/testsuite/gcc.target/arm/neon-intrinsics/arm-neon-ref.h
>>> gcc-fsf-neontests/gcc/testsuite/gcc.target/arm/neon-intrinsics/arm-neon-ref.h
>>> --- gcc-fsf/gcc/testsuite/gcc.target/arm/neon-intrinsics/arm-neon-ref.h
>>> 1970-01-01 01:00:00.000000000 +0100
>>> +++
>>> gcc-fsf-neontests/gcc/testsuite/gcc.target/arm/neon-intrinsics/arm-neon-ref.h
>>> 2013-05-09 00:48:59.395628726 +0200
>>> @@ -0,0 +1,349 @@
>>> +#ifndef_ARM_NEON_REF_H_
>>> +#define_ARM_NEON_REF_H_
>>> +
>>> +#include <stdio.h>
>>> +#include <inttypes.h>
>>> +#include <string.h>
>>> +#include <stdlib.h>
>>> +
>>> +#define xSTR(X) #X
>>> +#define STR(X) xSTR(X)
>>> +
>>> +#define xNAME1(V,T) V ## _ ##  T
>>> +#define xNAME(V,T) xNAME1(V,T)
>>> +
>>> +#define VAR(V,T,W) xNAME(V,T##W)
>>> +#define VAR_DECL(V, T, W) T##W##_t VAR(V,T,W)
>>> +
>>> +#define VECT_NAME(T, W, N) T##W##x##N
>>> +#define VECT_ARRAY_NAME(T, W, N, L) T##W##x##N##x##L
>>> +#define VECT_TYPE(T, W, N) xNAME(VECT_NAME(T,W,N),t)
>>> +#define VECT_ARRAY_TYPE(T, W, N, L) xNAME(VECT_ARRAY_NAME(T,W,N,L),t)
>>> +
>>> +#define VECT_VAR(V,T,W,N) xNAME(V,VECT_NAME(T,W,N))
>>> +#define VECT_VAR_DECL(V, T, W, N) T##W##_t VECT_VAR(V,T,W,N)
>>> +
>>> +/* Array declarations.  */
>>> +#define ARRAY(V, T, W, N) VECT_VAR_DECL(V,T,W,N)[N]
>>> +
>>> +/* Check results vs expected values.  */
>>> +#define CHECK(MSG,T,W,N,FMT)                                           \
>>> +  for(i=0; i<N ; i++)                                                  \
>>> +    {                                                                  \
>>> +      if (VECT_VAR(result, T, W, N)[i] != VECT_VAR(expected, T, W, N)[i])
>>> { \
>>> +       fprintf(stderr,                                                 \
>>> +               "ERROR in %s at type %s index %d: 0x%" FMT " != 0x%"    \
>>> +               FMT " (expected)\n",                                    \
>>> +               MSG, STR(VECT_NAME(T, W, N)), i,                        \
>>> +               VECT_VAR(result, T, W, N)[i],                           \
>>> +               VECT_VAR(expected, T, W, N)[i]);                        \
>>> +       abort();                                                        \
>>> +      }
>>> \
>>> +    }
>>> +
>>> +/* Floating-point variant.  */
>>> +#define CHECK_FP(MSG,T,W,N,FMT)
>>> \
>>> +  for(i=0; i<N ; i++)                                                  \
>>> +    {                                                                  \
>>> +      union fp_operand {                                               \
>>> +       uint##W##_t i;                                                  \
>>> +       float##W##_t f;                                                 \
>>> +      } tmp_res, tmp_exp;                                              \
>>> +      tmp_res.f = VECT_VAR(result, T, W, N)[i];
>>> \
>>> +      tmp_exp.f = VECT_VAR(expected, T, W, N)[i];                      \
>>> +      if (tmp_res.i != tmp_exp.i) {                                    \
>>> +       fprintf(stderr,                                                 \
>>> +               "ERROR in %s at type %s index %d: 0x%" FMT " != 0x%"    \
>>> +               FMT " (expected)\n",                                    \
>>> +               MSG, STR(VECT_NAME(T, W, N)), i,                        \
>>> +               tmp_res.i,                                              \
>>> +               tmp_exp.i);                                             \
>>> +      abort();                                                         \
>>> +      }
>>> \
>>> +    }
>>> +
>>> +/* Clean buffer with a non-zero pattern.  */
>>> +#define CLEAN_PATTERN_8  0x33
>>> +
>>> +#define CLEAN(VAR,T,W,N)                                               \
>>> +  memset(VECT_VAR(VAR, T, W, N),                                       \
>>> +        CLEAN_PATTERN_8,                                               \
>>> +        sizeof(VECT_VAR(VAR, T, W, N)));
>>> +
>>> +/* Input buffers, one of each size.  */
>>> +extern ARRAY(buffer, int, 8, 8);
>>> +extern ARRAY(buffer, int, 16, 4);
>>> +extern ARRAY(buffer, int, 32, 2);
>>> +extern ARRAY(buffer, int, 64, 1);
>>> +extern ARRAY(buffer, uint, 8, 8);
>>> +extern ARRAY(buffer, uint, 16, 4);
>>> +extern ARRAY(buffer, uint, 32, 2);
>>> +extern ARRAY(buffer, uint, 64, 1);
>>> +extern ARRAY(buffer, poly, 8, 8);
>>> +extern ARRAY(buffer, poly, 16, 4);
>>> +extern ARRAY(buffer, float, 32, 2);
>>> +extern ARRAY(buffer, int, 8, 16);
>>> +extern ARRAY(buffer, int, 16, 8);
>>> +extern ARRAY(buffer, int, 32, 4);
>>> +extern ARRAY(buffer, int, 64, 2);
>>> +extern ARRAY(buffer, uint, 8, 16);
>>> +extern ARRAY(buffer, uint, 16, 8);
>>> +extern ARRAY(buffer, uint, 32, 4);
>>> +extern ARRAY(buffer, uint, 64, 2);
>>> +extern ARRAY(buffer, poly, 8, 16);
>>> +extern ARRAY(buffer, poly, 16, 8);
>>> +extern ARRAY(buffer, float, 32, 4);
>>> +
>>> +/* Output buffers, one of each size.  */
>>> +static ARRAY(result, int, 8, 8);
>>> +static ARRAY(result, int, 16, 4);
>>> +static ARRAY(result, int, 32, 2);
>>> +static ARRAY(result, int, 64, 1);
>>> +static ARRAY(result, uint, 8, 8);
>>> +static ARRAY(result, uint, 16, 4);
>>> +static ARRAY(result, uint, 32, 2);
>>> +static ARRAY(result, uint, 64, 1);
>>> +static ARRAY(result, poly, 8, 8);
>>> +static ARRAY(result, poly, 16, 4);
>>> +static ARRAY(result, float, 32, 2);
>>> +static ARRAY(result, int, 8, 16);
>>> +static ARRAY(result, int, 16, 8);
>>> +static ARRAY(result, int, 32, 4);
>>> +static ARRAY(result, int, 64, 2);
>>> +static ARRAY(result, uint, 8, 16);
>>> +static ARRAY(result, uint, 16, 8);
>>> +static ARRAY(result, uint, 32, 4);
>>> +static ARRAY(result, uint, 64, 2);
>>> +static ARRAY(result, poly, 8, 16);
>>> +static ARRAY(result, poly, 16, 8);
>>> +static ARRAY(result, float, 32, 4);
>>> +
>>> +/* Expected results, one of each size.  */
>>> +extern ARRAY(expected, int, 8, 8);
>>> +extern ARRAY(expected, int, 16, 4);
>>> +extern ARRAY(expected, int, 32, 2);
>>> +extern ARRAY(expected, int, 64, 1);
>>> +extern ARRAY(expected, uint, 8, 8);
>>> +extern ARRAY(expected, uint, 16, 4);
>>> +extern ARRAY(expected, uint, 32, 2);
>>> +extern ARRAY(expected, uint, 64, 1);
>>> +extern ARRAY(expected, poly, 8, 8);
>>> +extern ARRAY(expected, poly, 16, 4);
>>> +extern ARRAY(expected, float, 32, 2);
>>> +extern ARRAY(expected, int, 8, 16);
>>> +extern ARRAY(expected, int, 16, 8);
>>> +extern ARRAY(expected, int, 32, 4);
>>> +extern ARRAY(expected, int, 64, 2);
>>> +extern ARRAY(expected, uint, 8, 16);
>>> +extern ARRAY(expected, uint, 16, 8);
>>> +extern ARRAY(expected, uint, 32, 4);
>>> +extern ARRAY(expected, uint, 64, 2);
>>> +extern ARRAY(expected, poly, 8, 16);
>>> +extern ARRAY(expected, poly, 16, 8);
>>> +extern ARRAY(expected, float, 32, 4);
>>> +
>>> +/* Check results.  */
>>> +static void check_results (const char*test_name, const char*  comment)
>>> +{
>>> +  int i;
>>> +
>>> +  CHECK(test_name, int, 8, 8, PRIx8);
>>> +  CHECK(test_name, int, 16, 4, PRIx16);
>>> +  CHECK(test_name, int, 32, 2, PRIx32);
>>> +  CHECK(test_name, int, 64, 1, PRIx64);
>>> +  CHECK(test_name, uint, 8, 8, PRIx8);
>>> +  CHECK(test_name, uint, 16, 4, PRIx16);
>>> +  CHECK(test_name, uint, 32, 2, PRIx32);
>>> +  CHECK(test_name, uint, 64, 1, PRIx64);
>>> +  CHECK(test_name, poly, 8, 8, PRIx8);
>>> +  CHECK(test_name, poly, 16, 4, PRIx16);
>>> +  CHECK_FP(test_name, float, 32, 2, PRIx32);
>>> +
>>> +  CHECK(test_name, int, 8, 16, PRIx8);
>>> +  CHECK(test_name, int, 16, 8, PRIx16);
>>> +  CHECK(test_name, int, 32, 4, PRIx32);
>>> +  CHECK(test_name, int, 64, 2, PRIx64);
>>> +  CHECK(test_name, uint, 8, 16, PRIx8);
>>> +  CHECK(test_name, uint, 16, 8, PRIx16);
>>> +  CHECK(test_name, uint, 32, 4, PRIx32);
>>> +  CHECK(test_name, uint, 64, 2, PRIx64);
>>> +  CHECK(test_name, poly, 8, 16, PRIx8);
>>> +  CHECK(test_name, poly, 16, 8, PRIx16);
>>> +  CHECK_FP(test_name, float, 32, 4, PRIx32);
>>> +}
>>> +
>>> +/* Clean output buffers before execution.  */
>>> +static void clean_results (void)
>>> +{
>>> +  CLEAN(result, int, 8, 8);
>>> +  CLEAN(result, int, 16, 4);
>>> +  CLEAN(result, int, 32, 2);
>>> +  CLEAN(result, int, 64, 1);
>>> +  CLEAN(result, uint, 8, 8);
>>> +  CLEAN(result, uint, 16, 4);
>>> +  CLEAN(result, uint, 32, 2);
>>> +  CLEAN(result, uint, 64, 1);
>>> +  CLEAN(result, poly, 8, 8);
>>> +  CLEAN(result, poly, 16, 4);
>>> +  CLEAN(result, float, 32, 2);
>>> +
>>> +  CLEAN(result, int, 8, 16);
>>> +  CLEAN(result, int, 16, 8);
>>> +  CLEAN(result, int, 32, 4);
>>> +  CLEAN(result, int, 64, 2);
>>> +  CLEAN(result, uint, 8, 16);
>>> +  CLEAN(result, uint, 16, 8);
>>> +  CLEAN(result, uint, 32, 4);
>>> +  CLEAN(result, uint, 64, 2);
>>> +  CLEAN(result, poly, 8, 16);
>>> +  CLEAN(result, poly, 16, 8);
>>> +  CLEAN(result, float, 32, 4);
>>> +}
>>> +
>>> +
>>> +/* Helpers to declare variables of various types.   */
>>> +#define DECL_VARIABLE(VAR, T1, W, N)           \
>>> +  VECT_TYPE(T1, W, N) VECT_VAR(VAR, T1, W, N)
>>> +
>>> +#define DECL_VARIABLE_64BITS_SIGNED_VARIANTS(VAR)      \
>>> +  DECL_VARIABLE(VAR, int, 8, 8);                       \
>>> +  DECL_VARIABLE(VAR, int, 16, 4);                      \
>>> +  DECL_VARIABLE(VAR, int, 32, 2);                      \
>>> +  DECL_VARIABLE(VAR, int, 64, 1)
>>> +
>>> +#define DECL_VARIABLE_64BITS_UNSIGNED_VARIANTS(VAR)    \
>>> +  DECL_VARIABLE(VAR, uint, 8, 8);                      \
>>> +  DECL_VARIABLE(VAR, uint, 16, 4);                     \
>>> +  DECL_VARIABLE(VAR, uint, 32, 2);                     \
>>> +  DECL_VARIABLE(VAR, uint, 64, 1)
>>> +
>>> +#define DECL_VARIABLE_128BITS_SIGNED_VARIANTS(VAR)     \
>>> +  DECL_VARIABLE(VAR, int, 8, 16);                      \
>>> +  DECL_VARIABLE(VAR, int, 16, 8);                      \
>>> +  DECL_VARIABLE(VAR, int, 32, 4);                      \
>>> +  DECL_VARIABLE(VAR, int, 64, 2)
>>> +
>>> +#define DECL_VARIABLE_128BITS_UNSIGNED_VARIANTS(VAR)   \
>>> +  DECL_VARIABLE(VAR, uint, 8, 16);                     \
>>> +  DECL_VARIABLE(VAR, uint, 16, 8);                     \
>>> +  DECL_VARIABLE(VAR, uint, 32, 4);                     \
>>> +  DECL_VARIABLE(VAR, uint, 64, 2)
>>> +
>>> +#define DECL_VARIABLE_64BITS_VARIANTS(VAR)     \
>>> +  DECL_VARIABLE_64BITS_SIGNED_VARIANTS(VAR);   \
>>> +  DECL_VARIABLE_64BITS_UNSIGNED_VARIANTS(VAR); \
>>> +  DECL_VARIABLE(VAR, poly, 8, 8);              \
>>> +  DECL_VARIABLE(VAR, poly, 16, 4);             \
>>> +  DECL_VARIABLE(VAR, float, 32, 2)
>>> +
>>> +#define DECL_VARIABLE_128BITS_VARIANTS(VAR)    \
>>> +  DECL_VARIABLE_128BITS_SIGNED_VARIANTS(VAR);  \
>>> +  DECL_VARIABLE_128BITS_UNSIGNED_VARIANTS(VAR);        \
>>> +  DECL_VARIABLE(VAR, poly, 8, 16);             \
>>> +  DECL_VARIABLE(VAR, poly, 16, 8);             \
>>> +  DECL_VARIABLE(VAR, float, 32, 4)
>>> +
>>> +#define DECL_VARIABLE_ALL_VARIANTS(VAR)                \
>>> +  DECL_VARIABLE_64BITS_VARIANTS(VAR);          \
>>> +  DECL_VARIABLE_128BITS_VARIANTS(VAR)
>>> +
>>> +/* Helpers to initialize vectors.  */
>>> +#define TEST_VDUP(VAR, Q, T1, T2, W, N, V)             \
>>> +  VECT_VAR(VAR, T1, W, N) = vdup##Q##_n_##T2##W(V)
>>> +
>>> +#define TEST_VSET_LANE(VAR, Q, T1, T2, W, N, L, V)                     \
>>> +  VECT_VAR(VAR, T1, W, N) = vset##Q##_lane_##T2##W(V,                  \
>>> +                                                  VECT_VAR(VAR, T1, W,
>>> N), \
>>> +                                                  L)
>>> +
>>> +/* We need to load initial values first, so rely on VLD1.  */
>>> +#define TEST_VLOAD(VAR, BUF, Q, T1, T2, W, N)                          \
>>> +  VECT_VAR(VAR, T1, W, N) = vld1##Q##_##T2##W(VECT_VAR(BUF, T1, W, N))
>>> +
>>> +/* Helpers for macros with 1 constant and 5 variable arguments.  */
>>> +#define TEST_MACRO_64BITS_SIGNED_VARIANTS_1_5(MACRO, VAR)      \
>>> +  MACRO(VAR, , int, s, 8, 8);                                  \
>>> +  MACRO(VAR, , int, s, 16, 4);                                 \
>>> +  MACRO(VAR, , int, s, 32, 2);                                 \
>>> +  MACRO(VAR, , int, s, 64, 1)
>>> +
>>> +#define TEST_MACRO_64BITS_UNSIGNED_VARIANTS_1_5(MACRO, VAR)    \
>>> +  MACRO(VAR, , uint, u, 8, 8);                                 \
>>> +  MACRO(VAR, , uint, u, 16, 4);                                        \
>>> +  MACRO(VAR, , uint, u, 32, 2);                                        \
>>> +  MACRO(VAR, , uint, u, 64, 1)
>>> +
>>> +#define TEST_MACRO_128BITS_SIGNED_VARIANTS_1_5(MACRO, VAR)     \
>>> +  MACRO(VAR, q, int, s, 8, 16);                                        \
>>> +  MACRO(VAR, q, int, s, 16, 8);                                        \
>>> +  MACRO(VAR, q, int, s, 32, 4);                                        \
>>> +  MACRO(VAR, q, int, s, 64, 2)
>>> +
>>> +#define TEST_MACRO_128BITS_UNSIGNED_VARIANTS_1_5(MACRO,VAR)    \
>>> +  MACRO(VAR, q, uint, u, 8, 16);                               \
>>> +  MACRO(VAR, q, uint, u, 16, 8);                               \
>>> +  MACRO(VAR, q, uint, u, 32, 4);                               \
>>> +  MACRO(VAR, q, uint, u, 64, 2)
>>> +
>>> +#define TEST_MACRO_64BITS_VARIANTS_1_5(MACRO, VAR)     \
>>> +  TEST_MACRO_64BITS_SIGNED_VARIANTS_1_5(MACRO, VAR);   \
>>> +  TEST_MACRO_64BITS_UNSIGNED_VARIANTS_1_5(MACRO, VAR)
>>> +
>>> +#define TEST_MACRO_128BITS_VARIANTS_1_5(MACRO, VAR)    \
>>> +  TEST_MACRO_128BITS_SIGNED_VARIANTS_1_5(MACRO, VAR);  \
>>> +  TEST_MACRO_128BITS_UNSIGNED_VARIANTS_1_5(MACRO, VAR)
>>> +
>>> +#define TEST_MACRO_ALL_VARIANTS_1_5(MACRO, VAR)        \
>>> +  TEST_MACRO_64BITS_VARIANTS_1_5(MACRO, VAR);  \
>>> +  TEST_MACRO_128BITS_VARIANTS_1_5(MACRO, VAR)
>>> +
>>> +#define TEST_MACRO_SIGNED_VARIANTS_1_5(MACRO, VAR)     \
>>> +  TEST_MACRO_64BITS_SIGNED_VARIANTS_1_5(MACRO, VAR);   \
>>> +  TEST_MACRO_128BITS_SIGNED_VARIANTS_1_5(MACRO, VAR)
>>> +
>>> +/* Helpers for macros with 2 constant and 5 variable arguments.  */
>>> +#define TEST_MACRO_64BITS_SIGNED_VARIANTS_2_5(MACRO, VAR1, VAR2)       \
>>> +  MACRO(VAR1, VAR2, , int, s, 8, 8);                                   \
>>> +  MACRO(VAR1, VAR2, , int, s, 16, 4);                                  \
>>> +  MACRO(VAR1, VAR2, , int, s, 32, 2);                                  \
>>> +  MACRO(VAR1, VAR2 , , int, s, 64, 1)
>>> +
>>> +#define TEST_MACRO_64BITS_UNSIGNED_VARIANTS_2_5(MACRO, VAR1, VAR2)     \
>>> +  MACRO(VAR1, VAR2, , uint, u, 8, 8);                                  \
>>> +  MACRO(VAR1, VAR2, , uint, u, 16, 4);                                 \
>>> +  MACRO(VAR1, VAR2, , uint, u, 32, 2);                                 \
>>> +  MACRO(VAR1, VAR2, , uint, u, 64, 1)
>>> +
>>> +#define TEST_MACRO_128BITS_SIGNED_VARIANTS_2_5(MACRO, VAR1, VAR2)      \
>>> +  MACRO(VAR1, VAR2, q, int, s, 8, 16);                                 \
>>> +  MACRO(VAR1, VAR2, q, int, s, 16, 8);                                 \
>>> +  MACRO(VAR1, VAR2, q, int, s, 32, 4);                                 \
>>> +  MACRO(VAR1, VAR2, q, int, s, 64, 2)
>>> +
>>> +#define TEST_MACRO_128BITS_UNSIGNED_VARIANTS_2_5(MACRO, VAR1, VAR2)    \
>>> +  MACRO(VAR1, VAR2, q, uint, u, 8, 16);
>>> \
>>> +  MACRO(VAR1, VAR2, q, uint, u, 16, 8);
>>> \
>>> +  MACRO(VAR1, VAR2, q, uint, u, 32, 4);
>>> \
>>> +  MACRO(VAR1, VAR2, q, uint, u, 64, 2)
>>> +
>>> +#define TEST_MACRO_64BITS_VARIANTS_2_5(MACRO, VAR1, VAR2)      \
>>> +  TEST_MACRO_64BITS_SIGNED_VARIANTS_2_5(MACRO, VAR1, VAR2);    \
>>> +  TEST_MACRO_64BITS_UNSIGNED_VARIANTS_2_5(MACRO, VAR1, VAR2);  \
>>> +  MACRO(VAR1, VAR2, , poly, p, 8, 8);                          \
>>> +  MACRO(VAR1, VAR2, , poly, p, 16, 4)
>>> +
>>> +#define TEST_MACRO_128BITS_VARIANTS_2_5(MACRO, VAR1, VAR2)     \
>>> +  TEST_MACRO_128BITS_SIGNED_VARIANTS_2_5(MACRO, VAR1, VAR2);   \
>>> +  TEST_MACRO_128BITS_UNSIGNED_VARIANTS_2_5(MACRO, VAR1, VAR2); \
>>> +  MACRO(VAR1, VAR2, q, poly, p, 8, 16);                                \
>>> +  MACRO(VAR1, VAR2, q, poly, p, 16, 8)
>>> +
>>> +#define TEST_MACRO_ALL_VARIANTS_2_5(MACRO, VAR1, VAR2) \
>>> +  TEST_MACRO_64BITS_VARIANTS_2_5(MACRO, VAR1, VAR2);   \
>>> +  TEST_MACRO_128BITS_VARIANTS_2_5(MACRO, VAR1, VAR2)
>>> +
>>> +#define TEST_MACRO_SIGNED_VARIANTS_2_5(MACRO, VAR1, VAR2)      \
>>> +  TEST_MACRO_64BITS_SIGNED_VARIANTS_2_5(MACRO, VAR1, VAR2);    \
>>> +  TEST_MACRO_128BITS_SIGNED_VARIANTS_2_5(MACRO, VAR1, VAR2)
>>> +
>>> +#endif /*_ARM_NEON_REF_H_  */
>>> diff -rNup '--exclude=.git'
>>> gcc-fsf/gcc/testsuite/gcc.target/arm/neon-intrinsics/compute_ref_data.h
>>> gcc-fsf-neontests/gcc/testsuite/gcc.target/arm/neon-intrinsics/compute_ref_data.h
>>> ---
>>> gcc-fsf/gcc/testsuite/gcc.target/arm/neon-intrinsics/compute_ref_data.h
>>> 1970-01-01 01:00:00.000000000 +0100
>>> +++
>>> gcc-fsf-neontests/gcc/testsuite/gcc.target/arm/neon-intrinsics/compute_ref_data.h
>>> 2013-05-09 00:31:03.563656926 +0200
>>> @@ -0,0 +1,80 @@
>>> +#include <arm_neon.h>
>>> +#include "arm-neon-ref.h"
>>> +
>>> +/* Initialization helpers; 4 slices are needed for vld2, vld3 and
>>> +   vld4.  */
>>> +#define MY_INIT_TAB(T,W,N) xNAME(INIT_TAB,N)(T##W##_t)
>>> +#define MY_INIT_TAB2(T,W,N) xNAME(INIT_TAB2,N)(T##W##_t)
>>> +#define MY_INIT_TAB3(T,W,N) xNAME(INIT_TAB3,N)(T##W##_t)
>>> +#define MY_INIT_TAB4(T,W,N) xNAME(INIT_TAB4,N)(T##W##_t)
>>> +
>>> +/* Initialized input buffers.  */
>>> +#define VECT_VAR_DECL_INIT(V, T, W, N)                 \
>>> +  VECT_VAR_DECL(V,T,W,N) [] = { MY_INIT_TAB(T,W,N) };
>>> +
>>> +/* Sample initialization vectors.  */
>>> +#define INIT_TAB_1(T)                          \
>>> +  (T)-16,
>>> +
>>> +#define INIT_TAB_2(T)                          \
>>> +  (T)-16, (T)-15,
>>> +
>>> +#define INIT_TAB_4(T)                          \
>>> +  (T)-16, (T)-15, (T)-14, (T)-13,
>>> +
>>> +#define INIT_TAB_8(T)                                                  \
>>> +  (T)-16, (T)-15, (T)-14, (T)-13, (T)-12, (T)-11, (T)-10, (T)-9,
>>> +
>>> +#define INIT_TAB_16(T)                                                 \
>>> +  (T)-16, (T)-15, (T)-14, (T)-13, (T)-12, (T)-11, (T)-10, (T)-9,       \
>>> +  (T)-8, (T)-7, (T)-6, (T)-5, (T)-4, (T)-3, (T)-2, (T)-1,
>>> +
>>> +/* This one is used for padding between input buffers.  */
>>> +#define PAD(V, T, W, N) char VECT_VAR(V,T,W,N)=42;
>>> +
>>> +/* Input buffers, one of each size.  */
>>> +/* Insert some padding to try to exhibit out of bounds accesses.  */
>>> +VECT_VAR_DECL_INIT(buffer, int, 8, 8);
>>> +PAD(buffer_pad, int, 8, 8);
>>> +VECT_VAR_DECL_INIT(buffer, int, 16, 4);
>>> +PAD(buffer_pad, int, 16, 4);
>>> +VECT_VAR_DECL_INIT(buffer, int, 32, 2);
>>> +PAD(buffer_pad, int, 32, 2);
>>> +VECT_VAR_DECL_INIT(buffer, int, 64, 1);
>>> +PAD(buffer_pad, int, 64, 1);
>>> +VECT_VAR_DECL_INIT(buffer, uint, 8, 8);
>>> +PAD(buffer_pad, uint, 8, 8);
>>> +VECT_VAR_DECL_INIT(buffer, poly, 8, 8);
>>> +PAD(buffer_pad, poly, 8, 8);
>>> +VECT_VAR_DECL_INIT(buffer, poly, 16, 4);
>>> +PAD(buffer_pad, poly, 16, 4);
>>> +VECT_VAR_DECL_INIT(buffer, uint, 16, 4);
>>> +PAD(buffer_pad, uint, 16, 4);
>>> +VECT_VAR_DECL_INIT(buffer, uint, 32, 2);
>>> +PAD(buffer_pad, uint, 32, 2);
>>> +VECT_VAR_DECL_INIT(buffer, uint, 64, 1);
>>> +PAD(buffer_pad, uint, 64, 1);
>>> +VECT_VAR_DECL_INIT(buffer, float, 32, 2);
>>> +PAD(buffer_pad, float, 32, 2);
>>> +VECT_VAR_DECL_INIT(buffer, int, 8, 16);
>>> +PAD(buffer_pad, int, 8, 16);
>>> +VECT_VAR_DECL_INIT(buffer, int, 16, 8);
>>> +PAD(buffer_pad, int, 16, 8);
>>> +VECT_VAR_DECL_INIT(buffer, int, 32, 4);
>>> +PAD(buffer_pad, int, 32, 4);
>>> +VECT_VAR_DECL_INIT(buffer, int, 64, 2);
>>> +PAD(buffer_pad, int, 64, 2);
>>> +VECT_VAR_DECL_INIT(buffer, uint, 8, 16);
>>> +PAD(buffer_pad, uint, 8, 16);
>>> +VECT_VAR_DECL_INIT(buffer, uint, 16, 8);
>>> +PAD(buffer_pad, uint, 16, 8);
>>> +VECT_VAR_DECL_INIT(buffer, uint, 32, 4);
>>> +PAD(buffer_pad, uint, 32, 4);
>>> +VECT_VAR_DECL_INIT(buffer, uint, 64, 2);
>>> +PAD(buffer_pad, uint, 64, 2);
>>> +VECT_VAR_DECL_INIT(buffer, poly, 8, 16);
>>> +PAD(buffer_pad, poly, 8, 16);
>>> +VECT_VAR_DECL_INIT(buffer, poly, 16, 8);
>>> +PAD(buffer_pad, poly, 16, 8);
>>> +VECT_VAR_DECL_INIT(buffer, float, 32, 4);
>>> +PAD(buffer_pad, float, 32, 4);
>>> diff -rNup '--exclude=.git'
>>> gcc-fsf/gcc/testsuite/gcc.target/arm/neon-intrinsics/ref_vaba.c
>>> gcc-fsf-neontests/gcc/testsuite/gcc.target/arm/neon-intrinsics/ref_vaba.c
>>> --- gcc-fsf/gcc/testsuite/gcc.target/arm/neon-intrinsics/ref_vaba.c
>>> 1970-01-01 01:00:00.000000000 +0100
>>> +++
>>> gcc-fsf-neontests/gcc/testsuite/gcc.target/arm/neon-intrinsics/ref_vaba.c
>>> 2013-05-09 00:40:27.611642141 +0200
>>> @@ -0,0 +1,145 @@
>>> +/* { dg-do run } */
>>> +/* { dg-require-effective-target arm_neon_hw } */
>>> +/* { dg-add-options arm_neon } */
>>> +
>>> +#include <arm_neon.h>
>>> +#include "arm-neon-ref.h"
>>> +#include "compute_ref_data.h"
>>> +
>>> +VECT_VAR_DECL(expected,int,8,8) [] = { 0xf6, 0xf7, 0xf8, 0xf9,
>>> +                                      0xfa, 0xfb, 0xfc, 0xfd };
>>> +VECT_VAR_DECL(expected,int,16,4) [] = { 0x16, 0x17, 0x18, 0x19 };
>>> +VECT_VAR_DECL(expected,int,32,2) [] = { 0x20, 0x21 };
>>> +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 };
>>> +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x53, 0x54, 0x55, 0x56,
>>> +                                       0x57, 0x58, 0x59, 0x5a };
>>> +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x907, 0x908, 0x909, 0x90a };
>>> +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xffffffe7, 0xffffffe8 };
>>> +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 };
>>> +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33,
>>> +                                       0x33, 0x33, 0x33, 0x33 };
>>> +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333
>>> };
>>> +VECT_VAR_DECL(expected,float,32,2) [] = { 4.172325e-08, 4.172325e-08 };
>>> +VECT_VAR_DECL(expected,int,8,16) [] = { 0x5e, 0x5f, 0x60, 0x61,
>>> +                                       0x62, 0x63, 0x64, 0x65,
>>> +                                       0x66, 0x67, 0x68, 0x69,
>>> +                                       0x6a, 0x6b, 0x6c, 0x6d };
>>> +VECT_VAR_DECL(expected,int,16,8) [] = { 0xb9c, 0xb9d, 0xb9e, 0xb9f,
>>> +                                       0xba0, 0xba1, 0xba2, 0xba3 };
>>> +VECT_VAR_DECL(expected,int,32,4) [] = { 0x26e0, 0x26e1, 0x26e2, 0x26e3 };
>>> +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333,
>>> +                                       0x3333333333333333 };
>>> +VECT_VAR_DECL(expected,uint,8,16) [] = { 0xf8, 0xf9, 0xfa, 0xfb,
>>> +                                        0xfc, 0xfd, 0xfe, 0xff,
>>> +                                        0x0, 0x1, 0x2, 0x3,
>>> +                                        0x4, 0x5, 0x6, 0x7 };
>>> +VECT_VAR_DECL(expected,uint,16,8) [] = { 0xfff9, 0xfffa, 0xfffb, 0xfffc,
>>> +                                        0xfffd, 0xfffe, 0xffff, 0x0 };
>>> +VECT_VAR_DECL(expected,uint,32,4) [] = { 0xc, 0xd, 0xe, 0xf };
>>> +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333,
>>> +                                        0x3333333333333333 };
>>> +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
>>> +                                        0x33, 0x33, 0x33, 0x33,
>>> +                                        0x33, 0x33, 0x33, 0x33,
>>> +                                        0x33, 0x33, 0x33, 0x33 };
>>> +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333,
>>> +                                        0x3333, 0x3333, 0x3333, 0x3333 };
>>> +VECT_VAR_DECL(expected,float,32,4) [] = { 4.172325e-08, 4.172325e-08,
>>> +                                         4.172325e-08, 4.172325e-08 };
>>> +
>>> +#define TEST_MSG "VABA/VABAQ"
>>> +void exec_vaba (void)
>>> +{
>>> +  /* Basic test: v4=vaba(v1,v2,v3), then store the result.  */
>>> +#define TEST_VABA(Q, T1, T2, W, N)                                     \
>>> +  VECT_VAR(vector_res, T1, W, N) =                                     \
>>> +    vaba##Q##_##T2##W(VECT_VAR(vector1, T1, W, N),                     \
>>> +                     VECT_VAR(vector2, T1, W, N),                      \
>>> +                     VECT_VAR(vector3, T1, W, N));                     \
>>> +  vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(vector_res, T1,
>>> W, N))
>>> +
>>> +#define DECL_VABA_VAR(VAR)                     \
>>> +  DECL_VARIABLE(VAR, int, 8, 8);               \
>>> +  DECL_VARIABLE(VAR, int, 16, 4);              \
>>> +  DECL_VARIABLE(VAR, int, 32, 2);              \
>>> +  DECL_VARIABLE(VAR, uint, 8, 8);              \
>>> +  DECL_VARIABLE(VAR, uint, 16, 4);             \
>>> +  DECL_VARIABLE(VAR, uint, 32, 2);             \
>>> +  DECL_VARIABLE(VAR, int, 8, 16);              \
>>> +  DECL_VARIABLE(VAR, int, 16, 8);              \
>>> +  DECL_VARIABLE(VAR, int, 32, 4);              \
>>> +  DECL_VARIABLE(VAR, uint, 8, 16);             \
>>> +  DECL_VARIABLE(VAR, uint, 16, 8);             \
>>> +  DECL_VARIABLE(VAR, uint, 32, 4)
>>> +
>>> +  DECL_VABA_VAR(vector1);
>>> +  DECL_VABA_VAR(vector2);
>>> +  DECL_VABA_VAR(vector3);
>>> +  DECL_VABA_VAR(vector_res);
>>> +
>>> +  clean_results ();
>>> +
>>> +  /* Initialize input "vector" from "buffer".  */
>>> +  TEST_VLOAD(vector1, buffer, , int, s, 8, 8);
>>> +  TEST_VLOAD(vector1, buffer, , int, s, 16, 4);
>>> +  TEST_VLOAD(vector1, buffer, , int, s, 32, 2);
>>> +  TEST_VLOAD(vector1, buffer, , uint, u, 8, 8);
>>> +  TEST_VLOAD(vector1, buffer, , uint, u, 16, 4);
>>> +  TEST_VLOAD(vector1, buffer, , uint, u, 32, 2);
>>> +  TEST_VLOAD(vector1, buffer, q, int, s, 8, 16);
>>> +  TEST_VLOAD(vector1, buffer, q, int, s, 16, 8);
>>> +  TEST_VLOAD(vector1, buffer, q, int, s, 32, 4);
>>> +  TEST_VLOAD(vector1, buffer, q, uint, u, 8, 16);
>>> +  TEST_VLOAD(vector1, buffer, q, uint, u, 16, 8);
>>> +  TEST_VLOAD(vector1, buffer, q, uint, u, 32, 4);
>>> +
>>> +  /* Choose init value arbitrarily.  */
>>> +  TEST_VDUP(vector2, , int, s, 8, 8, 1);
>>> +  TEST_VDUP(vector2, , int, s, 16, 4, -13);
>>> +  TEST_VDUP(vector2, , int, s, 32, 2, 8);
>>> +  TEST_VDUP(vector2, , uint, u, 8, 8, 1);
>>> +  TEST_VDUP(vector2, , uint, u, 16, 4, 13);
>>> +  TEST_VDUP(vector2, , uint, u, 32, 2, 8);
>>> +  TEST_VDUP(vector2, q, int, s, 8, 16, 10);
>>> +  TEST_VDUP(vector2, q, int, s, 16, 8, -12);
>>> +  TEST_VDUP(vector2, q, int, s, 32, 4, 32);
>>> +  TEST_VDUP(vector2, q, uint, u, 8, 16, 10);
>>> +  TEST_VDUP(vector2, q, uint, u, 16, 8, 12);
>>> +  TEST_VDUP(vector2, q, uint, u, 32, 4, 32);
>>> +
>>> +  /* Choose init value arbitrarily.  */
>>> +  TEST_VDUP(vector3, , int, s, 8, 8, -5);
>>> +  TEST_VDUP(vector3, , int, s, 16, 4, 25);
>>> +  TEST_VDUP(vector3, , int, s, 32, 2, -40);
>>> +  TEST_VDUP(vector3, , uint, u, 8, 8, 100);
>>> +  TEST_VDUP(vector3, , uint, u, 16, 4, 2340);
>>> +  TEST_VDUP(vector3, , uint, u, 32, 2, 0xffffffff);
>>> +  TEST_VDUP(vector3, q, int, s, 8, 16, -100);
>>> +  TEST_VDUP(vector3, q, int, s, 16, 8, -3000);
>>> +  TEST_VDUP(vector3, q, int, s, 32, 4, 10000);
>>> +  TEST_VDUP(vector3, q, uint, u, 8, 16, 2);
>>> +  TEST_VDUP(vector3, q, uint, u, 16, 8, 3);
>>> +  TEST_VDUP(vector3, q, uint, u, 32, 4, 4);
>>> +
>>> +  /* Execute the tests.  */
>>> +  TEST_VABA(, int, s, 8, 8);
>>> +  TEST_VABA(, int, s, 16, 4);
>>> +  TEST_VABA(, int, s, 32, 2);
>>> +  TEST_VABA(, uint, u, 8, 8);
>>> +  TEST_VABA(, uint, u, 16, 4);
>>> +  TEST_VABA(, uint, u, 32, 2);
>>> +  TEST_VABA(q, int, s, 8, 16);
>>> +  TEST_VABA(q, int, s, 16, 8);
>>> +  TEST_VABA(q, int, s, 32, 4);
>>> +  TEST_VABA(q, uint, u, 8, 16);
>>> +  TEST_VABA(q, uint, u, 16, 8);
>>> +  TEST_VABA(q, uint, u, 32, 4);
>>> +
>>> +  check_results (TEST_MSG, "");
>>> +}
>>> +
>>> +int main (void)
>>> +{
>>> +  exec_vaba ();
>>> +  return 0;
>>> +}
>>> diff -rNup '--exclude=.git'
>>> gcc-fsf/gcc/testsuite/gcc.target/arm/neon-intrinsics/ref_vld1.c
>>> gcc-fsf-neontests/gcc/testsuite/gcc.target/arm/neon-intrinsics/ref_vld1.c
>>> --- gcc-fsf/gcc/testsuite/gcc.target/arm/neon-intrinsics/ref_vld1.c
>>> 1970-01-01 01:00:00.000000000 +0100
>>> +++
>>> gcc-fsf-neontests/gcc/testsuite/gcc.target/arm/neon-intrinsics/ref_vld1.c
>>> 2013-05-09 00:39:00.351644429 +0200
>>> @@ -0,0 +1,77 @@
>>> +/* { dg-do run } */
>>> +/* { dg-require-effective-target arm_neon_hw } */
>>> +/* { dg-add-options arm_neon } */
>>> +
>>> +#include <arm_neon.h>
>>> +#include "arm-neon-ref.h"
>>> +#include "compute_ref_data.h"
>>> +
>>> +VECT_VAR_DECL(expected,int,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
>>> +                                      0xf4, 0xf5, 0xf6, 0xf7 };
>>> +VECT_VAR_DECL(expected,int,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 };
>>> +VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffffff0, 0xfffffff1 };
>>> +VECT_VAR_DECL(expected,int,64,1) [] = { 0xfffffffffffffff0 };
>>> +VECT_VAR_DECL(expected,uint,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
>>> +                                       0xf4, 0xf5, 0xf6, 0xf7 };
>>> +VECT_VAR_DECL(expected,uint,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3
>>> };
>>> +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xfffffff0, 0xfffffff1 };
>>> +VECT_VAR_DECL(expected,uint,64,1) [] = { 0xfffffffffffffff0 };
>>> +VECT_VAR_DECL(expected,poly,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
>>> +                                       0xf4, 0xf5, 0xf6, 0xf7 };
>>> +VECT_VAR_DECL(expected,poly,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3
>>> };
>>> +VECT_VAR_DECL(expected,float,32,2) [] = { -16, -15 };
>>> +VECT_VAR_DECL(expected,int,8,16) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
>>> +                                       0xf4, 0xf5, 0xf6, 0xf7,
>>> +                                       0xf8, 0xf9, 0xfa, 0xfb,
>>> +                                       0xfc, 0xfd, 0xfe, 0xff };
>>> +VECT_VAR_DECL(expected,int,16,8) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3,
>>> +                                       0xfff4, 0xfff5, 0xfff6, 0xfff7 };
>>> +VECT_VAR_DECL(expected,int,32,4) [] = { 0xfffffff0, 0xfffffff1,
>>> +                                       0xfffffff2, 0xfffffff3 };
>>> +VECT_VAR_DECL(expected,int,64,2) [] = { 0xfffffffffffffff0,
>>> +                                       0xfffffffffffffff1 };
>>> +VECT_VAR_DECL(expected,uint,8,16) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
>>> +                                        0xf4, 0xf5, 0xf6, 0xf7,
>>> +                                        0xf8, 0xf9, 0xfa, 0xfb,
>>> +                                        0xfc, 0xfd, 0xfe, 0xff };
>>> +VECT_VAR_DECL(expected,uint,16,8) [] = { 0xfff0, 0xfff1, 0xfff2,
>>> +                                        0xfff3, 0xfff4, 0xfff5,
>>> +                                        0xfff6, 0xfff7 };
>>> +VECT_VAR_DECL(expected,uint,32,4) [] = { 0xfffffff0, 0xfffffff1,
>>> +                                        0xfffffff2, 0xfffffff3 };
>>> +VECT_VAR_DECL(expected,uint,64,2) [] = { 0xfffffffffffffff0,
>>> +                                        0xfffffffffffffff1 };
>>> +VECT_VAR_DECL(expected,poly,8,16) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
>>> +                                        0xf4, 0xf5, 0xf6, 0xf7,
>>> +                                        0xf8, 0xf9, 0xfa, 0xfb,
>>> +                                        0xfc, 0xfd, 0xfe, 0xff };
>>> +VECT_VAR_DECL(expected,poly,16,8) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3,
>>> +                                        0xfff4, 0xfff5, 0xfff6, 0xfff7 };
>>> +VECT_VAR_DECL(expected,float,32,4) [] = { -16, -15, -14, -13 };
>>> +
>>> +#define TEST_MSG "VLD1/VLD1Q"
>>> +void exec_vld1 (void)
>>> +{
>>> +  /* Basic test vec=vld1(buffer); then store vec: vst1(result, vector).
>>> */
>>> +  /* This test actually tests vdl1 and vst1 at the same time.  */
>>> +#define TEST_VLD1(VAR, BUF, Q, T1, T2, W, N)                           \
>>> +  VECT_VAR(VAR, T1, W, N) = vld1##Q##_##T2##W(VECT_VAR(BUF, T1, W, N)); \
>>> +  vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(VAR, T1, W, N))
>>> +
>>> +  DECL_VARIABLE_ALL_VARIANTS(vector);
>>> +
>>> +  clean_results ();
>>> +
>>> +  TEST_MACRO_ALL_VARIANTS_2_5(TEST_VLD1, vector, buffer);
>>> +
>>> +  TEST_VLD1(vector, buffer, , float, f, 32, 2);
>>> +  TEST_VLD1(vector, buffer, q, float, f, 32, 4);
>>> +
>>> +  check_results (TEST_MSG, "");
>>> +}
>>> +
>>> +int main (void)
>>> +{
>>> +  exec_vld1 ();
>>> +  return 0;
>>> +}
>>>
>>
>>

Comments

Ramana Radhakrishnan April 15, 2014, 2:18 p.m. UTC | #1
On 04/14/14 23:16, Christophe Lyon wrote:
> Hi Ramana,
>
> Here is an updated version of my proposal to include tests for Neon intrinsics.
>
> wrt to my previous post, I have made a few changes:
> - renamed the test files, removing the "ref_" prefix.
> - removed the TEST_ prefix on some initialization macros
> - use the c-torture framework
>
> I have run it successfully on the following configurations:
>      aarch64-none-linux-gnu
>      aarch64-none-elf
>      aarch64_be-none-elf
>      arm-none-linux-gnueabihf
>      armeb-none-linux-gnueabihf
>      arm-none-linux-gnueabi
>      armeb-none-linux-gnueabi
>      arm-none-eabi
> using qemu for most of them and the Foundation Model for aarch64*elf



I had a brief look at your patch and how does this run for AArch64 when
you have such options in the testsuite ?


+++ b/gcc/testsuite/gcc.target/arm/neon-intrinsics/vaba.c
@@ -0,0 +1,145 @@
+/* { dg-do run } */
+/* { dg-require-effective-target arm_neon_hw { target { "arm* } } } */
+/* { dg-add-options arm_neon } */
+


Additionally a README would help in terms of how one should add new tests.



>
> Any comments?
>
> Thanks,
>
> Christophe.
>
>
> On 29 October 2013 19:09, Christophe Lyon <christophe.lyon@linaro.org> wrote:
>> On 29 October 2013 03:24, Ramana Radhakrishnan <ramrad01@arm.com> wrote:
>>> On 10/09/13 23:16, Christophe Lyon wrote:
>>
>>> Irrespective of our earlier conversations on this now I'm actually wondering
>>> if instead of doing this and integrating this in the GCC source base it
>>> maybe easier to write a harness to test this cross on qemu or natively.
>>> Additionally setting up an auto-tester to do this might be a more productive
>>> use of time rather than manually dejagnuizing this which appears to be a
>>> tedious and slow process.
>>
>> This would be easy to setup, since the Makefile on gitorious is
>> already targetting qemu. I used it occasionnally on boards with
>> minimal changes.
>> This just means we'd have to agree on how to set up such an
>> auto-tester, where do we send the results to, etc...

If you are sufficiently motivated to do the transition, I'm not opposed
to putting it into the testsuite as a basic regression testing framework
for neon intrinsics.

I'll try and play with this in some more detail with a couple of patches
I'm doing in the area of neon intrinsics so it may be useful to cross check.

regards
Ramana

>>
>>>> I'd like your feedback before continuing, as there are a lot more
>>>> files to come.
>>>>
>>>> I have made some cleanup to help review, but the two .h files will
>>>> need to grow as more intrinsics will be added (see the original ones).
>>>
>>> Which one should I compare this with in terms of the original file ?
>>
>> I have kept the same file names.
>>
>>
>>>> I'd like to keep the modifications at a minimal level, to save my time
>>>> when adapting each test (there are currently 145 test files, so 143
>>>> left:-).
>>>
>>>
>>> On to the patch itself.
>>>
>>> The prefix TEST_ seems a bit misleading in that it suggests this is testing
>>> something when in reality this is initializing stuff.
>> In fact, TEST_XXXX executes the XXXX intrinsics, and copies the
>> results to memory when relevant. But I can easily change TEST_ to
>> something else.
>>
>> So in the sample I posted:
>>
>> TEST_VABA: VAR=vaba(....); vst1(BUFFER,VAR)
>> TEST_VLD1: VAR=vld1(....); vst1(BUFFER, VAR)
>>
>> VDUP is special in that it is a helper for other tests:
>> TEST_VDUP: VAR1=vdup(VAR2,xxxx)
>> and similarly for TEST_VLOAD and TEST_VSETLANE
>>
>>>> +# Exit immediately if this isn't an ARM target.
>>>> +if ![istarget arm*-*-*] then {
>>>> +  return
>>>> +}
>>>
>>>
>>> Also for aarch64*-*-* as all these intrinsics are compatible with the
>>> aarch64 port. I would also prefer that this be tortured over multiple
>>> optimization levels as many times we find issues with different optimization
>>> levels.
>>
>> OK, this sounds easy to do, and I agree. I prefered to post a simple
>> version first.
>> And given you talked me about your plans to factorize arm and aarch64
>> tests, I thought it was better to start with a simple version I knew
>> was working.
>>
>>> More later I need to get back to something else and I need to play more with
>>> your original testsuite - but I'd like some discussion around some of these
>>> points anyway.
>>>
>>> Ramana
>>
>> OK thanks for the feedback. If we decide to go with auto-testers
>> instead, the discussion will probably be shorter.
>>
>> Christophe
>>
>>>
>>>> +
>>>> +# Load support procs.
>>>> +load_lib gcc-dg.exp
>>>> +
>>>> +# Initialize `dg'.
>>>> +dg-init
>>>> +
>>>> +# Main loop.
>>>> +gcc-dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/*.\[cCS\]]] \
>>>> +       ""
>>>> +
>>>> +# All done.
>>>> +dg-finish
>>>> diff -rNup '--exclude=.git'
>>>> gcc-fsf/gcc/testsuite/gcc.target/arm/neon-intrinsics/arm-neon-ref.h
>>>> gcc-fsf-neontests/gcc/testsuite/gcc.target/arm/neon-intrinsics/arm-neon-ref.h
>>>> --- gcc-fsf/gcc/testsuite/gcc.target/arm/neon-intrinsics/arm-neon-ref.h
>>>> 1970-01-01 01:00:00.000000000 +0100
>>>> +++
>>>> gcc-fsf-neontests/gcc/testsuite/gcc.target/arm/neon-intrinsics/arm-neon-ref.h
>>>> 2013-05-09 00:48:59.395628726 +0200
>>>> @@ -0,0 +1,349 @@
>>>> +#ifndef_ARM_NEON_REF_H_
>>>> +#define_ARM_NEON_REF_H_
>>>> +
>>>> +#include <stdio.h>
>>>> +#include <inttypes.h>
>>>> +#include <string.h>
>>>> +#include <stdlib.h>
>>>> +
>>>> +#define xSTR(X) #X
>>>> +#define STR(X) xSTR(X)
>>>> +
>>>> +#define xNAME1(V,T) V ## _ ##  T
>>>> +#define xNAME(V,T) xNAME1(V,T)
>>>> +
>>>> +#define VAR(V,T,W) xNAME(V,T##W)
>>>> +#define VAR_DECL(V, T, W) T##W##_t VAR(V,T,W)
>>>> +
>>>> +#define VECT_NAME(T, W, N) T##W##x##N
>>>> +#define VECT_ARRAY_NAME(T, W, N, L) T##W##x##N##x##L
>>>> +#define VECT_TYPE(T, W, N) xNAME(VECT_NAME(T,W,N),t)
>>>> +#define VECT_ARRAY_TYPE(T, W, N, L) xNAME(VECT_ARRAY_NAME(T,W,N,L),t)
>>>> +
>>>> +#define VECT_VAR(V,T,W,N) xNAME(V,VECT_NAME(T,W,N))
>>>> +#define VECT_VAR_DECL(V, T, W, N) T##W##_t VECT_VAR(V,T,W,N)
>>>> +
>>>> +/* Array declarations.  */
>>>> +#define ARRAY(V, T, W, N) VECT_VAR_DECL(V,T,W,N)[N]
>>>> +
>>>> +/* Check results vs expected values.  */
>>>> +#define CHECK(MSG,T,W,N,FMT)                                           \
>>>> +  for(i=0; i<N ; i++)                                                  \
>>>> +    {                                                                  \
>>>> +      if (VECT_VAR(result, T, W, N)[i] != VECT_VAR(expected, T, W, N)[i])
>>>> { \
>>>> +       fprintf(stderr,                                                 \
>>>> +               "ERROR in %s at type %s index %d: 0x%" FMT " != 0x%"    \
>>>> +               FMT " (expected)\n",                                    \
>>>> +               MSG, STR(VECT_NAME(T, W, N)), i,                        \
>>>> +               VECT_VAR(result, T, W, N)[i],                           \
>>>> +               VECT_VAR(expected, T, W, N)[i]);                        \
>>>> +       abort();                                                        \
>>>> +      }
>>>> \
>>>> +    }
>>>> +
>>>> +/* Floating-point variant.  */
>>>> +#define CHECK_FP(MSG,T,W,N,FMT)
>>>> \
>>>> +  for(i=0; i<N ; i++)                                                  \
>>>> +    {                                                                  \
>>>> +      union fp_operand {                                               \
>>>> +       uint##W##_t i;                                                  \
>>>> +       float##W##_t f;                                                 \
>>>> +      } tmp_res, tmp_exp;                                              \
>>>> +      tmp_res.f = VECT_VAR(result, T, W, N)[i];
>>>> \
>>>> +      tmp_exp.f = VECT_VAR(expected, T, W, N)[i];                      \
>>>> +      if (tmp_res.i != tmp_exp.i) {                                    \
>>>> +       fprintf(stderr,                                                 \
>>>> +               "ERROR in %s at type %s index %d: 0x%" FMT " != 0x%"    \
>>>> +               FMT " (expected)\n",                                    \
>>>> +               MSG, STR(VECT_NAME(T, W, N)), i,                        \
>>>> +               tmp_res.i,                                              \
>>>> +               tmp_exp.i);                                             \
>>>> +      abort();                                                         \
>>>> +      }
>>>> \
>>>> +    }
>>>> +
>>>> +/* Clean buffer with a non-zero pattern.  */
>>>> +#define CLEAN_PATTERN_8  0x33
>>>> +
>>>> +#define CLEAN(VAR,T,W,N)                                               \
>>>> +  memset(VECT_VAR(VAR, T, W, N),                                       \
>>>> +        CLEAN_PATTERN_8,                                               \
>>>> +        sizeof(VECT_VAR(VAR, T, W, N)));
>>>> +
>>>> +/* Input buffers, one of each size.  */
>>>> +extern ARRAY(buffer, int, 8, 8);
>>>> +extern ARRAY(buffer, int, 16, 4);
>>>> +extern ARRAY(buffer, int, 32, 2);
>>>> +extern ARRAY(buffer, int, 64, 1);
>>>> +extern ARRAY(buffer, uint, 8, 8);
>>>> +extern ARRAY(buffer, uint, 16, 4);
>>>> +extern ARRAY(buffer, uint, 32, 2);
>>>> +extern ARRAY(buffer, uint, 64, 1);
>>>> +extern ARRAY(buffer, poly, 8, 8);
>>>> +extern ARRAY(buffer, poly, 16, 4);
>>>> +extern ARRAY(buffer, float, 32, 2);
>>>> +extern ARRAY(buffer, int, 8, 16);
>>>> +extern ARRAY(buffer, int, 16, 8);
>>>> +extern ARRAY(buffer, int, 32, 4);
>>>> +extern ARRAY(buffer, int, 64, 2);
>>>> +extern ARRAY(buffer, uint, 8, 16);
>>>> +extern ARRAY(buffer, uint, 16, 8);
>>>> +extern ARRAY(buffer, uint, 32, 4);
>>>> +extern ARRAY(buffer, uint, 64, 2);
>>>> +extern ARRAY(buffer, poly, 8, 16);
>>>> +extern ARRAY(buffer, poly, 16, 8);
>>>> +extern ARRAY(buffer, float, 32, 4);
>>>> +
>>>> +/* Output buffers, one of each size.  */
>>>> +static ARRAY(result, int, 8, 8);
>>>> +static ARRAY(result, int, 16, 4);
>>>> +static ARRAY(result, int, 32, 2);
>>>> +static ARRAY(result, int, 64, 1);
>>>> +static ARRAY(result, uint, 8, 8);
>>>> +static ARRAY(result, uint, 16, 4);
>>>> +static ARRAY(result, uint, 32, 2);
>>>> +static ARRAY(result, uint, 64, 1);
>>>> +static ARRAY(result, poly, 8, 8);
>>>> +static ARRAY(result, poly, 16, 4);
>>>> +static ARRAY(result, float, 32, 2);
>>>> +static ARRAY(result, int, 8, 16);
>>>> +static ARRAY(result, int, 16, 8);
>>>> +static ARRAY(result, int, 32, 4);
>>>> +static ARRAY(result, int, 64, 2);
>>>> +static ARRAY(result, uint, 8, 16);
>>>> +static ARRAY(result, uint, 16, 8);
>>>> +static ARRAY(result, uint, 32, 4);
>>>> +static ARRAY(result, uint, 64, 2);
>>>> +static ARRAY(result, poly, 8, 16);
>>>> +static ARRAY(result, poly, 16, 8);
>>>> +static ARRAY(result, float, 32, 4);
>>>> +
>>>> +/* Expected results, one of each size.  */
>>>> +extern ARRAY(expected, int, 8, 8);
>>>> +extern ARRAY(expected, int, 16, 4);
>>>> +extern ARRAY(expected, int, 32, 2);
>>>> +extern ARRAY(expected, int, 64, 1);
>>>> +extern ARRAY(expected, uint, 8, 8);
>>>> +extern ARRAY(expected, uint, 16, 4);
>>>> +extern ARRAY(expected, uint, 32, 2);
>>>> +extern ARRAY(expected, uint, 64, 1);
>>>> +extern ARRAY(expected, poly, 8, 8);
>>>> +extern ARRAY(expected, poly, 16, 4);
>>>> +extern ARRAY(expected, float, 32, 2);
>>>> +extern ARRAY(expected, int, 8, 16);
>>>> +extern ARRAY(expected, int, 16, 8);
>>>> +extern ARRAY(expected, int, 32, 4);
>>>> +extern ARRAY(expected, int, 64, 2);
>>>> +extern ARRAY(expected, uint, 8, 16);
>>>> +extern ARRAY(expected, uint, 16, 8);
>>>> +extern ARRAY(expected, uint, 32, 4);
>>>> +extern ARRAY(expected, uint, 64, 2);
>>>> +extern ARRAY(expected, poly, 8, 16);
>>>> +extern ARRAY(expected, poly, 16, 8);
>>>> +extern ARRAY(expected, float, 32, 4);
>>>> +
>>>> +/* Check results.  */
>>>> +static void check_results (const char*test_name, const char*  comment)
>>>> +{
>>>> +  int i;
>>>> +
>>>> +  CHECK(test_name, int, 8, 8, PRIx8);
>>>> +  CHECK(test_name, int, 16, 4, PRIx16);
>>>> +  CHECK(test_name, int, 32, 2, PRIx32);
>>>> +  CHECK(test_name, int, 64, 1, PRIx64);
>>>> +  CHECK(test_name, uint, 8, 8, PRIx8);
>>>> +  CHECK(test_name, uint, 16, 4, PRIx16);
>>>> +  CHECK(test_name, uint, 32, 2, PRIx32);
>>>> +  CHECK(test_name, uint, 64, 1, PRIx64);
>>>> +  CHECK(test_name, poly, 8, 8, PRIx8);
>>>> +  CHECK(test_name, poly, 16, 4, PRIx16);
>>>> +  CHECK_FP(test_name, float, 32, 2, PRIx32);
>>>> +
>>>> +  CHECK(test_name, int, 8, 16, PRIx8);
>>>> +  CHECK(test_name, int, 16, 8, PRIx16);
>>>> +  CHECK(test_name, int, 32, 4, PRIx32);
>>>> +  CHECK(test_name, int, 64, 2, PRIx64);
>>>> +  CHECK(test_name, uint, 8, 16, PRIx8);
>>>> +  CHECK(test_name, uint, 16, 8, PRIx16);
>>>> +  CHECK(test_name, uint, 32, 4, PRIx32);
>>>> +  CHECK(test_name, uint, 64, 2, PRIx64);
>>>> +  CHECK(test_name, poly, 8, 16, PRIx8);
>>>> +  CHECK(test_name, poly, 16, 8, PRIx16);
>>>> +  CHECK_FP(test_name, float, 32, 4, PRIx32);
>>>> +}
>>>> +
>>>> +/* Clean output buffers before execution.  */
>>>> +static void clean_results (void)
>>>> +{
>>>> +  CLEAN(result, int, 8, 8);
>>>> +  CLEAN(result, int, 16, 4);
>>>> +  CLEAN(result, int, 32, 2);
>>>> +  CLEAN(result, int, 64, 1);
>>>> +  CLEAN(result, uint, 8, 8);
>>>> +  CLEAN(result, uint, 16, 4);
>>>> +  CLEAN(result, uint, 32, 2);
>>>> +  CLEAN(result, uint, 64, 1);
>>>> +  CLEAN(result, poly, 8, 8);
>>>> +  CLEAN(result, poly, 16, 4);
>>>> +  CLEAN(result, float, 32, 2);
>>>> +
>>>> +  CLEAN(result, int, 8, 16);
>>>> +  CLEAN(result, int, 16, 8);
>>>> +  CLEAN(result, int, 32, 4);
>>>> +  CLEAN(result, int, 64, 2);
>>>> +  CLEAN(result, uint, 8, 16);
>>>> +  CLEAN(result, uint, 16, 8);
>>>> +  CLEAN(result, uint, 32, 4);
>>>> +  CLEAN(result, uint, 64, 2);
>>>> +  CLEAN(result, poly, 8, 16);
>>>> +  CLEAN(result, poly, 16, 8);
>>>> +  CLEAN(result, float, 32, 4);
>>>> +}
>>>> +
>>>> +
>>>> +/* Helpers to declare variables of various types.   */
>>>> +#define DECL_VARIABLE(VAR, T1, W, N)           \
>>>> +  VECT_TYPE(T1, W, N) VECT_VAR(VAR, T1, W, N)
>>>> +
>>>> +#define DECL_VARIABLE_64BITS_SIGNED_VARIANTS(VAR)      \
>>>> +  DECL_VARIABLE(VAR, int, 8, 8);                       \
>>>> +  DECL_VARIABLE(VAR, int, 16, 4);                      \
>>>> +  DECL_VARIABLE(VAR, int, 32, 2);                      \
>>>> +  DECL_VARIABLE(VAR, int, 64, 1)
>>>> +
>>>> +#define DECL_VARIABLE_64BITS_UNSIGNED_VARIANTS(VAR)    \
>>>> +  DECL_VARIABLE(VAR, uint, 8, 8);                      \
>>>> +  DECL_VARIABLE(VAR, uint, 16, 4);                     \
>>>> +  DECL_VARIABLE(VAR, uint, 32, 2);                     \
>>>> +  DECL_VARIABLE(VAR, uint, 64, 1)
>>>> +
>>>> +#define DECL_VARIABLE_128BITS_SIGNED_VARIANTS(VAR)     \
>>>> +  DECL_VARIABLE(VAR, int, 8, 16);                      \
>>>> +  DECL_VARIABLE(VAR, int, 16, 8);                      \
>>>> +  DECL_VARIABLE(VAR, int, 32, 4);                      \
>>>> +  DECL_VARIABLE(VAR, int, 64, 2)
>>>> +
>>>> +#define DECL_VARIABLE_128BITS_UNSIGNED_VARIANTS(VAR)   \
>>>> +  DECL_VARIABLE(VAR, uint, 8, 16);                     \
>>>> +  DECL_VARIABLE(VAR, uint, 16, 8);                     \
>>>> +  DECL_VARIABLE(VAR, uint, 32, 4);                     \
>>>> +  DECL_VARIABLE(VAR, uint, 64, 2)
>>>> +
>>>> +#define DECL_VARIABLE_64BITS_VARIANTS(VAR)     \
>>>> +  DECL_VARIABLE_64BITS_SIGNED_VARIANTS(VAR);   \
>>>> +  DECL_VARIABLE_64BITS_UNSIGNED_VARIANTS(VAR); \
>>>> +  DECL_VARIABLE(VAR, poly, 8, 8);              \
>>>> +  DECL_VARIABLE(VAR, poly, 16, 4);             \
>>>> +  DECL_VARIABLE(VAR, float, 32, 2)
>>>> +
>>>> +#define DECL_VARIABLE_128BITS_VARIANTS(VAR)    \
>>>> +  DECL_VARIABLE_128BITS_SIGNED_VARIANTS(VAR);  \
>>>> +  DECL_VARIABLE_128BITS_UNSIGNED_VARIANTS(VAR);        \
>>>> +  DECL_VARIABLE(VAR, poly, 8, 16);             \
>>>> +  DECL_VARIABLE(VAR, poly, 16, 8);             \
>>>> +  DECL_VARIABLE(VAR, float, 32, 4)
>>>> +
>>>> +#define DECL_VARIABLE_ALL_VARIANTS(VAR)                \
>>>> +  DECL_VARIABLE_64BITS_VARIANTS(VAR);          \
>>>> +  DECL_VARIABLE_128BITS_VARIANTS(VAR)
>>>> +
>>>> +/* Helpers to initialize vectors.  */
>>>> +#define TEST_VDUP(VAR, Q, T1, T2, W, N, V)             \
>>>> +  VECT_VAR(VAR, T1, W, N) = vdup##Q##_n_##T2##W(V)
>>>> +
>>>> +#define TEST_VSET_LANE(VAR, Q, T1, T2, W, N, L, V)                     \
>>>> +  VECT_VAR(VAR, T1, W, N) = vset##Q##_lane_##T2##W(V,                  \
>>>> +                                                  VECT_VAR(VAR, T1, W,
>>>> N), \
>>>> +                                                  L)
>>>> +
>>>> +/* We need to load initial values first, so rely on VLD1.  */
>>>> +#define TEST_VLOAD(VAR, BUF, Q, T1, T2, W, N)                          \
>>>> +  VECT_VAR(VAR, T1, W, N) = vld1##Q##_##T2##W(VECT_VAR(BUF, T1, W, N))
>>>> +
>>>> +/* Helpers for macros with 1 constant and 5 variable arguments.  */
>>>> +#define TEST_MACRO_64BITS_SIGNED_VARIANTS_1_5(MACRO, VAR)      \
>>>> +  MACRO(VAR, , int, s, 8, 8);                                  \
>>>> +  MACRO(VAR, , int, s, 16, 4);                                 \
>>>> +  MACRO(VAR, , int, s, 32, 2);                                 \
>>>> +  MACRO(VAR, , int, s, 64, 1)
>>>> +
>>>> +#define TEST_MACRO_64BITS_UNSIGNED_VARIANTS_1_5(MACRO, VAR)    \
>>>> +  MACRO(VAR, , uint, u, 8, 8);                                 \
>>>> +  MACRO(VAR, , uint, u, 16, 4);                                        \
>>>> +  MACRO(VAR, , uint, u, 32, 2);                                        \
>>>> +  MACRO(VAR, , uint, u, 64, 1)
>>>> +
>>>> +#define TEST_MACRO_128BITS_SIGNED_VARIANTS_1_5(MACRO, VAR)     \
>>>> +  MACRO(VAR, q, int, s, 8, 16);                                        \
>>>> +  MACRO(VAR, q, int, s, 16, 8);                                        \
>>>> +  MACRO(VAR, q, int, s, 32, 4);                                        \
>>>> +  MACRO(VAR, q, int, s, 64, 2)
>>>> +
>>>> +#define TEST_MACRO_128BITS_UNSIGNED_VARIANTS_1_5(MACRO,VAR)    \
>>>> +  MACRO(VAR, q, uint, u, 8, 16);                               \
>>>> +  MACRO(VAR, q, uint, u, 16, 8);                               \
>>>> +  MACRO(VAR, q, uint, u, 32, 4);                               \
>>>> +  MACRO(VAR, q, uint, u, 64, 2)
>>>> +
>>>> +#define TEST_MACRO_64BITS_VARIANTS_1_5(MACRO, VAR)     \
>>>> +  TEST_MACRO_64BITS_SIGNED_VARIANTS_1_5(MACRO, VAR);   \
>>>> +  TEST_MACRO_64BITS_UNSIGNED_VARIANTS_1_5(MACRO, VAR)
>>>> +
>>>> +#define TEST_MACRO_128BITS_VARIANTS_1_5(MACRO, VAR)    \
>>>> +  TEST_MACRO_128BITS_SIGNED_VARIANTS_1_5(MACRO, VAR);  \
>>>> +  TEST_MACRO_128BITS_UNSIGNED_VARIANTS_1_5(MACRO, VAR)
>>>> +
>>>> +#define TEST_MACRO_ALL_VARIANTS_1_5(MACRO, VAR)        \
>>>> +  TEST_MACRO_64BITS_VARIANTS_1_5(MACRO, VAR);  \
>>>> +  TEST_MACRO_128BITS_VARIANTS_1_5(MACRO, VAR)
>>>> +
>>>> +#define TEST_MACRO_SIGNED_VARIANTS_1_5(MACRO, VAR)     \
>>>> +  TEST_MACRO_64BITS_SIGNED_VARIANTS_1_5(MACRO, VAR);   \
>>>> +  TEST_MACRO_128BITS_SIGNED_VARIANTS_1_5(MACRO, VAR)
>>>> +
>>>> +/* Helpers for macros with 2 constant and 5 variable arguments.  */
>>>> +#define TEST_MACRO_64BITS_SIGNED_VARIANTS_2_5(MACRO, VAR1, VAR2)       \
>>>> +  MACRO(VAR1, VAR2, , int, s, 8, 8);                                   \
>>>> +  MACRO(VAR1, VAR2, , int, s, 16, 4);                                  \
>>>> +  MACRO(VAR1, VAR2, , int, s, 32, 2);                                  \
>>>> +  MACRO(VAR1, VAR2 , , int, s, 64, 1)
>>>> +
>>>> +#define TEST_MACRO_64BITS_UNSIGNED_VARIANTS_2_5(MACRO, VAR1, VAR2)     \
>>>> +  MACRO(VAR1, VAR2, , uint, u, 8, 8);                                  \
>>>> +  MACRO(VAR1, VAR2, , uint, u, 16, 4);                                 \
>>>> +  MACRO(VAR1, VAR2, , uint, u, 32, 2);                                 \
>>>> +  MACRO(VAR1, VAR2, , uint, u, 64, 1)
>>>> +
>>>> +#define TEST_MACRO_128BITS_SIGNED_VARIANTS_2_5(MACRO, VAR1, VAR2)      \
>>>> +  MACRO(VAR1, VAR2, q, int, s, 8, 16);                                 \
>>>> +  MACRO(VAR1, VAR2, q, int, s, 16, 8);                                 \
>>>> +  MACRO(VAR1, VAR2, q, int, s, 32, 4);                                 \
>>>> +  MACRO(VAR1, VAR2, q, int, s, 64, 2)
>>>> +
>>>> +#define TEST_MACRO_128BITS_UNSIGNED_VARIANTS_2_5(MACRO, VAR1, VAR2)    \
>>>> +  MACRO(VAR1, VAR2, q, uint, u, 8, 16);
>>>> \
>>>> +  MACRO(VAR1, VAR2, q, uint, u, 16, 8);
>>>> \
>>>> +  MACRO(VAR1, VAR2, q, uint, u, 32, 4);
>>>> \
>>>> +  MACRO(VAR1, VAR2, q, uint, u, 64, 2)
>>>> +
>>>> +#define TEST_MACRO_64BITS_VARIANTS_2_5(MACRO, VAR1, VAR2)      \
>>>> +  TEST_MACRO_64BITS_SIGNED_VARIANTS_2_5(MACRO, VAR1, VAR2);    \
>>>> +  TEST_MACRO_64BITS_UNSIGNED_VARIANTS_2_5(MACRO, VAR1, VAR2);  \
>>>> +  MACRO(VAR1, VAR2, , poly, p, 8, 8);                          \
>>>> +  MACRO(VAR1, VAR2, , poly, p, 16, 4)
>>>> +
>>>> +#define TEST_MACRO_128BITS_VARIANTS_2_5(MACRO, VAR1, VAR2)     \
>>>> +  TEST_MACRO_128BITS_SIGNED_VARIANTS_2_5(MACRO, VAR1, VAR2);   \
>>>> +  TEST_MACRO_128BITS_UNSIGNED_VARIANTS_2_5(MACRO, VAR1, VAR2); \
>>>> +  MACRO(VAR1, VAR2, q, poly, p, 8, 16);                                \
>>>> +  MACRO(VAR1, VAR2, q, poly, p, 16, 8)
>>>> +
>>>> +#define TEST_MACRO_ALL_VARIANTS_2_5(MACRO, VAR1, VAR2) \
>>>> +  TEST_MACRO_64BITS_VARIANTS_2_5(MACRO, VAR1, VAR2);   \
>>>> +  TEST_MACRO_128BITS_VARIANTS_2_5(MACRO, VAR1, VAR2)
>>>> +
>>>> +#define TEST_MACRO_SIGNED_VARIANTS_2_5(MACRO, VAR1, VAR2)      \
>>>> +  TEST_MACRO_64BITS_SIGNED_VARIANTS_2_5(MACRO, VAR1, VAR2);    \
>>>> +  TEST_MACRO_128BITS_SIGNED_VARIANTS_2_5(MACRO, VAR1, VAR2)
>>>> +
>>>> +#endif /*_ARM_NEON_REF_H_  */
>>>> diff -rNup '--exclude=.git'
>>>> gcc-fsf/gcc/testsuite/gcc.target/arm/neon-intrinsics/compute_ref_data.h
>>>> gcc-fsf-neontests/gcc/testsuite/gcc.target/arm/neon-intrinsics/compute_ref_data.h
>>>> ---
>>>> gcc-fsf/gcc/testsuite/gcc.target/arm/neon-intrinsics/compute_ref_data.h
>>>> 1970-01-01 01:00:00.000000000 +0100
>>>> +++
>>>> gcc-fsf-neontests/gcc/testsuite/gcc.target/arm/neon-intrinsics/compute_ref_data.h
>>>> 2013-05-09 00:31:03.563656926 +0200
>>>> @@ -0,0 +1,80 @@
>>>> +#include <arm_neon.h>
>>>> +#include "arm-neon-ref.h"
>>>> +
>>>> +/* Initialization helpers; 4 slices are needed for vld2, vld3 and
>>>> +   vld4.  */
>>>> +#define MY_INIT_TAB(T,W,N) xNAME(INIT_TAB,N)(T##W##_t)
>>>> +#define MY_INIT_TAB2(T,W,N) xNAME(INIT_TAB2,N)(T##W##_t)
>>>> +#define MY_INIT_TAB3(T,W,N) xNAME(INIT_TAB3,N)(T##W##_t)
>>>> +#define MY_INIT_TAB4(T,W,N) xNAME(INIT_TAB4,N)(T##W##_t)
>>>> +
>>>> +/* Initialized input buffers.  */
>>>> +#define VECT_VAR_DECL_INIT(V, T, W, N)                 \
>>>> +  VECT_VAR_DECL(V,T,W,N) [] = { MY_INIT_TAB(T,W,N) };
>>>> +
>>>> +/* Sample initialization vectors.  */
>>>> +#define INIT_TAB_1(T)                          \
>>>> +  (T)-16,
>>>> +
>>>> +#define INIT_TAB_2(T)                          \
>>>> +  (T)-16, (T)-15,
>>>> +
>>>> +#define INIT_TAB_4(T)                          \
>>>> +  (T)-16, (T)-15, (T)-14, (T)-13,
>>>> +
>>>> +#define INIT_TAB_8(T)                                                  \
>>>> +  (T)-16, (T)-15, (T)-14, (T)-13, (T)-12, (T)-11, (T)-10, (T)-9,
>>>> +
>>>> +#define INIT_TAB_16(T)                                                 \
>>>> +  (T)-16, (T)-15, (T)-14, (T)-13, (T)-12, (T)-11, (T)-10, (T)-9,       \
>>>> +  (T)-8, (T)-7, (T)-6, (T)-5, (T)-4, (T)-3, (T)-2, (T)-1,
>>>> +
>>>> +/* This one is used for padding between input buffers.  */
>>>> +#define PAD(V, T, W, N) char VECT_VAR(V,T,W,N)=42;
>>>> +
>>>> +/* Input buffers, one of each size.  */
>>>> +/* Insert some padding to try to exhibit out of bounds accesses.  */
>>>> +VECT_VAR_DECL_INIT(buffer, int, 8, 8);
>>>> +PAD(buffer_pad, int, 8, 8);
>>>> +VECT_VAR_DECL_INIT(buffer, int, 16, 4);
>>>> +PAD(buffer_pad, int, 16, 4);
>>>> +VECT_VAR_DECL_INIT(buffer, int, 32, 2);
>>>> +PAD(buffer_pad, int, 32, 2);
>>>> +VECT_VAR_DECL_INIT(buffer, int, 64, 1);
>>>> +PAD(buffer_pad, int, 64, 1);
>>>> +VECT_VAR_DECL_INIT(buffer, uint, 8, 8);
>>>> +PAD(buffer_pad, uint, 8, 8);
>>>> +VECT_VAR_DECL_INIT(buffer, poly, 8, 8);
>>>> +PAD(buffer_pad, poly, 8, 8);
>>>> +VECT_VAR_DECL_INIT(buffer, poly, 16, 4);
>>>> +PAD(buffer_pad, poly, 16, 4);
>>>> +VECT_VAR_DECL_INIT(buffer, uint, 16, 4);
>>>> +PAD(buffer_pad, uint, 16, 4);
>>>> +VECT_VAR_DECL_INIT(buffer, uint, 32, 2);
>>>> +PAD(buffer_pad, uint, 32, 2);
>>>> +VECT_VAR_DECL_INIT(buffer, uint, 64, 1);
>>>> +PAD(buffer_pad, uint, 64, 1);
>>>> +VECT_VAR_DECL_INIT(buffer, float, 32, 2);
>>>> +PAD(buffer_pad, float, 32, 2);
>>>> +VECT_VAR_DECL_INIT(buffer, int, 8, 16);
>>>> +PAD(buffer_pad, int, 8, 16);
>>>> +VECT_VAR_DECL_INIT(buffer, int, 16, 8);
>>>> +PAD(buffer_pad, int, 16, 8);
>>>> +VECT_VAR_DECL_INIT(buffer, int, 32, 4);
>>>> +PAD(buffer_pad, int, 32, 4);
>>>> +VECT_VAR_DECL_INIT(buffer, int, 64, 2);
>>>> +PAD(buffer_pad, int, 64, 2);
>>>> +VECT_VAR_DECL_INIT(buffer, uint, 8, 16);
>>>> +PAD(buffer_pad, uint, 8, 16);
>>>> +VECT_VAR_DECL_INIT(buffer, uint, 16, 8);
>>>> +PAD(buffer_pad, uint, 16, 8);
>>>> +VECT_VAR_DECL_INIT(buffer, uint, 32, 4);
>>>> +PAD(buffer_pad, uint, 32, 4);
>>>> +VECT_VAR_DECL_INIT(buffer, uint, 64, 2);
>>>> +PAD(buffer_pad, uint, 64, 2);
>>>> +VECT_VAR_DECL_INIT(buffer, poly, 8, 16);
>>>> +PAD(buffer_pad, poly, 8, 16);
>>>> +VECT_VAR_DECL_INIT(buffer, poly, 16, 8);
>>>> +PAD(buffer_pad, poly, 16, 8);
>>>> +VECT_VAR_DECL_INIT(buffer, float, 32, 4);
>>>> +PAD(buffer_pad, float, 32, 4);
>>>> diff -rNup '--exclude=.git'
>>>> gcc-fsf/gcc/testsuite/gcc.target/arm/neon-intrinsics/ref_vaba.c
>>>> gcc-fsf-neontests/gcc/testsuite/gcc.target/arm/neon-intrinsics/ref_vaba.c
>>>> --- gcc-fsf/gcc/testsuite/gcc.target/arm/neon-intrinsics/ref_vaba.c
>>>> 1970-01-01 01:00:00.000000000 +0100
>>>> +++
>>>> gcc-fsf-neontests/gcc/testsuite/gcc.target/arm/neon-intrinsics/ref_vaba.c
>>>> 2013-05-09 00:40:27.611642141 +0200
>>>> @@ -0,0 +1,145 @@
>>>> +/* { dg-do run } */
>>>> +/* { dg-require-effective-target arm_neon_hw } */
>>>> +/* { dg-add-options arm_neon } */
>>>> +
>>>> +#include <arm_neon.h>
>>>> +#include "arm-neon-ref.h"
>>>> +#include "compute_ref_data.h"
>>>> +
>>>> +VECT_VAR_DECL(expected,int,8,8) [] = { 0xf6, 0xf7, 0xf8, 0xf9,
>>>> +                                      0xfa, 0xfb, 0xfc, 0xfd };
>>>> +VECT_VAR_DECL(expected,int,16,4) [] = { 0x16, 0x17, 0x18, 0x19 };
>>>> +VECT_VAR_DECL(expected,int,32,2) [] = { 0x20, 0x21 };
>>>> +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 };
>>>> +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x53, 0x54, 0x55, 0x56,
>>>> +                                       0x57, 0x58, 0x59, 0x5a };
>>>> +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x907, 0x908, 0x909, 0x90a };
>>>> +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xffffffe7, 0xffffffe8 };
>>>> +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 };
>>>> +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33,
>>>> +                                       0x33, 0x33, 0x33, 0x33 };
>>>> +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333
>>>> };
>>>> +VECT_VAR_DECL(expected,float,32,2) [] = { 4.172325e-08, 4.172325e-08 };
>>>> +VECT_VAR_DECL(expected,int,8,16) [] = { 0x5e, 0x5f, 0x60, 0x61,
>>>> +                                       0x62, 0x63, 0x64, 0x65,
>>>> +                                       0x66, 0x67, 0x68, 0x69,
>>>> +                                       0x6a, 0x6b, 0x6c, 0x6d };
>>>> +VECT_VAR_DECL(expected,int,16,8) [] = { 0xb9c, 0xb9d, 0xb9e, 0xb9f,
>>>> +                                       0xba0, 0xba1, 0xba2, 0xba3 };
>>>> +VECT_VAR_DECL(expected,int,32,4) [] = { 0x26e0, 0x26e1, 0x26e2, 0x26e3 };
>>>> +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333,
>>>> +                                       0x3333333333333333 };
>>>> +VECT_VAR_DECL(expected,uint,8,16) [] = { 0xf8, 0xf9, 0xfa, 0xfb,
>>>> +                                        0xfc, 0xfd, 0xfe, 0xff,
>>>> +                                        0x0, 0x1, 0x2, 0x3,
>>>> +                                        0x4, 0x5, 0x6, 0x7 };
>>>> +VECT_VAR_DECL(expected,uint,16,8) [] = { 0xfff9, 0xfffa, 0xfffb, 0xfffc,
>>>> +                                        0xfffd, 0xfffe, 0xffff, 0x0 };
>>>> +VECT_VAR_DECL(expected,uint,32,4) [] = { 0xc, 0xd, 0xe, 0xf };
>>>> +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333,
>>>> +                                        0x3333333333333333 };
>>>> +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
>>>> +                                        0x33, 0x33, 0x33, 0x33,
>>>> +                                        0x33, 0x33, 0x33, 0x33,
>>>> +                                        0x33, 0x33, 0x33, 0x33 };
>>>> +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333,
>>>> +                                        0x3333, 0x3333, 0x3333, 0x3333 };
>>>> +VECT_VAR_DECL(expected,float,32,4) [] = { 4.172325e-08, 4.172325e-08,
>>>> +                                         4.172325e-08, 4.172325e-08 };
>>>> +
>>>> +#define TEST_MSG "VABA/VABAQ"
>>>> +void exec_vaba (void)
>>>> +{
>>>> +  /* Basic test: v4=vaba(v1,v2,v3), then store the result.  */
>>>> +#define TEST_VABA(Q, T1, T2, W, N)                                     \
>>>> +  VECT_VAR(vector_res, T1, W, N) =                                     \
>>>> +    vaba##Q##_##T2##W(VECT_VAR(vector1, T1, W, N),                     \
>>>> +                     VECT_VAR(vector2, T1, W, N),                      \
>>>> +                     VECT_VAR(vector3, T1, W, N));                     \
>>>> +  vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(vector_res, T1,
>>>> W, N))
>>>> +
>>>> +#define DECL_VABA_VAR(VAR)                     \
>>>> +  DECL_VARIABLE(VAR, int, 8, 8);               \
>>>> +  DECL_VARIABLE(VAR, int, 16, 4);              \
>>>> +  DECL_VARIABLE(VAR, int, 32, 2);              \
>>>> +  DECL_VARIABLE(VAR, uint, 8, 8);              \
>>>> +  DECL_VARIABLE(VAR, uint, 16, 4);             \
>>>> +  DECL_VARIABLE(VAR, uint, 32, 2);             \
>>>> +  DECL_VARIABLE(VAR, int, 8, 16);              \
>>>> +  DECL_VARIABLE(VAR, int, 16, 8);              \
>>>> +  DECL_VARIABLE(VAR, int, 32, 4);              \
>>>> +  DECL_VARIABLE(VAR, uint, 8, 16);             \
>>>> +  DECL_VARIABLE(VAR, uint, 16, 8);             \
>>>> +  DECL_VARIABLE(VAR, uint, 32, 4)
>>>> +
>>>> +  DECL_VABA_VAR(vector1);
>>>> +  DECL_VABA_VAR(vector2);
>>>> +  DECL_VABA_VAR(vector3);
>>>> +  DECL_VABA_VAR(vector_res);
>>>> +
>>>> +  clean_results ();
>>>> +
>>>> +  /* Initialize input "vector" from "buffer".  */
>>>> +  TEST_VLOAD(vector1, buffer, , int, s, 8, 8);
>>>> +  TEST_VLOAD(vector1, buffer, , int, s, 16, 4);
>>>> +  TEST_VLOAD(vector1, buffer, , int, s, 32, 2);
>>>> +  TEST_VLOAD(vector1, buffer, , uint, u, 8, 8);
>>>> +  TEST_VLOAD(vector1, buffer, , uint, u, 16, 4);
>>>> +  TEST_VLOAD(vector1, buffer, , uint, u, 32, 2);
>>>> +  TEST_VLOAD(vector1, buffer, q, int, s, 8, 16);
>>>> +  TEST_VLOAD(vector1, buffer, q, int, s, 16, 8);
>>>> +  TEST_VLOAD(vector1, buffer, q, int, s, 32, 4);
>>>> +  TEST_VLOAD(vector1, buffer, q, uint, u, 8, 16);
>>>> +  TEST_VLOAD(vector1, buffer, q, uint, u, 16, 8);
>>>> +  TEST_VLOAD(vector1, buffer, q, uint, u, 32, 4);
>>>> +
>>>> +  /* Choose init value arbitrarily.  */
>>>> +  TEST_VDUP(vector2, , int, s, 8, 8, 1);
>>>> +  TEST_VDUP(vector2, , int, s, 16, 4, -13);
>>>> +  TEST_VDUP(vector2, , int, s, 32, 2, 8);
>>>> +  TEST_VDUP(vector2, , uint, u, 8, 8, 1);
>>>> +  TEST_VDUP(vector2, , uint, u, 16, 4, 13);
>>>> +  TEST_VDUP(vector2, , uint, u, 32, 2, 8);
>>>> +  TEST_VDUP(vector2, q, int, s, 8, 16, 10);
>>>> +  TEST_VDUP(vector2, q, int, s, 16, 8, -12);
>>>> +  TEST_VDUP(vector2, q, int, s, 32, 4, 32);
>>>> +  TEST_VDUP(vector2, q, uint, u, 8, 16, 10);
>>>> +  TEST_VDUP(vector2, q, uint, u, 16, 8, 12);
>>>> +  TEST_VDUP(vector2, q, uint, u, 32, 4, 32);
>>>> +
>>>> +  /* Choose init value arbitrarily.  */
>>>> +  TEST_VDUP(vector3, , int, s, 8, 8, -5);
>>>> +  TEST_VDUP(vector3, , int, s, 16, 4, 25);
>>>> +  TEST_VDUP(vector3, , int, s, 32, 2, -40);
>>>> +  TEST_VDUP(vector3, , uint, u, 8, 8, 100);
>>>> +  TEST_VDUP(vector3, , uint, u, 16, 4, 2340);
>>>> +  TEST_VDUP(vector3, , uint, u, 32, 2, 0xffffffff);
>>>> +  TEST_VDUP(vector3, q, int, s, 8, 16, -100);
>>>> +  TEST_VDUP(vector3, q, int, s, 16, 8, -3000);
>>>> +  TEST_VDUP(vector3, q, int, s, 32, 4, 10000);
>>>> +  TEST_VDUP(vector3, q, uint, u, 8, 16, 2);
>>>> +  TEST_VDUP(vector3, q, uint, u, 16, 8, 3);
>>>> +  TEST_VDUP(vector3, q, uint, u, 32, 4, 4);
>>>> +
>>>> +  /* Execute the tests.  */
>>>> +  TEST_VABA(, int, s, 8, 8);
>>>> +  TEST_VABA(, int, s, 16, 4);
>>>> +  TEST_VABA(, int, s, 32, 2);
>>>> +  TEST_VABA(, uint, u, 8, 8);
>>>> +  TEST_VABA(, uint, u, 16, 4);
>>>> +  TEST_VABA(, uint, u, 32, 2);
>>>> +  TEST_VABA(q, int, s, 8, 16);
>>>> +  TEST_VABA(q, int, s, 16, 8);
>>>> +  TEST_VABA(q, int, s, 32, 4);
>>>> +  TEST_VABA(q, uint, u, 8, 16);
>>>> +  TEST_VABA(q, uint, u, 16, 8);
>>>> +  TEST_VABA(q, uint, u, 32, 4);
>>>> +
>>>> +  check_results (TEST_MSG, "");
>>>> +}
>>>> +
>>>> +int main (void)
>>>> +{
>>>> +  exec_vaba ();
>>>> +  return 0;
>>>> +}
>>>> diff -rNup '--exclude=.git'
>>>> gcc-fsf/gcc/testsuite/gcc.target/arm/neon-intrinsics/ref_vld1.c
>>>> gcc-fsf-neontests/gcc/testsuite/gcc.target/arm/neon-intrinsics/ref_vld1.c
>>>> --- gcc-fsf/gcc/testsuite/gcc.target/arm/neon-intrinsics/ref_vld1.c
>>>> 1970-01-01 01:00:00.000000000 +0100
>>>> +++
>>>> gcc-fsf-neontests/gcc/testsuite/gcc.target/arm/neon-intrinsics/ref_vld1.c
>>>> 2013-05-09 00:39:00.351644429 +0200
>>>> @@ -0,0 +1,77 @@
>>>> +/* { dg-do run } */
>>>> +/* { dg-require-effective-target arm_neon_hw } */
>>>> +/* { dg-add-options arm_neon } */
>>>> +
>>>> +#include <arm_neon.h>
>>>> +#include "arm-neon-ref.h"
>>>> +#include "compute_ref_data.h"
>>>> +
>>>> +VECT_VAR_DECL(expected,int,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
>>>> +                                      0xf4, 0xf5, 0xf6, 0xf7 };
>>>> +VECT_VAR_DECL(expected,int,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 };
>>>> +VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffffff0, 0xfffffff1 };
>>>> +VECT_VAR_DECL(expected,int,64,1) [] = { 0xfffffffffffffff0 };
>>>> +VECT_VAR_DECL(expected,uint,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
>>>> +                                       0xf4, 0xf5, 0xf6, 0xf7 };
>>>> +VECT_VAR_DECL(expected,uint,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3
>>>> };
>>>> +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xfffffff0, 0xfffffff1 };
>>>> +VECT_VAR_DECL(expected,uint,64,1) [] = { 0xfffffffffffffff0 };
>>>> +VECT_VAR_DECL(expected,poly,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
>>>> +                                       0xf4, 0xf5, 0xf6, 0xf7 };
>>>> +VECT_VAR_DECL(expected,poly,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3
>>>> };
>>>> +VECT_VAR_DECL(expected,float,32,2) [] = { -16, -15 };
>>>> +VECT_VAR_DECL(expected,int,8,16) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
>>>> +                                       0xf4, 0xf5, 0xf6, 0xf7,
>>>> +                                       0xf8, 0xf9, 0xfa, 0xfb,
>>>> +                                       0xfc, 0xfd, 0xfe, 0xff };
>>>> +VECT_VAR_DECL(expected,int,16,8) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3,
>>>> +                                       0xfff4, 0xfff5, 0xfff6, 0xfff7 };
>>>> +VECT_VAR_DECL(expected,int,32,4) [] = { 0xfffffff0, 0xfffffff1,
>>>> +                                       0xfffffff2, 0xfffffff3 };
>>>> +VECT_VAR_DECL(expected,int,64,2) [] = { 0xfffffffffffffff0,
>>>> +                                       0xfffffffffffffff1 };
>>>> +VECT_VAR_DECL(expected,uint,8,16) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
>>>> +                                        0xf4, 0xf5, 0xf6, 0xf7,
>>>> +                                        0xf8, 0xf9, 0xfa, 0xfb,
>>>> +                                        0xfc, 0xfd, 0xfe, 0xff };
>>>> +VECT_VAR_DECL(expected,uint,16,8) [] = { 0xfff0, 0xfff1, 0xfff2,
>>>> +                                        0xfff3, 0xfff4, 0xfff5,
>>>> +                                        0xfff6, 0xfff7 };
>>>> +VECT_VAR_DECL(expected,uint,32,4) [] = { 0xfffffff0, 0xfffffff1,
>>>> +                                        0xfffffff2, 0xfffffff3 };
>>>> +VECT_VAR_DECL(expected,uint,64,2) [] = { 0xfffffffffffffff0,
>>>> +                                        0xfffffffffffffff1 };
>>>> +VECT_VAR_DECL(expected,poly,8,16) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
>>>> +                                        0xf4, 0xf5, 0xf6, 0xf7,
>>>> +                                        0xf8, 0xf9, 0xfa, 0xfb,
>>>> +                                        0xfc, 0xfd, 0xfe, 0xff };
>>>> +VECT_VAR_DECL(expected,poly,16,8) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3,
>>>> +                                        0xfff4, 0xfff5, 0xfff6, 0xfff7 };
>>>> +VECT_VAR_DECL(expected,float,32,4) [] = { -16, -15, -14, -13 };
>>>> +
>>>> +#define TEST_MSG "VLD1/VLD1Q"
>>>> +void exec_vld1 (void)
>>>> +{
>>>> +  /* Basic test vec=vld1(buffer); then store vec: vst1(result, vector).
>>>> */
>>>> +  /* This test actually tests vdl1 and vst1 at the same time.  */
>>>> +#define TEST_VLD1(VAR, BUF, Q, T1, T2, W, N)                           \
>>>> +  VECT_VAR(VAR, T1, W, N) = vld1##Q##_##T2##W(VECT_VAR(BUF, T1, W, N)); \
>>>> +  vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(VAR, T1, W, N))
>>>> +
>>>> +  DECL_VARIABLE_ALL_VARIANTS(vector);
>>>> +
>>>> +  clean_results ();
>>>> +
>>>> +  TEST_MACRO_ALL_VARIANTS_2_5(TEST_VLD1, vector, buffer);
>>>> +
>>>> +  TEST_VLD1(vector, buffer, , float, f, 32, 2);
>>>> +  TEST_VLD1(vector, buffer, q, float, f, 32, 4);
>>>> +
>>>> +  check_results (TEST_MSG, "");
>>>> +}
>>>> +
>>>> +int main (void)
>>>> +{
>>>> +  exec_vld1 ();
>>>> +  return 0;
>>>> +}
>>>>
>>>
>>>


--
Ramana Radhakrishnan
Principal Engineer
ARM Ltd.
Direct - +44 1223 400495

-- IMPORTANT NOTICE: The contents of this email and any attachments are confidential and may also be privileged. If you are not the intended recipient, please notify the sender immediately and do not disclose the contents to any other person, use it for any purpose, or store or copy the information in any medium.  Thank you.

ARM Limited, Registered office 110 Fulbourn Road, Cambridge CB1 9NJ, Registered in England & Wales, Company No:  2557590
ARM Holdings plc, Registered office 110 Fulbourn Road, Cambridge CB1 9NJ, Registered in England & Wales, Company No:  2548782
Christophe Lyon April 15, 2014, 5:38 p.m. UTC | #2
On 15 April 2014 16:18, Ramana Radhakrishnan
<ramana.radhakrishnan@arm.com> wrote:
> On 04/14/14 23:16, Christophe Lyon wrote:
>>
>> Hi Ramana,
>>
>> Here is an updated version of my proposal to include tests for Neon
>> intrinsics.
>>
>> wrt to my previous post, I have made a few changes:
>> - renamed the test files, removing the "ref_" prefix.
>> - removed the TEST_ prefix on some initialization macros
>> - use the c-torture framework
>>
>> I have run it successfully on the following configurations:
>>      aarch64-none-linux-gnu
>>      aarch64-none-elf
>>      aarch64_be-none-elf
>>      arm-none-linux-gnueabihf
>>      armeb-none-linux-gnueabihf
>>      arm-none-linux-gnueabi
>>      armeb-none-linux-gnueabi
>>      arm-none-eabi
>> using qemu for most of them and the Foundation Model for aarch64*elf
>
> I had a brief look at your patch and how does this run for AArch64 when
> you have such options in the testsuite ?
>
>
> +++ b/gcc/testsuite/gcc.target/arm/neon-intrinsics/vaba.c
>
> @@ -0,0 +1,145 @@
> +/* { dg-do run } */
> +/* { dg-require-effective-target arm_neon_hw { target { "arm* } } } */
>
> +/* { dg-add-options arm_neon } */
> +
>

Good catch... in fact these lines are ignored when using c-torture, I
just forgot to clean them up.

> Additionally a README would help in terms of how one should add new tests.
OK

>> Any comments?
>>
>> Thanks,
>>
>> Christophe.
>>
>>
>> On 29 October 2013 19:09, Christophe Lyon <christophe.lyon@linaro.org>
>> wrote:
>>>
>>> On 29 October 2013 03:24, Ramana Radhakrishnan <ramrad01@arm.com> wrote:
>>>>
>>>> On 10/09/13 23:16, Christophe Lyon wrote:
>>>
>>>
>>>> Irrespective of our earlier conversations on this now I'm actually
>>>> wondering
>>>> if instead of doing this and integrating this in the GCC source base it
>>>> maybe easier to write a harness to test this cross on qemu or natively.
>>>> Additionally setting up an auto-tester to do this might be a more
>>>> productive
>>>> use of time rather than manually dejagnuizing this which appears to be a
>>>> tedious and slow process.
>>>
>>>
>>> This would be easy to setup, since the Makefile on gitorious is
>>> already targetting qemu. I used it occasionnally on boards with
>>> minimal changes.
>>> This just means we'd have to agree on how to set up such an
>>> auto-tester, where do we send the results to, etc...
>
> If you are sufficiently motivated to do the transition, I'm not opposed
> to putting it into the testsuite as a basic regression testing framework
> for neon intrinsics.
>
I would really like to have all this converge to a good solution, so
yes I want to convert the whole testsuite to dejagnu.
I just want that we agree on the format before proceeding with the
other tests, that's why I've just posted a subset, hopefully
representative enough but easier to review.

> I'll try and play with this in some more detail with a couple of patches
> I'm doing in the area of neon intrinsics so it may be useful to cross check.

OK let me know if you have further comments.

As of now I understand that you are OK with this patch, modulo the
removal of the 3 dg-* lines, correct?


Thanks,

Christophe.

>
> regards
> Ramana
>
>
>>>
>>>>> I'd like your feedback before continuing, as there are a lot more
>>>>> files to come.
>>>>>
>>>>> I have made some cleanup to help review, but the two .h files will
>>>>> need to grow as more intrinsics will be added (see the original ones).
>>>>
>>>>
>>>> Which one should I compare this with in terms of the original file ?
>>>
>>>
>>> I have kept the same file names.
>>>
>>>
>>>>> I'd like to keep the modifications at a minimal level, to save my time
>>>>> when adapting each test (there are currently 145 test files, so 143
>>>>> left:-).
>>>>
>>>>
>>>>
>>>> On to the patch itself.
>>>>
>>>> The prefix TEST_ seems a bit misleading in that it suggests this is
>>>> testing
>>>> something when in reality this is initializing stuff.
>>>
>>> In fact, TEST_XXXX executes the XXXX intrinsics, and copies the
>>> results to memory when relevant. But I can easily change TEST_ to
>>> something else.
>>>
>>> So in the sample I posted:
>>>
>>> TEST_VABA: VAR=vaba(....); vst1(BUFFER,VAR)
>>> TEST_VLD1: VAR=vld1(....); vst1(BUFFER, VAR)
>>>
>>> VDUP is special in that it is a helper for other tests:
>>> TEST_VDUP: VAR1=vdup(VAR2,xxxx)
>>> and similarly for TEST_VLOAD and TEST_VSETLANE
>>>
>>>>> +# Exit immediately if this isn't an ARM target.
>>>>> +if ![istarget arm*-*-*] then {
>>>>> +  return
>>>>> +}
>>>>
>>>>
>>>>
>>>> Also for aarch64*-*-* as all these intrinsics are compatible with the
>>>> aarch64 port. I would also prefer that this be tortured over multiple
>>>> optimization levels as many times we find issues with different
>>>> optimization
>>>> levels.
>>>
>>>
>>> OK, this sounds easy to do, and I agree. I prefered to post a simple
>>> version first.
>>> And given you talked me about your plans to factorize arm and aarch64
>>> tests, I thought it was better to start with a simple version I knew
>>> was working.
>>>
>>>> More later I need to get back to something else and I need to play more
>>>> with
>>>> your original testsuite - but I'd like some discussion around some of
>>>> these
>>>> points anyway.
>>>>
>>>> Ramana
>>>
>>>
>>> OK thanks for the feedback. If we decide to go with auto-testers
>>> instead, the discussion will probably be shorter.
>>>
>>> Christophe
>>>
>>>>
>>>>> +
>>>>> +# Load support procs.
>>>>> +load_lib gcc-dg.exp
>>>>> +
>>>>> +# Initialize `dg'.
>>>>> +dg-init
>>>>> +
>>>>> +# Main loop.
>>>>> +gcc-dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/*.\[cCS\]]] \
>>>>> +       ""
>>>>> +
>>>>> +# All done.
>>>>> +dg-finish
>>>>> diff -rNup '--exclude=.git'
>>>>> gcc-fsf/gcc/testsuite/gcc.target/arm/neon-intrinsics/arm-neon-ref.h
>>>>>
>>>>> gcc-fsf-neontests/gcc/testsuite/gcc.target/arm/neon-intrinsics/arm-neon-ref.h
>>>>> --- gcc-fsf/gcc/testsuite/gcc.target/arm/neon-intrinsics/arm-neon-ref.h
>>>>> 1970-01-01 01:00:00.000000000 +0100
>>>>> +++
>>>>>
>>>>> gcc-fsf-neontests/gcc/testsuite/gcc.target/arm/neon-intrinsics/arm-neon-ref.h
>>>>> 2013-05-09 00:48:59.395628726 +0200
>>>>> @@ -0,0 +1,349 @@
>>>>> +#ifndef_ARM_NEON_REF_H_
>>>>> +#define_ARM_NEON_REF_H_
>>>>> +
>>>>> +#include <stdio.h>
>>>>> +#include <inttypes.h>
>>>>> +#include <string.h>
>>>>> +#include <stdlib.h>
>>>>> +
>>>>> +#define xSTR(X) #X
>>>>> +#define STR(X) xSTR(X)
>>>>> +
>>>>> +#define xNAME1(V,T) V ## _ ##  T
>>>>> +#define xNAME(V,T) xNAME1(V,T)
>>>>> +
>>>>> +#define VAR(V,T,W) xNAME(V,T##W)
>>>>> +#define VAR_DECL(V, T, W) T##W##_t VAR(V,T,W)
>>>>> +
>>>>> +#define VECT_NAME(T, W, N) T##W##x##N
>>>>> +#define VECT_ARRAY_NAME(T, W, N, L) T##W##x##N##x##L
>>>>> +#define VECT_TYPE(T, W, N) xNAME(VECT_NAME(T,W,N),t)
>>>>> +#define VECT_ARRAY_TYPE(T, W, N, L) xNAME(VECT_ARRAY_NAME(T,W,N,L),t)
>>>>> +
>>>>> +#define VECT_VAR(V,T,W,N) xNAME(V,VECT_NAME(T,W,N))
>>>>> +#define VECT_VAR_DECL(V, T, W, N) T##W##_t VECT_VAR(V,T,W,N)
>>>>> +
>>>>> +/* Array declarations.  */
>>>>> +#define ARRAY(V, T, W, N) VECT_VAR_DECL(V,T,W,N)[N]
>>>>> +
>>>>> +/* Check results vs expected values.  */
>>>>> +#define CHECK(MSG,T,W,N,FMT)
>>>>> \
>>>>> +  for(i=0; i<N ; i++)
>>>>> \
>>>>> +    {
>>>>> \
>>>>> +      if (VECT_VAR(result, T, W, N)[i] != VECT_VAR(expected, T, W,
>>>>> N)[i])
>>>>> { \
>>>>> +       fprintf(stderr,
>>>>> \
>>>>> +               "ERROR in %s at type %s index %d: 0x%" FMT " != 0x%"
>>>>> \
>>>>> +               FMT " (expected)\n",
>>>>> \
>>>>> +               MSG, STR(VECT_NAME(T, W, N)), i,
>>>>> \
>>>>> +               VECT_VAR(result, T, W, N)[i],
>>>>> \
>>>>> +               VECT_VAR(expected, T, W, N)[i]);
>>>>> \
>>>>> +       abort();
>>>>> \
>>>>> +      }
>>>>> \
>>>>> +    }
>>>>> +
>>>>> +/* Floating-point variant.  */
>>>>> +#define CHECK_FP(MSG,T,W,N,FMT)
>>>>> \
>>>>> +  for(i=0; i<N ; i++)
>>>>> \
>>>>> +    {
>>>>> \
>>>>> +      union fp_operand {
>>>>> \
>>>>> +       uint##W##_t i;
>>>>> \
>>>>> +       float##W##_t f;
>>>>> \
>>>>> +      } tmp_res, tmp_exp;
>>>>> \
>>>>> +      tmp_res.f = VECT_VAR(result, T, W, N)[i];
>>>>> \
>>>>> +      tmp_exp.f = VECT_VAR(expected, T, W, N)[i];
>>>>> \
>>>>> +      if (tmp_res.i != tmp_exp.i) {
>>>>> \
>>>>> +       fprintf(stderr,
>>>>> \
>>>>> +               "ERROR in %s at type %s index %d: 0x%" FMT " != 0x%"
>>>>> \
>>>>> +               FMT " (expected)\n",
>>>>> \
>>>>> +               MSG, STR(VECT_NAME(T, W, N)), i,
>>>>> \
>>>>> +               tmp_res.i,
>>>>> \
>>>>> +               tmp_exp.i);
>>>>> \
>>>>> +      abort();
>>>>> \
>>>>> +      }
>>>>> \
>>>>> +    }
>>>>> +
>>>>> +/* Clean buffer with a non-zero pattern.  */
>>>>> +#define CLEAN_PATTERN_8  0x33
>>>>> +
>>>>> +#define CLEAN(VAR,T,W,N)
>>>>> \
>>>>> +  memset(VECT_VAR(VAR, T, W, N),
>>>>> \
>>>>> +        CLEAN_PATTERN_8,
>>>>> \
>>>>> +        sizeof(VECT_VAR(VAR, T, W, N)));
>>>>> +
>>>>> +/* Input buffers, one of each size.  */
>>>>> +extern ARRAY(buffer, int, 8, 8);
>>>>> +extern ARRAY(buffer, int, 16, 4);
>>>>> +extern ARRAY(buffer, int, 32, 2);
>>>>> +extern ARRAY(buffer, int, 64, 1);
>>>>> +extern ARRAY(buffer, uint, 8, 8);
>>>>> +extern ARRAY(buffer, uint, 16, 4);
>>>>> +extern ARRAY(buffer, uint, 32, 2);
>>>>> +extern ARRAY(buffer, uint, 64, 1);
>>>>> +extern ARRAY(buffer, poly, 8, 8);
>>>>> +extern ARRAY(buffer, poly, 16, 4);
>>>>> +extern ARRAY(buffer, float, 32, 2);
>>>>> +extern ARRAY(buffer, int, 8, 16);
>>>>> +extern ARRAY(buffer, int, 16, 8);
>>>>> +extern ARRAY(buffer, int, 32, 4);
>>>>> +extern ARRAY(buffer, int, 64, 2);
>>>>> +extern ARRAY(buffer, uint, 8, 16);
>>>>> +extern ARRAY(buffer, uint, 16, 8);
>>>>> +extern ARRAY(buffer, uint, 32, 4);
>>>>> +extern ARRAY(buffer, uint, 64, 2);
>>>>> +extern ARRAY(buffer, poly, 8, 16);
>>>>> +extern ARRAY(buffer, poly, 16, 8);
>>>>> +extern ARRAY(buffer, float, 32, 4);
>>>>> +
>>>>> +/* Output buffers, one of each size.  */
>>>>> +static ARRAY(result, int, 8, 8);
>>>>> +static ARRAY(result, int, 16, 4);
>>>>> +static ARRAY(result, int, 32, 2);
>>>>> +static ARRAY(result, int, 64, 1);
>>>>> +static ARRAY(result, uint, 8, 8);
>>>>> +static ARRAY(result, uint, 16, 4);
>>>>> +static ARRAY(result, uint, 32, 2);
>>>>> +static ARRAY(result, uint, 64, 1);
>>>>> +static ARRAY(result, poly, 8, 8);
>>>>> +static ARRAY(result, poly, 16, 4);
>>>>> +static ARRAY(result, float, 32, 2);
>>>>> +static ARRAY(result, int, 8, 16);
>>>>> +static ARRAY(result, int, 16, 8);
>>>>> +static ARRAY(result, int, 32, 4);
>>>>> +static ARRAY(result, int, 64, 2);
>>>>> +static ARRAY(result, uint, 8, 16);
>>>>> +static ARRAY(result, uint, 16, 8);
>>>>> +static ARRAY(result, uint, 32, 4);
>>>>> +static ARRAY(result, uint, 64, 2);
>>>>> +static ARRAY(result, poly, 8, 16);
>>>>> +static ARRAY(result, poly, 16, 8);
>>>>> +static ARRAY(result, float, 32, 4);
>>>>> +
>>>>> +/* Expected results, one of each size.  */
>>>>> +extern ARRAY(expected, int, 8, 8);
>>>>> +extern ARRAY(expected, int, 16, 4);
>>>>> +extern ARRAY(expected, int, 32, 2);
>>>>> +extern ARRAY(expected, int, 64, 1);
>>>>> +extern ARRAY(expected, uint, 8, 8);
>>>>> +extern ARRAY(expected, uint, 16, 4);
>>>>> +extern ARRAY(expected, uint, 32, 2);
>>>>> +extern ARRAY(expected, uint, 64, 1);
>>>>> +extern ARRAY(expected, poly, 8, 8);
>>>>> +extern ARRAY(expected, poly, 16, 4);
>>>>> +extern ARRAY(expected, float, 32, 2);
>>>>> +extern ARRAY(expected, int, 8, 16);
>>>>> +extern ARRAY(expected, int, 16, 8);
>>>>> +extern ARRAY(expected, int, 32, 4);
>>>>> +extern ARRAY(expected, int, 64, 2);
>>>>> +extern ARRAY(expected, uint, 8, 16);
>>>>> +extern ARRAY(expected, uint, 16, 8);
>>>>> +extern ARRAY(expected, uint, 32, 4);
>>>>> +extern ARRAY(expected, uint, 64, 2);
>>>>> +extern ARRAY(expected, poly, 8, 16);
>>>>> +extern ARRAY(expected, poly, 16, 8);
>>>>> +extern ARRAY(expected, float, 32, 4);
>>>>> +
>>>>> +/* Check results.  */
>>>>> +static void check_results (const char*test_name, const char*  comment)
>>>>> +{
>>>>> +  int i;
>>>>> +
>>>>> +  CHECK(test_name, int, 8, 8, PRIx8);
>>>>> +  CHECK(test_name, int, 16, 4, PRIx16);
>>>>> +  CHECK(test_name, int, 32, 2, PRIx32);
>>>>> +  CHECK(test_name, int, 64, 1, PRIx64);
>>>>> +  CHECK(test_name, uint, 8, 8, PRIx8);
>>>>> +  CHECK(test_name, uint, 16, 4, PRIx16);
>>>>> +  CHECK(test_name, uint, 32, 2, PRIx32);
>>>>> +  CHECK(test_name, uint, 64, 1, PRIx64);
>>>>> +  CHECK(test_name, poly, 8, 8, PRIx8);
>>>>> +  CHECK(test_name, poly, 16, 4, PRIx16);
>>>>> +  CHECK_FP(test_name, float, 32, 2, PRIx32);
>>>>> +
>>>>> +  CHECK(test_name, int, 8, 16, PRIx8);
>>>>> +  CHECK(test_name, int, 16, 8, PRIx16);
>>>>> +  CHECK(test_name, int, 32, 4, PRIx32);
>>>>> +  CHECK(test_name, int, 64, 2, PRIx64);
>>>>> +  CHECK(test_name, uint, 8, 16, PRIx8);
>>>>> +  CHECK(test_name, uint, 16, 8, PRIx16);
>>>>> +  CHECK(test_name, uint, 32, 4, PRIx32);
>>>>> +  CHECK(test_name, uint, 64, 2, PRIx64);
>>>>> +  CHECK(test_name, poly, 8, 16, PRIx8);
>>>>> +  CHECK(test_name, poly, 16, 8, PRIx16);
>>>>> +  CHECK_FP(test_name, float, 32, 4, PRIx32);
>>>>> +}
>>>>> +
>>>>> +/* Clean output buffers before execution.  */
>>>>> +static void clean_results (void)
>>>>> +{
>>>>> +  CLEAN(result, int, 8, 8);
>>>>> +  CLEAN(result, int, 16, 4);
>>>>> +  CLEAN(result, int, 32, 2);
>>>>> +  CLEAN(result, int, 64, 1);
>>>>> +  CLEAN(result, uint, 8, 8);
>>>>> +  CLEAN(result, uint, 16, 4);
>>>>> +  CLEAN(result, uint, 32, 2);
>>>>> +  CLEAN(result, uint, 64, 1);
>>>>> +  CLEAN(result, poly, 8, 8);
>>>>> +  CLEAN(result, poly, 16, 4);
>>>>> +  CLEAN(result, float, 32, 2);
>>>>> +
>>>>> +  CLEAN(result, int, 8, 16);
>>>>> +  CLEAN(result, int, 16, 8);
>>>>> +  CLEAN(result, int, 32, 4);
>>>>> +  CLEAN(result, int, 64, 2);
>>>>> +  CLEAN(result, uint, 8, 16);
>>>>> +  CLEAN(result, uint, 16, 8);
>>>>> +  CLEAN(result, uint, 32, 4);
>>>>> +  CLEAN(result, uint, 64, 2);
>>>>> +  CLEAN(result, poly, 8, 16);
>>>>> +  CLEAN(result, poly, 16, 8);
>>>>> +  CLEAN(result, float, 32, 4);
>>>>> +}
>>>>> +
>>>>> +
>>>>> +/* Helpers to declare variables of various types.   */
>>>>> +#define DECL_VARIABLE(VAR, T1, W, N)           \
>>>>> +  VECT_TYPE(T1, W, N) VECT_VAR(VAR, T1, W, N)
>>>>> +
>>>>> +#define DECL_VARIABLE_64BITS_SIGNED_VARIANTS(VAR)      \
>>>>> +  DECL_VARIABLE(VAR, int, 8, 8);                       \
>>>>> +  DECL_VARIABLE(VAR, int, 16, 4);                      \
>>>>> +  DECL_VARIABLE(VAR, int, 32, 2);                      \
>>>>> +  DECL_VARIABLE(VAR, int, 64, 1)
>>>>> +
>>>>> +#define DECL_VARIABLE_64BITS_UNSIGNED_VARIANTS(VAR)    \
>>>>> +  DECL_VARIABLE(VAR, uint, 8, 8);                      \
>>>>> +  DECL_VARIABLE(VAR, uint, 16, 4);                     \
>>>>> +  DECL_VARIABLE(VAR, uint, 32, 2);                     \
>>>>> +  DECL_VARIABLE(VAR, uint, 64, 1)
>>>>> +
>>>>> +#define DECL_VARIABLE_128BITS_SIGNED_VARIANTS(VAR)     \
>>>>> +  DECL_VARIABLE(VAR, int, 8, 16);                      \
>>>>> +  DECL_VARIABLE(VAR, int, 16, 8);                      \
>>>>> +  DECL_VARIABLE(VAR, int, 32, 4);                      \
>>>>> +  DECL_VARIABLE(VAR, int, 64, 2)
>>>>> +
>>>>> +#define DECL_VARIABLE_128BITS_UNSIGNED_VARIANTS(VAR)   \
>>>>> +  DECL_VARIABLE(VAR, uint, 8, 16);                     \
>>>>> +  DECL_VARIABLE(VAR, uint, 16, 8);                     \
>>>>> +  DECL_VARIABLE(VAR, uint, 32, 4);                     \
>>>>> +  DECL_VARIABLE(VAR, uint, 64, 2)
>>>>> +
>>>>> +#define DECL_VARIABLE_64BITS_VARIANTS(VAR)     \
>>>>> +  DECL_VARIABLE_64BITS_SIGNED_VARIANTS(VAR);   \
>>>>> +  DECL_VARIABLE_64BITS_UNSIGNED_VARIANTS(VAR); \
>>>>> +  DECL_VARIABLE(VAR, poly, 8, 8);              \
>>>>> +  DECL_VARIABLE(VAR, poly, 16, 4);             \
>>>>> +  DECL_VARIABLE(VAR, float, 32, 2)
>>>>> +
>>>>> +#define DECL_VARIABLE_128BITS_VARIANTS(VAR)    \
>>>>> +  DECL_VARIABLE_128BITS_SIGNED_VARIANTS(VAR);  \
>>>>> +  DECL_VARIABLE_128BITS_UNSIGNED_VARIANTS(VAR);        \
>>>>> +  DECL_VARIABLE(VAR, poly, 8, 16);             \
>>>>> +  DECL_VARIABLE(VAR, poly, 16, 8);             \
>>>>> +  DECL_VARIABLE(VAR, float, 32, 4)
>>>>> +
>>>>> +#define DECL_VARIABLE_ALL_VARIANTS(VAR)                \
>>>>> +  DECL_VARIABLE_64BITS_VARIANTS(VAR);          \
>>>>> +  DECL_VARIABLE_128BITS_VARIANTS(VAR)
>>>>> +
>>>>> +/* Helpers to initialize vectors.  */
>>>>> +#define TEST_VDUP(VAR, Q, T1, T2, W, N, V)             \
>>>>> +  VECT_VAR(VAR, T1, W, N) = vdup##Q##_n_##T2##W(V)
>>>>> +
>>>>> +#define TEST_VSET_LANE(VAR, Q, T1, T2, W, N, L, V)
>>>>> \
>>>>> +  VECT_VAR(VAR, T1, W, N) = vset##Q##_lane_##T2##W(V,
>>>>> \
>>>>> +                                                  VECT_VAR(VAR, T1, W,
>>>>> N), \
>>>>> +                                                  L)
>>>>> +
>>>>> +/* We need to load initial values first, so rely on VLD1.  */
>>>>> +#define TEST_VLOAD(VAR, BUF, Q, T1, T2, W, N)
>>>>> \
>>>>> +  VECT_VAR(VAR, T1, W, N) = vld1##Q##_##T2##W(VECT_VAR(BUF, T1, W, N))
>>>>> +
>>>>> +/* Helpers for macros with 1 constant and 5 variable arguments.  */
>>>>> +#define TEST_MACRO_64BITS_SIGNED_VARIANTS_1_5(MACRO, VAR)      \
>>>>> +  MACRO(VAR, , int, s, 8, 8);                                  \
>>>>> +  MACRO(VAR, , int, s, 16, 4);                                 \
>>>>> +  MACRO(VAR, , int, s, 32, 2);                                 \
>>>>> +  MACRO(VAR, , int, s, 64, 1)
>>>>> +
>>>>> +#define TEST_MACRO_64BITS_UNSIGNED_VARIANTS_1_5(MACRO, VAR)    \
>>>>> +  MACRO(VAR, , uint, u, 8, 8);                                 \
>>>>> +  MACRO(VAR, , uint, u, 16, 4);
>>>>> \
>>>>> +  MACRO(VAR, , uint, u, 32, 2);
>>>>> \
>>>>> +  MACRO(VAR, , uint, u, 64, 1)
>>>>> +
>>>>> +#define TEST_MACRO_128BITS_SIGNED_VARIANTS_1_5(MACRO, VAR)     \
>>>>> +  MACRO(VAR, q, int, s, 8, 16);
>>>>> \
>>>>> +  MACRO(VAR, q, int, s, 16, 8);
>>>>> \
>>>>> +  MACRO(VAR, q, int, s, 32, 4);
>>>>> \
>>>>> +  MACRO(VAR, q, int, s, 64, 2)
>>>>> +
>>>>> +#define TEST_MACRO_128BITS_UNSIGNED_VARIANTS_1_5(MACRO,VAR)    \
>>>>> +  MACRO(VAR, q, uint, u, 8, 16);                               \
>>>>> +  MACRO(VAR, q, uint, u, 16, 8);                               \
>>>>> +  MACRO(VAR, q, uint, u, 32, 4);                               \
>>>>> +  MACRO(VAR, q, uint, u, 64, 2)
>>>>> +
>>>>> +#define TEST_MACRO_64BITS_VARIANTS_1_5(MACRO, VAR)     \
>>>>> +  TEST_MACRO_64BITS_SIGNED_VARIANTS_1_5(MACRO, VAR);   \
>>>>> +  TEST_MACRO_64BITS_UNSIGNED_VARIANTS_1_5(MACRO, VAR)
>>>>> +
>>>>> +#define TEST_MACRO_128BITS_VARIANTS_1_5(MACRO, VAR)    \
>>>>> +  TEST_MACRO_128BITS_SIGNED_VARIANTS_1_5(MACRO, VAR);  \
>>>>> +  TEST_MACRO_128BITS_UNSIGNED_VARIANTS_1_5(MACRO, VAR)
>>>>> +
>>>>> +#define TEST_MACRO_ALL_VARIANTS_1_5(MACRO, VAR)        \
>>>>> +  TEST_MACRO_64BITS_VARIANTS_1_5(MACRO, VAR);  \
>>>>> +  TEST_MACRO_128BITS_VARIANTS_1_5(MACRO, VAR)
>>>>> +
>>>>> +#define TEST_MACRO_SIGNED_VARIANTS_1_5(MACRO, VAR)     \
>>>>> +  TEST_MACRO_64BITS_SIGNED_VARIANTS_1_5(MACRO, VAR);   \
>>>>> +  TEST_MACRO_128BITS_SIGNED_VARIANTS_1_5(MACRO, VAR)
>>>>> +
>>>>> +/* Helpers for macros with 2 constant and 5 variable arguments.  */
>>>>> +#define TEST_MACRO_64BITS_SIGNED_VARIANTS_2_5(MACRO, VAR1, VAR2)
>>>>> \
>>>>> +  MACRO(VAR1, VAR2, , int, s, 8, 8);
>>>>> \
>>>>> +  MACRO(VAR1, VAR2, , int, s, 16, 4);
>>>>> \
>>>>> +  MACRO(VAR1, VAR2, , int, s, 32, 2);
>>>>> \
>>>>> +  MACRO(VAR1, VAR2 , , int, s, 64, 1)
>>>>> +
>>>>> +#define TEST_MACRO_64BITS_UNSIGNED_VARIANTS_2_5(MACRO, VAR1, VAR2)
>>>>> \
>>>>> +  MACRO(VAR1, VAR2, , uint, u, 8, 8);
>>>>> \
>>>>> +  MACRO(VAR1, VAR2, , uint, u, 16, 4);
>>>>> \
>>>>> +  MACRO(VAR1, VAR2, , uint, u, 32, 2);
>>>>> \
>>>>> +  MACRO(VAR1, VAR2, , uint, u, 64, 1)
>>>>> +
>>>>> +#define TEST_MACRO_128BITS_SIGNED_VARIANTS_2_5(MACRO, VAR1, VAR2)
>>>>> \
>>>>> +  MACRO(VAR1, VAR2, q, int, s, 8, 16);
>>>>> \
>>>>> +  MACRO(VAR1, VAR2, q, int, s, 16, 8);
>>>>> \
>>>>> +  MACRO(VAR1, VAR2, q, int, s, 32, 4);
>>>>> \
>>>>> +  MACRO(VAR1, VAR2, q, int, s, 64, 2)
>>>>> +
>>>>> +#define TEST_MACRO_128BITS_UNSIGNED_VARIANTS_2_5(MACRO, VAR1, VAR2)
>>>>> \
>>>>> +  MACRO(VAR1, VAR2, q, uint, u, 8, 16);
>>>>> \
>>>>> +  MACRO(VAR1, VAR2, q, uint, u, 16, 8);
>>>>> \
>>>>> +  MACRO(VAR1, VAR2, q, uint, u, 32, 4);
>>>>> \
>>>>> +  MACRO(VAR1, VAR2, q, uint, u, 64, 2)
>>>>> +
>>>>> +#define TEST_MACRO_64BITS_VARIANTS_2_5(MACRO, VAR1, VAR2)      \
>>>>> +  TEST_MACRO_64BITS_SIGNED_VARIANTS_2_5(MACRO, VAR1, VAR2);    \
>>>>> +  TEST_MACRO_64BITS_UNSIGNED_VARIANTS_2_5(MACRO, VAR1, VAR2);  \
>>>>> +  MACRO(VAR1, VAR2, , poly, p, 8, 8);                          \
>>>>> +  MACRO(VAR1, VAR2, , poly, p, 16, 4)
>>>>> +
>>>>> +#define TEST_MACRO_128BITS_VARIANTS_2_5(MACRO, VAR1, VAR2)     \
>>>>> +  TEST_MACRO_128BITS_SIGNED_VARIANTS_2_5(MACRO, VAR1, VAR2);   \
>>>>> +  TEST_MACRO_128BITS_UNSIGNED_VARIANTS_2_5(MACRO, VAR1, VAR2); \
>>>>> +  MACRO(VAR1, VAR2, q, poly, p, 8, 16);
>>>>> \
>>>>> +  MACRO(VAR1, VAR2, q, poly, p, 16, 8)
>>>>> +
>>>>> +#define TEST_MACRO_ALL_VARIANTS_2_5(MACRO, VAR1, VAR2) \
>>>>> +  TEST_MACRO_64BITS_VARIANTS_2_5(MACRO, VAR1, VAR2);   \
>>>>> +  TEST_MACRO_128BITS_VARIANTS_2_5(MACRO, VAR1, VAR2)
>>>>> +
>>>>> +#define TEST_MACRO_SIGNED_VARIANTS_2_5(MACRO, VAR1, VAR2)      \
>>>>> +  TEST_MACRO_64BITS_SIGNED_VARIANTS_2_5(MACRO, VAR1, VAR2);    \
>>>>> +  TEST_MACRO_128BITS_SIGNED_VARIANTS_2_5(MACRO, VAR1, VAR2)
>>>>> +
>>>>> +#endif /*_ARM_NEON_REF_H_  */
>>>>> diff -rNup '--exclude=.git'
>>>>> gcc-fsf/gcc/testsuite/gcc.target/arm/neon-intrinsics/compute_ref_data.h
>>>>>
>>>>> gcc-fsf-neontests/gcc/testsuite/gcc.target/arm/neon-intrinsics/compute_ref_data.h
>>>>> ---
>>>>> gcc-fsf/gcc/testsuite/gcc.target/arm/neon-intrinsics/compute_ref_data.h
>>>>> 1970-01-01 01:00:00.000000000 +0100
>>>>> +++
>>>>>
>>>>> gcc-fsf-neontests/gcc/testsuite/gcc.target/arm/neon-intrinsics/compute_ref_data.h
>>>>> 2013-05-09 00:31:03.563656926 +0200
>>>>> @@ -0,0 +1,80 @@
>>>>> +#include <arm_neon.h>
>>>>> +#include "arm-neon-ref.h"
>>>>> +
>>>>> +/* Initialization helpers; 4 slices are needed for vld2, vld3 and
>>>>> +   vld4.  */
>>>>> +#define MY_INIT_TAB(T,W,N) xNAME(INIT_TAB,N)(T##W##_t)
>>>>> +#define MY_INIT_TAB2(T,W,N) xNAME(INIT_TAB2,N)(T##W##_t)
>>>>> +#define MY_INIT_TAB3(T,W,N) xNAME(INIT_TAB3,N)(T##W##_t)
>>>>> +#define MY_INIT_TAB4(T,W,N) xNAME(INIT_TAB4,N)(T##W##_t)
>>>>> +
>>>>> +/* Initialized input buffers.  */
>>>>> +#define VECT_VAR_DECL_INIT(V, T, W, N)                 \
>>>>> +  VECT_VAR_DECL(V,T,W,N) [] = { MY_INIT_TAB(T,W,N) };
>>>>> +
>>>>> +/* Sample initialization vectors.  */
>>>>> +#define INIT_TAB_1(T)                          \
>>>>> +  (T)-16,
>>>>> +
>>>>> +#define INIT_TAB_2(T)                          \
>>>>> +  (T)-16, (T)-15,
>>>>> +
>>>>> +#define INIT_TAB_4(T)                          \
>>>>> +  (T)-16, (T)-15, (T)-14, (T)-13,
>>>>> +
>>>>> +#define INIT_TAB_8(T)
>>>>> \
>>>>> +  (T)-16, (T)-15, (T)-14, (T)-13, (T)-12, (T)-11, (T)-10, (T)-9,
>>>>> +
>>>>> +#define INIT_TAB_16(T)
>>>>> \
>>>>> +  (T)-16, (T)-15, (T)-14, (T)-13, (T)-12, (T)-11, (T)-10, (T)-9,
>>>>> \
>>>>> +  (T)-8, (T)-7, (T)-6, (T)-5, (T)-4, (T)-3, (T)-2, (T)-1,
>>>>> +
>>>>> +/* This one is used for padding between input buffers.  */
>>>>> +#define PAD(V, T, W, N) char VECT_VAR(V,T,W,N)=42;
>>>>> +
>>>>> +/* Input buffers, one of each size.  */
>>>>> +/* Insert some padding to try to exhibit out of bounds accesses.  */
>>>>> +VECT_VAR_DECL_INIT(buffer, int, 8, 8);
>>>>> +PAD(buffer_pad, int, 8, 8);
>>>>> +VECT_VAR_DECL_INIT(buffer, int, 16, 4);
>>>>> +PAD(buffer_pad, int, 16, 4);
>>>>> +VECT_VAR_DECL_INIT(buffer, int, 32, 2);
>>>>> +PAD(buffer_pad, int, 32, 2);
>>>>> +VECT_VAR_DECL_INIT(buffer, int, 64, 1);
>>>>> +PAD(buffer_pad, int, 64, 1);
>>>>> +VECT_VAR_DECL_INIT(buffer, uint, 8, 8);
>>>>> +PAD(buffer_pad, uint, 8, 8);
>>>>> +VECT_VAR_DECL_INIT(buffer, poly, 8, 8);
>>>>> +PAD(buffer_pad, poly, 8, 8);
>>>>> +VECT_VAR_DECL_INIT(buffer, poly, 16, 4);
>>>>> +PAD(buffer_pad, poly, 16, 4);
>>>>> +VECT_VAR_DECL_INIT(buffer, uint, 16, 4);
>>>>> +PAD(buffer_pad, uint, 16, 4);
>>>>> +VECT_VAR_DECL_INIT(buffer, uint, 32, 2);
>>>>> +PAD(buffer_pad, uint, 32, 2);
>>>>> +VECT_VAR_DECL_INIT(buffer, uint, 64, 1);
>>>>> +PAD(buffer_pad, uint, 64, 1);
>>>>> +VECT_VAR_DECL_INIT(buffer, float, 32, 2);
>>>>> +PAD(buffer_pad, float, 32, 2);
>>>>> +VECT_VAR_DECL_INIT(buffer, int, 8, 16);
>>>>> +PAD(buffer_pad, int, 8, 16);
>>>>> +VECT_VAR_DECL_INIT(buffer, int, 16, 8);
>>>>> +PAD(buffer_pad, int, 16, 8);
>>>>> +VECT_VAR_DECL_INIT(buffer, int, 32, 4);
>>>>> +PAD(buffer_pad, int, 32, 4);
>>>>> +VECT_VAR_DECL_INIT(buffer, int, 64, 2);
>>>>> +PAD(buffer_pad, int, 64, 2);
>>>>> +VECT_VAR_DECL_INIT(buffer, uint, 8, 16);
>>>>> +PAD(buffer_pad, uint, 8, 16);
>>>>> +VECT_VAR_DECL_INIT(buffer, uint, 16, 8);
>>>>> +PAD(buffer_pad, uint, 16, 8);
>>>>> +VECT_VAR_DECL_INIT(buffer, uint, 32, 4);
>>>>> +PAD(buffer_pad, uint, 32, 4);
>>>>> +VECT_VAR_DECL_INIT(buffer, uint, 64, 2);
>>>>> +PAD(buffer_pad, uint, 64, 2);
>>>>> +VECT_VAR_DECL_INIT(buffer, poly, 8, 16);
>>>>> +PAD(buffer_pad, poly, 8, 16);
>>>>> +VECT_VAR_DECL_INIT(buffer, poly, 16, 8);
>>>>> +PAD(buffer_pad, poly, 16, 8);
>>>>> +VECT_VAR_DECL_INIT(buffer, float, 32, 4);
>>>>> +PAD(buffer_pad, float, 32, 4);
>>>>> diff -rNup '--exclude=.git'
>>>>> gcc-fsf/gcc/testsuite/gcc.target/arm/neon-intrinsics/ref_vaba.c
>>>>>
>>>>> gcc-fsf-neontests/gcc/testsuite/gcc.target/arm/neon-intrinsics/ref_vaba.c
>>>>> --- gcc-fsf/gcc/testsuite/gcc.target/arm/neon-intrinsics/ref_vaba.c
>>>>> 1970-01-01 01:00:00.000000000 +0100
>>>>> +++
>>>>>
>>>>> gcc-fsf-neontests/gcc/testsuite/gcc.target/arm/neon-intrinsics/ref_vaba.c
>>>>> 2013-05-09 00:40:27.611642141 +0200
>>>>> @@ -0,0 +1,145 @@
>>>>> +/* { dg-do run } */
>>>>> +/* { dg-require-effective-target arm_neon_hw } */
>>>>> +/* { dg-add-options arm_neon } */
>>>>> +
>>>>> +#include <arm_neon.h>
>>>>> +#include "arm-neon-ref.h"
>>>>> +#include "compute_ref_data.h"
>>>>> +
>>>>> +VECT_VAR_DECL(expected,int,8,8) [] = { 0xf6, 0xf7, 0xf8, 0xf9,
>>>>> +                                      0xfa, 0xfb, 0xfc, 0xfd };
>>>>> +VECT_VAR_DECL(expected,int,16,4) [] = { 0x16, 0x17, 0x18, 0x19 };
>>>>> +VECT_VAR_DECL(expected,int,32,2) [] = { 0x20, 0x21 };
>>>>> +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 };
>>>>> +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x53, 0x54, 0x55, 0x56,
>>>>> +                                       0x57, 0x58, 0x59, 0x5a };
>>>>> +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x907, 0x908, 0x909, 0x90a };
>>>>> +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xffffffe7, 0xffffffe8 };
>>>>> +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 };
>>>>> +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33,
>>>>> +                                       0x33, 0x33, 0x33, 0x33 };
>>>>> +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333,
>>>>> 0x3333
>>>>> };
>>>>> +VECT_VAR_DECL(expected,float,32,2) [] = { 4.172325e-08, 4.172325e-08
>>>>> };
>>>>> +VECT_VAR_DECL(expected,int,8,16) [] = { 0x5e, 0x5f, 0x60, 0x61,
>>>>> +                                       0x62, 0x63, 0x64, 0x65,
>>>>> +                                       0x66, 0x67, 0x68, 0x69,
>>>>> +                                       0x6a, 0x6b, 0x6c, 0x6d };
>>>>> +VECT_VAR_DECL(expected,int,16,8) [] = { 0xb9c, 0xb9d, 0xb9e, 0xb9f,
>>>>> +                                       0xba0, 0xba1, 0xba2, 0xba3 };
>>>>> +VECT_VAR_DECL(expected,int,32,4) [] = { 0x26e0, 0x26e1, 0x26e2, 0x26e3
>>>>> };
>>>>> +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333,
>>>>> +                                       0x3333333333333333 };
>>>>> +VECT_VAR_DECL(expected,uint,8,16) [] = { 0xf8, 0xf9, 0xfa, 0xfb,
>>>>> +                                        0xfc, 0xfd, 0xfe, 0xff,
>>>>> +                                        0x0, 0x1, 0x2, 0x3,
>>>>> +                                        0x4, 0x5, 0x6, 0x7 };
>>>>> +VECT_VAR_DECL(expected,uint,16,8) [] = { 0xfff9, 0xfffa, 0xfffb,
>>>>> 0xfffc,
>>>>> +                                        0xfffd, 0xfffe, 0xffff, 0x0 };
>>>>> +VECT_VAR_DECL(expected,uint,32,4) [] = { 0xc, 0xd, 0xe, 0xf };
>>>>> +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333,
>>>>> +                                        0x3333333333333333 };
>>>>> +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
>>>>> +                                        0x33, 0x33, 0x33, 0x33,
>>>>> +                                        0x33, 0x33, 0x33, 0x33,
>>>>> +                                        0x33, 0x33, 0x33, 0x33 };
>>>>> +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333,
>>>>> 0x3333,
>>>>> +                                        0x3333, 0x3333, 0x3333, 0x3333
>>>>> };
>>>>> +VECT_VAR_DECL(expected,float,32,4) [] = { 4.172325e-08, 4.172325e-08,
>>>>> +                                         4.172325e-08, 4.172325e-08 };
>>>>> +
>>>>> +#define TEST_MSG "VABA/VABAQ"
>>>>> +void exec_vaba (void)
>>>>> +{
>>>>> +  /* Basic test: v4=vaba(v1,v2,v3), then store the result.  */
>>>>> +#define TEST_VABA(Q, T1, T2, W, N)
>>>>> \
>>>>> +  VECT_VAR(vector_res, T1, W, N) =
>>>>> \
>>>>> +    vaba##Q##_##T2##W(VECT_VAR(vector1, T1, W, N),
>>>>> \
>>>>> +                     VECT_VAR(vector2, T1, W, N),
>>>>> \
>>>>> +                     VECT_VAR(vector3, T1, W, N));
>>>>> \
>>>>> +  vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(vector_res,
>>>>> T1,
>>>>> W, N))
>>>>> +
>>>>> +#define DECL_VABA_VAR(VAR)                     \
>>>>> +  DECL_VARIABLE(VAR, int, 8, 8);               \
>>>>> +  DECL_VARIABLE(VAR, int, 16, 4);              \
>>>>> +  DECL_VARIABLE(VAR, int, 32, 2);              \
>>>>> +  DECL_VARIABLE(VAR, uint, 8, 8);              \
>>>>> +  DECL_VARIABLE(VAR, uint, 16, 4);             \
>>>>> +  DECL_VARIABLE(VAR, uint, 32, 2);             \
>>>>> +  DECL_VARIABLE(VAR, int, 8, 16);              \
>>>>> +  DECL_VARIABLE(VAR, int, 16, 8);              \
>>>>> +  DECL_VARIABLE(VAR, int, 32, 4);              \
>>>>> +  DECL_VARIABLE(VAR, uint, 8, 16);             \
>>>>> +  DECL_VARIABLE(VAR, uint, 16, 8);             \
>>>>> +  DECL_VARIABLE(VAR, uint, 32, 4)
>>>>> +
>>>>> +  DECL_VABA_VAR(vector1);
>>>>> +  DECL_VABA_VAR(vector2);
>>>>> +  DECL_VABA_VAR(vector3);
>>>>> +  DECL_VABA_VAR(vector_res);
>>>>> +
>>>>> +  clean_results ();
>>>>> +
>>>>> +  /* Initialize input "vector" from "buffer".  */
>>>>> +  TEST_VLOAD(vector1, buffer, , int, s, 8, 8);
>>>>> +  TEST_VLOAD(vector1, buffer, , int, s, 16, 4);
>>>>> +  TEST_VLOAD(vector1, buffer, , int, s, 32, 2);
>>>>> +  TEST_VLOAD(vector1, buffer, , uint, u, 8, 8);
>>>>> +  TEST_VLOAD(vector1, buffer, , uint, u, 16, 4);
>>>>> +  TEST_VLOAD(vector1, buffer, , uint, u, 32, 2);
>>>>> +  TEST_VLOAD(vector1, buffer, q, int, s, 8, 16);
>>>>> +  TEST_VLOAD(vector1, buffer, q, int, s, 16, 8);
>>>>> +  TEST_VLOAD(vector1, buffer, q, int, s, 32, 4);
>>>>> +  TEST_VLOAD(vector1, buffer, q, uint, u, 8, 16);
>>>>> +  TEST_VLOAD(vector1, buffer, q, uint, u, 16, 8);
>>>>> +  TEST_VLOAD(vector1, buffer, q, uint, u, 32, 4);
>>>>> +
>>>>> +  /* Choose init value arbitrarily.  */
>>>>> +  TEST_VDUP(vector2, , int, s, 8, 8, 1);
>>>>> +  TEST_VDUP(vector2, , int, s, 16, 4, -13);
>>>>> +  TEST_VDUP(vector2, , int, s, 32, 2, 8);
>>>>> +  TEST_VDUP(vector2, , uint, u, 8, 8, 1);
>>>>> +  TEST_VDUP(vector2, , uint, u, 16, 4, 13);
>>>>> +  TEST_VDUP(vector2, , uint, u, 32, 2, 8);
>>>>> +  TEST_VDUP(vector2, q, int, s, 8, 16, 10);
>>>>> +  TEST_VDUP(vector2, q, int, s, 16, 8, -12);
>>>>> +  TEST_VDUP(vector2, q, int, s, 32, 4, 32);
>>>>> +  TEST_VDUP(vector2, q, uint, u, 8, 16, 10);
>>>>> +  TEST_VDUP(vector2, q, uint, u, 16, 8, 12);
>>>>> +  TEST_VDUP(vector2, q, uint, u, 32, 4, 32);
>>>>> +
>>>>> +  /* Choose init value arbitrarily.  */
>>>>> +  TEST_VDUP(vector3, , int, s, 8, 8, -5);
>>>>> +  TEST_VDUP(vector3, , int, s, 16, 4, 25);
>>>>> +  TEST_VDUP(vector3, , int, s, 32, 2, -40);
>>>>> +  TEST_VDUP(vector3, , uint, u, 8, 8, 100);
>>>>> +  TEST_VDUP(vector3, , uint, u, 16, 4, 2340);
>>>>> +  TEST_VDUP(vector3, , uint, u, 32, 2, 0xffffffff);
>>>>> +  TEST_VDUP(vector3, q, int, s, 8, 16, -100);
>>>>> +  TEST_VDUP(vector3, q, int, s, 16, 8, -3000);
>>>>> +  TEST_VDUP(vector3, q, int, s, 32, 4, 10000);
>>>>> +  TEST_VDUP(vector3, q, uint, u, 8, 16, 2);
>>>>> +  TEST_VDUP(vector3, q, uint, u, 16, 8, 3);
>>>>> +  TEST_VDUP(vector3, q, uint, u, 32, 4, 4);
>>>>> +
>>>>> +  /* Execute the tests.  */
>>>>> +  TEST_VABA(, int, s, 8, 8);
>>>>> +  TEST_VABA(, int, s, 16, 4);
>>>>> +  TEST_VABA(, int, s, 32, 2);
>>>>> +  TEST_VABA(, uint, u, 8, 8);
>>>>> +  TEST_VABA(, uint, u, 16, 4);
>>>>> +  TEST_VABA(, uint, u, 32, 2);
>>>>> +  TEST_VABA(q, int, s, 8, 16);
>>>>> +  TEST_VABA(q, int, s, 16, 8);
>>>>> +  TEST_VABA(q, int, s, 32, 4);
>>>>> +  TEST_VABA(q, uint, u, 8, 16);
>>>>> +  TEST_VABA(q, uint, u, 16, 8);
>>>>> +  TEST_VABA(q, uint, u, 32, 4);
>>>>> +
>>>>> +  check_results (TEST_MSG, "");
>>>>> +}
>>>>> +
>>>>> +int main (void)
>>>>> +{
>>>>> +  exec_vaba ();
>>>>> +  return 0;
>>>>> +}
>>>>> diff -rNup '--exclude=.git'
>>>>> gcc-fsf/gcc/testsuite/gcc.target/arm/neon-intrinsics/ref_vld1.c
>>>>>
>>>>> gcc-fsf-neontests/gcc/testsuite/gcc.target/arm/neon-intrinsics/ref_vld1.c
>>>>> --- gcc-fsf/gcc/testsuite/gcc.target/arm/neon-intrinsics/ref_vld1.c
>>>>> 1970-01-01 01:00:00.000000000 +0100
>>>>> +++
>>>>>
>>>>> gcc-fsf-neontests/gcc/testsuite/gcc.target/arm/neon-intrinsics/ref_vld1.c
>>>>> 2013-05-09 00:39:00.351644429 +0200
>>>>> @@ -0,0 +1,77 @@
>>>>> +/* { dg-do run } */
>>>>> +/* { dg-require-effective-target arm_neon_hw } */
>>>>> +/* { dg-add-options arm_neon } */
>>>>> +
>>>>> +#include <arm_neon.h>
>>>>> +#include "arm-neon-ref.h"
>>>>> +#include "compute_ref_data.h"
>>>>> +
>>>>> +VECT_VAR_DECL(expected,int,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
>>>>> +                                      0xf4, 0xf5, 0xf6, 0xf7 };
>>>>> +VECT_VAR_DECL(expected,int,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3
>>>>> };
>>>>> +VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffffff0, 0xfffffff1 };
>>>>> +VECT_VAR_DECL(expected,int,64,1) [] = { 0xfffffffffffffff0 };
>>>>> +VECT_VAR_DECL(expected,uint,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
>>>>> +                                       0xf4, 0xf5, 0xf6, 0xf7 };
>>>>> +VECT_VAR_DECL(expected,uint,16,4) [] = { 0xfff0, 0xfff1, 0xfff2,
>>>>> 0xfff3
>>>>> };
>>>>> +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xfffffff0, 0xfffffff1 };
>>>>> +VECT_VAR_DECL(expected,uint,64,1) [] = { 0xfffffffffffffff0 };
>>>>> +VECT_VAR_DECL(expected,poly,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
>>>>> +                                       0xf4, 0xf5, 0xf6, 0xf7 };
>>>>> +VECT_VAR_DECL(expected,poly,16,4) [] = { 0xfff0, 0xfff1, 0xfff2,
>>>>> 0xfff3
>>>>> };
>>>>> +VECT_VAR_DECL(expected,float,32,2) [] = { -16, -15 };
>>>>> +VECT_VAR_DECL(expected,int,8,16) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
>>>>> +                                       0xf4, 0xf5, 0xf6, 0xf7,
>>>>> +                                       0xf8, 0xf9, 0xfa, 0xfb,
>>>>> +                                       0xfc, 0xfd, 0xfe, 0xff };
>>>>> +VECT_VAR_DECL(expected,int,16,8) [] = { 0xfff0, 0xfff1, 0xfff2,
>>>>> 0xfff3,
>>>>> +                                       0xfff4, 0xfff5, 0xfff6, 0xfff7
>>>>> };
>>>>> +VECT_VAR_DECL(expected,int,32,4) [] = { 0xfffffff0, 0xfffffff1,
>>>>> +                                       0xfffffff2, 0xfffffff3 };
>>>>> +VECT_VAR_DECL(expected,int,64,2) [] = { 0xfffffffffffffff0,
>>>>> +                                       0xfffffffffffffff1 };
>>>>> +VECT_VAR_DECL(expected,uint,8,16) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
>>>>> +                                        0xf4, 0xf5, 0xf6, 0xf7,
>>>>> +                                        0xf8, 0xf9, 0xfa, 0xfb,
>>>>> +                                        0xfc, 0xfd, 0xfe, 0xff };
>>>>> +VECT_VAR_DECL(expected,uint,16,8) [] = { 0xfff0, 0xfff1, 0xfff2,
>>>>> +                                        0xfff3, 0xfff4, 0xfff5,
>>>>> +                                        0xfff6, 0xfff7 };
>>>>> +VECT_VAR_DECL(expected,uint,32,4) [] = { 0xfffffff0, 0xfffffff1,
>>>>> +                                        0xfffffff2, 0xfffffff3 };
>>>>> +VECT_VAR_DECL(expected,uint,64,2) [] = { 0xfffffffffffffff0,
>>>>> +                                        0xfffffffffffffff1 };
>>>>> +VECT_VAR_DECL(expected,poly,8,16) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
>>>>> +                                        0xf4, 0xf5, 0xf6, 0xf7,
>>>>> +                                        0xf8, 0xf9, 0xfa, 0xfb,
>>>>> +                                        0xfc, 0xfd, 0xfe, 0xff };
>>>>> +VECT_VAR_DECL(expected,poly,16,8) [] = { 0xfff0, 0xfff1, 0xfff2,
>>>>> 0xfff3,
>>>>> +                                        0xfff4, 0xfff5, 0xfff6, 0xfff7
>>>>> };
>>>>> +VECT_VAR_DECL(expected,float,32,4) [] = { -16, -15, -14, -13 };
>>>>> +
>>>>> +#define TEST_MSG "VLD1/VLD1Q"
>>>>> +void exec_vld1 (void)
>>>>> +{
>>>>> +  /* Basic test vec=vld1(buffer); then store vec: vst1(result,
>>>>> vector).
>>>>> */
>>>>> +  /* This test actually tests vdl1 and vst1 at the same time.  */
>>>>> +#define TEST_VLD1(VAR, BUF, Q, T1, T2, W, N)
>>>>> \
>>>>> +  VECT_VAR(VAR, T1, W, N) = vld1##Q##_##T2##W(VECT_VAR(BUF, T1, W,
>>>>> N)); \
>>>>> +  vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(VAR, T1, W,
>>>>> N))
>>>>> +
>>>>> +  DECL_VARIABLE_ALL_VARIANTS(vector);
>>>>> +
>>>>> +  clean_results ();
>>>>> +
>>>>> +  TEST_MACRO_ALL_VARIANTS_2_5(TEST_VLD1, vector, buffer);
>>>>> +
>>>>> +  TEST_VLD1(vector, buffer, , float, f, 32, 2);
>>>>> +  TEST_VLD1(vector, buffer, q, float, f, 32, 4);
>>>>> +
>>>>> +  check_results (TEST_MSG, "");
>>>>> +}
>>>>> +
>>>>> +int main (void)
>>>>> +{
>>>>> +  exec_vld1 ();
>>>>> +  return 0;
>>>>> +}
>>>>>
>>>>
>>>>
>
>
> --
> Ramana Radhakrishnan
> Principal Engineer
> ARM Ltd.
> Direct - +44 1223 400495
>
> -- IMPORTANT NOTICE: The contents of this email and any attachments are
> confidential and may also be privileged. If you are not the intended
> recipient, please notify the sender immediately and do not disclose the
> contents to any other person, use it for any purpose, or store or copy the
> information in any medium.  Thank you.
>
> ARM Limited, Registered office 110 Fulbourn Road, Cambridge CB1 9NJ,
> Registered in England & Wales, Company No:  2557590
> ARM Holdings plc, Registered office 110 Fulbourn Road, Cambridge CB1 9NJ,
> Registered in England & Wales, Company No:  2548782
>
diff mbox

Patch

diff --git a/gcc/testsuite/gcc.target/arm/neon-intrinsics/arm-neon-ref.h b/gcc/testsuite/gcc.target/arm/neon-intrinsics/arm-neon-ref.h
new file mode 100644
index 0000000..597ecf0
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/neon-intrinsics/arm-neon-ref.h
@@ -0,0 +1,350 @@ 
+#ifndef _ARM_NEON_REF_H_
+#define _ARM_NEON_REF_H_
+
+#include <stdio.h>
+#include <inttypes.h>
+
+extern void abort(void);
+extern void *memset(void *, int, size_t);
+
+#define xSTR(X) #X
+#define STR(X) xSTR(X)
+
+#define xNAME1(V,T) V ## _ ##  T
+#define xNAME(V,T) xNAME1(V,T)
+
+#define VAR(V,T,W) xNAME(V,T##W)
+#define VAR_DECL(V, T, W) T##W##_t VAR(V,T,W)
+
+#define VECT_NAME(T, W, N) T##W##x##N
+#define VECT_ARRAY_NAME(T, W, N, L) T##W##x##N##x##L
+#define VECT_TYPE(T, W, N) xNAME(VECT_NAME(T,W,N),t)
+#define VECT_ARRAY_TYPE(T, W, N, L) xNAME(VECT_ARRAY_NAME(T,W,N,L),t)
+
+#define VECT_VAR(V,T,W,N) xNAME(V,VECT_NAME(T,W,N))
+#define VECT_VAR_DECL(V, T, W, N) T##W##_t VECT_VAR(V,T,W,N)
+
+/* Array declarations.  */
+#define ARRAY(V, T, W, N) VECT_VAR_DECL(V,T,W,N)[N]
+
+/* Check results vs expected values.  */
+#define CHECK(MSG,T,W,N,FMT)						\
+  for(i=0; i<N ; i++)							\
+    {									\
+      if (VECT_VAR(result, T, W, N)[i] != VECT_VAR(expected, T, W, N)[i]) { \
+	fprintf(stderr,							\
+		"ERROR in %s at type %s index %d: 0x%" FMT " != 0x%"	\
+		FMT " (expected)\n",					\
+		MSG, STR(VECT_NAME(T, W, N)), i,			\
+		VECT_VAR(result, T, W, N)[i],				\
+		VECT_VAR(expected, T, W, N)[i]);			\
+	abort();							\
+      }									\
+    }
+
+/* Floating-point variant.  */
+#define CHECK_FP(MSG,T,W,N,FMT)						\
+  for(i=0; i<N ; i++)							\
+    {									\
+      union fp_operand {						\
+	uint##W##_t i;							\
+	float##W##_t f;							\
+      } tmp_res, tmp_exp;						\
+      tmp_res.f = VECT_VAR(result, T, W, N)[i];				\
+      tmp_exp.f = VECT_VAR(expected, T, W, N)[i];			\
+      if (tmp_res.i != tmp_exp.i) {					\
+	fprintf(stderr,							\
+		"ERROR in %s at type %s index %d: 0x%" FMT " != 0x%"	\
+		FMT " (expected)\n",					\
+		MSG, STR(VECT_NAME(T, W, N)), i,			\
+		tmp_res.i,						\
+		tmp_exp.i);						\
+      abort();								\
+      }									\
+    }
+
+/* Clean buffer with a non-zero pattern.  */
+#define CLEAN_PATTERN_8  0x33
+
+#define CLEAN(VAR,T,W,N)						\
+  memset(VECT_VAR(VAR, T, W, N),					\
+	 CLEAN_PATTERN_8,						\
+	 sizeof(VECT_VAR(VAR, T, W, N)));
+
+/* Input buffers, one of each size.  */
+extern ARRAY(buffer, int, 8, 8);
+extern ARRAY(buffer, int, 16, 4);
+extern ARRAY(buffer, int, 32, 2);
+extern ARRAY(buffer, int, 64, 1);
+extern ARRAY(buffer, uint, 8, 8);
+extern ARRAY(buffer, uint, 16, 4);
+extern ARRAY(buffer, uint, 32, 2);
+extern ARRAY(buffer, uint, 64, 1);
+extern ARRAY(buffer, poly, 8, 8);
+extern ARRAY(buffer, poly, 16, 4);
+extern ARRAY(buffer, float, 32, 2);
+extern ARRAY(buffer, int, 8, 16);
+extern ARRAY(buffer, int, 16, 8);
+extern ARRAY(buffer, int, 32, 4);
+extern ARRAY(buffer, int, 64, 2);
+extern ARRAY(buffer, uint, 8, 16);
+extern ARRAY(buffer, uint, 16, 8);
+extern ARRAY(buffer, uint, 32, 4);
+extern ARRAY(buffer, uint, 64, 2);
+extern ARRAY(buffer, poly, 8, 16);
+extern ARRAY(buffer, poly, 16, 8);
+extern ARRAY(buffer, float, 32, 4);
+
+/* Output buffers, one of each size.  */
+static ARRAY(result, int, 8, 8);
+static ARRAY(result, int, 16, 4);
+static ARRAY(result, int, 32, 2);
+static ARRAY(result, int, 64, 1);
+static ARRAY(result, uint, 8, 8);
+static ARRAY(result, uint, 16, 4);
+static ARRAY(result, uint, 32, 2);
+static ARRAY(result, uint, 64, 1);
+static ARRAY(result, poly, 8, 8);
+static ARRAY(result, poly, 16, 4);
+static ARRAY(result, float, 32, 2);
+static ARRAY(result, int, 8, 16);
+static ARRAY(result, int, 16, 8);
+static ARRAY(result, int, 32, 4);
+static ARRAY(result, int, 64, 2);
+static ARRAY(result, uint, 8, 16);
+static ARRAY(result, uint, 16, 8);
+static ARRAY(result, uint, 32, 4);
+static ARRAY(result, uint, 64, 2);
+static ARRAY(result, poly, 8, 16);
+static ARRAY(result, poly, 16, 8);
+static ARRAY(result, float, 32, 4);
+
+/* Expected results, one of each size.  */
+extern ARRAY(expected, int, 8, 8);
+extern ARRAY(expected, int, 16, 4);
+extern ARRAY(expected, int, 32, 2);
+extern ARRAY(expected, int, 64, 1);
+extern ARRAY(expected, uint, 8, 8);
+extern ARRAY(expected, uint, 16, 4);
+extern ARRAY(expected, uint, 32, 2);
+extern ARRAY(expected, uint, 64, 1);
+extern ARRAY(expected, poly, 8, 8);
+extern ARRAY(expected, poly, 16, 4);
+extern ARRAY(expected, float, 32, 2);
+extern ARRAY(expected, int, 8, 16);
+extern ARRAY(expected, int, 16, 8);
+extern ARRAY(expected, int, 32, 4);
+extern ARRAY(expected, int, 64, 2);
+extern ARRAY(expected, uint, 8, 16);
+extern ARRAY(expected, uint, 16, 8);
+extern ARRAY(expected, uint, 32, 4);
+extern ARRAY(expected, uint, 64, 2);
+extern ARRAY(expected, poly, 8, 16);
+extern ARRAY(expected, poly, 16, 8);
+extern ARRAY(expected, float, 32, 4);
+
+/* Check results.  */
+static void check_results (const char *test_name, const char* comment)
+{
+  int i;
+
+  CHECK(test_name, int, 8, 8, PRIx8);
+  CHECK(test_name, int, 16, 4, PRIx16);
+  CHECK(test_name, int, 32, 2, PRIx32);
+  CHECK(test_name, int, 64, 1, PRIx64);
+  CHECK(test_name, uint, 8, 8, PRIx8);
+  CHECK(test_name, uint, 16, 4, PRIx16);
+  CHECK(test_name, uint, 32, 2, PRIx32);
+  CHECK(test_name, uint, 64, 1, PRIx64);
+  CHECK(test_name, poly, 8, 8, PRIx8);
+  CHECK(test_name, poly, 16, 4, PRIx16);
+  CHECK_FP(test_name, float, 32, 2, PRIx32);
+
+  CHECK(test_name, int, 8, 16, PRIx8);
+  CHECK(test_name, int, 16, 8, PRIx16);
+  CHECK(test_name, int, 32, 4, PRIx32);
+  CHECK(test_name, int, 64, 2, PRIx64);
+  CHECK(test_name, uint, 8, 16, PRIx8);
+  CHECK(test_name, uint, 16, 8, PRIx16);
+  CHECK(test_name, uint, 32, 4, PRIx32);
+  CHECK(test_name, uint, 64, 2, PRIx64);
+  CHECK(test_name, poly, 8, 16, PRIx8);
+  CHECK(test_name, poly, 16, 8, PRIx16);
+  CHECK_FP(test_name, float, 32, 4, PRIx32);
+}
+
+/* Clean output buffers before execution.  */
+static void clean_results (void)
+{
+  CLEAN(result, int, 8, 8);
+  CLEAN(result, int, 16, 4);
+  CLEAN(result, int, 32, 2);
+  CLEAN(result, int, 64, 1);
+  CLEAN(result, uint, 8, 8);
+  CLEAN(result, uint, 16, 4);
+  CLEAN(result, uint, 32, 2);
+  CLEAN(result, uint, 64, 1);
+  CLEAN(result, poly, 8, 8);
+  CLEAN(result, poly, 16, 4);
+  CLEAN(result, float, 32, 2);
+
+  CLEAN(result, int, 8, 16);
+  CLEAN(result, int, 16, 8);
+  CLEAN(result, int, 32, 4);
+  CLEAN(result, int, 64, 2);
+  CLEAN(result, uint, 8, 16);
+  CLEAN(result, uint, 16, 8);
+  CLEAN(result, uint, 32, 4);
+  CLEAN(result, uint, 64, 2);
+  CLEAN(result, poly, 8, 16);
+  CLEAN(result, poly, 16, 8);
+  CLEAN(result, float, 32, 4);
+}
+
+
+/* Helpers to declare variables of various types.   */
+#define DECL_VARIABLE(VAR, T1, W, N)		\
+  VECT_TYPE(T1, W, N) VECT_VAR(VAR, T1, W, N)
+
+#define DECL_VARIABLE_64BITS_SIGNED_VARIANTS(VAR)	\
+  DECL_VARIABLE(VAR, int, 8, 8);			\
+  DECL_VARIABLE(VAR, int, 16, 4);			\
+  DECL_VARIABLE(VAR, int, 32, 2);			\
+  DECL_VARIABLE(VAR, int, 64, 1)
+
+#define DECL_VARIABLE_64BITS_UNSIGNED_VARIANTS(VAR)	\
+  DECL_VARIABLE(VAR, uint, 8, 8);			\
+  DECL_VARIABLE(VAR, uint, 16, 4);			\
+  DECL_VARIABLE(VAR, uint, 32, 2);			\
+  DECL_VARIABLE(VAR, uint, 64, 1)
+
+#define DECL_VARIABLE_128BITS_SIGNED_VARIANTS(VAR)	\
+  DECL_VARIABLE(VAR, int, 8, 16);			\
+  DECL_VARIABLE(VAR, int, 16, 8);			\
+  DECL_VARIABLE(VAR, int, 32, 4);			\
+  DECL_VARIABLE(VAR, int, 64, 2)
+
+#define DECL_VARIABLE_128BITS_UNSIGNED_VARIANTS(VAR)	\
+  DECL_VARIABLE(VAR, uint, 8, 16);			\
+  DECL_VARIABLE(VAR, uint, 16, 8);			\
+  DECL_VARIABLE(VAR, uint, 32, 4);			\
+  DECL_VARIABLE(VAR, uint, 64, 2)
+
+#define DECL_VARIABLE_64BITS_VARIANTS(VAR)	\
+  DECL_VARIABLE_64BITS_SIGNED_VARIANTS(VAR);	\
+  DECL_VARIABLE_64BITS_UNSIGNED_VARIANTS(VAR);	\
+  DECL_VARIABLE(VAR, poly, 8, 8);		\
+  DECL_VARIABLE(VAR, poly, 16, 4);		\
+  DECL_VARIABLE(VAR, float, 32, 2)
+
+#define DECL_VARIABLE_128BITS_VARIANTS(VAR)	\
+  DECL_VARIABLE_128BITS_SIGNED_VARIANTS(VAR);	\
+  DECL_VARIABLE_128BITS_UNSIGNED_VARIANTS(VAR);	\
+  DECL_VARIABLE(VAR, poly, 8, 16);		\
+  DECL_VARIABLE(VAR, poly, 16, 8);		\
+  DECL_VARIABLE(VAR, float, 32, 4)
+
+#define DECL_VARIABLE_ALL_VARIANTS(VAR)		\
+  DECL_VARIABLE_64BITS_VARIANTS(VAR);		\
+  DECL_VARIABLE_128BITS_VARIANTS(VAR)
+
+/* Helpers to initialize vectors.  */
+#define VDUP(VAR, Q, T1, T2, W, N, V)			\
+  VECT_VAR(VAR, T1, W, N) = vdup##Q##_n_##T2##W(V)
+
+#define VSET_LANE(VAR, Q, T1, T2, W, N, L, V)				\
+  VECT_VAR(VAR, T1, W, N) = vset##Q##_lane_##T2##W(V,			\
+						   VECT_VAR(VAR, T1, W, N), \
+						   L)
+
+/* We need to load initial values first, so rely on VLD1.  */
+#define VLOAD(VAR, BUF, Q, T1, T2, W, N)				\
+  VECT_VAR(VAR, T1, W, N) = vld1##Q##_##T2##W(VECT_VAR(BUF, T1, W, N))
+
+/* Helpers for macros with 1 constant and 5 variable arguments.  */
+#define TEST_MACRO_64BITS_SIGNED_VARIANTS_1_5(MACRO, VAR)	\
+  MACRO(VAR, , int, s, 8, 8);					\
+  MACRO(VAR, , int, s, 16, 4);					\
+  MACRO(VAR, , int, s, 32, 2);					\
+  MACRO(VAR, , int, s, 64, 1)
+
+#define TEST_MACRO_64BITS_UNSIGNED_VARIANTS_1_5(MACRO, VAR)	\
+  MACRO(VAR, , uint, u, 8, 8);					\
+  MACRO(VAR, , uint, u, 16, 4);					\
+  MACRO(VAR, , uint, u, 32, 2);					\
+  MACRO(VAR, , uint, u, 64, 1)
+
+#define TEST_MACRO_128BITS_SIGNED_VARIANTS_1_5(MACRO, VAR)	\
+  MACRO(VAR, q, int, s, 8, 16);					\
+  MACRO(VAR, q, int, s, 16, 8);					\
+  MACRO(VAR, q, int, s, 32, 4);					\
+  MACRO(VAR, q, int, s, 64, 2)
+
+#define TEST_MACRO_128BITS_UNSIGNED_VARIANTS_1_5(MACRO,VAR)	\
+  MACRO(VAR, q, uint, u, 8, 16);				\
+  MACRO(VAR, q, uint, u, 16, 8);				\
+  MACRO(VAR, q, uint, u, 32, 4);				\
+  MACRO(VAR, q, uint, u, 64, 2)
+
+#define TEST_MACRO_64BITS_VARIANTS_1_5(MACRO, VAR)	\
+  TEST_MACRO_64BITS_SIGNED_VARIANTS_1_5(MACRO, VAR);	\
+  TEST_MACRO_64BITS_UNSIGNED_VARIANTS_1_5(MACRO, VAR)
+
+#define TEST_MACRO_128BITS_VARIANTS_1_5(MACRO, VAR)	\
+  TEST_MACRO_128BITS_SIGNED_VARIANTS_1_5(MACRO, VAR);	\
+  TEST_MACRO_128BITS_UNSIGNED_VARIANTS_1_5(MACRO, VAR)
+
+#define TEST_MACRO_ALL_VARIANTS_1_5(MACRO, VAR)	\
+  TEST_MACRO_64BITS_VARIANTS_1_5(MACRO, VAR);	\
+  TEST_MACRO_128BITS_VARIANTS_1_5(MACRO, VAR)
+
+#define TEST_MACRO_SIGNED_VARIANTS_1_5(MACRO, VAR)	\
+  TEST_MACRO_64BITS_SIGNED_VARIANTS_1_5(MACRO, VAR);	\
+  TEST_MACRO_128BITS_SIGNED_VARIANTS_1_5(MACRO, VAR)
+
+/* Helpers for macros with 2 constant and 5 variable arguments.  */
+#define TEST_MACRO_64BITS_SIGNED_VARIANTS_2_5(MACRO, VAR1, VAR2)	\
+  MACRO(VAR1, VAR2, , int, s, 8, 8);					\
+  MACRO(VAR1, VAR2, , int, s, 16, 4);					\
+  MACRO(VAR1, VAR2, , int, s, 32, 2);					\
+  MACRO(VAR1, VAR2 , , int, s, 64, 1)
+
+#define TEST_MACRO_64BITS_UNSIGNED_VARIANTS_2_5(MACRO, VAR1, VAR2)	\
+  MACRO(VAR1, VAR2, , uint, u, 8, 8);					\
+  MACRO(VAR1, VAR2, , uint, u, 16, 4);					\
+  MACRO(VAR1, VAR2, , uint, u, 32, 2);					\
+  MACRO(VAR1, VAR2, , uint, u, 64, 1)
+
+#define TEST_MACRO_128BITS_SIGNED_VARIANTS_2_5(MACRO, VAR1, VAR2)	\
+  MACRO(VAR1, VAR2, q, int, s, 8, 16);					\
+  MACRO(VAR1, VAR2, q, int, s, 16, 8);					\
+  MACRO(VAR1, VAR2, q, int, s, 32, 4);					\
+  MACRO(VAR1, VAR2, q, int, s, 64, 2)
+
+#define TEST_MACRO_128BITS_UNSIGNED_VARIANTS_2_5(MACRO, VAR1, VAR2)	\
+  MACRO(VAR1, VAR2, q, uint, u, 8, 16);					\
+  MACRO(VAR1, VAR2, q, uint, u, 16, 8);					\
+  MACRO(VAR1, VAR2, q, uint, u, 32, 4);					\
+  MACRO(VAR1, VAR2, q, uint, u, 64, 2)
+
+#define TEST_MACRO_64BITS_VARIANTS_2_5(MACRO, VAR1, VAR2)	\
+  TEST_MACRO_64BITS_SIGNED_VARIANTS_2_5(MACRO, VAR1, VAR2);	\
+  TEST_MACRO_64BITS_UNSIGNED_VARIANTS_2_5(MACRO, VAR1, VAR2);	\
+  MACRO(VAR1, VAR2, , poly, p, 8, 8);				\
+  MACRO(VAR1, VAR2, , poly, p, 16, 4)
+
+#define TEST_MACRO_128BITS_VARIANTS_2_5(MACRO, VAR1, VAR2)	\
+  TEST_MACRO_128BITS_SIGNED_VARIANTS_2_5(MACRO, VAR1, VAR2);	\
+  TEST_MACRO_128BITS_UNSIGNED_VARIANTS_2_5(MACRO, VAR1, VAR2);	\
+  MACRO(VAR1, VAR2, q, poly, p, 8, 16);				\
+  MACRO(VAR1, VAR2, q, poly, p, 16, 8)
+
+#define TEST_MACRO_ALL_VARIANTS_2_5(MACRO, VAR1, VAR2)	\
+  TEST_MACRO_64BITS_VARIANTS_2_5(MACRO, VAR1, VAR2);	\
+  TEST_MACRO_128BITS_VARIANTS_2_5(MACRO, VAR1, VAR2)
+
+#define TEST_MACRO_SIGNED_VARIANTS_2_5(MACRO, VAR1, VAR2)	\
+  TEST_MACRO_64BITS_SIGNED_VARIANTS_2_5(MACRO, VAR1, VAR2);	\
+  TEST_MACRO_128BITS_SIGNED_VARIANTS_2_5(MACRO, VAR1, VAR2)
+
+#endif /* _ARM_NEON_REF_H_ */
diff --git a/gcc/testsuite/gcc.target/arm/neon-intrinsics/compute_ref_data.h b/gcc/testsuite/gcc.target/arm/neon-intrinsics/compute_ref_data.h
new file mode 100644
index 0000000..2a11b7e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/neon-intrinsics/compute_ref_data.h
@@ -0,0 +1,80 @@ 
+#include <arm_neon.h>
+#include "arm-neon-ref.h"
+
+/* Initialization helpers; 4 slices are needed for vld2, vld3 and
+   vld4.  */
+#define MY_INIT_TAB(T,W,N) xNAME(INIT_TAB,N)(T##W##_t)
+#define MY_INIT_TAB2(T,W,N) xNAME(INIT_TAB2,N)(T##W##_t)
+#define MY_INIT_TAB3(T,W,N) xNAME(INIT_TAB3,N)(T##W##_t)
+#define MY_INIT_TAB4(T,W,N) xNAME(INIT_TAB4,N)(T##W##_t)
+
+/* Initialized input buffers.  */
+#define VECT_VAR_DECL_INIT(V, T, W, N)			\
+  VECT_VAR_DECL(V,T,W,N) [] = { MY_INIT_TAB(T,W,N) };
+
+/* Sample initialization vectors.  */
+#define INIT_TAB_1(T)				\
+  (T)-16,
+
+#define INIT_TAB_2(T)				\
+  (T)-16, (T)-15,
+
+#define INIT_TAB_4(T)				\
+  (T)-16, (T)-15, (T)-14, (T)-13,
+
+#define INIT_TAB_8(T)							\
+  (T)-16, (T)-15, (T)-14, (T)-13, (T)-12, (T)-11, (T)-10, (T)-9,
+
+#define INIT_TAB_16(T)							\
+  (T)-16, (T)-15, (T)-14, (T)-13, (T)-12, (T)-11, (T)-10, (T)-9,	\
+  (T)-8, (T)-7, (T)-6, (T)-5, (T)-4, (T)-3, (T)-2, (T)-1,
+
+/* This one is used for padding between input buffers.  */
+#define PAD(V, T, W, N) char VECT_VAR(V,T,W,N)=42;
+
+/* Input buffers, one of each size.  */
+/* Insert some padding to try to exhibit out of bounds accesses.  */
+VECT_VAR_DECL_INIT(buffer, int, 8, 8);
+PAD(buffer_pad, int, 8, 8);
+VECT_VAR_DECL_INIT(buffer, int, 16, 4);
+PAD(buffer_pad, int, 16, 4);
+VECT_VAR_DECL_INIT(buffer, int, 32, 2);
+PAD(buffer_pad, int, 32, 2);
+VECT_VAR_DECL_INIT(buffer, int, 64, 1);
+PAD(buffer_pad, int, 64, 1);
+VECT_VAR_DECL_INIT(buffer, uint, 8, 8);
+PAD(buffer_pad, uint, 8, 8);
+VECT_VAR_DECL_INIT(buffer, poly, 8, 8);
+PAD(buffer_pad, poly, 8, 8);
+VECT_VAR_DECL_INIT(buffer, poly, 16, 4);
+PAD(buffer_pad, poly, 16, 4);
+VECT_VAR_DECL_INIT(buffer, uint, 16, 4);
+PAD(buffer_pad, uint, 16, 4);
+VECT_VAR_DECL_INIT(buffer, uint, 32, 2);
+PAD(buffer_pad, uint, 32, 2);
+VECT_VAR_DECL_INIT(buffer, uint, 64, 1);
+PAD(buffer_pad, uint, 64, 1);
+VECT_VAR_DECL_INIT(buffer, float, 32, 2);
+PAD(buffer_pad, float, 32, 2);
+VECT_VAR_DECL_INIT(buffer, int, 8, 16);
+PAD(buffer_pad, int, 8, 16);
+VECT_VAR_DECL_INIT(buffer, int, 16, 8);
+PAD(buffer_pad, int, 16, 8);
+VECT_VAR_DECL_INIT(buffer, int, 32, 4);
+PAD(buffer_pad, int, 32, 4);
+VECT_VAR_DECL_INIT(buffer, int, 64, 2);
+PAD(buffer_pad, int, 64, 2);
+VECT_VAR_DECL_INIT(buffer, uint, 8, 16);
+PAD(buffer_pad, uint, 8, 16);
+VECT_VAR_DECL_INIT(buffer, uint, 16, 8);
+PAD(buffer_pad, uint, 16, 8);
+VECT_VAR_DECL_INIT(buffer, uint, 32, 4);
+PAD(buffer_pad, uint, 32, 4);
+VECT_VAR_DECL_INIT(buffer, uint, 64, 2);
+PAD(buffer_pad, uint, 64, 2);
+VECT_VAR_DECL_INIT(buffer, poly, 8, 16);
+PAD(buffer_pad, poly, 8, 16);
+VECT_VAR_DECL_INIT(buffer, poly, 16, 8);
+PAD(buffer_pad, poly, 16, 8);
+VECT_VAR_DECL_INIT(buffer, float, 32, 4);
+PAD(buffer_pad, float, 32, 4);
diff --git a/gcc/testsuite/gcc.target/arm/neon-intrinsics/neon-intrinsics.exp b/gcc/testsuite/gcc.target/arm/neon-intrinsics/neon-intrinsics.exp
new file mode 100644
index 0000000..0ef4640
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/neon-intrinsics/neon-intrinsics.exp
@@ -0,0 +1,49 @@ 
+# Copyright (C) 2013 Free Software Foundation, Inc.
+
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+# GCC testsuite that uses the `dg.exp' driver.
+
+# Exit immediately if this isn't an ARM target.
+if {![istarget arm*-*-*]
+    && ![istarget aarch64*-*-*]} then {
+  return
+}
+
+# Load support procs.
+load_lib gcc-dg.exp
+
+# Initialize `dg'.
+load_lib c-torture.exp
+load_lib target-supports.exp
+load_lib torture-options.exp
+
+torture-init
+set-torture-options $C_TORTURE_OPTIONS {{}} $LTO_TORTURE_OPTIONS
+
+set additional_flags [add_options_for_arm_neon ""]
+
+# Main loop.
+foreach src [lsort [glob -nocomplain $srcdir/$subdir/*.c]] {
+    # If we're only testing specific files and this isn't one of them, skip it.
+    if ![runtest_file_p $runtests $src] then {
+        continue
+    }
+
+    c-torture-execute $src $additional_flags
+}
+
+# All done.
+torture-finish
diff --git a/gcc/testsuite/gcc.target/arm/neon-intrinsics/vaba.c b/gcc/testsuite/gcc.target/arm/neon-intrinsics/vaba.c
new file mode 100644
index 0000000..62b1e67
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/neon-intrinsics/vaba.c
@@ -0,0 +1,145 @@ 
+/* { dg-do run } */
+/* { dg-require-effective-target arm_neon_hw { target { "arm* } } } */
+/* { dg-add-options arm_neon } */
+
+#include <arm_neon.h>
+#include "arm-neon-ref.h"
+#include "compute_ref_data.h"
+
+VECT_VAR_DECL(expected,int,8,8) [] = { 0xf6, 0xf7, 0xf8, 0xf9,
+				       0xfa, 0xfb, 0xfc, 0xfd };
+VECT_VAR_DECL(expected,int,16,4) [] = { 0x16, 0x17, 0x18, 0x19 };
+VECT_VAR_DECL(expected,int,32,2) [] = { 0x20, 0x21 };
+VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 };
+VECT_VAR_DECL(expected,uint,8,8) [] = { 0x53, 0x54, 0x55, 0x56,
+					0x57, 0x58, 0x59, 0x5a };
+VECT_VAR_DECL(expected,uint,16,4) [] = { 0x907, 0x908, 0x909, 0x90a };
+VECT_VAR_DECL(expected,uint,32,2) [] = { 0xffffffe7, 0xffffffe8 };
+VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 };
+VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33,
+					0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected,float,32,2) [] = { 4.172325e-08, 4.172325e-08 };
+VECT_VAR_DECL(expected,int,8,16) [] = { 0x5e, 0x5f, 0x60, 0x61,
+					0x62, 0x63, 0x64, 0x65,
+					0x66, 0x67, 0x68, 0x69,
+					0x6a, 0x6b, 0x6c, 0x6d };
+VECT_VAR_DECL(expected,int,16,8) [] = { 0xb9c, 0xb9d, 0xb9e, 0xb9f,
+					0xba0, 0xba1, 0xba2, 0xba3 };
+VECT_VAR_DECL(expected,int,32,4) [] = { 0x26e0, 0x26e1, 0x26e2, 0x26e3 };
+VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333,
+					0x3333333333333333 };
+VECT_VAR_DECL(expected,uint,8,16) [] = { 0xf8, 0xf9, 0xfa, 0xfb,
+					 0xfc, 0xfd, 0xfe, 0xff,
+					 0x0, 0x1, 0x2, 0x3,
+					 0x4, 0x5, 0x6, 0x7 };
+VECT_VAR_DECL(expected,uint,16,8) [] = { 0xfff9, 0xfffa, 0xfffb, 0xfffc,
+					 0xfffd, 0xfffe, 0xffff, 0x0 };
+VECT_VAR_DECL(expected,uint,32,4) [] = { 0xc, 0xd, 0xe, 0xf };
+VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333,
+					 0x3333333333333333 };
+VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+					 0x33, 0x33, 0x33, 0x33,
+					 0x33, 0x33, 0x33, 0x33,
+					 0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333,
+					 0x3333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected,float,32,4) [] = { 4.172325e-08, 4.172325e-08,
+					  4.172325e-08, 4.172325e-08 };
+
+#define TEST_MSG "VABA/VABAQ"
+void exec_vaba (void)
+{
+  /* Basic test: v4=vaba(v1,v2,v3), then store the result.  */
+#define TEST_VABA(Q, T1, T2, W, N)					\
+  VECT_VAR(vector_res, T1, W, N) =					\
+    vaba##Q##_##T2##W(VECT_VAR(vector1, T1, W, N),			\
+		      VECT_VAR(vector2, T1, W, N),			\
+		      VECT_VAR(vector3, T1, W, N));			\
+  vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(vector_res, T1, W, N))
+
+#define DECL_VABA_VAR(VAR)			\
+  DECL_VARIABLE(VAR, int, 8, 8);		\
+  DECL_VARIABLE(VAR, int, 16, 4);		\
+  DECL_VARIABLE(VAR, int, 32, 2);		\
+  DECL_VARIABLE(VAR, uint, 8, 8);		\
+  DECL_VARIABLE(VAR, uint, 16, 4);		\
+  DECL_VARIABLE(VAR, uint, 32, 2);		\
+  DECL_VARIABLE(VAR, int, 8, 16);		\
+  DECL_VARIABLE(VAR, int, 16, 8);		\
+  DECL_VARIABLE(VAR, int, 32, 4);		\
+  DECL_VARIABLE(VAR, uint, 8, 16);		\
+  DECL_VARIABLE(VAR, uint, 16, 8);		\
+  DECL_VARIABLE(VAR, uint, 32, 4)
+
+  DECL_VABA_VAR(vector1);
+  DECL_VABA_VAR(vector2);
+  DECL_VABA_VAR(vector3);
+  DECL_VABA_VAR(vector_res);
+
+  clean_results ();
+
+  /* Initialize input "vector" from "buffer".  */
+  VLOAD(vector1, buffer, , int, s, 8, 8);
+  VLOAD(vector1, buffer, , int, s, 16, 4);
+  VLOAD(vector1, buffer, , int, s, 32, 2);
+  VLOAD(vector1, buffer, , uint, u, 8, 8);
+  VLOAD(vector1, buffer, , uint, u, 16, 4);
+  VLOAD(vector1, buffer, , uint, u, 32, 2);
+  VLOAD(vector1, buffer, q, int, s, 8, 16);
+  VLOAD(vector1, buffer, q, int, s, 16, 8);
+  VLOAD(vector1, buffer, q, int, s, 32, 4);
+  VLOAD(vector1, buffer, q, uint, u, 8, 16);
+  VLOAD(vector1, buffer, q, uint, u, 16, 8);
+  VLOAD(vector1, buffer, q, uint, u, 32, 4);
+
+  /* Choose init value arbitrarily.  */
+  VDUP(vector2, , int, s, 8, 8, 1);
+  VDUP(vector2, , int, s, 16, 4, -13);
+  VDUP(vector2, , int, s, 32, 2, 8);
+  VDUP(vector2, , uint, u, 8, 8, 1);
+  VDUP(vector2, , uint, u, 16, 4, 13);
+  VDUP(vector2, , uint, u, 32, 2, 8);
+  VDUP(vector2, q, int, s, 8, 16, 10);
+  VDUP(vector2, q, int, s, 16, 8, -12);
+  VDUP(vector2, q, int, s, 32, 4, 32);
+  VDUP(vector2, q, uint, u, 8, 16, 10);
+  VDUP(vector2, q, uint, u, 16, 8, 12);
+  VDUP(vector2, q, uint, u, 32, 4, 32);
+
+  /* Choose init value arbitrarily.  */
+  VDUP(vector3, , int, s, 8, 8, -5);
+  VDUP(vector3, , int, s, 16, 4, 25);
+  VDUP(vector3, , int, s, 32, 2, -40);
+  VDUP(vector3, , uint, u, 8, 8, 100);
+  VDUP(vector3, , uint, u, 16, 4, 2340);
+  VDUP(vector3, , uint, u, 32, 2, 0xffffffff);
+  VDUP(vector3, q, int, s, 8, 16, -100);
+  VDUP(vector3, q, int, s, 16, 8, -3000);
+  VDUP(vector3, q, int, s, 32, 4, 10000);
+  VDUP(vector3, q, uint, u, 8, 16, 2);
+  VDUP(vector3, q, uint, u, 16, 8, 3);
+  VDUP(vector3, q, uint, u, 32, 4, 4);
+
+  /* Execute the tests.  */
+  TEST_VABA(, int, s, 8, 8);
+  TEST_VABA(, int, s, 16, 4);
+  TEST_VABA(, int, s, 32, 2);
+  TEST_VABA(, uint, u, 8, 8);
+  TEST_VABA(, uint, u, 16, 4);
+  TEST_VABA(, uint, u, 32, 2);
+  TEST_VABA(q, int, s, 8, 16);
+  TEST_VABA(q, int, s, 16, 8);
+  TEST_VABA(q, int, s, 32, 4);
+  TEST_VABA(q, uint, u, 8, 16);
+  TEST_VABA(q, uint, u, 16, 8);
+  TEST_VABA(q, uint, u, 32, 4);
+
+  check_results (TEST_MSG, "");
+}
+
+int main (void)
+{
+  exec_vaba ();
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/arm/neon-intrinsics/vld1.c b/gcc/testsuite/gcc.target/arm/neon-intrinsics/vld1.c
new file mode 100644
index 0000000..859db92
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/neon-intrinsics/vld1.c
@@ -0,0 +1,77 @@ 
+/* { dg-do run } */
+/* { dg-require-effective-target arm_neon_hw { target { "arm*-*-*" } } } */
+/* { dg-add-options arm_neon } */
+
+#include <arm_neon.h>
+#include "arm-neon-ref.h"
+#include "compute_ref_data.h"
+
+VECT_VAR_DECL(expected,int,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
+				       0xf4, 0xf5, 0xf6, 0xf7 };
+VECT_VAR_DECL(expected,int,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 };
+VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffffff0, 0xfffffff1 };
+VECT_VAR_DECL(expected,int,64,1) [] = { 0xfffffffffffffff0 };
+VECT_VAR_DECL(expected,uint,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
+					0xf4, 0xf5, 0xf6, 0xf7 };
+VECT_VAR_DECL(expected,uint,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 };
+VECT_VAR_DECL(expected,uint,32,2) [] = { 0xfffffff0, 0xfffffff1 };
+VECT_VAR_DECL(expected,uint,64,1) [] = { 0xfffffffffffffff0 };
+VECT_VAR_DECL(expected,poly,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
+					0xf4, 0xf5, 0xf6, 0xf7 };
+VECT_VAR_DECL(expected,poly,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 };
+VECT_VAR_DECL(expected,float,32,2) [] = { -16, -15 };
+VECT_VAR_DECL(expected,int,8,16) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
+					0xf4, 0xf5, 0xf6, 0xf7,
+					0xf8, 0xf9, 0xfa, 0xfb,
+					0xfc, 0xfd, 0xfe, 0xff };
+VECT_VAR_DECL(expected,int,16,8) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3,
+					0xfff4, 0xfff5, 0xfff6, 0xfff7 };
+VECT_VAR_DECL(expected,int,32,4) [] = { 0xfffffff0, 0xfffffff1,
+					0xfffffff2, 0xfffffff3 };
+VECT_VAR_DECL(expected,int,64,2) [] = { 0xfffffffffffffff0,
+					0xfffffffffffffff1 };
+VECT_VAR_DECL(expected,uint,8,16) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
+					 0xf4, 0xf5, 0xf6, 0xf7,
+					 0xf8, 0xf9, 0xfa, 0xfb,
+					 0xfc, 0xfd, 0xfe, 0xff };
+VECT_VAR_DECL(expected,uint,16,8) [] = { 0xfff0, 0xfff1, 0xfff2,
+					 0xfff3, 0xfff4, 0xfff5,
+					 0xfff6, 0xfff7 };
+VECT_VAR_DECL(expected,uint,32,4) [] = { 0xfffffff0, 0xfffffff1,
+					 0xfffffff2, 0xfffffff3 };
+VECT_VAR_DECL(expected,uint,64,2) [] = { 0xfffffffffffffff0,
+					 0xfffffffffffffff1 };
+VECT_VAR_DECL(expected,poly,8,16) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
+					 0xf4, 0xf5, 0xf6, 0xf7,
+					 0xf8, 0xf9, 0xfa, 0xfb,
+					 0xfc, 0xfd, 0xfe, 0xff };
+VECT_VAR_DECL(expected,poly,16,8) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3,
+					 0xfff4, 0xfff5, 0xfff6, 0xfff7 };
+VECT_VAR_DECL(expected,float,32,4) [] = { -16, -15, -14, -13 };
+
+#define TEST_MSG "VLD1/VLD1Q"
+void exec_vld1 (void)
+{
+  /* Basic test vec=vld1(buffer); then store vec: vst1(result, vector).  */
+  /* This test actually tests vdl1 and vst1 at the same time.  */
+#define TEST_VLD1(VAR, BUF, Q, T1, T2, W, N)				\
+  VECT_VAR(VAR, T1, W, N) = vld1##Q##_##T2##W(VECT_VAR(BUF, T1, W, N)); \
+  vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(VAR, T1, W, N))
+
+  DECL_VARIABLE_ALL_VARIANTS(vector);
+
+  clean_results ();
+
+  TEST_MACRO_ALL_VARIANTS_2_5(TEST_VLD1, vector, buffer);
+
+  TEST_VLD1(vector, buffer, , float, f, 32, 2);
+  TEST_VLD1(vector, buffer, q, float, f, 32, 4);
+
+  check_results (TEST_MSG, "");
+}
+
+int main (void)
+{
+  exec_vld1 ();
+  return 0;
+}