diff mbox

[[ARM/AArch64,testsuite] 03/36] Add vmax, vmin, vhadd, vhsub and vrhadd tests.

Message ID 1421162314-25779-4-git-send-email-christophe.lyon@linaro.org
State New
Headers show

Commit Message

Christophe Lyon Jan. 13, 2015, 3:18 p.m. UTC
* gcc.target/aarch64/advsimd-intrinsics/binary_op_no64.inc: New file.
	* gcc.target/aarch64/advsimd-intrinsics/vhadd.c: New file.
	* gcc.target/aarch64/advsimd-intrinsics/vhsub.c: New file.
	* gcc.target/aarch64/advsimd-intrinsics/vmax.c: New file.
	* gcc.target/aarch64/advsimd-intrinsics/vmin.c: New file.
	* gcc.target/aarch64/advsimd-intrinsics/vrhadd.c: New file.

Comments

Christophe Lyon Jan. 16, 2015, 4:21 p.m. UTC | #1
On 16 January 2015 at 14:56, Tejas Belagod <tejas.belagod@arm.com> wrote:
>> +#ifndef NO_FLOAT_VARIANT
>> +  VLOAD(vector, buffer, , float, f, 32, 2);
>> +  VLOAD(vector, buffer, q, float, f, 32, 4);
>> +#endif
>>
> ....
>>
>> +#ifndef NO_FLOAT_VARIANT
>> +  VDUP(vector2, , float, f, 32, 2, -15.5f);
>> +  VDUP(vector2, q, float, f, 32, 4, -14.5f);
>> +#endif
>> +
>> +#ifndef NO_FLOAT_VARIANT
>> +#define FLOAT_VARIANT(MACRO, VAR)                      \
>> +  MACRO(VAR, , float, f, 32, 2);                       \
>> +  MACRO(VAR, q, float, f, 32, 4)
>> +#else
>> +#define FLOAT_VARIANT(MACRO, VAR)
>> +#endif
>
>
> Double negative! :-) Probably easier on the reader to avoid it, but your
> call.

Oh yes... I am importing my existing code, so I try to minimize changes.

>> diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmax.c
>> b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmax.c
>> new file mode 100644
>> index 0000000..2591b16
>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmax.c
>> @@ -0,0 +1,64 @@
>> +#include <arm_neon.h>
>> +#include "arm-neon-ref.h"
>> +#include "compute-ref-data.h"
>> +
>> +#define INSN_NAME vmax
>> +#define TEST_MSG "VMAX/VMAXQ"
>> +
>> +/* Expected results.  */
>> +VECT_VAR_DECL(expected,int,8,8) [] = { 0xf3, 0xf3, 0xf3, 0xf3,
>> +                                      0xf4, 0xf5, 0xf6, 0xf7 };
>> +VECT_VAR_DECL(expected,int,16,4) [] = { 0xfff2, 0xfff2, 0xfff2, 0xfff3 };
>> +VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffffff0, 0xfffffff1 };
>> +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 };
>> +VECT_VAR_DECL(expected,uint,8,8) [] = { 0xf3, 0xf3, 0xf3, 0xf3,
>> +                                       0xf4, 0xf5, 0xf6, 0xf7 };
>> +VECT_VAR_DECL(expected,uint,16,4) [] = { 0xfff1, 0xfff1, 0xfff2, 0xfff3
>> };
>> +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xfffffff0, 0xfffffff1 };
>> +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 };
>> +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33,
>> +                                       0x33, 0x33, 0x33, 0x33 };
>> +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333
>> };
>> +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xc1780000, 0xc1700000 };
>> +VECT_VAR_DECL(expected,int,8,16) [] = { 0xf4, 0xf4, 0xf4, 0xf4,
>> +                                       0xf4, 0xf5, 0xf6, 0xf7,
>> +                                       0xf8, 0xf9, 0xfa, 0xfb,
>> +                                       0xfc, 0xfd, 0xfe, 0xff };
>> +VECT_VAR_DECL(expected,int,16,8) [] = { 0xfff3, 0xfff3, 0xfff3, 0xfff3,
>> +                                       0xfff4, 0xfff5, 0xfff6, 0xfff7 };
>> +VECT_VAR_DECL(expected,int,32,4) [] = { 0xfffffff1, 0xfffffff1,
>> +                                       0xfffffff2, 0xfffffff3 };
>> +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333,
>> +                                       0x3333333333333333 };
>> +VECT_VAR_DECL(expected,uint,8,16) [] = { 0xf9, 0xf9, 0xf9, 0xf9,
>> +                                        0xf9, 0xf9, 0xf9, 0xf9,
>> +                                        0xf9, 0xf9, 0xfa, 0xfb,
>> +                                        0xfc, 0xfd, 0xfe, 0xff };
>> +VECT_VAR_DECL(expected,uint,16,8) [] = { 0xfff2, 0xfff2, 0xfff2, 0xfff3,
>> +                                        0xfff4, 0xfff5, 0xfff6, 0xfff7 };
>> +VECT_VAR_DECL(expected,uint,32,4) [] = { 0xfffffff1, 0xfffffff1,
>> +                                        0xfffffff2, 0xfffffff3 };
>> +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333,
>> +                                        0x3333333333333333 };
>> +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
>> +                                        0x33, 0x33, 0x33, 0x33,
>> +                                        0x33, 0x33, 0x33, 0x33,
>> +                                        0x33, 0x33, 0x33, 0x33 };
>> +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333,
>> +                                        0x3333, 0x3333, 0x3333, 0x3333 };
>> +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0xc1680000, 0xc1680000,
>> +                                          0xc1600000, 0xc1500000 };
>> +
>> +/* Expected results with special FP values.  */
>> +VECT_VAR_DECL(expected_nan,hfloat,32,4) [] = { 0x7fc00000, 0x7fc00000,
>> +                                              0x7fc00000, 0x7fc00000 };
>> +VECT_VAR_DECL(expected_mnan,hfloat,32,4) [] = { 0x7fc00000, 0x7fc00000,
>> +                                               0x7fc00000, 0x7fc00000 };
>> +VECT_VAR_DECL(expected_inf,hfloat,32,4) [] = { 0x7f800000, 0x7f800000,
>> +                                              0x7f800000, 0x7f800000 };
>> +VECT_VAR_DECL(expected_minf,hfloat,32,4) [] = { 0x3f800000, 0x3f800000,
>> +                                               0x3f800000, 0x3f800000 };
>> +VECT_VAR_DECL(expected_zero1,hfloat,32,4) [] = { 0x0, 0x0, 0x0, 0x0 };
>> +VECT_VAR_DECL(expected_zero2,hfloat,32,4) [] = { 0x0, 0x0, 0x0, 0x0 };
>> +
>> +#include "binary_op_no64.inc"
>> diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmin.c
>> b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmin.c
>> new file mode 100644
>> index 0000000..2b5e87c
>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmin.c
>> @@ -0,0 +1,66 @@
>> +#include <arm_neon.h>
>> +#include "arm-neon-ref.h"
>> +#include "compute-ref-data.h"
>> +
>> +#define INSN_NAME vmin
>> +#define TEST_MSG "VMIN/VMINQ"
>> +
>> +/* Expected results.  */
>> +VECT_VAR_DECL(expected,int,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
>> +                                      0xf3, 0xf3, 0xf3, 0xf3 };
>> +VECT_VAR_DECL(expected,int,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff2 };
>> +VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffffff0, 0xfffffff0 };
>> +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 };
>> +VECT_VAR_DECL(expected,uint,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
>> +                                       0xf3, 0xf3, 0xf3, 0xf3 };
>> +VECT_VAR_DECL(expected,uint,16,4) [] = { 0xfff0, 0xfff1, 0xfff1, 0xfff1
>> };
>> +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xfffffff0, 0xfffffff0 };
>> +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 };
>> +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33,
>> +                                       0x33, 0x33, 0x33, 0x33 };
>> +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333
>> };
>> +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xc1800000, 0xc1780000 };
>> +VECT_VAR_DECL(expected,int,8,16) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
>> +                                       0xf4, 0xf4, 0xf4, 0xf4,
>> +                                       0xf4, 0xf4, 0xf4, 0xf4,
>> +                                       0xf4, 0xf4, 0xf4, 0xf4 };
>> +VECT_VAR_DECL(expected,int,16,8) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3,
>> +                                       0xfff3, 0xfff3, 0xfff3, 0xfff3 };
>> +VECT_VAR_DECL(expected,int,32,4) [] = { 0xfffffff0, 0xfffffff1,
>> +                                       0xfffffff1, 0xfffffff1 };
>> +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333,
>> +                                       0x3333333333333333 };
>> +VECT_VAR_DECL(expected,uint,8,16) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
>> +                                        0xf4, 0xf5, 0xf6, 0xf7,
>> +                                        0xf8, 0xf9, 0xf9, 0xf9,
>> +                                        0xf9, 0xf9, 0xf9, 0xf9 };
>> +VECT_VAR_DECL(expected,uint,16,8) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff2,
>> +                                        0xfff2, 0xfff2, 0xfff2, 0xfff2 };
>> +VECT_VAR_DECL(expected,uint,32,4) [] = { 0xfffffff0, 0xfffffff1,
>> +                                        0xfffffff1, 0xfffffff1 };
>> +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333,
>> +                                        0x3333333333333333 };
>> +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
>> +                                        0x33, 0x33, 0x33, 0x33,
>> +                                        0x33, 0x33, 0x33, 0x33,
>> +                                        0x33, 0x33, 0x33, 0x33 };
>> +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333,
>> +                                        0x3333, 0x3333, 0x3333, 0x3333 };
>> +
>> +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0xc1800000, 0xc1700000,
>> +                                          0xc1680000, 0xc1680000 };
>> +/* Expected results with special FP values.  */
>> +VECT_VAR_DECL(expected_nan,hfloat,32,4) [] = { 0x7fc00000, 0x7fc00000,
>> +                                              0x7fc00000, 0x7fc00000 };
>> +VECT_VAR_DECL(expected_mnan,hfloat,32,4) [] = { 0x7fc00000, 0x7fc00000,
>> +                                               0x7fc00000, 0x7fc00000 };
>> +VECT_VAR_DECL(expected_inf,hfloat,32,4) [] = { 0x3f800000, 0x3f800000,
>> +                                              0x3f800000, 0x3f800000 };
>> +VECT_VAR_DECL(expected_minf,hfloat,32,4) [] = { 0xff800000, 0xff800000,
>> +                                               0xff800000, 0xff800000 };
>> +VECT_VAR_DECL(expected_zero1,hfloat,32,4) [] = { 0x80000000, 0x80000000,
>> +                                                0x80000000, 0x80000000 };
>> +VECT_VAR_DECL(expected_zero2,hfloat,32,4) [] = { 0x80000000, 0x80000000,
>> +                                                0x80000000, 0x80000000 };
>> +
>> +#include "binary_op_no64.inc"
>
>
> vmax and vmin do have v<maxmin>_f64 and v<maxmin>q_f64 variants.

My existing tests only cover armv7 so far.
I do plan to expand them once they are all in GCC.

> Otherwise, they look good to me(but I can't approve it).
>
> Tejas.
>
Christophe Lyon Jan. 16, 2015, 5:52 p.m. UTC | #2
On 16 January 2015 at 18:14, Marcus Shawcroft
<marcus.shawcroft@gmail.com> wrote:
> On 16 January 2015 at 16:21, Christophe Lyon <christophe.lyon@linaro.org> wrote:
>
>> My existing tests only cover armv7 so far.
>> I do plan to expand them once they are all in GCC.
>>
>>> Otherwise, they look good to me(but I can't approve it).
>>>
>>> Tejas.
>>>
>
> OK provided, as per the previous couple, that we don;t regression or
> introduce new fails on aarch64[_be] or aarch32.

This patch shows failures on aarch64 and aarch64_be for vmax and vmin
when the input is -NaN.
It's a corner case, and my reading of the ARM ARM is that the result
should the same as on aarch32.
I haven't had time to look at it in more details though.
So, not OK?

> /Marcus
Christophe Lyon Jan. 19, 2015, 3:43 p.m. UTC | #3
On 19 January 2015 at 14:29, Marcus Shawcroft
<marcus.shawcroft@gmail.com> wrote:
> On 16 January 2015 at 17:52, Christophe Lyon <christophe.lyon@linaro.org> wrote:
>
>>> OK provided, as per the previous couple, that we don;t regression or
>>> introduce new fails on aarch64[_be] or aarch32.
>>
>> This patch shows failures on aarch64 and aarch64_be for vmax and vmin
>> when the input is -NaN.
>> It's a corner case, and my reading of the ARM ARM is that the result
>> should the same as on aarch32.
>> I haven't had time to look at it in more details though.
>> So, not OK?
>
> They should have the same behaviour in aarch32 and aarch64. Did you
> test on HW or a model?
>
I ran the tests on qemu for aarch32 and aarch64-linux, and on the
foundation model for aarch64*-elf.

> /Marcus
Christophe Lyon Jan. 21, 2015, 3:07 p.m. UTC | #4
On 19 January 2015 at 17:54, Marcus Shawcroft
<marcus.shawcroft@gmail.com> wrote:
> On 19 January 2015 at 15:43, Christophe Lyon <christophe.lyon@linaro.org> wrote:
>> On 19 January 2015 at 14:29, Marcus Shawcroft
>> <marcus.shawcroft@gmail.com> wrote:
>>> On 16 January 2015 at 17:52, Christophe Lyon <christophe.lyon@linaro.org> wrote:
>>>
>>>>> OK provided, as per the previous couple, that we don;t regression or
>>>>> introduce new fails on aarch64[_be] or aarch32.
>>>>
>>>> This patch shows failures on aarch64 and aarch64_be for vmax and vmin
>>>> when the input is -NaN.
>>>> It's a corner case, and my reading of the ARM ARM is that the result
>>>> should the same as on aarch32.
>>>> I haven't had time to look at it in more details though.
>>>> So, not OK?
>>>
>>> They should have the same behaviour in aarch32 and aarch64. Did you
>>> test on HW or a model?
>>>
>> I ran the tests on qemu for aarch32 and aarch64-linux, and on the
>> foundation model for aarch64*-elf.
>
> Leave this one out until we understand why it fails. /Marcus

I've looked at this a bit more.
We have
fmax    v0.4s, v0.4s, v1.4s
where v0 is a vector of -NaN (0xffc00000) and v1 is a vector of 1.

The output is still -NaN (0xffc00000), while the test expects
defaultNaN (0x7fc00000).

I have executed the test under GDB on AArch64 HW, and noticed that fpcr was 0.
I forced it to have DN==1:
set $fpcr=0x1000000
but this didn't change the result.

Does setting fpcr.dn under gdb actually work?

Christophe.
Christophe Lyon Jan. 22, 2015, 2:28 p.m. UTC | #5
On 22 January 2015 at 12:19, Tejas Belagod <tejas.belagod@arm.com> wrote:
> On 21/01/15 15:07, Christophe Lyon wrote:
>>
>> On 19 January 2015 at 17:54, Marcus Shawcroft
>> <marcus.shawcroft@gmail.com> wrote:
>>>
>>> On 19 January 2015 at 15:43, Christophe Lyon <christophe.lyon@linaro.org>
>>> wrote:
>>>>
>>>> On 19 January 2015 at 14:29, Marcus Shawcroft
>>>> <marcus.shawcroft@gmail.com> wrote:
>>>>>
>>>>> On 16 January 2015 at 17:52, Christophe Lyon
>>>>> <christophe.lyon@linaro.org> wrote:
>>>>>
>>>>>>> OK provided, as per the previous couple, that we don;t regression or
>>>>>>> introduce new fails on aarch64[_be] or aarch32.
>>>>>>
>>>>>>
>>>>>> This patch shows failures on aarch64 and aarch64_be for vmax and vmin
>>>>>> when the input is -NaN.
>>>>>> It's a corner case, and my reading of the ARM ARM is that the result
>>>>>> should the same as on aarch32.
>>>>>> I haven't had time to look at it in more details though.
>>>>>> So, not OK?
>>>>>
>>>>>
>>>>> They should have the same behaviour in aarch32 and aarch64. Did you
>>>>> test on HW or a model?
>>>>>
>>>> I ran the tests on qemu for aarch32 and aarch64-linux, and on the
>>>> foundation model for aarch64*-elf.
>>>
>>>
>>> Leave this one out until we understand why it fails. /Marcus
>>
>>
>> I've looked at this a bit more.
>> We have
>> fmax    v0.4s, v0.4s, v1.4s
>> where v0 is a vector of -NaN (0xffc00000) and v1 is a vector of 1.
>>
>> The output is still -NaN (0xffc00000), while the test expects
>> defaultNaN (0x7fc00000).
>>
>
> In the AArch32 execution state, Advanced SIMD FP arithmetic always uses the
> DefaultNaN setting regardless of the DN-bit value in the FPSCR. In AArch64
> execution state, result of Advanced SIMD FP arithmetic operations depend on
> the value of the DN-bit i.e. either propagate the input NaN or generate
> DefaultNaN depending on the value of DN.

Maybe I'm using an outdated doc. On page 2282 of ARMv8 ARM rev C, I
can see only the latter (no diff between aarch32 and aarch64 in
FPProcessNan pseudo-code)

> If you're running your test in the AArch64 execution state, you'd want to
> define the DN bit and modify the expected results accordingly or have the
> test poll at runtime what the DN-bit is set to and check expected results
> dynamically.
Makes sense, I hadn't noticed the different aarch64 spec here.

> I think the test already has expected behaviour for AArch32 execution state
> by expecting DefaultNaN regardless.
Yes.

>> I have executed the test under GDB on AArch64 HW, and noticed that fpcr
>> was 0.
>> I forced it to have DN==1:
>> set $fpcr=0x1000000
>> but this didn't change the result.
>>
>> Does setting fpcr.dn under gdb actually work?
>>
>
> It should. Possibly a bug, patches welcome :-).
>
:-)
Christophe Lyon Jan. 22, 2015, 9:31 p.m. UTC | #6
On 22 January 2015 at 16:22, Tejas Belagod <tejas.belagod@arm.com> wrote:
> On 22/01/15 14:28, Christophe Lyon wrote:
>>
>> On 22 January 2015 at 12:19, Tejas Belagod <tejas.belagod@arm.com> wrote:
>>>
>>> On 21/01/15 15:07, Christophe Lyon wrote:
>>>>
>>>>
>>>> On 19 January 2015 at 17:54, Marcus Shawcroft
>>>> <marcus.shawcroft@gmail.com> wrote:
>>>>>
>>>>>
>>>>> On 19 January 2015 at 15:43, Christophe Lyon
>>>>> <christophe.lyon@linaro.org>
>>>>> wrote:
>>>>>>
>>>>>>
>>>>>> On 19 January 2015 at 14:29, Marcus Shawcroft
>>>>>> <marcus.shawcroft@gmail.com> wrote:
>>>>>>>
>>>>>>>
>>>>>>> On 16 January 2015 at 17:52, Christophe Lyon
>>>>>>> <christophe.lyon@linaro.org> wrote:
>>>>>>>
>>>>>>>>> OK provided, as per the previous couple, that we don;t regression
>>>>>>>>> or
>>>>>>>>> introduce new fails on aarch64[_be] or aarch32.
>>>>>>>>
>>>>>>>>
>>>>>>>>
>>>>>>>> This patch shows failures on aarch64 and aarch64_be for vmax and
>>>>>>>> vmin
>>>>>>>> when the input is -NaN.
>>>>>>>> It's a corner case, and my reading of the ARM ARM is that the result
>>>>>>>> should the same as on aarch32.
>>>>>>>> I haven't had time to look at it in more details though.
>>>>>>>> So, not OK?
>>>>>>>
>>>>>>>
>>>>>>>
>>>>>>> They should have the same behaviour in aarch32 and aarch64. Did you
>>>>>>> test on HW or a model?
>>>>>>>
>>>>>> I ran the tests on qemu for aarch32 and aarch64-linux, and on the
>>>>>> foundation model for aarch64*-elf.
>>>>>
>>>>>
>>>>>
>>>>> Leave this one out until we understand why it fails. /Marcus
>>>>
>>>>
>>>>
>>>> I've looked at this a bit more.
>>>> We have
>>>> fmax    v0.4s, v0.4s, v1.4s
>>>> where v0 is a vector of -NaN (0xffc00000) and v1 is a vector of 1.
>>>>
>>>> The output is still -NaN (0xffc00000), while the test expects
>>>> defaultNaN (0x7fc00000).
>>>>
>>>
>>> In the AArch32 execution state, Advanced SIMD FP arithmetic always uses
>>> the
>>> DefaultNaN setting regardless of the DN-bit value in the FPSCR. In
>>> AArch64
>>> execution state, result of Advanced SIMD FP arithmetic operations depend
>>> on
>>> the value of the DN-bit i.e. either propagate the input NaN or generate
>>> DefaultNaN depending on the value of DN.
>>
>>
>> Maybe I'm using an outdated doc. On page 2282 of ARMv8 ARM rev C, I
>> can see only the latter (no diff between aarch32 and aarch64 in
>> FPProcessNan pseudo-code)
>>
>
> If you see pg. 4005 in the same doc(rev C), you'll see the FPSCR spec -
> under DN:
>
> "The value of this bit only controls scalar floating-point arithmetic.
> Advanced SIMD arithmetic always uses the Default NaN setting, regardless of
> the value of the DN bit."
>
> Also on page 3180 for the description of VMAX(vector FP), it says:
> "
> *  max(+0.0, -0.0) = +0.0
> * If any input is a NaN, the corresponding result element is the default
> NaN.
> "
>
Oops I was looking at FMAX (vector) pg 936.

> The pseudocode for FPMax () on pg. 3180 passes StandardFPSCRValue() to
> FPMax() which is on pg. 2285
>
> // StandardFPSCRValue()
> // ====================
> FPCRType StandardFPSCRValue()
> return ‘00000’ : FPSCR.AHP : ‘11000000000000000000000000’
>
> Here bit-25(FPSCR.DN) is set to 1.
>

So, we should get defaultNaN too on aarch64, and no need to try to
force DN to 1 in gdb?

What can be wrong?

> Thanks,
> Tejas.
>
>
>>> If you're running your test in the AArch64 execution state, you'd want to
>>> define the DN bit and modify the expected results accordingly or have the
>>> test poll at runtime what the DN-bit is set to and check expected results
>>> dynamically.
>>
>> Makes sense, I hadn't noticed the different aarch64 spec here.
>>
>>> I think the test already has expected behaviour for AArch32 execution
>>> state
>>> by expecting DefaultNaN regardless.
>>
>> Yes.
>>
>>>> I have executed the test under GDB on AArch64 HW, and noticed that fpcr
>>>> was 0.
>>>> I forced it to have DN==1:
>>>> set $fpcr=0x1000000
>>>> but this didn't change the result.
>>>>
>>>> Does setting fpcr.dn under gdb actually work?
>>>>
>>>
>>> It should. Possibly a bug, patches welcome :-).
>>>
>> :-)
>>
>
>
Christophe Lyon Jan. 23, 2015, 11:42 a.m. UTC | #7
On 23 January 2015 at 11:18, Tejas Belagod <tejas.belagod@arm.com> wrote:
> On 22/01/15 21:31, Christophe Lyon wrote:
>>
>> On 22 January 2015 at 16:22, Tejas Belagod <tejas.belagod@arm.com> wrote:
>>>
>>> On 22/01/15 14:28, Christophe Lyon wrote:
>>>>
>>>>
>>>> On 22 January 2015 at 12:19, Tejas Belagod <tejas.belagod@arm.com>
>>>> wrote:
>>>>>
>>>>>
>>>>> On 21/01/15 15:07, Christophe Lyon wrote:
>>>>>>
>>>>>>
>>>>>>
>>>>>> On 19 January 2015 at 17:54, Marcus Shawcroft
>>>>>> <marcus.shawcroft@gmail.com> wrote:
>>>>>>>
>>>>>>>
>>>>>>>
>>>>>>> On 19 January 2015 at 15:43, Christophe Lyon
>>>>>>> <christophe.lyon@linaro.org>
>>>>>>> wrote:
>>>>>>>>
>>>>>>>>
>>>>>>>>
>>>>>>>> On 19 January 2015 at 14:29, Marcus Shawcroft
>>>>>>>> <marcus.shawcroft@gmail.com> wrote:
>>>>>>>>>
>>>>>>>>>
>>>>>>>>>
>>>>>>>>> On 16 January 2015 at 17:52, Christophe Lyon
>>>>>>>>> <christophe.lyon@linaro.org> wrote:
>>>>>>>>>
>>>>>>>>>>> OK provided, as per the previous couple, that we don;t regression
>>>>>>>>>>> or
>>>>>>>>>>> introduce new fails on aarch64[_be] or aarch32.
>>>>>>>>>>
>>>>>>>>>>
>>>>>>>>>>
>>>>>>>>>>
>>>>>>>>>> This patch shows failures on aarch64 and aarch64_be for vmax and
>>>>>>>>>> vmin
>>>>>>>>>> when the input is -NaN.
>>>>>>>>>> It's a corner case, and my reading of the ARM ARM is that the
>>>>>>>>>> result
>>>>>>>>>> should the same as on aarch32.
>>>>>>>>>> I haven't had time to look at it in more details though.
>>>>>>>>>> So, not OK?
>>>>>>>>>
>>>>>>>>>
>>>>>>>>>
>>>>>>>>>
>>>>>>>>> They should have the same behaviour in aarch32 and aarch64. Did you
>>>>>>>>> test on HW or a model?
>>>>>>>>>
>>>>>>>> I ran the tests on qemu for aarch32 and aarch64-linux, and on the
>>>>>>>> foundation model for aarch64*-elf.
>>>>>>>
>>>>>>>
>>>>>>>
>>>>>>>
>>>>>>> Leave this one out until we understand why it fails. /Marcus
>>>>>>
>>>>>>
>>>>>>
>>>>>>
>>>>>> I've looked at this a bit more.
>>>>>> We have
>>>>>> fmax    v0.4s, v0.4s, v1.4s
>>>>>> where v0 is a vector of -NaN (0xffc00000) and v1 is a vector of 1.
>>>>>>
>>>>>> The output is still -NaN (0xffc00000), while the test expects
>>>>>> defaultNaN (0x7fc00000).
>>>>>>
>>>>>
>>>>> In the AArch32 execution state, Advanced SIMD FP arithmetic always uses
>>>>> the
>>>>> DefaultNaN setting regardless of the DN-bit value in the FPSCR. In
>>>>> AArch64
>>>>> execution state, result of Advanced SIMD FP arithmetic operations
>>>>> depend
>>>>> on
>>>>> the value of the DN-bit i.e. either propagate the input NaN or generate
>>>>> DefaultNaN depending on the value of DN.
>>>>
>>>>
>>>>
>>>> Maybe I'm using an outdated doc. On page 2282 of ARMv8 ARM rev C, I
>>>> can see only the latter (no diff between aarch32 and aarch64 in
>>>> FPProcessNan pseudo-code)
>>>>
>>>
>>> If you see pg. 4005 in the same doc(rev C), you'll see the FPSCR spec -
>>> under DN:
>>>
>>> "The value of this bit only controls scalar floating-point arithmetic.
>>> Advanced SIMD arithmetic always uses the Default NaN setting, regardless
>>> of
>>> the value of the DN bit."
>>>
>>> Also on page 3180 for the description of VMAX(vector FP), it says:
>>> "
>>> *  max(+0.0, -0.0) = +0.0
>>> * If any input is a NaN, the corresponding result element is the default
>>> NaN.
>>> "
>>>
>> Oops I was looking at FMAX (vector) pg 936.
>>
>>> The pseudocode for FPMax () on pg. 3180 passes StandardFPSCRValue() to
>>> FPMax() which is on pg. 2285
>>>
>>> // StandardFPSCRValue()
>>> // ====================
>>> FPCRType StandardFPSCRValue()
>>> return ‘00000’ : FPSCR.AHP : ‘11000000000000000000000000’
>>>
>>> Here bit-25(FPSCR.DN) is set to 1.
>>>
>>
>> So, we should get defaultNaN too on aarch64, and no need to try to
>> force DN to 1 in gdb?
>>
>> What can be wrong?
>>
>
> On pg 3180, I see VMAX(FPSIMD) for A32/T32, not A64. I hope we're reading
> the same document.
>
> Regardless of the page number, if you see the pseudocode for VMAX(FPSIMD)
> for AArch32, StandardFPSCRValue() (i.e. DN = 1) is passed to FPMax() which
> means generate DefaultNaN() regardless.
>
> OTOH, on pg 936, you have FMAX(vector) for A64 where FPMax() in the
> pseudocode gets just FPCR.
>
>
Ok, that was my initial understanding but our discussion confused me.

And that's why I tried to force DN = 1 in gdb before single-stepping over
fmax    v0.4s, v0.4s, v1.4s

but it changed nothing :-(
Hence my question about a gdb possible bug or misuse.

I'll try modifying the test to have it force DN=1.

> Thanks,
> Tejas.
>
>
>>> Thanks,
>>> Tejas.
>>>
>>>
>>>>> If you're running your test in the AArch64 execution state, you'd want
>>>>> to
>>>>> define the DN bit and modify the expected results accordingly or have
>>>>> the
>>>>> test poll at runtime what the DN-bit is set to and check expected
>>>>> results
>>>>> dynamically.
>>>>
>>>>
>>>> Makes sense, I hadn't noticed the different aarch64 spec here.
>>>>
>>>>> I think the test already has expected behaviour for AArch32 execution
>>>>> state
>>>>> by expecting DefaultNaN regardless.
>>>>
>>>>
>>>> Yes.
>>>>
>>>>>> I have executed the test under GDB on AArch64 HW, and noticed that
>>>>>> fpcr
>>>>>> was 0.
>>>>>> I forced it to have DN==1:
>>>>>> set $fpcr=0x1000000
>>>>>> but this didn't change the result.
>>>>>>
>>>>>> Does setting fpcr.dn under gdb actually work?
>>>>>>
>>>>>
>>>>> It should. Possibly a bug, patches welcome :-).
>>>>>
>>>> :-)
>>>>
>>>
>>>
>>
>
>
Christophe Lyon Jan. 23, 2015, 1:44 p.m. UTC | #8
On 23 January 2015 at 12:42, Christophe Lyon <christophe.lyon@linaro.org> wrote:
> On 23 January 2015 at 11:18, Tejas Belagod <tejas.belagod@arm.com> wrote:
>> On 22/01/15 21:31, Christophe Lyon wrote:
>>>
>>> On 22 January 2015 at 16:22, Tejas Belagod <tejas.belagod@arm.com> wrote:
>>>>
>>>> On 22/01/15 14:28, Christophe Lyon wrote:
>>>>>
>>>>>
>>>>> On 22 January 2015 at 12:19, Tejas Belagod <tejas.belagod@arm.com>
>>>>> wrote:
>>>>>>
>>>>>>
>>>>>> On 21/01/15 15:07, Christophe Lyon wrote:
>>>>>>>
>>>>>>>
>>>>>>>
>>>>>>> On 19 January 2015 at 17:54, Marcus Shawcroft
>>>>>>> <marcus.shawcroft@gmail.com> wrote:
>>>>>>>>
>>>>>>>>
>>>>>>>>
>>>>>>>> On 19 January 2015 at 15:43, Christophe Lyon
>>>>>>>> <christophe.lyon@linaro.org>
>>>>>>>> wrote:
>>>>>>>>>
>>>>>>>>>
>>>>>>>>>
>>>>>>>>> On 19 January 2015 at 14:29, Marcus Shawcroft
>>>>>>>>> <marcus.shawcroft@gmail.com> wrote:
>>>>>>>>>>
>>>>>>>>>>
>>>>>>>>>>
>>>>>>>>>> On 16 January 2015 at 17:52, Christophe Lyon
>>>>>>>>>> <christophe.lyon@linaro.org> wrote:
>>>>>>>>>>
>>>>>>>>>>>> OK provided, as per the previous couple, that we don;t regression
>>>>>>>>>>>> or
>>>>>>>>>>>> introduce new fails on aarch64[_be] or aarch32.
>>>>>>>>>>>
>>>>>>>>>>>
>>>>>>>>>>>
>>>>>>>>>>>
>>>>>>>>>>> This patch shows failures on aarch64 and aarch64_be for vmax and
>>>>>>>>>>> vmin
>>>>>>>>>>> when the input is -NaN.
>>>>>>>>>>> It's a corner case, and my reading of the ARM ARM is that the
>>>>>>>>>>> result
>>>>>>>>>>> should the same as on aarch32.
>>>>>>>>>>> I haven't had time to look at it in more details though.
>>>>>>>>>>> So, not OK?
>>>>>>>>>>
>>>>>>>>>>
>>>>>>>>>>
>>>>>>>>>>
>>>>>>>>>> They should have the same behaviour in aarch32 and aarch64. Did you
>>>>>>>>>> test on HW or a model?
>>>>>>>>>>
>>>>>>>>> I ran the tests on qemu for aarch32 and aarch64-linux, and on the
>>>>>>>>> foundation model for aarch64*-elf.
>>>>>>>>
>>>>>>>>
>>>>>>>>
>>>>>>>>
>>>>>>>> Leave this one out until we understand why it fails. /Marcus
>>>>>>>
>>>>>>>
>>>>>>>
>>>>>>>
>>>>>>> I've looked at this a bit more.
>>>>>>> We have
>>>>>>> fmax    v0.4s, v0.4s, v1.4s
>>>>>>> where v0 is a vector of -NaN (0xffc00000) and v1 is a vector of 1.
>>>>>>>
>>>>>>> The output is still -NaN (0xffc00000), while the test expects
>>>>>>> defaultNaN (0x7fc00000).
>>>>>>>
>>>>>>
>>>>>> In the AArch32 execution state, Advanced SIMD FP arithmetic always uses
>>>>>> the
>>>>>> DefaultNaN setting regardless of the DN-bit value in the FPSCR. In
>>>>>> AArch64
>>>>>> execution state, result of Advanced SIMD FP arithmetic operations
>>>>>> depend
>>>>>> on
>>>>>> the value of the DN-bit i.e. either propagate the input NaN or generate
>>>>>> DefaultNaN depending on the value of DN.
>>>>>
>>>>>
>>>>>
>>>>> Maybe I'm using an outdated doc. On page 2282 of ARMv8 ARM rev C, I
>>>>> can see only the latter (no diff between aarch32 and aarch64 in
>>>>> FPProcessNan pseudo-code)
>>>>>
>>>>
>>>> If you see pg. 4005 in the same doc(rev C), you'll see the FPSCR spec -
>>>> under DN:
>>>>
>>>> "The value of this bit only controls scalar floating-point arithmetic.
>>>> Advanced SIMD arithmetic always uses the Default NaN setting, regardless
>>>> of
>>>> the value of the DN bit."
>>>>
>>>> Also on page 3180 for the description of VMAX(vector FP), it says:
>>>> "
>>>> *  max(+0.0, -0.0) = +0.0
>>>> * If any input is a NaN, the corresponding result element is the default
>>>> NaN.
>>>> "
>>>>
>>> Oops I was looking at FMAX (vector) pg 936.
>>>
>>>> The pseudocode for FPMax () on pg. 3180 passes StandardFPSCRValue() to
>>>> FPMax() which is on pg. 2285
>>>>
>>>> // StandardFPSCRValue()
>>>> // ====================
>>>> FPCRType StandardFPSCRValue()
>>>> return ‘00000’ : FPSCR.AHP : ‘11000000000000000000000000’
>>>>
>>>> Here bit-25(FPSCR.DN) is set to 1.
>>>>
>>>
>>> So, we should get defaultNaN too on aarch64, and no need to try to
>>> force DN to 1 in gdb?
>>>
>>> What can be wrong?
>>>
>>
>> On pg 3180, I see VMAX(FPSIMD) for A32/T32, not A64. I hope we're reading
>> the same document.
>>
>> Regardless of the page number, if you see the pseudocode for VMAX(FPSIMD)
>> for AArch32, StandardFPSCRValue() (i.e. DN = 1) is passed to FPMax() which
>> means generate DefaultNaN() regardless.
>>
>> OTOH, on pg 936, you have FMAX(vector) for A64 where FPMax() in the
>> pseudocode gets just FPCR.
>>
>>
> Ok, that was my initial understanding but our discussion confused me.
>
> And that's why I tried to force DN = 1 in gdb before single-stepping over
> fmax    v0.4s, v0.4s, v1.4s
>
> but it changed nothing :-(
> Hence my question about a gdb possible bug or misuse.

Hmm... user error, I missed one bit
set $fpcr=0x2000000
works under gdb.

> I'll try modifying the test to have it force DN=1.
>
Forcing DN=1 in the test makes it pass.

I am going to look at adding that cleanly to my test, and resubmit it.

Thanks, and sorry for the noise.

>> Thanks,
>> Tejas.
>>
>>
>>>> Thanks,
>>>> Tejas.
>>>>
>>>>
>>>>>> If you're running your test in the AArch64 execution state, you'd want
>>>>>> to
>>>>>> define the DN bit and modify the expected results accordingly or have
>>>>>> the
>>>>>> test poll at runtime what the DN-bit is set to and check expected
>>>>>> results
>>>>>> dynamically.
>>>>>
>>>>>
>>>>> Makes sense, I hadn't noticed the different aarch64 spec here.
>>>>>
>>>>>> I think the test already has expected behaviour for AArch32 execution
>>>>>> state
>>>>>> by expecting DefaultNaN regardless.
>>>>>
>>>>>
>>>>> Yes.
>>>>>
>>>>>>> I have executed the test under GDB on AArch64 HW, and noticed that
>>>>>>> fpcr
>>>>>>> was 0.
>>>>>>> I forced it to have DN==1:
>>>>>>> set $fpcr=0x1000000
>>>>>>> but this didn't change the result.
>>>>>>>
>>>>>>> Does setting fpcr.dn under gdb actually work?
>>>>>>>
>>>>>>
>>>>>> It should. Possibly a bug, patches welcome :-).
>>>>>>
>>>>> :-)
>>>>>
>>>>
>>>>
>>>
>>
>>
diff mbox

Patch

diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/binary_op_no64.inc b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/binary_op_no64.inc
new file mode 100644
index 0000000..36efe3a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/binary_op_no64.inc
@@ -0,0 +1,120 @@ 
+/* Can't use the standard binary_op.inc template because vmax has no
+   64 bits variant.  */
+
+#include <math.h>
+
+#define FNNAME1(NAME) exec_ ## NAME
+#define FNNAME(NAME) FNNAME1(NAME)
+
+void FNNAME (INSN_NAME) (void)
+{
+  int i;
+
+  /* Basic test: y=vmax(x,x), then store the result.  */
+#define TEST_BINARY_OP1(INSN, Q, T1, T2, W, N)				\
+  VECT_VAR(vector_res, T1, W, N) =                                      \
+    INSN##Q##_##T2##W(VECT_VAR(vector, T1, W, N),                       \
+                      VECT_VAR(vector2, T1, W, N));                     \
+  vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(vector_res, T1, W, N))
+
+#define TEST_BINARY_OP(INSN, Q, T1, T2, W, N)   \
+  TEST_BINARY_OP1(INSN, Q, T1, T2, W, N)        \
+
+  DECL_VARIABLE_ALL_VARIANTS(vector);
+  DECL_VARIABLE_ALL_VARIANTS(vector2);
+  DECL_VARIABLE_ALL_VARIANTS(vector_res);
+
+  clean_results ();
+
+  /* Initialize input "vector" from "buffer".  */
+  TEST_MACRO_ALL_VARIANTS_2_5(VLOAD, vector, buffer);
+#ifndef NO_FLOAT_VARIANT
+  VLOAD(vector, buffer, , float, f, 32, 2);
+  VLOAD(vector, buffer, q, float, f, 32, 4);
+#endif
+
+  /* Choose init value arbitrarily, will be used as comparison value.  */
+  VDUP(vector2, , int, s, 8, 8, -13);
+  VDUP(vector2, , int, s, 16, 4, -14);
+  VDUP(vector2, , int, s, 32, 2, -16);
+  VDUP(vector2, , uint, u, 8, 8, 0xf3);
+  VDUP(vector2, , uint, u, 16, 4, 0xfff1);
+  VDUP(vector2, , uint, u, 32, 2, 0xfffffff0);
+  VDUP(vector2, q, int, s, 8, 16, -12);
+  VDUP(vector2, q, int, s, 16, 8, -13);
+  VDUP(vector2, q, int, s, 32, 4, -15);
+  VDUP(vector2, q, uint, u, 8, 16, 0xf9);
+  VDUP(vector2, q, uint, u, 16, 8, 0xfff2);
+  VDUP(vector2, q, uint, u, 32, 4, 0xfffffff1);
+#ifndef NO_FLOAT_VARIANT
+  VDUP(vector2, , float, f, 32, 2, -15.5f);
+  VDUP(vector2, q, float, f, 32, 4, -14.5f);
+#endif
+
+#ifndef NO_FLOAT_VARIANT
+#define FLOAT_VARIANT(MACRO, VAR)			\
+  MACRO(VAR, , float, f, 32, 2);			\
+  MACRO(VAR, q, float, f, 32, 4)
+#else
+#define FLOAT_VARIANT(MACRO, VAR)
+#endif
+
+#define TEST_MACRO_NO64BIT_VARIANT_1_5(MACRO, VAR)	\
+  MACRO(VAR, , int, s, 8, 8);				\
+  MACRO(VAR, , int, s, 16, 4);				\
+  MACRO(VAR, , int, s, 32, 2);				\
+  MACRO(VAR, , uint, u, 8, 8);				\
+  MACRO(VAR, , uint, u, 16, 4);				\
+  MACRO(VAR, , uint, u, 32, 2);				\
+  MACRO(VAR, q, int, s, 8, 16);				\
+  MACRO(VAR, q, int, s, 16, 8);				\
+  MACRO(VAR, q, int, s, 32, 4);				\
+  MACRO(VAR, q, uint, u, 8, 16);			\
+  MACRO(VAR, q, uint, u, 16, 8);			\
+  MACRO(VAR, q, uint, u, 32, 4);			\
+  FLOAT_VARIANT(MACRO, VAR)
+
+  /* Apply a binary operator named INSN_NAME.  */
+  TEST_MACRO_NO64BIT_VARIANT_1_5(TEST_BINARY_OP, INSN_NAME);
+
+  CHECK_RESULTS (TEST_MSG, "");
+
+#ifndef NO_FLOAT_VARIANT
+  /* Extra FP tests with special values (NaN, ....)  */
+  VDUP(vector, q, float, f, 32, 4, 1.0f);
+  VDUP(vector2, q, float, f, 32, 4, NAN);
+  TEST_BINARY_OP(INSN_NAME, q, float, f, 32, 4);
+  CHECK_FP(TEST_MSG, float, 32, 4, PRIx32, expected_nan, " FP special (NaN)");
+
+  VDUP(vector, q, float, f, 32, 4, -NAN);
+  VDUP(vector2, q, float, f, 32, 4, 1.0f);
+  TEST_BINARY_OP(INSN_NAME, q, float, f, 32, 4);
+  CHECK_FP(TEST_MSG, float, 32, 4, PRIx32, expected_mnan, " FP special (-NaN)");
+
+  VDUP(vector, q, float, f, 32, 4, 1.0f);
+  VDUP(vector2, q, float, f, 32, 4, HUGE_VALF);
+  TEST_BINARY_OP(INSN_NAME, q, float, f, 32, 4);
+  CHECK_FP(TEST_MSG, float, 32, 4, PRIx32, expected_inf, " FP special (inf)");
+
+  VDUP(vector, q, float, f, 32, 4, -HUGE_VALF);
+  VDUP(vector2, q, float, f, 32, 4, 1.0f);
+  TEST_BINARY_OP(INSN_NAME, q, float, f, 32, 4);
+  CHECK_FP(TEST_MSG, float, 32, 4, PRIx32, expected_minf, " FP special (-inf)");
+
+  VDUP(vector, q, float, f, 32, 4, 0.0f);
+  VDUP(vector2, q, float, f, 32, 4, -0.0f);
+  TEST_BINARY_OP(INSN_NAME, q, float, f, 32, 4);
+  CHECK_FP(TEST_MSG, float, 32, 4, PRIx32, expected_zero1, " FP special (-0.0)");
+
+  VDUP(vector, q, float, f, 32, 4, -0.0f);
+  VDUP(vector2, q, float, f, 32, 4, 0.0f);
+  TEST_BINARY_OP(INSN_NAME, q, float, f, 32, 4);
+  CHECK_FP(TEST_MSG, float, 32, 4, PRIx32, expected_zero2, " FP special (-0.0)");
+#endif
+}
+
+int main (void)
+{
+  FNNAME (INSN_NAME) ();
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vhadd.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vhadd.c
new file mode 100644
index 0000000..0c67df9
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vhadd.c
@@ -0,0 +1,54 @@ 
+#include <arm_neon.h>
+#include "arm-neon-ref.h"
+#include "compute-ref-data.h"
+
+#define INSN_NAME vhadd
+#define TEST_MSG "VHADD/VHADDQ"
+
+#define NO_FLOAT_VARIANT
+
+/* Expected results.  */
+VECT_VAR_DECL(expected,int,8,8) [] = { 0xf1, 0xf2, 0xf2, 0xf3,
+				       0xf3, 0xf4, 0xf4, 0xf5 };
+VECT_VAR_DECL(expected,int,16,4) [] = { 0xfff1, 0xfff1, 0xfff2, 0xfff2 };
+VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffffff0, 0xfffffff0 };
+VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 };
+VECT_VAR_DECL(expected,uint,8,8) [] = { 0xf1, 0xf2, 0xf2, 0xf3,
+					0xf3, 0xf4, 0xf4, 0xf5 };
+VECT_VAR_DECL(expected,uint,16,4) [] = { 0xfff0, 0xfff1, 0xfff1, 0xfff2 };
+VECT_VAR_DECL(expected,uint,32,2) [] = { 0xfffffff0, 0xfffffff0 };
+VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 };
+VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33,
+					0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 };
+VECT_VAR_DECL(expected,int,8,16) [] = { 0xf2, 0xf2, 0xf3, 0xf3,
+					0xf4, 0xf4, 0xf5, 0xf5,
+					0xf6, 0xf6, 0xf7, 0xf7,
+					0xf8, 0xf8, 0xf9, 0xf9 };
+VECT_VAR_DECL(expected,int,16,8) [] = { 0xfff1, 0xfff2, 0xfff2, 0xfff3,
+					0xfff3, 0xfff4, 0xfff4, 0xfff5 };
+VECT_VAR_DECL(expected,int,32,4) [] = { 0xfffffff0, 0xfffffff1,
+					0xfffffff1, 0xfffffff2 };
+VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333,
+					0x3333333333333333 };
+VECT_VAR_DECL(expected,uint,8,16) [] = { 0xf4, 0xf5, 0xf5, 0xf6,
+					 0xf6, 0xf7, 0xf7, 0xf8,
+					 0xf8, 0xf9, 0xf9, 0xfa,
+					 0xfa, 0xfb, 0xfb, 0xfc };
+VECT_VAR_DECL(expected,uint,16,8) [] = { 0xfff1, 0xfff1, 0xfff2, 0xfff2,
+					 0xfff3, 0xfff3, 0xfff4, 0xfff4 };
+VECT_VAR_DECL(expected,uint,32,4) [] = { 0xfffffff0, 0xfffffff1,
+					 0xfffffff1, 0xfffffff2 };
+VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333,
+					 0x3333333333333333 };
+VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+					 0x33, 0x33, 0x33, 0x33,
+					 0x33, 0x33, 0x33, 0x33,
+					 0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333,
+					 0x3333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333,
+					   0x33333333, 0x33333333 };
+
+#include "binary_op_no64.inc"
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vhsub.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vhsub.c
new file mode 100644
index 0000000..2431288
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vhsub.c
@@ -0,0 +1,52 @@ 
+#include <arm_neon.h>
+#include "arm-neon-ref.h"
+#include "compute-ref-data.h"
+
+#define INSN_NAME vhsub
+#define TEST_MSG "VHSUB/VHSUBQ"
+
+#define NO_FLOAT_VARIANT
+
+/* Expected results.  */
+VECT_VAR_DECL(expected,int,8,8) [] = { 0xfe, 0xff, 0xff, 0x0,
+				       0x0, 0x1, 0x1, 0x2 };
+VECT_VAR_DECL(expected,int,16,4) [] = { 0xffff, 0xffff, 0x0, 0x0 };
+VECT_VAR_DECL(expected,int,32,2) [] = { 0x0, 0x0 };
+VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 };
+VECT_VAR_DECL(expected,uint,8,8) [] = { 0xfe, 0xff, 0xff, 0x0,
+					0x0, 0x1, 0x1, 0x2 };
+VECT_VAR_DECL(expected,uint,16,4) [] = { 0xffff, 0x0, 0x0, 0x1 };
+VECT_VAR_DECL(expected,uint,32,2) [] = { 0x0, 0x0 };
+VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 };
+VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33,
+					0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 };
+VECT_VAR_DECL(expected,int,8,16) [] = { 0xfe, 0xfe, 0xff, 0xff,
+					0x0, 0x0, 0x1, 0x1,
+					0x2, 0x2, 0x3, 0x3,
+					0x4, 0x4, 0x5, 0x5 };
+VECT_VAR_DECL(expected,int,16,8) [] = { 0xfffe, 0xffff, 0xffff, 0x0,
+					0x0, 0x1, 0x1, 0x2 };
+VECT_VAR_DECL(expected,int,32,4) [] = { 0xffffffff, 0x0, 0x0, 0x1 };
+VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333,
+					0x3333333333333333 };
+VECT_VAR_DECL(expected,uint,8,16) [] = { 0xfb, 0xfc, 0xfc, 0xfd,
+					 0xfd, 0xfe, 0xfe, 0xff,
+					 0xff, 0x0, 0x0, 0x1,
+					 0x1, 0x2, 0x2, 0x3 };
+VECT_VAR_DECL(expected,uint,16,8) [] = { 0xffff, 0xffff, 0x0, 0x0,
+					 0x1, 0x1, 0x2, 0x2 };
+VECT_VAR_DECL(expected,uint,32,4) [] = { 0xffffffff, 0x0, 0x0, 0x1 };
+VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333,
+					 0x3333333333333333 };
+VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+					 0x33, 0x33, 0x33, 0x33,
+					 0x33, 0x33, 0x33, 0x33,
+					 0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333,
+					 0x3333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333,
+					   0x33333333, 0x33333333 };
+
+#include "binary_op_no64.inc"
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmax.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmax.c
new file mode 100644
index 0000000..2591b16
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmax.c
@@ -0,0 +1,64 @@ 
+#include <arm_neon.h>
+#include "arm-neon-ref.h"
+#include "compute-ref-data.h"
+
+#define INSN_NAME vmax
+#define TEST_MSG "VMAX/VMAXQ"
+
+/* Expected results.  */
+VECT_VAR_DECL(expected,int,8,8) [] = { 0xf3, 0xf3, 0xf3, 0xf3,
+				       0xf4, 0xf5, 0xf6, 0xf7 };
+VECT_VAR_DECL(expected,int,16,4) [] = { 0xfff2, 0xfff2, 0xfff2, 0xfff3 };
+VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffffff0, 0xfffffff1 };
+VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 };
+VECT_VAR_DECL(expected,uint,8,8) [] = { 0xf3, 0xf3, 0xf3, 0xf3,
+					0xf4, 0xf5, 0xf6, 0xf7 };
+VECT_VAR_DECL(expected,uint,16,4) [] = { 0xfff1, 0xfff1, 0xfff2, 0xfff3 };
+VECT_VAR_DECL(expected,uint,32,2) [] = { 0xfffffff0, 0xfffffff1 };
+VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 };
+VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33,
+					0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xc1780000, 0xc1700000 };
+VECT_VAR_DECL(expected,int,8,16) [] = { 0xf4, 0xf4, 0xf4, 0xf4,
+					0xf4, 0xf5, 0xf6, 0xf7,
+					0xf8, 0xf9, 0xfa, 0xfb,
+					0xfc, 0xfd, 0xfe, 0xff };
+VECT_VAR_DECL(expected,int,16,8) [] = { 0xfff3, 0xfff3, 0xfff3, 0xfff3,
+					0xfff4, 0xfff5, 0xfff6, 0xfff7 };
+VECT_VAR_DECL(expected,int,32,4) [] = { 0xfffffff1, 0xfffffff1,
+					0xfffffff2, 0xfffffff3 };
+VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333,
+					0x3333333333333333 };
+VECT_VAR_DECL(expected,uint,8,16) [] = { 0xf9, 0xf9, 0xf9, 0xf9,
+					 0xf9, 0xf9, 0xf9, 0xf9,
+					 0xf9, 0xf9, 0xfa, 0xfb,
+					 0xfc, 0xfd, 0xfe, 0xff };
+VECT_VAR_DECL(expected,uint,16,8) [] = { 0xfff2, 0xfff2, 0xfff2, 0xfff3,
+					 0xfff4, 0xfff5, 0xfff6, 0xfff7 };
+VECT_VAR_DECL(expected,uint,32,4) [] = { 0xfffffff1, 0xfffffff1,
+					 0xfffffff2, 0xfffffff3 };
+VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333,
+					 0x3333333333333333 };
+VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+					 0x33, 0x33, 0x33, 0x33,
+					 0x33, 0x33, 0x33, 0x33,
+					 0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333,
+					 0x3333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0xc1680000, 0xc1680000,
+					   0xc1600000, 0xc1500000 };
+
+/* Expected results with special FP values.  */
+VECT_VAR_DECL(expected_nan,hfloat,32,4) [] = { 0x7fc00000, 0x7fc00000,
+					       0x7fc00000, 0x7fc00000 };
+VECT_VAR_DECL(expected_mnan,hfloat,32,4) [] = { 0x7fc00000, 0x7fc00000,
+						0x7fc00000, 0x7fc00000 };
+VECT_VAR_DECL(expected_inf,hfloat,32,4) [] = { 0x7f800000, 0x7f800000,
+					       0x7f800000, 0x7f800000 };
+VECT_VAR_DECL(expected_minf,hfloat,32,4) [] = { 0x3f800000, 0x3f800000,
+						0x3f800000, 0x3f800000 };
+VECT_VAR_DECL(expected_zero1,hfloat,32,4) [] = { 0x0, 0x0, 0x0, 0x0 };
+VECT_VAR_DECL(expected_zero2,hfloat,32,4) [] = { 0x0, 0x0, 0x0, 0x0 };
+
+#include "binary_op_no64.inc"
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmin.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmin.c
new file mode 100644
index 0000000..2b5e87c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmin.c
@@ -0,0 +1,66 @@ 
+#include <arm_neon.h>
+#include "arm-neon-ref.h"
+#include "compute-ref-data.h"
+
+#define INSN_NAME vmin
+#define TEST_MSG "VMIN/VMINQ"
+
+/* Expected results.  */
+VECT_VAR_DECL(expected,int,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
+				       0xf3, 0xf3, 0xf3, 0xf3 };
+VECT_VAR_DECL(expected,int,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff2 };
+VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffffff0, 0xfffffff0 };
+VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 };
+VECT_VAR_DECL(expected,uint,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
+					0xf3, 0xf3, 0xf3, 0xf3 };
+VECT_VAR_DECL(expected,uint,16,4) [] = { 0xfff0, 0xfff1, 0xfff1, 0xfff1 };
+VECT_VAR_DECL(expected,uint,32,2) [] = { 0xfffffff0, 0xfffffff0 };
+VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 };
+VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33,
+					0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xc1800000, 0xc1780000 };
+VECT_VAR_DECL(expected,int,8,16) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
+					0xf4, 0xf4, 0xf4, 0xf4,
+					0xf4, 0xf4, 0xf4, 0xf4,
+					0xf4, 0xf4, 0xf4, 0xf4 };
+VECT_VAR_DECL(expected,int,16,8) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3,
+					0xfff3, 0xfff3, 0xfff3, 0xfff3 };
+VECT_VAR_DECL(expected,int,32,4) [] = { 0xfffffff0, 0xfffffff1,
+					0xfffffff1, 0xfffffff1 };
+VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333,
+					0x3333333333333333 };
+VECT_VAR_DECL(expected,uint,8,16) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
+					 0xf4, 0xf5, 0xf6, 0xf7,
+					 0xf8, 0xf9, 0xf9, 0xf9,
+					 0xf9, 0xf9, 0xf9, 0xf9 };
+VECT_VAR_DECL(expected,uint,16,8) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff2,
+					 0xfff2, 0xfff2, 0xfff2, 0xfff2 };
+VECT_VAR_DECL(expected,uint,32,4) [] = { 0xfffffff0, 0xfffffff1,
+					 0xfffffff1, 0xfffffff1 };
+VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333,
+					 0x3333333333333333 };
+VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+					 0x33, 0x33, 0x33, 0x33,
+					 0x33, 0x33, 0x33, 0x33,
+					 0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333,
+					 0x3333, 0x3333, 0x3333, 0x3333 };
+
+VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0xc1800000, 0xc1700000,
+					   0xc1680000, 0xc1680000 };
+/* Expected results with special FP values.  */
+VECT_VAR_DECL(expected_nan,hfloat,32,4) [] = { 0x7fc00000, 0x7fc00000,
+					       0x7fc00000, 0x7fc00000 };
+VECT_VAR_DECL(expected_mnan,hfloat,32,4) [] = { 0x7fc00000, 0x7fc00000,
+						0x7fc00000, 0x7fc00000 };
+VECT_VAR_DECL(expected_inf,hfloat,32,4) [] = { 0x3f800000, 0x3f800000,
+					       0x3f800000, 0x3f800000 };
+VECT_VAR_DECL(expected_minf,hfloat,32,4) [] = { 0xff800000, 0xff800000,
+						0xff800000, 0xff800000 };
+VECT_VAR_DECL(expected_zero1,hfloat,32,4) [] = { 0x80000000, 0x80000000,
+						 0x80000000, 0x80000000 };
+VECT_VAR_DECL(expected_zero2,hfloat,32,4) [] = { 0x80000000, 0x80000000,
+						 0x80000000, 0x80000000 };
+
+#include "binary_op_no64.inc"
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vrhadd.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vrhadd.c
new file mode 100644
index 0000000..8629beb
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vrhadd.c
@@ -0,0 +1,54 @@ 
+#include <arm_neon.h>
+#include "arm-neon-ref.h"
+#include "compute-ref-data.h"
+
+#define INSN_NAME vrhadd
+#define TEST_MSG "VRHADD/VRHADDQ"
+
+#define NO_FLOAT_VARIANT
+
+/* Expected results.  */
+VECT_VAR_DECL(expected,int,8,8) [] = { 0xf2, 0xf2, 0xf3, 0xf3,
+				       0xf4, 0xf4, 0xf5, 0xf5 };
+VECT_VAR_DECL(expected,int,16,4) [] = { 0xfff1, 0xfff2, 0xfff2, 0xfff3 };
+VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffffff0, 0xfffffff1 };
+VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 };
+VECT_VAR_DECL(expected,uint,8,8) [] = { 0xf2, 0xf2, 0xf3, 0xf3,
+					0xf4, 0xf4, 0xf5, 0xf5 };
+VECT_VAR_DECL(expected,uint,16,4) [] = { 0xfff1, 0xfff1, 0xfff2, 0xfff2 };
+VECT_VAR_DECL(expected,uint,32,2) [] = { 0xfffffff0, 0xfffffff1 };
+VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 };
+VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33,
+					0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 };
+VECT_VAR_DECL(expected,int,8,16) [] = { 0xf2, 0xf3, 0xf3, 0xf4,
+					0xf4, 0xf5, 0xf5, 0xf6,
+					0xf6, 0xf7, 0xf7, 0xf8,
+					0xf8, 0xf9, 0xf9, 0xfa };
+VECT_VAR_DECL(expected,int,16,8) [] = { 0xfff2, 0xfff2, 0xfff3, 0xfff3,
+					0xfff4, 0xfff4, 0xfff5, 0xfff5 };
+VECT_VAR_DECL(expected,int,32,4) [] = { 0xfffffff1, 0xfffffff1,
+					0xfffffff2, 0xfffffff2 };
+VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333,
+					0x3333333333333333 };
+VECT_VAR_DECL(expected,uint,8,16) [] = { 0xf5, 0xf5, 0xf6, 0xf6,
+					 0xf7, 0xf7, 0xf8, 0xf8,
+					 0xf9, 0xf9, 0xfa, 0xfa,
+					 0xfb, 0xfb, 0xfc, 0xfc };
+VECT_VAR_DECL(expected,uint,16,8) [] = { 0xfff1, 0xfff2, 0xfff2, 0xfff3,
+					 0xfff3, 0xfff4, 0xfff4, 0xfff5 };
+VECT_VAR_DECL(expected,uint,32,4) [] = { 0xfffffff1, 0xfffffff1,
+					 0xfffffff2, 0xfffffff2 };
+VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333,
+					 0x3333333333333333 };
+VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+					 0x33, 0x33, 0x33, 0x33,
+					 0x33, 0x33, 0x33, 0x33,
+					 0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333,
+					 0x3333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333,
+					   0x33333333, 0x33333333 };
+
+#include "binary_op_no64.inc"