diff mbox

[ARM,AArch64] more poly64 intrinsics and tests

Message ID CAKdteObQJA=UH0snnyPBnOJxQDaeaQMGxYDQj56=do3=FF4ZmQ@mail.gmail.com
State Accepted
Commit 052ef81d2dbef6727f9684733cfbb70f7adcf066
Headers show

Commit Message

Christophe Lyon Dec. 12, 2016, 4:03 p.m. UTC
Hi,

After the recent update from Tamar, I noticed a few discrepancies
between ARM and AArch64 regarding a few poly64 intrinsics.

This patch:
- adds vtst_p64 and vtstq_p64 to AArch64's arm_neon.h
- adds vgetq_lane_p64, vset_lane_p64 and vsetq_lane_p64 to ARM's arm_neon.h
( vget_lane_p64 was already there)
- adds the corresponding tests, and moves the vget_lane_p64 ones out
of the #ifdef __aarch64__ zone.

Cross-tested on arm* and aarch64* targets.

OK?

Christophe
gcc/ChangeLog:

2016-12-12  Christophe Lyon  <christophe.lyon@linaro.org>

	* config/aarch64/arm_neon.h (vtst_p64): New.
	(vtstq_p64): New.
	* config/arm/arm_neon.h (vgetq_lane_p64): New.
	(vset_lane_p64): New.
	(vsetq_lane_p64): New.

gcc/testsuite/ChangeLog:

2016-12-12  Christophe Lyon  <christophe.lyon@linaro.org>

	* gcc.target/aarch64/advsimd-intrinsics/p64_p128.c
	(vget_lane_expected, vset_lane_expected, vtst_expected_poly64x1):
	New.
	(vmov_n_expected0, vmov_n_expected1, vmov_n_expected2)
	(expected_vld_st2_0, expected_vld_st2_1, expected_vld_st3_0)
	(expected_vld_st3_1, expected_vld_st3_2, expected_vld_st4_0)
	(expected_vld_st4_1, expected_vld_st4_2, expected_vld_st4_3)
	(vtst_expected_poly64x2): Move to aarch64-only section.
	(vget_lane_p64, vgetq_lane_p64, vset_lane_p64, vsetq_lane_p64)
	(vtst_p64, vtstq_p64): New tests.

Comments

James Greenhalgh Dec. 14, 2016, 4:55 p.m. UTC | #1
On Mon, Dec 12, 2016 at 05:03:31PM +0100, Christophe Lyon wrote:
> Hi,

> 

> After the recent update from Tamar, I noticed a few discrepancies

> between ARM and AArch64 regarding a few poly64 intrinsics.

> 

> This patch:

> - adds vtst_p64 and vtstq_p64 to AArch64's arm_neon.h

> - adds vgetq_lane_p64, vset_lane_p64 and vsetq_lane_p64 to ARM's arm_neon.h

> ( vget_lane_p64 was already there)

> - adds the corresponding tests, and moves the vget_lane_p64 ones out

> of the #ifdef __aarch64__ zone.

> 

> Cross-tested on arm* and aarch64* targets.

> 

> OK?


The AArch64 parts of this look fine to me, but I do have one question on
your inline assembly implementation for vtstq_p64:

> +__extension__ extern __inline uint64x2_t

> +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))

> +vtstq_p64 (poly64x2_t a, poly64x2_t b)

> +{

> +  uint64x2_t result;

> +  __asm__ ("cmtst %0.2d, %1.2d, %2.2d"

> +           : "=w"(result)

> +           : "w"(a), "w"(b)

> +           : /* No clobbers */);

> +  return result;

> +}

> +


Why can this not be written as many of the other vtstq intrinsics are; e.g.:

   __extension__ extern __inline uint64x2_t
  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
  vtstq_p64 (poly64x2_t __a, poly64x2_t __b)
  {
    return (uint64x2_t) ((((uint64x2_t) __a) & ((uint64x2_t) __b))
                          != __AARCH64_INT64_C (0));
  }

Thanks,
James

> gcc/ChangeLog:

> 

> 2016-12-12  Christophe Lyon  <christophe.lyon@linaro.org>

> 

> 	* config/aarch64/arm_neon.h (vtst_p64): New.

> 	(vtstq_p64): New.

> 	* config/arm/arm_neon.h (vgetq_lane_p64): New.

> 	(vset_lane_p64): New.

> 	(vsetq_lane_p64): New.

> 

> gcc/testsuite/ChangeLog:

> 

> 2016-12-12  Christophe Lyon  <christophe.lyon@linaro.org>

> 

> 	* gcc.target/aarch64/advsimd-intrinsics/p64_p128.c

> 	(vget_lane_expected, vset_lane_expected, vtst_expected_poly64x1):

> 	New.

> 	(vmov_n_expected0, vmov_n_expected1, vmov_n_expected2)

> 	(expected_vld_st2_0, expected_vld_st2_1, expected_vld_st3_0)

> 	(expected_vld_st3_1, expected_vld_st3_2, expected_vld_st4_0)

> 	(expected_vld_st4_1, expected_vld_st4_2, expected_vld_st4_3)

> 	(vtst_expected_poly64x2): Move to aarch64-only section.

> 	(vget_lane_p64, vgetq_lane_p64, vset_lane_p64, vsetq_lane_p64)

> 	(vtst_p64, vtstq_p64): New tests.

>
Christophe Lyon Dec. 14, 2016, 10:09 p.m. UTC | #2
On 14 December 2016 at 17:55, James Greenhalgh <james.greenhalgh@arm.com> wrote:
> On Mon, Dec 12, 2016 at 05:03:31PM +0100, Christophe Lyon wrote:

>> Hi,

>>

>> After the recent update from Tamar, I noticed a few discrepancies

>> between ARM and AArch64 regarding a few poly64 intrinsics.

>>

>> This patch:

>> - adds vtst_p64 and vtstq_p64 to AArch64's arm_neon.h

>> - adds vgetq_lane_p64, vset_lane_p64 and vsetq_lane_p64 to ARM's arm_neon.h

>> ( vget_lane_p64 was already there)

>> - adds the corresponding tests, and moves the vget_lane_p64 ones out

>> of the #ifdef __aarch64__ zone.

>>

>> Cross-tested on arm* and aarch64* targets.

>>

>> OK?

>

> The AArch64 parts of this look fine to me, but I do have one question on

> your inline assembly implementation for vtstq_p64:

>

>> +__extension__ extern __inline uint64x2_t

>> +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))

>> +vtstq_p64 (poly64x2_t a, poly64x2_t b)

>> +{

>> +  uint64x2_t result;

>> +  __asm__ ("cmtst %0.2d, %1.2d, %2.2d"

>> +           : "=w"(result)

>> +           : "w"(a), "w"(b)

>> +           : /* No clobbers */);

>> +  return result;

>> +}

>> +

>

> Why can this not be written as many of the other vtstq intrinsics are; e.g.:

>

>    __extension__ extern __inline uint64x2_t

>   __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))

>   vtstq_p64 (poly64x2_t __a, poly64x2_t __b)

>   {

>     return (uint64x2_t) ((((uint64x2_t) __a) & ((uint64x2_t) __b))

>                           != __AARCH64_INT64_C (0));

>   }

>


I don't know, I just followed the pattern used for vtstq_p8 and vtstq_p16
just above...


> Thanks,

> James

>

>> gcc/ChangeLog:

>>

>> 2016-12-12  Christophe Lyon  <christophe.lyon@linaro.org>

>>

>>       * config/aarch64/arm_neon.h (vtst_p64): New.

>>       (vtstq_p64): New.

>>       * config/arm/arm_neon.h (vgetq_lane_p64): New.

>>       (vset_lane_p64): New.

>>       (vsetq_lane_p64): New.

>>

>> gcc/testsuite/ChangeLog:

>>

>> 2016-12-12  Christophe Lyon  <christophe.lyon@linaro.org>

>>

>>       * gcc.target/aarch64/advsimd-intrinsics/p64_p128.c

>>       (vget_lane_expected, vset_lane_expected, vtst_expected_poly64x1):

>>       New.

>>       (vmov_n_expected0, vmov_n_expected1, vmov_n_expected2)

>>       (expected_vld_st2_0, expected_vld_st2_1, expected_vld_st3_0)

>>       (expected_vld_st3_1, expected_vld_st3_2, expected_vld_st4_0)

>>       (expected_vld_st4_1, expected_vld_st4_2, expected_vld_st4_3)

>>       (vtst_expected_poly64x2): Move to aarch64-only section.

>>       (vget_lane_p64, vgetq_lane_p64, vset_lane_p64, vsetq_lane_p64)

>>       (vtst_p64, vtstq_p64): New tests.

>>

>

>
Christophe Lyon Jan. 3, 2017, 3:47 p.m. UTC | #3
Ping?


On 14 December 2016 at 23:09, Christophe Lyon
<christophe.lyon@linaro.org> wrote:
> On 14 December 2016 at 17:55, James Greenhalgh <james.greenhalgh@arm.com> wrote:

>> On Mon, Dec 12, 2016 at 05:03:31PM +0100, Christophe Lyon wrote:

>>> Hi,

>>>

>>> After the recent update from Tamar, I noticed a few discrepancies

>>> between ARM and AArch64 regarding a few poly64 intrinsics.

>>>

>>> This patch:

>>> - adds vtst_p64 and vtstq_p64 to AArch64's arm_neon.h

>>> - adds vgetq_lane_p64, vset_lane_p64 and vsetq_lane_p64 to ARM's arm_neon.h

>>> ( vget_lane_p64 was already there)

>>> - adds the corresponding tests, and moves the vget_lane_p64 ones out

>>> of the #ifdef __aarch64__ zone.

>>>

>>> Cross-tested on arm* and aarch64* targets.

>>>

>>> OK?

>>

>> The AArch64 parts of this look fine to me, but I do have one question on

>> your inline assembly implementation for vtstq_p64:

>>

>>> +__extension__ extern __inline uint64x2_t

>>> +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))

>>> +vtstq_p64 (poly64x2_t a, poly64x2_t b)

>>> +{

>>> +  uint64x2_t result;

>>> +  __asm__ ("cmtst %0.2d, %1.2d, %2.2d"

>>> +           : "=w"(result)

>>> +           : "w"(a), "w"(b)

>>> +           : /* No clobbers */);

>>> +  return result;

>>> +}

>>> +

>>

>> Why can this not be written as many of the other vtstq intrinsics are; e.g.:

>>

>>    __extension__ extern __inline uint64x2_t

>>   __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))

>>   vtstq_p64 (poly64x2_t __a, poly64x2_t __b)

>>   {

>>     return (uint64x2_t) ((((uint64x2_t) __a) & ((uint64x2_t) __b))

>>                           != __AARCH64_INT64_C (0));

>>   }

>>

>

> I don't know, I just followed the pattern used for vtstq_p8 and vtstq_p16

> just above...

>

>

>> Thanks,

>> James

>>

>>> gcc/ChangeLog:

>>>

>>> 2016-12-12  Christophe Lyon  <christophe.lyon@linaro.org>

>>>

>>>       * config/aarch64/arm_neon.h (vtst_p64): New.

>>>       (vtstq_p64): New.

>>>       * config/arm/arm_neon.h (vgetq_lane_p64): New.

>>>       (vset_lane_p64): New.

>>>       (vsetq_lane_p64): New.

>>>

>>> gcc/testsuite/ChangeLog:

>>>

>>> 2016-12-12  Christophe Lyon  <christophe.lyon@linaro.org>

>>>

>>>       * gcc.target/aarch64/advsimd-intrinsics/p64_p128.c

>>>       (vget_lane_expected, vset_lane_expected, vtst_expected_poly64x1):

>>>       New.

>>>       (vmov_n_expected0, vmov_n_expected1, vmov_n_expected2)

>>>       (expected_vld_st2_0, expected_vld_st2_1, expected_vld_st3_0)

>>>       (expected_vld_st3_1, expected_vld_st3_2, expected_vld_st4_0)

>>>       (expected_vld_st4_1, expected_vld_st4_2, expected_vld_st4_3)

>>>       (vtst_expected_poly64x2): Move to aarch64-only section.

>>>       (vget_lane_p64, vgetq_lane_p64, vset_lane_p64, vsetq_lane_p64)

>>>       (vtst_p64, vtstq_p64): New tests.

>>>

>>

>>
Christophe Lyon Jan. 11, 2017, 10:13 a.m. UTC | #4
Ping?

James, I'm not sure whether your comment was a request for a new
version of my patch or just FYI?


On 3 January 2017 at 16:47, Christophe Lyon <christophe.lyon@linaro.org> wrote:
> Ping?

>

>

> On 14 December 2016 at 23:09, Christophe Lyon

> <christophe.lyon@linaro.org> wrote:

>> On 14 December 2016 at 17:55, James Greenhalgh <james.greenhalgh@arm.com> wrote:

>>> On Mon, Dec 12, 2016 at 05:03:31PM +0100, Christophe Lyon wrote:

>>>> Hi,

>>>>

>>>> After the recent update from Tamar, I noticed a few discrepancies

>>>> between ARM and AArch64 regarding a few poly64 intrinsics.

>>>>

>>>> This patch:

>>>> - adds vtst_p64 and vtstq_p64 to AArch64's arm_neon.h

>>>> - adds vgetq_lane_p64, vset_lane_p64 and vsetq_lane_p64 to ARM's arm_neon.h

>>>> ( vget_lane_p64 was already there)

>>>> - adds the corresponding tests, and moves the vget_lane_p64 ones out

>>>> of the #ifdef __aarch64__ zone.

>>>>

>>>> Cross-tested on arm* and aarch64* targets.

>>>>

>>>> OK?

>>>

>>> The AArch64 parts of this look fine to me, but I do have one question on

>>> your inline assembly implementation for vtstq_p64:

>>>

>>>> +__extension__ extern __inline uint64x2_t

>>>> +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))

>>>> +vtstq_p64 (poly64x2_t a, poly64x2_t b)

>>>> +{

>>>> +  uint64x2_t result;

>>>> +  __asm__ ("cmtst %0.2d, %1.2d, %2.2d"

>>>> +           : "=w"(result)

>>>> +           : "w"(a), "w"(b)

>>>> +           : /* No clobbers */);

>>>> +  return result;

>>>> +}

>>>> +

>>>

>>> Why can this not be written as many of the other vtstq intrinsics are; e.g.:

>>>

>>>    __extension__ extern __inline uint64x2_t

>>>   __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))

>>>   vtstq_p64 (poly64x2_t __a, poly64x2_t __b)

>>>   {

>>>     return (uint64x2_t) ((((uint64x2_t) __a) & ((uint64x2_t) __b))

>>>                           != __AARCH64_INT64_C (0));

>>>   }

>>>

>>

>> I don't know, I just followed the pattern used for vtstq_p8 and vtstq_p16

>> just above...

>>

>>

>>> Thanks,

>>> James

>>>

>>>> gcc/ChangeLog:

>>>>

>>>> 2016-12-12  Christophe Lyon  <christophe.lyon@linaro.org>

>>>>

>>>>       * config/aarch64/arm_neon.h (vtst_p64): New.

>>>>       (vtstq_p64): New.

>>>>       * config/arm/arm_neon.h (vgetq_lane_p64): New.

>>>>       (vset_lane_p64): New.

>>>>       (vsetq_lane_p64): New.

>>>>

>>>> gcc/testsuite/ChangeLog:

>>>>

>>>> 2016-12-12  Christophe Lyon  <christophe.lyon@linaro.org>

>>>>

>>>>       * gcc.target/aarch64/advsimd-intrinsics/p64_p128.c

>>>>       (vget_lane_expected, vset_lane_expected, vtst_expected_poly64x1):

>>>>       New.

>>>>       (vmov_n_expected0, vmov_n_expected1, vmov_n_expected2)

>>>>       (expected_vld_st2_0, expected_vld_st2_1, expected_vld_st3_0)

>>>>       (expected_vld_st3_1, expected_vld_st3_2, expected_vld_st4_0)

>>>>       (expected_vld_st4_1, expected_vld_st4_2, expected_vld_st4_3)

>>>>       (vtst_expected_poly64x2): Move to aarch64-only section.

>>>>       (vget_lane_p64, vgetq_lane_p64, vset_lane_p64, vsetq_lane_p64)

>>>>       (vtst_p64, vtstq_p64): New tests.

>>>>

>>>

>>>
Christophe Lyon Feb. 2, 2017, 8:31 p.m. UTC | #5
Hello,

Is it too late for this patch?

On 11 January 2017 at 11:13, Christophe Lyon <christophe.lyon@linaro.org> wrote:
> Ping?

>

> James, I'm not sure whether your comment was a request for a new

> version of my patch or just FYI?

>

>

> On 3 January 2017 at 16:47, Christophe Lyon <christophe.lyon@linaro.org> wrote:

>> Ping?

>>

>>

>> On 14 December 2016 at 23:09, Christophe Lyon

>> <christophe.lyon@linaro.org> wrote:

>>> On 14 December 2016 at 17:55, James Greenhalgh <james.greenhalgh@arm.com> wrote:

>>>> On Mon, Dec 12, 2016 at 05:03:31PM +0100, Christophe Lyon wrote:

>>>>> Hi,

>>>>>

>>>>> After the recent update from Tamar, I noticed a few discrepancies

>>>>> between ARM and AArch64 regarding a few poly64 intrinsics.

>>>>>

>>>>> This patch:

>>>>> - adds vtst_p64 and vtstq_p64 to AArch64's arm_neon.h

>>>>> - adds vgetq_lane_p64, vset_lane_p64 and vsetq_lane_p64 to ARM's arm_neon.h

>>>>> ( vget_lane_p64 was already there)

>>>>> - adds the corresponding tests, and moves the vget_lane_p64 ones out

>>>>> of the #ifdef __aarch64__ zone.

>>>>>

>>>>> Cross-tested on arm* and aarch64* targets.

>>>>>

>>>>> OK?

>>>>

>>>> The AArch64 parts of this look fine to me, but I do have one question on

>>>> your inline assembly implementation for vtstq_p64:

>>>>

>>>>> +__extension__ extern __inline uint64x2_t

>>>>> +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))

>>>>> +vtstq_p64 (poly64x2_t a, poly64x2_t b)

>>>>> +{

>>>>> +  uint64x2_t result;

>>>>> +  __asm__ ("cmtst %0.2d, %1.2d, %2.2d"

>>>>> +           : "=w"(result)

>>>>> +           : "w"(a), "w"(b)

>>>>> +           : /* No clobbers */);

>>>>> +  return result;

>>>>> +}

>>>>> +

>>>>

>>>> Why can this not be written as many of the other vtstq intrinsics are; e.g.:

>>>>

>>>>    __extension__ extern __inline uint64x2_t

>>>>   __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))

>>>>   vtstq_p64 (poly64x2_t __a, poly64x2_t __b)

>>>>   {

>>>>     return (uint64x2_t) ((((uint64x2_t) __a) & ((uint64x2_t) __b))

>>>>                           != __AARCH64_INT64_C (0));

>>>>   }

>>>>

>>>

>>> I don't know, I just followed the pattern used for vtstq_p8 and vtstq_p16

>>> just above...

>>>

>>>

>>>> Thanks,

>>>> James

>>>>

>>>>> gcc/ChangeLog:

>>>>>

>>>>> 2016-12-12  Christophe Lyon  <christophe.lyon@linaro.org>

>>>>>

>>>>>       * config/aarch64/arm_neon.h (vtst_p64): New.

>>>>>       (vtstq_p64): New.

>>>>>       * config/arm/arm_neon.h (vgetq_lane_p64): New.

>>>>>       (vset_lane_p64): New.

>>>>>       (vsetq_lane_p64): New.

>>>>>

>>>>> gcc/testsuite/ChangeLog:

>>>>>

>>>>> 2016-12-12  Christophe Lyon  <christophe.lyon@linaro.org>

>>>>>

>>>>>       * gcc.target/aarch64/advsimd-intrinsics/p64_p128.c

>>>>>       (vget_lane_expected, vset_lane_expected, vtst_expected_poly64x1):

>>>>>       New.

>>>>>       (vmov_n_expected0, vmov_n_expected1, vmov_n_expected2)

>>>>>       (expected_vld_st2_0, expected_vld_st2_1, expected_vld_st3_0)

>>>>>       (expected_vld_st3_1, expected_vld_st3_2, expected_vld_st4_0)

>>>>>       (expected_vld_st4_1, expected_vld_st4_2, expected_vld_st4_3)

>>>>>       (vtst_expected_poly64x2): Move to aarch64-only section.

>>>>>       (vget_lane_p64, vgetq_lane_p64, vset_lane_p64, vsetq_lane_p64)

>>>>>       (vtst_p64, vtstq_p64): New tests.

>>>>>

>>>>

>>>>
James Greenhalgh Feb. 4, 2017, 9:54 p.m. UTC | #6
On Wed, Jan 11, 2017 at 11:13:07AM +0100, Christophe Lyon wrote:
> Ping?

> 

> James, I'm not sure whether your comment was a request for a new

> version of my patch or just FYI?


Sorry that this was unclear. I was looking for a new version of the patch
covering this comment. Otherwise we just have debt to go fix it in future.

With the suggested change, the AArch64 parts of this patch are OK - adding
missing intrinsics is very safe (even in Stage 4).

Please post an updated patch, and give Richard and Marcus a reasonable
amount of tiume to object to taking the patch this late. (and you need an
AArch32 OK too).

Thanks,
James

> 

> 

> On 3 January 2017 at 16:47, Christophe Lyon <christophe.lyon@linaro.org> wrote:

> > Ping?

> >

> >

> > On 14 December 2016 at 23:09, Christophe Lyon

> > <christophe.lyon@linaro.org> wrote:

> >> On 14 December 2016 at 17:55, James Greenhalgh <james.greenhalgh@arm.com> wrote:

> >>> On Mon, Dec 12, 2016 at 05:03:31PM +0100, Christophe Lyon wrote:

> >>>> Hi,

> >>>>

> >>>> After the recent update from Tamar, I noticed a few discrepancies

> >>>> between ARM and AArch64 regarding a few poly64 intrinsics.

> >>>>

> >>>> This patch:

> >>>> - adds vtst_p64 and vtstq_p64 to AArch64's arm_neon.h

> >>>> - adds vgetq_lane_p64, vset_lane_p64 and vsetq_lane_p64 to ARM's arm_neon.h

> >>>> ( vget_lane_p64 was already there)

> >>>> - adds the corresponding tests, and moves the vget_lane_p64 ones out

> >>>> of the #ifdef __aarch64__ zone.

> >>>>

> >>>> Cross-tested on arm* and aarch64* targets.

> >>>>

> >>>> OK?

> >>>

> >>> The AArch64 parts of this look fine to me, but I do have one question on

> >>> your inline assembly implementation for vtstq_p64:

> >>>

> >>>> +__extension__ extern __inline uint64x2_t

> >>>> +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))

> >>>> +vtstq_p64 (poly64x2_t a, poly64x2_t b)

> >>>> +{

> >>>> +  uint64x2_t result;

> >>>> +  __asm__ ("cmtst %0.2d, %1.2d, %2.2d"

> >>>> +           : "=w"(result)

> >>>> +           : "w"(a), "w"(b)

> >>>> +           : /* No clobbers */);

> >>>> +  return result;

> >>>> +}

> >>>> +

> >>>

> >>> Why can this not be written as many of the other vtstq intrinsics are; e.g.:

> >>>

> >>>    __extension__ extern __inline uint64x2_t

> >>>   __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))

> >>>   vtstq_p64 (poly64x2_t __a, poly64x2_t __b)

> >>>   {

> >>>     return (uint64x2_t) ((((uint64x2_t) __a) & ((uint64x2_t) __b))

> >>>                           != __AARCH64_INT64_C (0));

> >>>   }

> >>>

> >>

> >> I don't know, I just followed the pattern used for vtstq_p8 and vtstq_p16

> >> just above...

> >>

> >>

> >>> Thanks,

> >>> James

> >>>

> >>>> gcc/ChangeLog:

> >>>>

> >>>> 2016-12-12  Christophe Lyon  <christophe.lyon@linaro.org>

> >>>>

> >>>>       * config/aarch64/arm_neon.h (vtst_p64): New.

> >>>>       (vtstq_p64): New.

> >>>>       * config/arm/arm_neon.h (vgetq_lane_p64): New.

> >>>>       (vset_lane_p64): New.

> >>>>       (vsetq_lane_p64): New.

> >>>>

> >>>> gcc/testsuite/ChangeLog:

> >>>>

> >>>> 2016-12-12  Christophe Lyon  <christophe.lyon@linaro.org>

> >>>>

> >>>>       * gcc.target/aarch64/advsimd-intrinsics/p64_p128.c

> >>>>       (vget_lane_expected, vset_lane_expected, vtst_expected_poly64x1):

> >>>>       New.

> >>>>       (vmov_n_expected0, vmov_n_expected1, vmov_n_expected2)

> >>>>       (expected_vld_st2_0, expected_vld_st2_1, expected_vld_st3_0)

> >>>>       (expected_vld_st3_1, expected_vld_st3_2, expected_vld_st4_0)

> >>>>       (expected_vld_st4_1, expected_vld_st4_2, expected_vld_st4_3)

> >>>>       (vtst_expected_poly64x2): Move to aarch64-only section.

> >>>>       (vget_lane_p64, vgetq_lane_p64, vset_lane_p64, vsetq_lane_p64)

> >>>>       (vtst_p64, vtstq_p64): New tests.

> >>>>

> >>>

> >>>
Christophe Lyon Feb. 6, 2017, 8:31 a.m. UTC | #7
On 4 February 2017 at 22:54, James Greenhalgh <james.greenhalgh@arm.com> wrote:
> On Wed, Jan 11, 2017 at 11:13:07AM +0100, Christophe Lyon wrote:

>> Ping?

>>

>> James, I'm not sure whether your comment was a request for a new

>> version of my patch or just FYI?

>

> Sorry that this was unclear. I was looking for a new version of the patch

> covering this comment. Otherwise we just have debt to go fix it in future.

>

> With the suggested change, the AArch64 parts of this patch are OK - adding

> missing intrinsics is very safe (even in Stage 4).

>

> Please post an updated patch, and give Richard and Marcus a reasonable

> amount of tiume to object to taking the patch this late. (and you need an

> AArch32 OK too).

>

> Thanks,

> James

>


Hi James,

Thanks for the clarification, here is an updated patch.

I had to make a few changes after rebasing, and I also took the opportunity to
rewrite the existing vtst_p8, vtst_p16, vtstq_p8 and vtstq_p16 without an
asm() statement.

As before, the aarch64 and aarch32 updates to arm_neon.h are independent,
but I found it simpler to group them, as they imply updates to the same test.

Tested as usual, cross-testing on several arm* and aarch64* configurations,
no regression.

OK?

Thanks,

Christophe

>>

>>

>> On 3 January 2017 at 16:47, Christophe Lyon <christophe.lyon@linaro.org> wrote:

>> > Ping?

>> >

>> >

>> > On 14 December 2016 at 23:09, Christophe Lyon

>> > <christophe.lyon@linaro.org> wrote:

>> >> On 14 December 2016 at 17:55, James Greenhalgh <james.greenhalgh@arm.com> wrote:

>> >>> On Mon, Dec 12, 2016 at 05:03:31PM +0100, Christophe Lyon wrote:

>> >>>> Hi,

>> >>>>

>> >>>> After the recent update from Tamar, I noticed a few discrepancies

>> >>>> between ARM and AArch64 regarding a few poly64 intrinsics.

>> >>>>

>> >>>> This patch:

>> >>>> - adds vtst_p64 and vtstq_p64 to AArch64's arm_neon.h

>> >>>> - adds vgetq_lane_p64, vset_lane_p64 and vsetq_lane_p64 to ARM's arm_neon.h

>> >>>> ( vget_lane_p64 was already there)

>> >>>> - adds the corresponding tests, and moves the vget_lane_p64 ones out

>> >>>> of the #ifdef __aarch64__ zone.

>> >>>>

>> >>>> Cross-tested on arm* and aarch64* targets.

>> >>>>

>> >>>> OK?

>> >>>

>> >>> The AArch64 parts of this look fine to me, but I do have one question on

>> >>> your inline assembly implementation for vtstq_p64:

>> >>>

>> >>>> +__extension__ extern __inline uint64x2_t

>> >>>> +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))

>> >>>> +vtstq_p64 (poly64x2_t a, poly64x2_t b)

>> >>>> +{

>> >>>> +  uint64x2_t result;

>> >>>> +  __asm__ ("cmtst %0.2d, %1.2d, %2.2d"

>> >>>> +           : "=w"(result)

>> >>>> +           : "w"(a), "w"(b)

>> >>>> +           : /* No clobbers */);

>> >>>> +  return result;

>> >>>> +}

>> >>>> +

>> >>>

>> >>> Why can this not be written as many of the other vtstq intrinsics are; e.g.:

>> >>>

>> >>>    __extension__ extern __inline uint64x2_t

>> >>>   __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))

>> >>>   vtstq_p64 (poly64x2_t __a, poly64x2_t __b)

>> >>>   {

>> >>>     return (uint64x2_t) ((((uint64x2_t) __a) & ((uint64x2_t) __b))

>> >>>                           != __AARCH64_INT64_C (0));

>> >>>   }

>> >>>

>> >>

>> >> I don't know, I just followed the pattern used for vtstq_p8 and vtstq_p16

>> >> just above...

>> >>

>> >>

>> >>> Thanks,

>> >>> James

>> >>>

>> >>>> gcc/ChangeLog:

>> >>>>

>> >>>> 2016-12-12  Christophe Lyon  <christophe.lyon@linaro.org>

>> >>>>

>> >>>>       * config/aarch64/arm_neon.h (vtst_p64): New.

>> >>>>       (vtstq_p64): New.

>> >>>>       * config/arm/arm_neon.h (vgetq_lane_p64): New.

>> >>>>       (vset_lane_p64): New.

>> >>>>       (vsetq_lane_p64): New.

>> >>>>

>> >>>> gcc/testsuite/ChangeLog:

>> >>>>

>> >>>> 2016-12-12  Christophe Lyon  <christophe.lyon@linaro.org>

>> >>>>

>> >>>>       * gcc.target/aarch64/advsimd-intrinsics/p64_p128.c

>> >>>>       (vget_lane_expected, vset_lane_expected, vtst_expected_poly64x1):

>> >>>>       New.

>> >>>>       (vmov_n_expected0, vmov_n_expected1, vmov_n_expected2)

>> >>>>       (expected_vld_st2_0, expected_vld_st2_1, expected_vld_st3_0)

>> >>>>       (expected_vld_st3_1, expected_vld_st3_2, expected_vld_st4_0)

>> >>>>       (expected_vld_st4_1, expected_vld_st4_2, expected_vld_st4_3)

>> >>>>       (vtst_expected_poly64x2): Move to aarch64-only section.

>> >>>>       (vget_lane_p64, vgetq_lane_p64, vset_lane_p64, vsetq_lane_p64)

>> >>>>       (vtst_p64, vtstq_p64): New tests.

>> >>>>

>> >>>

>> >>>

>
gcc/ChangeLog:

2017-02-06  Christophe Lyon  <christophe.lyon@linaro.org>

	* config/aarch64/arm_neon.h (vtst_p8): Rewrite without asm.
	(vtst_p16): Likewise.
	(vtstq_p8): Likewise.
	(vtstq_p16): Likewise.
	(vtst_p64): New.
	(vtstq_p64): Likewise.
	* config/arm/arm_neon.h (vgetq_lane_p64): New.
	(vset_lane_p64): New.
	(vsetq_lane_p64): New.

gcc/testsuite/ChangeLog:

2017-02-06  Christophe Lyon  <christophe.lyon@linaro.org>

	* gcc.target/aarch64/advsimd-intrinsics/p64_p128.c
	(vget_lane_expected, vset_lane_expected, vtst_expected_poly64x1):
	New.
	(vmov_n_expected0, vmov_n_expected1, vmov_n_expected2)
	(expected_vld_st2_0, expected_vld_st2_1, expected_vld_st3_0)
	(expected_vld_st3_1, expected_vld_st3_2, expected_vld_st4_0)
	(expected_vld_st4_1, expected_vld_st4_2, expected_vld_st4_3)
	(vtst_expected_poly64x2): Move to aarch64-only section.
	(vget_lane_p64, vgetq_lane_p64, vset_lane_p64, vsetq_lane_p64)
	(vtst_p64, vtstq_p64): New tests.
commit d8eebfd0523115ad743a3a273f6dbf79e3d41d5c
Author: Christophe Lyon <christophe.lyon@linaro.org>
Date:   Sun Feb 5 20:43:13 2017 +0000

    ARM/AArch64: add missing poly64 intrinsics (vtst on aarch64, vget_lane on arm)
    
    Change-Id: I334e0fa6ab07d473609ed96d9ab8cb56ebd521acdiff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h
index a54c0be..0753da3 100644
--- a/gcc/config/aarch64/arm_neon.h
+++ b/gcc/config/aarch64/arm_neon.h
@@ -10862,48 +10862,47 @@ __extension__ extern __inline uint8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vtst_p8 (poly8x8_t a, poly8x8_t b)
 {
-  uint8x8_t result;
-  __asm__ ("cmtst %0.8b, %1.8b, %2.8b"
-           : "=w"(result)
-           : "w"(a), "w"(b)
-           : /* No clobbers */);
-  return result;
+  return (uint8x8_t) ((((uint8x8_t) a) & ((uint8x8_t) b))
+		       != 0);
 }
 
 __extension__ extern __inline uint16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vtst_p16 (poly16x4_t a, poly16x4_t b)
 {
-  uint16x4_t result;
-  __asm__ ("cmtst %0.4h, %1.4h, %2.4h"
-           : "=w"(result)
-           : "w"(a), "w"(b)
-           : /* No clobbers */);
-  return result;
+  return (uint16x4_t) ((((uint16x4_t) a) & ((uint16x4_t) b))
+		       != 0);
+}
+
+__extension__ extern __inline uint64x1_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vtst_p64 (poly64x1_t a, poly64x1_t b)
+{
+  return (uint64x1_t) ((a & b) != __AARCH64_INT64_C (0));
 }
 
 __extension__ extern __inline uint8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vtstq_p8 (poly8x16_t a, poly8x16_t b)
 {
-  uint8x16_t result;
-  __asm__ ("cmtst %0.16b, %1.16b, %2.16b"
-           : "=w"(result)
-           : "w"(a), "w"(b)
-           : /* No clobbers */);
-  return result;
+  return (uint8x16_t) ((((uint8x16_t) a) & ((uint8x16_t) b))
+		       != 0);
 }
 
 __extension__ extern __inline uint16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vtstq_p16 (poly16x8_t a, poly16x8_t b)
 {
-  uint16x8_t result;
-  __asm__ ("cmtst %0.8h, %1.8h, %2.8h"
-           : "=w"(result)
-           : "w"(a), "w"(b)
-           : /* No clobbers */);
-  return result;
+  return (uint16x8_t) ((((uint16x8_t) a) & ((uint16x8_t) b))
+		       != 0);
+}
+
+__extension__ extern __inline uint64x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vtstq_p64 (poly64x2_t a, poly64x2_t b)
+{
+  return (uint64x2_t) ((((uint64x2_t) a) & ((uint64x2_t) b))
+		       != __AARCH64_INT64_C (0));
 }
 
 /* End of temporary inline asm implementations.  */
diff --git a/gcc/config/arm/arm_neon.h b/gcc/config/arm/arm_neon.h
index 16bf8c5..f81d77e 100644
--- a/gcc/config/arm/arm_neon.h
+++ b/gcc/config/arm/arm_neon.h
@@ -6309,6 +6309,16 @@ vgetq_lane_s64 (int64x2_t __a, const int __b)
   return (int64_t)__builtin_neon_vget_lanev2di (__a, __b);
 }
 
+#pragma GCC push_options
+#pragma GCC target ("fpu=crypto-neon-fp-armv8")
+__extension__ extern __inline poly64_t
+__attribute__  ((__always_inline__, __gnu_inline__, __artificial__))
+vgetq_lane_p64 (poly64x2_t __a, const int __b)
+{
+  return (poly64_t)__builtin_neon_vget_lanev2di ((int64x2_t) __a, __b);
+}
+
+#pragma GCC pop_options
 __extension__ extern __inline uint64_t
 __attribute__  ((__always_inline__, __gnu_inline__, __artificial__))
 vgetq_lane_u64 (uint64x2_t __a, const int __b)
@@ -6405,6 +6415,16 @@ vset_lane_u64 (uint64_t __a, uint64x1_t __b, const int __c)
   return (uint64x1_t)__builtin_neon_vset_lanedi ((__builtin_neon_di) __a, (int64x1_t) __b, __c);
 }
 
+#pragma GCC push_options
+#pragma GCC target ("fpu=crypto-neon-fp-armv8")
+__extension__ extern __inline poly64x1_t
+__attribute__  ((__always_inline__, __gnu_inline__, __artificial__))
+vset_lane_p64 (poly64_t __a, poly64x1_t __b, const int __c)
+{
+  return (poly64x1_t)__builtin_neon_vset_lanedi ((__builtin_neon_di) __a, (int64x1_t) __b, __c);
+}
+
+#pragma GCC pop_options
 __extension__ extern __inline int8x16_t
 __attribute__  ((__always_inline__, __gnu_inline__, __artificial__))
 vsetq_lane_s8 (int8_t __a, int8x16_t __b, const int __c)
@@ -6496,6 +6516,13 @@ vsetq_lane_u64 (uint64_t __a, uint64x2_t __b, const int __c)
 
 #pragma GCC push_options
 #pragma GCC target ("fpu=crypto-neon-fp-armv8")
+__extension__ extern __inline poly64x2_t
+__attribute__  ((__always_inline__, __gnu_inline__, __artificial__))
+vsetq_lane_p64 (poly64_t __a, poly64x2_t __b, const int __c)
+{
+  return (poly64x2_t)__builtin_neon_vset_lanev2di ((__builtin_neon_di) __a, (int64x2_t) __b, __c);
+}
+
 __extension__ extern __inline poly64x1_t
 __attribute__  ((__always_inline__, __gnu_inline__, __artificial__))
 vcreate_p64 (uint64_t __a)
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/p64_p128.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/p64_p128.c
index 7c5bca2..a3210a9 100644
--- a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/p64_p128.c
+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/p64_p128.c
@@ -39,17 +39,6 @@ VECT_VAR_DECL(vdup_n_expected2,poly,64,1) [] = { 0xfffffffffffffff2 };
 VECT_VAR_DECL(vdup_n_expected2,poly,64,2) [] = { 0xfffffffffffffff2,
 						 0xfffffffffffffff2 };
 
-/* Expected results: vmov_n.  */
-VECT_VAR_DECL(vmov_n_expected0,poly,64,1) [] = { 0xfffffffffffffff0 };
-VECT_VAR_DECL(vmov_n_expected0,poly,64,2) [] = { 0xfffffffffffffff0,
-						 0xfffffffffffffff0 };
-VECT_VAR_DECL(vmov_n_expected1,poly,64,1) [] = { 0xfffffffffffffff1 };
-VECT_VAR_DECL(vmov_n_expected1,poly,64,2) [] = { 0xfffffffffffffff1,
-						 0xfffffffffffffff1 };
-VECT_VAR_DECL(vmov_n_expected2,poly,64,1) [] = { 0xfffffffffffffff2 };
-VECT_VAR_DECL(vmov_n_expected2,poly,64,2) [] = { 0xfffffffffffffff2,
-						 0xfffffffffffffff2 };
-
 /* Expected results: vext.  */
 VECT_VAR_DECL(vext_expected,poly,64,1) [] = { 0xfffffffffffffff0 };
 VECT_VAR_DECL(vext_expected,poly,64,2) [] = { 0xfffffffffffffff1, 0x88 };
@@ -124,6 +113,29 @@ VECT_VAR_DECL(vst1_lane_expected,poly,64,1) [] = { 0xfffffffffffffff0 };
 VECT_VAR_DECL(vst1_lane_expected,poly,64,2) [] = { 0xfffffffffffffff0,
 						   0x3333333333333333 };
 
+/* Expected results: vget_lane.  */
+VECT_VAR_DECL(vget_lane_expected,poly,64,1) = 0xfffffffffffffff0;
+VECT_VAR_DECL(vget_lane_expected,poly,64,2) = 0xfffffffffffffff0;
+
+/* Expected results: vset_lane.  */
+VECT_VAR_DECL(vset_lane_expected,poly,64,1) [] = { 0x88 };
+VECT_VAR_DECL(vset_lane_expected,poly,64,2) [] = { 0xfffffffffffffff0, 0x11 };
+
+/* Expected results: vtst.  */
+VECT_VAR_DECL(vtst_expected,uint,64,1) [] = { 0x0 };
+
+#ifdef __aarch64__
+/* Expected results: vmov_n.  */
+VECT_VAR_DECL(vmov_n_expected0,poly,64,1) [] = { 0xfffffffffffffff0 };
+VECT_VAR_DECL(vmov_n_expected0,poly,64,2) [] = { 0xfffffffffffffff0,
+						 0xfffffffffffffff0 };
+VECT_VAR_DECL(vmov_n_expected1,poly,64,1) [] = { 0xfffffffffffffff1 };
+VECT_VAR_DECL(vmov_n_expected1,poly,64,2) [] = { 0xfffffffffffffff1,
+						 0xfffffffffffffff1 };
+VECT_VAR_DECL(vmov_n_expected2,poly,64,1) [] = { 0xfffffffffffffff2 };
+VECT_VAR_DECL(vmov_n_expected2,poly,64,2) [] = { 0xfffffffffffffff2,
+						 0xfffffffffffffff2 };
+
 /* Expected results: vldX_lane.  */
 VECT_VAR_DECL(expected_vld_st2_0,poly,64,1) [] = { 0xfffffffffffffff0 };
 VECT_VAR_DECL(expected_vld_st2_0,poly,64,2) [] = { 0xfffffffffffffff0,
@@ -153,9 +165,9 @@ VECT_VAR_DECL(expected_vld_st4_3,poly,64,1) [] = { 0xfffffffffffffff3 };
 VECT_VAR_DECL(expected_vld_st4_3,poly,64,2) [] = { 0xaaaaaaaaaaaaaaaa,
 						   0xaaaaaaaaaaaaaaaa };
 
-/* Expected results: vget_lane.  */
-VECT_VAR_DECL(vget_lane_expected,poly,64,1) = 0xfffffffffffffff0;
-VECT_VAR_DECL(vget_lane_expected,poly,64,2) = 0xfffffffffffffff0;
+/* Expected results: vtst.  */
+VECT_VAR_DECL(vtst_expected,uint,64,2) [] = { 0x0, 0xffffffffffffffff };
+#endif
 
 int main (void)
 {
@@ -727,7 +739,105 @@ int main (void)
   CHECK_POLY(TEST_MSG, poly, 64, 1, PRIx64, vst1_lane_expected, "");
   CHECK_POLY(TEST_MSG, poly, 64, 2, PRIx64, vst1_lane_expected, "");
 
+  /* vget_lane_p64 tests.  */
+#undef TEST_MSG
+#define TEST_MSG "VGET_LANE/VGETQ_LANE"
+
+#define TEST_VGET_LANE(Q, T1, T2, W, N, L)				   \
+  VECT_VAR(vget_lane_vector, T1, W, N) = vget##Q##_lane_##T2##W(VECT_VAR(vget_lane_vector1, T1, W, N), L); \
+  if (VECT_VAR(vget_lane_vector, T1, W, N) != VECT_VAR(vget_lane_expected, T1, W, N)) {		\
+    fprintf(stderr,							   \
+	    "ERROR in %s (%s line %d in result '%s') at type %s "	   \
+	    "got 0x%" PRIx##W " != 0x%" PRIx##W "\n",			   \
+	    TEST_MSG, __FILE__, __LINE__,				   \
+	    STR(VECT_VAR(vget_lane_expected, T1, W, N)),		   \
+	    STR(VECT_NAME(T1, W, N)),					   \
+	    VECT_VAR(vget_lane_vector, T1, W, N),			   \
+	    VECT_VAR(vget_lane_expected, T1, W, N));			   \
+    abort ();								   \
+  }
+
+  /* Initialize input values.  */
+  DECL_VARIABLE(vget_lane_vector1, poly, 64, 1);
+  DECL_VARIABLE(vget_lane_vector1, poly, 64, 2);
+
+  VLOAD(vget_lane_vector1, buffer,  , poly, p, 64, 1);
+  VLOAD(vget_lane_vector1, buffer, q, poly, p, 64, 2);
+
+  VECT_VAR_DECL(vget_lane_vector, poly, 64, 1);
+  VECT_VAR_DECL(vget_lane_vector, poly, 64, 2);
+
+  TEST_VGET_LANE( , poly, p, 64, 1, 0);
+  TEST_VGET_LANE(q, poly, p, 64, 2, 0);
+
+
+  /* vset_lane_p64 tests.  */
+#undef TEST_MSG
+#define TEST_MSG "VSET_LANE/VSETQ_LANE"
+
+#define TEST_VSET_LANE(Q, T1, T2, W, N, V, L)				\
+  VECT_VAR(vset_lane_vector, T1, W, N) =						\
+    vset##Q##_lane_##T2##W(V,						\
+			   VECT_VAR(vset_lane_vector, T1, W, N),			\
+			   L);						\
+  vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(vset_lane_vector, T1, W, N))
+
+  /* Initialize input values.  */
+  DECL_VARIABLE(vset_lane_vector, poly, 64, 1);
+  DECL_VARIABLE(vset_lane_vector, poly, 64, 2);
+
+  CLEAN(result, uint, 64, 1);
+  CLEAN(result, uint, 64, 2);
+
+  VLOAD(vset_lane_vector, buffer, , poly, p, 64, 1);
+  VLOAD(vset_lane_vector, buffer, q, poly, p, 64, 2);
+
+  /* Choose value and lane arbitrarily.  */
+  TEST_VSET_LANE(, poly, p, 64, 1, 0x88, 0);
+  TEST_VSET_LANE(q, poly, p, 64, 2, 0x11, 1);
+
+  CHECK(TEST_MSG, poly, 64, 1, PRIx64, vset_lane_expected, "");
+  CHECK(TEST_MSG, poly, 64, 2, PRIx64, vset_lane_expected, "");
+
+
+  /* vtst_p64 tests.  */
+#undef TEST_MSG
+#define TEST_MSG "VTST"
+  
+#define TEST_VTST1(INSN, Q, T1, T2, W, N)			\
+  VECT_VAR(vtst_vector_res, uint, W, N) =			\
+    INSN##Q##_##T2##W(VECT_VAR(vtst_vector, T1, W, N),		\
+		      VECT_VAR(vtst_vector2, T1, W, N));	\
+    vst1##Q##_u##W(VECT_VAR(result, uint, W, N),		\
+		   VECT_VAR(vtst_vector_res, uint, W, N))
+
+#define TEST_VTST(INSN, Q, T1, T2, W, N)	\
+  TEST_VTST1(INSN, Q, T1, T2, W, N)		\
+
+  /* Initialize input values.  */
+  DECL_VARIABLE(vtst_vector, poly, 64, 1);
+  DECL_VARIABLE(vtst_vector2, poly, 64, 1);
+  DECL_VARIABLE(vtst_vector_res, uint, 64, 1);
+
+  CLEAN(result, uint, 64, 1);
+
+  VLOAD(vtst_vector, buffer,  , poly, p, 64, 1);
+  VDUP(vtst_vector2, , poly, p, 64, 1, 5);
+
+  TEST_VTST(vtst, , poly, p, 64, 1);
+
+  CHECK(TEST_MSG, uint, 64, 1, PRIx64, vtst_expected, "");
+
+  /* vtstq_p64 is supported by aarch64 only.  */
 #ifdef __aarch64__
+  DECL_VARIABLE(vtst_vector, poly, 64, 2);
+  DECL_VARIABLE(vtst_vector2, poly, 64, 2);
+  DECL_VARIABLE(vtst_vector_res, uint, 64, 2);
+  CLEAN(result, uint, 64, 2);
+  VLOAD(vtst_vector, buffer, q, poly, p, 64, 2);
+  VDUP(vtst_vector2, q, poly, p, 64, 2, 5);
+  TEST_VTST(vtst, q, poly, p, 64, 2);
+  CHECK(TEST_MSG, uint, 64, 2, PRIx64, vtst_expected, "");
 
   /* vmov_n_p64 tests.  */
 #undef TEST_MSG
@@ -767,37 +877,6 @@ int main (void)
     }
   }
 
-  /* vget_lane_p64 tests.  */
-#undef TEST_MSG
-#define TEST_MSG "VGET_LANE/VGETQ_LANE"
-
-#define TEST_VGET_LANE(Q, T1, T2, W, N, L)				   \
-  VECT_VAR(vget_lane_vector, T1, W, N) = vget##Q##_lane_##T2##W(VECT_VAR(vector, T1, W, N), L); \
-  if (VECT_VAR(vget_lane_vector, T1, W, N) != VECT_VAR(vget_lane_expected, T1, W, N)) {		\
-    fprintf(stderr,							   \
-	    "ERROR in %s (%s line %d in result '%s') at type %s "	   \
-	    "got 0x%" PRIx##W " != 0x%" PRIx##W "\n",			   \
-	    TEST_MSG, __FILE__, __LINE__,				   \
-	    STR(VECT_VAR(vget_lane_expected, T1, W, N)),		   \
-	    STR(VECT_NAME(T1, W, N)),					   \
-	    (uint##W##_t)VECT_VAR(vget_lane_vector, T1, W, N),		   \
-	    (uint##W##_t)VECT_VAR(vget_lane_expected, T1, W, N));	   \
-    abort ();								   \
-  }
-
-  /* Initialize input values.  */
-  DECL_VARIABLE(vector, poly, 64, 1);
-  DECL_VARIABLE(vector, poly, 64, 2);
-
-  VLOAD(vector, buffer,  , poly, p, 64, 1);
-  VLOAD(vector, buffer, q, poly, p, 64, 2);
-
-  VECT_VAR_DECL(vget_lane_vector, poly, 64, 1);
-  VECT_VAR_DECL(vget_lane_vector, poly, 64, 2);
-
-  TEST_VGET_LANE( , poly, p, 64, 1, 0);
-  TEST_VGET_LANE(q, poly, p, 64, 2, 0);
-
   /* vldx_lane_p64 tests.  */
 #undef TEST_MSG
 #define TEST_MSG "VLDX_LANE/VLDXQ_LANE"

Kyrill Tkachov Feb. 6, 2017, 9:23 a.m. UTC | #8
Hi Christophe,

On 06/02/17 08:31, Christophe Lyon wrote:
> On 4 February 2017 at 22:54, James Greenhalgh <james.greenhalgh@arm.com> wrote:

>> On Wed, Jan 11, 2017 at 11:13:07AM +0100, Christophe Lyon wrote:

>>> Ping?

>>>

>>> James, I'm not sure whether your comment was a request for a new

>>> version of my patch or just FYI?

>> Sorry that this was unclear. I was looking for a new version of the patch

>> covering this comment. Otherwise we just have debt to go fix it in future.

>>

>> With the suggested change, the AArch64 parts of this patch are OK - adding

>> missing intrinsics is very safe (even in Stage 4).

>>

>> Please post an updated patch, and give Richard and Marcus a reasonable

>> amount of tiume to object to taking the patch this late. (and you need an

>> AArch32 OK too).

>>

>> Thanks,

>> James

>>

> Hi James,

>

> Thanks for the clarification, here is an updated patch.

>

> I had to make a few changes after rebasing, and I also took the opportunity to

> rewrite the existing vtst_p8, vtst_p16, vtstq_p8 and vtstq_p16 without an

> asm() statement.

>

> As before, the aarch64 and aarch32 updates to arm_neon.h are independent,

> but I found it simpler to group them, as they imply updates to the same test.

>

> Tested as usual, cross-testing on several arm* and aarch64* configurations,

> no regression.

>

> OK?


Ok for arm.

Thanks,
Kyrill

> Thanks,

>

> Christophe

>

>>>

>>> On 3 January 2017 at 16:47, Christophe Lyon <christophe.lyon@linaro.org> wrote:

>>>> Ping?

>>>>

>>>>

>>>> On 14 December 2016 at 23:09, Christophe Lyon

>>>> <christophe.lyon@linaro.org> wrote:

>>>>> On 14 December 2016 at 17:55, James Greenhalgh <james.greenhalgh@arm.com> wrote:

>>>>>> On Mon, Dec 12, 2016 at 05:03:31PM +0100, Christophe Lyon wrote:

>>>>>>> Hi,

>>>>>>>

>>>>>>> After the recent update from Tamar, I noticed a few discrepancies

>>>>>>> between ARM and AArch64 regarding a few poly64 intrinsics.

>>>>>>>

>>>>>>> This patch:

>>>>>>> - adds vtst_p64 and vtstq_p64 to AArch64's arm_neon.h

>>>>>>> - adds vgetq_lane_p64, vset_lane_p64 and vsetq_lane_p64 to ARM's arm_neon.h

>>>>>>> ( vget_lane_p64 was already there)

>>>>>>> - adds the corresponding tests, and moves the vget_lane_p64 ones out

>>>>>>> of the #ifdef __aarch64__ zone.

>>>>>>>

>>>>>>> Cross-tested on arm* and aarch64* targets.

>>>>>>>

>>>>>>> OK?

>>>>>> The AArch64 parts of this look fine to me, but I do have one question on

>>>>>> your inline assembly implementation for vtstq_p64:

>>>>>>

>>>>>>> +__extension__ extern __inline uint64x2_t

>>>>>>> +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))

>>>>>>> +vtstq_p64 (poly64x2_t a, poly64x2_t b)

>>>>>>> +{

>>>>>>> +  uint64x2_t result;

>>>>>>> +  __asm__ ("cmtst %0.2d, %1.2d, %2.2d"

>>>>>>> +           : "=w"(result)

>>>>>>> +           : "w"(a), "w"(b)

>>>>>>> +           : /* No clobbers */);

>>>>>>> +  return result;

>>>>>>> +}

>>>>>>> +

>>>>>> Why can this not be written as many of the other vtstq intrinsics are; e.g.:

>>>>>>

>>>>>>     __extension__ extern __inline uint64x2_t

>>>>>>    __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))

>>>>>>    vtstq_p64 (poly64x2_t __a, poly64x2_t __b)

>>>>>>    {

>>>>>>      return (uint64x2_t) ((((uint64x2_t) __a) & ((uint64x2_t) __b))

>>>>>>                            != __AARCH64_INT64_C (0));

>>>>>>    }

>>>>>>

>>>>> I don't know, I just followed the pattern used for vtstq_p8 and vtstq_p16

>>>>> just above...

>>>>>

>>>>>

>>>>>> Thanks,

>>>>>> James

>>>>>>

>>>>>>> gcc/ChangeLog:

>>>>>>>

>>>>>>> 2016-12-12  Christophe Lyon  <christophe.lyon@linaro.org>

>>>>>>>

>>>>>>>        * config/aarch64/arm_neon.h (vtst_p64): New.

>>>>>>>        (vtstq_p64): New.

>>>>>>>        * config/arm/arm_neon.h (vgetq_lane_p64): New.

>>>>>>>        (vset_lane_p64): New.

>>>>>>>        (vsetq_lane_p64): New.

>>>>>>>

>>>>>>> gcc/testsuite/ChangeLog:

>>>>>>>

>>>>>>> 2016-12-12  Christophe Lyon  <christophe.lyon@linaro.org>

>>>>>>>

>>>>>>>        * gcc.target/aarch64/advsimd-intrinsics/p64_p128.c

>>>>>>>        (vget_lane_expected, vset_lane_expected, vtst_expected_poly64x1):

>>>>>>>        New.

>>>>>>>        (vmov_n_expected0, vmov_n_expected1, vmov_n_expected2)

>>>>>>>        (expected_vld_st2_0, expected_vld_st2_1, expected_vld_st3_0)

>>>>>>>        (expected_vld_st3_1, expected_vld_st3_2, expected_vld_st4_0)

>>>>>>>        (expected_vld_st4_1, expected_vld_st4_2, expected_vld_st4_3)

>>>>>>>        (vtst_expected_poly64x2): Move to aarch64-only section.

>>>>>>>        (vget_lane_p64, vgetq_lane_p64, vset_lane_p64, vsetq_lane_p64)

>>>>>>>        (vtst_p64, vtstq_p64): New tests.

>>>>>>>

>>>>>>
diff mbox

Patch

diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h
index b846644..74d163e 100644
--- a/gcc/config/aarch64/arm_neon.h
+++ b/gcc/config/aarch64/arm_neon.h
@@ -10882,6 +10882,13 @@  vtst_p16 (poly16x4_t a, poly16x4_t b)
   return result;
 }
 
+__extension__ extern __inline uint64x1_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vtst_p64 (poly64x1_t a, poly64x1_t b)
+{
+  return (uint64x1_t) ((a & b) != __AARCH64_INT64_C (0));
+}
+
 __extension__ extern __inline uint8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vtstq_p8 (poly8x16_t a, poly8x16_t b)
@@ -10906,6 +10913,18 @@  vtstq_p16 (poly16x8_t a, poly16x8_t b)
   return result;
 }
 
+__extension__ extern __inline uint64x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vtstq_p64 (poly64x2_t a, poly64x2_t b)
+{
+  uint64x2_t result;
+  __asm__ ("cmtst %0.2d, %1.2d, %2.2d"
+           : "=w"(result)
+           : "w"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
 /* End of temporary inline asm implementations.  */
 
 /* Start of temporary inline asm for vldn, vstn and friends.  */
diff --git a/gcc/config/arm/arm_neon.h b/gcc/config/arm/arm_neon.h
index ab29da7..d199b41 100644
--- a/gcc/config/arm/arm_neon.h
+++ b/gcc/config/arm/arm_neon.h
@@ -5497,6 +5497,15 @@  vgetq_lane_s64 (int64x2_t __a, const int __b)
   return (int64_t)__builtin_neon_vget_lanev2di (__a, __b);
 }
 
+#pragma GCC push_options
+#pragma GCC target ("fpu=crypto-neon-fp-armv8")
+__extension__ static __inline poly64_t __attribute__ ((__always_inline__))
+vgetq_lane_p64 (poly64x2_t __a, const int __b)
+{
+  return (poly64_t)__builtin_neon_vget_lanev2di ((int64x2_t) __a, __b);
+}
+
+#pragma GCC pop_options
 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
 vgetq_lane_u64 (uint64x2_t __a, const int __b)
 {
@@ -5581,6 +5590,15 @@  vset_lane_u64 (uint64_t __a, uint64x1_t __b, const int __c)
   return (uint64x1_t)__builtin_neon_vset_lanedi ((__builtin_neon_di) __a, (int64x1_t) __b, __c);
 }
 
+#pragma GCC push_options
+#pragma GCC target ("fpu=crypto-neon-fp-armv8")
+__extension__ static __inline poly64x1_t __attribute__ ((__always_inline__))
+vset_lane_p64 (poly64_t __a, poly64x1_t __b, const int __c)
+{
+  return (poly64x1_t)__builtin_neon_vset_lanedi ((__builtin_neon_di) __a, (int64x1_t) __b, __c);
+}
+
+#pragma GCC pop_options
 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
 vsetq_lane_s8 (int8_t __a, int8x16_t __b, const int __c)
 {
@@ -5661,6 +5679,12 @@  vsetq_lane_u64 (uint64_t __a, uint64x2_t __b, const int __c)
 
 #pragma GCC push_options
 #pragma GCC target ("fpu=crypto-neon-fp-armv8")
+__extension__ static __inline poly64x2_t __attribute__ ((__always_inline__))
+vsetq_lane_p64 (poly64_t __a, poly64x2_t __b, const int __c)
+{
+  return (poly64x2_t)__builtin_neon_vset_lanev2di ((__builtin_neon_di) __a, (int64x2_t) __b, __c);
+}
+
 __extension__ static __inline poly64x1_t __attribute__ ((__always_inline__))
 vcreate_p64 (uint64_t __a)
 {
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/p64_p128.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/p64_p128.c
index 8907b38..ba8fbeb 100644
--- a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/p64_p128.c
+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/p64_p128.c
@@ -39,17 +39,6 @@  VECT_VAR_DECL(vdup_n_expected2,poly,64,1) [] = { 0xfffffffffffffff2 };
 VECT_VAR_DECL(vdup_n_expected2,poly,64,2) [] = { 0xfffffffffffffff2,
 						 0xfffffffffffffff2 };
 
-/* Expected results: vmov_n.  */
-VECT_VAR_DECL(vmov_n_expected0,poly,64,1) [] = { 0xfffffffffffffff0 };
-VECT_VAR_DECL(vmov_n_expected0,poly,64,2) [] = { 0xfffffffffffffff0,
-						 0xfffffffffffffff0 };
-VECT_VAR_DECL(vmov_n_expected1,poly,64,1) [] = { 0xfffffffffffffff1 };
-VECT_VAR_DECL(vmov_n_expected1,poly,64,2) [] = { 0xfffffffffffffff1,
-						 0xfffffffffffffff1 };
-VECT_VAR_DECL(vmov_n_expected2,poly,64,1) [] = { 0xfffffffffffffff2 };
-VECT_VAR_DECL(vmov_n_expected2,poly,64,2) [] = { 0xfffffffffffffff2,
-						 0xfffffffffffffff2 };
-
 /* Expected results: vext.  */
 VECT_VAR_DECL(vext_expected,poly,64,1) [] = { 0xfffffffffffffff0 };
 VECT_VAR_DECL(vext_expected,poly,64,2) [] = { 0xfffffffffffffff1, 0x88 };
@@ -124,6 +113,29 @@  VECT_VAR_DECL(vst1_lane_expected,poly,64,1) [] = { 0xfffffffffffffff0 };
 VECT_VAR_DECL(vst1_lane_expected,poly,64,2) [] = { 0xfffffffffffffff0,
 						   0x3333333333333333 };
 
+/* Expected results: vget_lane.  */
+VECT_VAR_DECL(vget_lane_expected,poly,64,1) = 0xfffffffffffffff0;
+VECT_VAR_DECL(vget_lane_expected,poly,64,2) = 0xfffffffffffffff0;
+
+/* Expected results: vset_lane.  */
+VECT_VAR_DECL(vset_lane_expected,poly,64,1) [] = { 0x88 };
+VECT_VAR_DECL(vset_lane_expected,poly,64,2) [] = { 0xfffffffffffffff0, 0x11 };
+
+/* Expected results: vtst.  */
+VECT_VAR_DECL(vtst_expected,uint,64,1) [] = { 0x0 };
+
+#ifdef __aarch64__
+/* Expected results: vmov_n.  */
+VECT_VAR_DECL(vmov_n_expected0,poly,64,1) [] = { 0xfffffffffffffff0 };
+VECT_VAR_DECL(vmov_n_expected0,poly,64,2) [] = { 0xfffffffffffffff0,
+						 0xfffffffffffffff0 };
+VECT_VAR_DECL(vmov_n_expected1,poly,64,1) [] = { 0xfffffffffffffff1 };
+VECT_VAR_DECL(vmov_n_expected1,poly,64,2) [] = { 0xfffffffffffffff1,
+						 0xfffffffffffffff1 };
+VECT_VAR_DECL(vmov_n_expected2,poly,64,1) [] = { 0xfffffffffffffff2 };
+VECT_VAR_DECL(vmov_n_expected2,poly,64,2) [] = { 0xfffffffffffffff2,
+						 0xfffffffffffffff2 };
+
 /* Expected results: vldX_lane.  */
 VECT_VAR_DECL(expected_vld_st2_0,poly,64,1) [] = { 0xfffffffffffffff0 };
 VECT_VAR_DECL(expected_vld_st2_0,poly,64,2) [] = { 0xfffffffffffffff0,
@@ -153,9 +165,9 @@  VECT_VAR_DECL(expected_vld_st4_3,poly,64,1) [] = { 0xfffffffffffffff3 };
 VECT_VAR_DECL(expected_vld_st4_3,poly,64,2) [] = { 0xaaaaaaaaaaaaaaaa,
 						   0xaaaaaaaaaaaaaaaa };
 
-/* Expected results: vget_lane.  */
-VECT_VAR_DECL(vget_lane_expected,poly,64,1) = 0xfffffffffffffff0;
-VECT_VAR_DECL(vget_lane_expected,poly,64,2) = 0xfffffffffffffff0;
+/* Expected results: vtst.  */
+VECT_VAR_DECL(vtst_expected,uint,64,2) [] = { 0x0, 0xffffffffffffffff };
+#endif
 
 int main (void)
 {
@@ -727,6 +739,107 @@  int main (void)
   CHECK(TEST_MSG, poly, 64, 1, PRIx64, vst1_lane_expected, "");
   CHECK(TEST_MSG, poly, 64, 2, PRIx64, vst1_lane_expected, "");
 
+  /* vget_lane_p64 tests.  */
+#undef TEST_MSG
+#define TEST_MSG "VGET_LANE/VGETQ_LANE"
+
+#define TEST_VGET_LANE(Q, T1, T2, W, N, L)				   \
+  VECT_VAR(vget_lane_vector, T1, W, N) = vget##Q##_lane_##T2##W(VECT_VAR(vget_lane_vector1, T1, W, N), L); \
+  if (VECT_VAR(vget_lane_vector, T1, W, N) != VECT_VAR(vget_lane_expected, T1, W, N)) {		\
+    fprintf(stderr,							   \
+	    "ERROR in %s (%s line %d in result '%s') at type %s "	   \
+	    "got 0x%" PRIx##W " != 0x%" PRIx##W "\n",			   \
+	    TEST_MSG, __FILE__, __LINE__,				   \
+	    STR(VECT_VAR(vget_lane_expected, T1, W, N)),		   \
+	    STR(VECT_NAME(T1, W, N)),					   \
+	    VECT_VAR(vget_lane_vector, T1, W, N),			   \
+	    VECT_VAR(vget_lane_expected, T1, W, N));			   \
+    abort ();								   \
+  }
+
+  /* Initialize input values.  */
+  DECL_VARIABLE(vget_lane_vector1, poly, 64, 1);
+  DECL_VARIABLE(vget_lane_vector1, poly, 64, 2);
+
+  VLOAD(vget_lane_vector1, buffer,  , poly, p, 64, 1);
+  VLOAD(vget_lane_vector1, buffer, q, poly, p, 64, 2);
+
+  VECT_VAR_DECL(vget_lane_vector, poly, 64, 1);
+  VECT_VAR_DECL(vget_lane_vector, poly, 64, 2);
+
+  TEST_VGET_LANE( , poly, p, 64, 1, 0);
+  TEST_VGET_LANE(q, poly, p, 64, 2, 0);
+
+
+  /* vset_lane_p64 tests.  */
+#undef TEST_MSG
+#define TEST_MSG "VSET_LANE/VSETQ_LANE"
+
+#define TEST_VSET_LANE(Q, T1, T2, W, N, V, L)				\
+  VECT_VAR(vset_lane_vector, T1, W, N) =						\
+    vset##Q##_lane_##T2##W(V,						\
+			   VECT_VAR(vset_lane_vector, T1, W, N),			\
+			   L);						\
+  vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(vset_lane_vector, T1, W, N))
+
+  /* Initialize input values.  */
+  DECL_VARIABLE(vset_lane_vector, poly, 64, 1);
+  DECL_VARIABLE(vset_lane_vector, poly, 64, 2);
+
+  CLEAN(result, uint, 64, 1);
+  CLEAN(result, uint, 64, 2);
+
+  VLOAD(vset_lane_vector, buffer, , poly, p, 64, 1);
+  VLOAD(vset_lane_vector, buffer, q, poly, p, 64, 2);
+
+  /* Choose value and lane arbitrarily.  */
+  TEST_VSET_LANE(, poly, p, 64, 1, 0x88, 0);
+  TEST_VSET_LANE(q, poly, p, 64, 2, 0x11, 1);
+
+  CHECK(TEST_MSG, poly, 64, 1, PRIx64, vset_lane_expected, "");
+  CHECK(TEST_MSG, poly, 64, 2, PRIx64, vset_lane_expected, "");
+
+
+  /* vtst_p64 tests.  */
+#undef TEST_MSG
+#define TEST_MSG "VTST"
+  
+#define TEST_VTST1(INSN, Q, T1, T2, W, N)			\
+  VECT_VAR(vtst_vector_res, uint, W, N) =			\
+    INSN##Q##_##T2##W(VECT_VAR(vtst_vector, T1, W, N),		\
+		      VECT_VAR(vtst_vector2, T1, W, N));	\
+    vst1##Q##_u##W(VECT_VAR(result, uint, W, N),		\
+		   VECT_VAR(vtst_vector_res, uint, W, N))
+
+#define TEST_VTST(INSN, Q, T1, T2, W, N)	\
+  TEST_VTST1(INSN, Q, T1, T2, W, N)		\
+
+  /* Initialize input values.  */
+  DECL_VARIABLE(vtst_vector, poly, 64, 1);
+  DECL_VARIABLE(vtst_vector2, poly, 64, 1);
+  DECL_VARIABLE(vtst_vector_res, uint, 64, 1);
+
+  CLEAN(result, uint, 64, 1);
+
+  VLOAD(vtst_vector, buffer,  , poly, p, 64, 1);
+  VDUP(vtst_vector2, , poly, p, 64, 1, 5);
+
+  TEST_VTST(vtst, , poly, p, 64, 1);
+
+  CHECK(TEST_MSG, uint, 64, 1, PRIx64, vtst_expected, "");
+
+  /* vtstq_p64 is supported by aarch64 only.  */
+#ifdef __aarch64__
+  DECL_VARIABLE(vtst_vector, poly, 64, 2);
+  DECL_VARIABLE(vtst_vector2, poly, 64, 2);
+  DECL_VARIABLE(vtst_vector_res, uint, 64, 2);
+  CLEAN(result, uint, 64, 2);
+  VLOAD(vtst_vector, buffer, q, poly, p, 64, 2);
+  VDUP(vtst_vector2, q, poly, p, 64, 2, 5);
+  TEST_VTST(vtst, q, poly, p, 64, 2);
+  CHECK(TEST_MSG, uint, 64, 2, PRIx64, vtst_expected, "");
+#endif
+
 #ifdef __aarch64__
 
   /* vmov_n_p64 tests.  */
@@ -767,37 +880,6 @@  int main (void)
     }
   }
 
-  /* vget_lane_p64 tests.  */
-#undef TEST_MSG
-#define TEST_MSG "VGET_LANE/VGETQ_LANE"
-
-#define TEST_VGET_LANE(Q, T1, T2, W, N, L)				   \
-  VECT_VAR(vget_lane_vector, T1, W, N) = vget##Q##_lane_##T2##W(VECT_VAR(vector, T1, W, N), L); \
-  if (VECT_VAR(vget_lane_vector, T1, W, N) != VECT_VAR(vget_lane_expected, T1, W, N)) {		\
-    fprintf(stderr,							   \
-	    "ERROR in %s (%s line %d in result '%s') at type %s "	   \
-	    "got 0x%" PRIx##W " != 0x%" PRIx##W "\n",			   \
-	    TEST_MSG, __FILE__, __LINE__,				   \
-	    STR(VECT_VAR(vget_lane_expected, T1, W, N)),		   \
-	    STR(VECT_NAME(T1, W, N)),					   \
-	    VECT_VAR(vget_lane_vector, T1, W, N),			   \
-	    VECT_VAR(vget_lane_expected, T1, W, N));			   \
-    abort ();								   \
-  }
-
-  /* Initialize input values.  */
-  DECL_VARIABLE(vector, poly, 64, 1);
-  DECL_VARIABLE(vector, poly, 64, 2);
-
-  VLOAD(vector, buffer,  , poly, p, 64, 1);
-  VLOAD(vector, buffer, q, poly, p, 64, 2);
-
-  VECT_VAR_DECL(vget_lane_vector, poly, 64, 1);
-  VECT_VAR_DECL(vget_lane_vector, poly, 64, 2);
-
-  TEST_VGET_LANE( , poly, p, 64, 1, 0);
-  TEST_VGET_LANE(q, poly, p, 64, 2, 0);
-
   /* vldx_lane_p64 tests.  */
 #undef TEST_MSG
 #define TEST_MSG "VLDX_LANE/VLDXQ_LANE"