Message ID | 874lmry4c1.fsf@linaro.org |
---|---|
State | New |
Headers | show |
Series | [AArch64] Add SVE mul_highpart patterns | expand |
On 8 February 2018 at 14:54, Richard Sandiford <richard.sandiford@linaro.org> wrote: > One advantage of the new permute handling compared to the old way is > that we can now easily take advantage of the vectoriser's divmod patterns > for SVE. > > I realise we're in stage 4, but this is entirely SVE-specific. > > Tested on aarch64-linux-gnu and aarch64_be-elf. OK to install? > > Richard > Hi Richard, > > 2018-02-08 Richard Sandiford <richard.sandiford@linaro.org> > > gcc/ > * config/aarch64/iterators.md (UNSPEC_SMUL_HIGHPART) > (UNSPEC_UMUL_HIGHPART): New constants. > (MUL_HIGHPART): New int iteraor. > (su): Handle UNSPEC_SMUL_HIGHPART and UNSPEC_UMUL_HIGHPART. > * config/aarch64/aarch64-sve.md (<su>mul<mode>3_highpart): New > define_expand. > (*<su>mul<mode>3_highpart): New define_insn. > > gcc/testsuite/ > * gcc.target/aarch64/sve/mul_highpart_1.c: New test. > * gcc.target/aarch64/sve/mul_highpart_1_run.c: Likewise. > > Index: gcc/config/aarch64/iterators.md > =================================================================== > --- gcc/config/aarch64/iterators.md 2018-01-26 15:14:35.386171048 +0000 > +++ gcc/config/aarch64/iterators.md 2018-02-08 13:51:56.252511923 +0000 > @@ -438,6 +438,8 @@ (define_c_enum "unspec" > UNSPEC_ANDF ; Used in aarch64-sve.md. > UNSPEC_IORF ; Used in aarch64-sve.md. > UNSPEC_XORF ; Used in aarch64-sve.md. > + UNSPEC_SMUL_HIGHPART ; Used in aarch64-sve.md. > + UNSPEC_UMUL_HIGHPART ; Used in aarch64-sve.md. > UNSPEC_COND_ADD ; Used in aarch64-sve.md. > UNSPEC_COND_SUB ; Used in aarch64-sve.md. > UNSPEC_COND_SMAX ; Used in aarch64-sve.md. > @@ -1467,6 +1469,8 @@ (define_int_iterator UNPACK [UNSPEC_UNPA > > (define_int_iterator UNPACK_UNSIGNED [UNSPEC_UNPACKULO UNSPEC_UNPACKUHI]) > > +(define_int_iterator MUL_HIGHPART [UNSPEC_SMUL_HIGHPART UNSPEC_UMUL_HIGHPART]) > + > (define_int_iterator SVE_COND_INT_OP [UNSPEC_COND_ADD UNSPEC_COND_SUB > UNSPEC_COND_SMAX UNSPEC_COND_UMAX > UNSPEC_COND_SMIN UNSPEC_COND_UMIN > @@ -1558,7 +1562,9 @@ (define_int_attr logicalf_op [(UNSPEC_AN > (define_int_attr su [(UNSPEC_UNPACKSHI "s") > (UNSPEC_UNPACKUHI "u") > (UNSPEC_UNPACKSLO "s") > - (UNSPEC_UNPACKULO "u")]) > + (UNSPEC_UNPACKULO "u") > + (UNSPEC_SMUL_HIGHPART "s") > + (UNSPEC_UMUL_HIGHPART "u")]) > > (define_int_attr sur [(UNSPEC_SHADD "s") (UNSPEC_UHADD "u") > (UNSPEC_SRHADD "sr") (UNSPEC_URHADD "ur") > Index: gcc/config/aarch64/aarch64-sve.md > =================================================================== > --- gcc/config/aarch64/aarch64-sve.md 2018-02-01 11:04:16.723192040 +0000 > +++ gcc/config/aarch64/aarch64-sve.md 2018-02-08 13:51:56.252511923 +0000 > @@ -980,6 +980,34 @@ (define_insn "*msub<mode>3" > mls\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>" > ) > > +;; Unpredicated highpart multiplication. > +(define_expand "<su>mul<mode>3_highpart" > + [(set (match_operand:SVE_I 0 "register_operand") > + (unspec:SVE_I > + [(match_dup 3) > + (unspec:SVE_I [(match_operand:SVE_I 1 "register_operand") > + (match_operand:SVE_I 2 "register_operand")] > + MUL_HIGHPART)] > + UNSPEC_MERGE_PTRUE))] > + "TARGET_SVE" > + { > + operands[3] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode)); > + } > +) > + > +;; Predicated highpart multiplication. > +(define_insn "*<su>mul<mode>3_highpart" > + [(set (match_operand:SVE_I 0 "register_operand" "=w") > + (unspec:SVE_I > + [(match_operand:<VPRED> 1 "register_operand" "Upl") > + (unspec:SVE_I [(match_operand:SVE_I 2 "register_operand" "%0") > + (match_operand:SVE_I 3 "register_operand" "w")] > + MUL_HIGHPART)] > + UNSPEC_MERGE_PTRUE))] > + "TARGET_SVE" > + "<su>mulh\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>" > +) > + > ;; Unpredicated NEG, NOT and POPCOUNT. > (define_expand "<optab><mode>2" > [(set (match_operand:SVE_I 0 "register_operand") > Index: gcc/testsuite/gcc.target/aarch64/sve/mul_highpart_1.c > =================================================================== > --- /dev/null 2018-02-08 11:17:10.862716283 +0000 > +++ gcc/testsuite/gcc.target/aarch64/sve/mul_highpart_1.c 2018-02-08 13:51:56.252511923 +0000 > @@ -0,0 +1,25 @@ > +/* { dg-do assemble { target aarch64_asm_sve_ok } } */ > +/* { dg-options "-O2 -ftree-vectorize -fno-vect-cost-model --save-temps" } */ > + > +#include <stdint.h> > + > +#define DEF_LOOP(TYPE) \ > +void __attribute__ ((noipa)) \ > +mod_##TYPE (TYPE *dst, TYPE *src, int count) \ > +{ \ > + for (int i = 0; i < count; ++i) \ > + dst[i] = src[i] % 17; \ > +} > + > +#define TEST_ALL(T) \ > + T (int32_t) \ > + T (uint32_t) \ > + T (int64_t) \ > + T (uint64_t) > + > +TEST_ALL (DEF_LOOP) > + > +/* { dg-final { scan-assembler-times {\tsmulh\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */ > +/* { dg-final { scan-assembler-times {\tumulh\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */ > +/* { dg-final { scan-assembler-times {\tsmulh\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, z[0-9]+\.d\n} 1 } } */ > +/* { dg-final { scan-assembler-times {\tumulh\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, z[0-9]+\.d\n} 1 } } */ > Index: gcc/testsuite/gcc.target/aarch64/sve/mul_highpart_1_run.c > =================================================================== > --- /dev/null 2018-02-08 11:17:10.862716283 +0000 > +++ gcc/testsuite/gcc.target/aarch64/sve/mul_highpart_1_run.c 2018-02-08 13:51:56.253511883 +0000 > @@ -0,0 +1,29 @@ > +/* { dg-do run } */ You forgot to include an effective target to prevent trying to run on non-SVE capable HW. I suppose check_effective_target_aarch64_sve_hw would work, but I 'm not sure it's sufficient to prevent from compiling the test with old binutils non supporting sve: maybe you also need to add aarch64_asm_sve_ok as in the other testcase? Thanks, Christophe > +/* { dg-options "-O2 -ftree-vectorize -fno-vect-cost-model --save-temps" } */ > + > +#include "mul_highpart_1.c" > + > +#define N 79 > + > +#define TEST_LOOP(TYPE) \ > + { \ > + TYPE dst[N], src[N]; \ > + for (int i = 0; i < N; ++i) \ > + { \ > + src[i] = i * 7 + i % 3; \ > + if (i % 11 > 7) \ > + src[i] = -src[i]; \ > + asm volatile ("" ::: "memory"); \ > + } \ > + mod_##TYPE (dst, src, N); \ > + for (int i = 0; i < N; ++i) \ > + if (dst[i] != src[i] % 17) \ > + __builtin_abort (); \ > + } > + > +int > +main (void) > +{ > + TEST_ALL (TEST_LOOP); > + return 0; > +}
Christophe Lyon <christophe.lyon@linaro.org> writes: > On 8 February 2018 at 14:54, Richard Sandiford > <richard.sandiford@linaro.org> wrote: >> Index: gcc/testsuite/gcc.target/aarch64/sve/mul_highpart_1_run.c >> =================================================================== >> --- /dev/null 2018-02-08 11:17:10.862716283 +0000 >> +++ gcc/testsuite/gcc.target/aarch64/sve/mul_highpart_1_run.c 2018-02-08 13:51:56.253511883 +0000 >> @@ -0,0 +1,29 @@ >> +/* { dg-do run } */ > You forgot to include an effective target to prevent trying to run on > non-SVE capable HW. Oops, sorry about that. I had the fix in the tree I use for non-SVE testing but forgot to update the commit tree. Applied as below. > I suppose check_effective_target_aarch64_sve_hw would work, but I 'm > not sure it's sufficient to prevent from compiling the test with old > binutils non supporting sve: maybe you also need to add > aarch64_asm_sve_ok as in the other testcase? It should be OK. aarch64_sve_hw is supposed to imply aarch64_asm_sve_ok, since it needs to both assemble and run SVE code. Thanks, Richard 2018-03-14 Richard Sandiford <richard.sandiford@linaro.org> gcc/testsuite/ * gcc.target/aarch64/sve/mul_highpart_1_run.c: Restrict to aarch64_sve_hw. Index: gcc/testsuite/gcc.target/aarch64/sve/mul_highpart_1_run.c =================================================================== --- gcc/testsuite/gcc.target/aarch64/sve/mul_highpart_1_run.c 2018-03-13 15:11:55.402370138 +0000 +++ gcc/testsuite/gcc.target/aarch64/sve/mul_highpart_1_run.c 2018-03-14 09:06:36.946848641 +0000 @@ -1,4 +1,4 @@ -/* { dg-do run } */ +/* { dg-do run { target aarch64_sve_hw } } */ /* { dg-options "-O2 -ftree-vectorize -fno-vect-cost-model --save-temps" } */ #include "mul_highpart_1.c"
Index: gcc/config/aarch64/iterators.md =================================================================== --- gcc/config/aarch64/iterators.md 2018-01-26 15:14:35.386171048 +0000 +++ gcc/config/aarch64/iterators.md 2018-02-08 13:51:56.252511923 +0000 @@ -438,6 +438,8 @@ (define_c_enum "unspec" UNSPEC_ANDF ; Used in aarch64-sve.md. UNSPEC_IORF ; Used in aarch64-sve.md. UNSPEC_XORF ; Used in aarch64-sve.md. + UNSPEC_SMUL_HIGHPART ; Used in aarch64-sve.md. + UNSPEC_UMUL_HIGHPART ; Used in aarch64-sve.md. UNSPEC_COND_ADD ; Used in aarch64-sve.md. UNSPEC_COND_SUB ; Used in aarch64-sve.md. UNSPEC_COND_SMAX ; Used in aarch64-sve.md. @@ -1467,6 +1469,8 @@ (define_int_iterator UNPACK [UNSPEC_UNPA (define_int_iterator UNPACK_UNSIGNED [UNSPEC_UNPACKULO UNSPEC_UNPACKUHI]) +(define_int_iterator MUL_HIGHPART [UNSPEC_SMUL_HIGHPART UNSPEC_UMUL_HIGHPART]) + (define_int_iterator SVE_COND_INT_OP [UNSPEC_COND_ADD UNSPEC_COND_SUB UNSPEC_COND_SMAX UNSPEC_COND_UMAX UNSPEC_COND_SMIN UNSPEC_COND_UMIN @@ -1558,7 +1562,9 @@ (define_int_attr logicalf_op [(UNSPEC_AN (define_int_attr su [(UNSPEC_UNPACKSHI "s") (UNSPEC_UNPACKUHI "u") (UNSPEC_UNPACKSLO "s") - (UNSPEC_UNPACKULO "u")]) + (UNSPEC_UNPACKULO "u") + (UNSPEC_SMUL_HIGHPART "s") + (UNSPEC_UMUL_HIGHPART "u")]) (define_int_attr sur [(UNSPEC_SHADD "s") (UNSPEC_UHADD "u") (UNSPEC_SRHADD "sr") (UNSPEC_URHADD "ur") Index: gcc/config/aarch64/aarch64-sve.md =================================================================== --- gcc/config/aarch64/aarch64-sve.md 2018-02-01 11:04:16.723192040 +0000 +++ gcc/config/aarch64/aarch64-sve.md 2018-02-08 13:51:56.252511923 +0000 @@ -980,6 +980,34 @@ (define_insn "*msub<mode>3" mls\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>" ) +;; Unpredicated highpart multiplication. +(define_expand "<su>mul<mode>3_highpart" + [(set (match_operand:SVE_I 0 "register_operand") + (unspec:SVE_I + [(match_dup 3) + (unspec:SVE_I [(match_operand:SVE_I 1 "register_operand") + (match_operand:SVE_I 2 "register_operand")] + MUL_HIGHPART)] + UNSPEC_MERGE_PTRUE))] + "TARGET_SVE" + { + operands[3] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode)); + } +) + +;; Predicated highpart multiplication. +(define_insn "*<su>mul<mode>3_highpart" + [(set (match_operand:SVE_I 0 "register_operand" "=w") + (unspec:SVE_I + [(match_operand:<VPRED> 1 "register_operand" "Upl") + (unspec:SVE_I [(match_operand:SVE_I 2 "register_operand" "%0") + (match_operand:SVE_I 3 "register_operand" "w")] + MUL_HIGHPART)] + UNSPEC_MERGE_PTRUE))] + "TARGET_SVE" + "<su>mulh\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>" +) + ;; Unpredicated NEG, NOT and POPCOUNT. (define_expand "<optab><mode>2" [(set (match_operand:SVE_I 0 "register_operand") Index: gcc/testsuite/gcc.target/aarch64/sve/mul_highpart_1.c =================================================================== --- /dev/null 2018-02-08 11:17:10.862716283 +0000 +++ gcc/testsuite/gcc.target/aarch64/sve/mul_highpart_1.c 2018-02-08 13:51:56.252511923 +0000 @@ -0,0 +1,25 @@ +/* { dg-do assemble { target aarch64_asm_sve_ok } } */ +/* { dg-options "-O2 -ftree-vectorize -fno-vect-cost-model --save-temps" } */ + +#include <stdint.h> + +#define DEF_LOOP(TYPE) \ +void __attribute__ ((noipa)) \ +mod_##TYPE (TYPE *dst, TYPE *src, int count) \ +{ \ + for (int i = 0; i < count; ++i) \ + dst[i] = src[i] % 17; \ +} + +#define TEST_ALL(T) \ + T (int32_t) \ + T (uint32_t) \ + T (int64_t) \ + T (uint64_t) + +TEST_ALL (DEF_LOOP) + +/* { dg-final { scan-assembler-times {\tsmulh\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tumulh\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tsmulh\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, z[0-9]+\.d\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tumulh\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, z[0-9]+\.d\n} 1 } } */ Index: gcc/testsuite/gcc.target/aarch64/sve/mul_highpart_1_run.c =================================================================== --- /dev/null 2018-02-08 11:17:10.862716283 +0000 +++ gcc/testsuite/gcc.target/aarch64/sve/mul_highpart_1_run.c 2018-02-08 13:51:56.253511883 +0000 @@ -0,0 +1,29 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -ftree-vectorize -fno-vect-cost-model --save-temps" } */ + +#include "mul_highpart_1.c" + +#define N 79 + +#define TEST_LOOP(TYPE) \ + { \ + TYPE dst[N], src[N]; \ + for (int i = 0; i < N; ++i) \ + { \ + src[i] = i * 7 + i % 3; \ + if (i % 11 > 7) \ + src[i] = -src[i]; \ + asm volatile ("" ::: "memory"); \ + } \ + mod_##TYPE (dst, src, N); \ + for (int i = 0; i < N; ++i) \ + if (dst[i] != src[i] % 17) \ + __builtin_abort (); \ + } + +int +main (void) +{ + TEST_ALL (TEST_LOOP); + return 0; +}