[AArch64] Add SVE mul_highpart patterns

Message ID 874lmry4c1.fsf@linaro.org
State New
Headers show
Series
  • [AArch64] Add SVE mul_highpart patterns
Related show

Commit Message

Richard Sandiford Feb. 8, 2018, 1:54 p.m.
One advantage of the new permute handling compared to the old way is
that we can now easily take advantage of the vectoriser's divmod patterns
for SVE.

I realise we're in stage 4, but this is entirely SVE-specific.

Tested on aarch64-linux-gnu and aarch64_be-elf.  OK to install?

Richard


2018-02-08  Richard Sandiford  <richard.sandiford@linaro.org>

gcc/
	* config/aarch64/iterators.md (UNSPEC_SMUL_HIGHPART)
	(UNSPEC_UMUL_HIGHPART): New constants.
	(MUL_HIGHPART): New int iteraor.
	(su): Handle UNSPEC_SMUL_HIGHPART and UNSPEC_UMUL_HIGHPART.
	* config/aarch64/aarch64-sve.md (<su>mul<mode>3_highpart): New
	define_expand.
	(*<su>mul<mode>3_highpart): New define_insn.

gcc/testsuite/
	* gcc.target/aarch64/sve/mul_highpart_1.c: New test.
	* gcc.target/aarch64/sve/mul_highpart_1_run.c: Likewise.

Comments

Christophe Lyon March 14, 2018, 8:32 a.m. | #1
On 8 February 2018 at 14:54, Richard Sandiford
<richard.sandiford@linaro.org> wrote:
> One advantage of the new permute handling compared to the old way is

> that we can now easily take advantage of the vectoriser's divmod patterns

> for SVE.

>

> I realise we're in stage 4, but this is entirely SVE-specific.

>

> Tested on aarch64-linux-gnu and aarch64_be-elf.  OK to install?

>

> Richard

>

Hi Richard,

>

> 2018-02-08  Richard Sandiford  <richard.sandiford@linaro.org>

>

> gcc/

>         * config/aarch64/iterators.md (UNSPEC_SMUL_HIGHPART)

>         (UNSPEC_UMUL_HIGHPART): New constants.

>         (MUL_HIGHPART): New int iteraor.

>         (su): Handle UNSPEC_SMUL_HIGHPART and UNSPEC_UMUL_HIGHPART.

>         * config/aarch64/aarch64-sve.md (<su>mul<mode>3_highpart): New

>         define_expand.

>         (*<su>mul<mode>3_highpart): New define_insn.

>

> gcc/testsuite/

>         * gcc.target/aarch64/sve/mul_highpart_1.c: New test.

>         * gcc.target/aarch64/sve/mul_highpart_1_run.c: Likewise.

>

> Index: gcc/config/aarch64/iterators.md

> ===================================================================

> --- gcc/config/aarch64/iterators.md     2018-01-26 15:14:35.386171048 +0000

> +++ gcc/config/aarch64/iterators.md     2018-02-08 13:51:56.252511923 +0000

> @@ -438,6 +438,8 @@ (define_c_enum "unspec"

>      UNSPEC_ANDF                ; Used in aarch64-sve.md.

>      UNSPEC_IORF                ; Used in aarch64-sve.md.

>      UNSPEC_XORF                ; Used in aarch64-sve.md.

> +    UNSPEC_SMUL_HIGHPART ; Used in aarch64-sve.md.

> +    UNSPEC_UMUL_HIGHPART ; Used in aarch64-sve.md.

>      UNSPEC_COND_ADD    ; Used in aarch64-sve.md.

>      UNSPEC_COND_SUB    ; Used in aarch64-sve.md.

>      UNSPEC_COND_SMAX   ; Used in aarch64-sve.md.

> @@ -1467,6 +1469,8 @@ (define_int_iterator UNPACK [UNSPEC_UNPA

>

>  (define_int_iterator UNPACK_UNSIGNED [UNSPEC_UNPACKULO UNSPEC_UNPACKUHI])

>

> +(define_int_iterator MUL_HIGHPART [UNSPEC_SMUL_HIGHPART UNSPEC_UMUL_HIGHPART])

> +

>  (define_int_iterator SVE_COND_INT_OP [UNSPEC_COND_ADD UNSPEC_COND_SUB

>                                       UNSPEC_COND_SMAX UNSPEC_COND_UMAX

>                                       UNSPEC_COND_SMIN UNSPEC_COND_UMIN

> @@ -1558,7 +1562,9 @@ (define_int_attr logicalf_op [(UNSPEC_AN

>  (define_int_attr su [(UNSPEC_UNPACKSHI "s")

>                      (UNSPEC_UNPACKUHI "u")

>                      (UNSPEC_UNPACKSLO "s")

> -                    (UNSPEC_UNPACKULO "u")])

> +                    (UNSPEC_UNPACKULO "u")

> +                    (UNSPEC_SMUL_HIGHPART "s")

> +                    (UNSPEC_UMUL_HIGHPART "u")])

>

>  (define_int_attr sur [(UNSPEC_SHADD "s") (UNSPEC_UHADD "u")

>                       (UNSPEC_SRHADD "sr") (UNSPEC_URHADD "ur")

> Index: gcc/config/aarch64/aarch64-sve.md

> ===================================================================

> --- gcc/config/aarch64/aarch64-sve.md   2018-02-01 11:04:16.723192040 +0000

> +++ gcc/config/aarch64/aarch64-sve.md   2018-02-08 13:51:56.252511923 +0000

> @@ -980,6 +980,34 @@ (define_insn "*msub<mode>3"

>     mls\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>"

>  )

>

> +;; Unpredicated highpart multiplication.

> +(define_expand "<su>mul<mode>3_highpart"

> +  [(set (match_operand:SVE_I 0 "register_operand")

> +       (unspec:SVE_I

> +         [(match_dup 3)

> +          (unspec:SVE_I [(match_operand:SVE_I 1 "register_operand")

> +                         (match_operand:SVE_I 2 "register_operand")]

> +                        MUL_HIGHPART)]

> +         UNSPEC_MERGE_PTRUE))]

> +  "TARGET_SVE"

> +  {

> +    operands[3] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode));

> +  }

> +)

> +

> +;; Predicated highpart multiplication.

> +(define_insn "*<su>mul<mode>3_highpart"

> +  [(set (match_operand:SVE_I 0 "register_operand" "=w")

> +       (unspec:SVE_I

> +         [(match_operand:<VPRED> 1 "register_operand" "Upl")

> +          (unspec:SVE_I [(match_operand:SVE_I 2 "register_operand" "%0")

> +                         (match_operand:SVE_I 3 "register_operand" "w")]

> +                        MUL_HIGHPART)]

> +         UNSPEC_MERGE_PTRUE))]

> +  "TARGET_SVE"

> +  "<su>mulh\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"

> +)

> +

>  ;; Unpredicated NEG, NOT and POPCOUNT.

>  (define_expand "<optab><mode>2"

>    [(set (match_operand:SVE_I 0 "register_operand")

> Index: gcc/testsuite/gcc.target/aarch64/sve/mul_highpart_1.c

> ===================================================================

> --- /dev/null   2018-02-08 11:17:10.862716283 +0000

> +++ gcc/testsuite/gcc.target/aarch64/sve/mul_highpart_1.c       2018-02-08 13:51:56.252511923 +0000

> @@ -0,0 +1,25 @@

> +/* { dg-do assemble { target aarch64_asm_sve_ok } } */

> +/* { dg-options "-O2 -ftree-vectorize -fno-vect-cost-model --save-temps" } */

> +

> +#include <stdint.h>

> +

> +#define DEF_LOOP(TYPE)                         \

> +void __attribute__ ((noipa))                   \

> +mod_##TYPE (TYPE *dst, TYPE *src, int count)   \

> +{                                              \

> +  for (int i = 0; i < count; ++i)              \

> +    dst[i] = src[i] % 17;                      \

> +}

> +

> +#define TEST_ALL(T) \

> +  T (int32_t) \

> +  T (uint32_t) \

> +  T (int64_t) \

> +  T (uint64_t)

> +

> +TEST_ALL (DEF_LOOP)

> +

> +/* { dg-final { scan-assembler-times {\tsmulh\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */

> +/* { dg-final { scan-assembler-times {\tumulh\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */

> +/* { dg-final { scan-assembler-times {\tsmulh\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, z[0-9]+\.d\n} 1 } } */

> +/* { dg-final { scan-assembler-times {\tumulh\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, z[0-9]+\.d\n} 1 } } */

> Index: gcc/testsuite/gcc.target/aarch64/sve/mul_highpart_1_run.c

> ===================================================================

> --- /dev/null   2018-02-08 11:17:10.862716283 +0000

> +++ gcc/testsuite/gcc.target/aarch64/sve/mul_highpart_1_run.c   2018-02-08 13:51:56.253511883 +0000

> @@ -0,0 +1,29 @@

> +/* { dg-do run } */

You forgot to include an effective target to prevent trying to run on
non-SVE capable HW.

I suppose check_effective_target_aarch64_sve_hw would work, but I 'm
not sure it's sufficient to prevent from compiling the test with old
binutils non supporting sve: maybe you also need to add
aarch64_asm_sve_ok as in the other testcase?

Thanks,

Christophe

> +/* { dg-options "-O2 -ftree-vectorize -fno-vect-cost-model --save-temps" } */

> +

> +#include "mul_highpart_1.c"

> +

> +#define N 79

> +

> +#define TEST_LOOP(TYPE)                                \

> +  {                                            \

> +    TYPE dst[N], src[N];                       \

> +    for (int i = 0; i < N; ++i)                        \

> +      {                                                \

> +       src[i] = i * 7 + i % 3;                 \

> +       if (i % 11 > 7)                         \

> +         src[i] = -src[i];                     \

> +       asm volatile ("" ::: "memory");         \

> +      }                                                \

> +    mod_##TYPE (dst, src, N);                  \

> +    for (int i = 0; i < N; ++i)                        \

> +      if (dst[i] != src[i] % 17)               \

> +       __builtin_abort ();                     \

> +  }

> +

> +int

> +main (void)

> +{

> +  TEST_ALL (TEST_LOOP);

> +  return 0;

> +}
Richard Sandiford March 14, 2018, 9:13 a.m. | #2
Christophe Lyon <christophe.lyon@linaro.org> writes:
> On 8 February 2018 at 14:54, Richard Sandiford

> <richard.sandiford@linaro.org> wrote:

>> Index: gcc/testsuite/gcc.target/aarch64/sve/mul_highpart_1_run.c

>> ===================================================================

>> --- /dev/null   2018-02-08 11:17:10.862716283 +0000

>> +++ gcc/testsuite/gcc.target/aarch64/sve/mul_highpart_1_run.c   2018-02-08 13:51:56.253511883 +0000

>> @@ -0,0 +1,29 @@

>> +/* { dg-do run } */

> You forgot to include an effective target to prevent trying to run on

> non-SVE capable HW.


Oops, sorry about that.  I had the fix in the tree I use for non-SVE
testing but forgot to update the commit tree.  Applied as below.

> I suppose check_effective_target_aarch64_sve_hw would work, but I 'm

> not sure it's sufficient to prevent from compiling the test with old

> binutils non supporting sve: maybe you also need to add

> aarch64_asm_sve_ok as in the other testcase?


It should be OK.  aarch64_sve_hw is supposed to imply aarch64_asm_sve_ok,
since it needs to both assemble and run SVE code.

Thanks,
Richard


2018-03-14  Richard Sandiford  <richard.sandiford@linaro.org>

gcc/testsuite/
	* gcc.target/aarch64/sve/mul_highpart_1_run.c: Restrict to
	aarch64_sve_hw.

Index: gcc/testsuite/gcc.target/aarch64/sve/mul_highpart_1_run.c
===================================================================
--- gcc/testsuite/gcc.target/aarch64/sve/mul_highpart_1_run.c	2018-03-13 15:11:55.402370138 +0000
+++ gcc/testsuite/gcc.target/aarch64/sve/mul_highpart_1_run.c	2018-03-14 09:06:36.946848641 +0000
@@ -1,4 +1,4 @@
-/* { dg-do run } */
+/* { dg-do run { target aarch64_sve_hw } } */
 /* { dg-options "-O2 -ftree-vectorize -fno-vect-cost-model --save-temps" } */
 
 #include "mul_highpart_1.c"

Patch

Index: gcc/config/aarch64/iterators.md
===================================================================
--- gcc/config/aarch64/iterators.md	2018-01-26 15:14:35.386171048 +0000
+++ gcc/config/aarch64/iterators.md	2018-02-08 13:51:56.252511923 +0000
@@ -438,6 +438,8 @@  (define_c_enum "unspec"
     UNSPEC_ANDF		; Used in aarch64-sve.md.
     UNSPEC_IORF		; Used in aarch64-sve.md.
     UNSPEC_XORF		; Used in aarch64-sve.md.
+    UNSPEC_SMUL_HIGHPART ; Used in aarch64-sve.md.
+    UNSPEC_UMUL_HIGHPART ; Used in aarch64-sve.md.
     UNSPEC_COND_ADD	; Used in aarch64-sve.md.
     UNSPEC_COND_SUB	; Used in aarch64-sve.md.
     UNSPEC_COND_SMAX	; Used in aarch64-sve.md.
@@ -1467,6 +1469,8 @@  (define_int_iterator UNPACK [UNSPEC_UNPA
 
 (define_int_iterator UNPACK_UNSIGNED [UNSPEC_UNPACKULO UNSPEC_UNPACKUHI])
 
+(define_int_iterator MUL_HIGHPART [UNSPEC_SMUL_HIGHPART UNSPEC_UMUL_HIGHPART])
+
 (define_int_iterator SVE_COND_INT_OP [UNSPEC_COND_ADD UNSPEC_COND_SUB
 				      UNSPEC_COND_SMAX UNSPEC_COND_UMAX
 				      UNSPEC_COND_SMIN UNSPEC_COND_UMIN
@@ -1558,7 +1562,9 @@  (define_int_attr logicalf_op [(UNSPEC_AN
 (define_int_attr su [(UNSPEC_UNPACKSHI "s")
 		     (UNSPEC_UNPACKUHI "u")
 		     (UNSPEC_UNPACKSLO "s")
-		     (UNSPEC_UNPACKULO "u")])
+		     (UNSPEC_UNPACKULO "u")
+		     (UNSPEC_SMUL_HIGHPART "s")
+		     (UNSPEC_UMUL_HIGHPART "u")])
 
 (define_int_attr sur [(UNSPEC_SHADD "s") (UNSPEC_UHADD "u")
 		      (UNSPEC_SRHADD "sr") (UNSPEC_URHADD "ur")
Index: gcc/config/aarch64/aarch64-sve.md
===================================================================
--- gcc/config/aarch64/aarch64-sve.md	2018-02-01 11:04:16.723192040 +0000
+++ gcc/config/aarch64/aarch64-sve.md	2018-02-08 13:51:56.252511923 +0000
@@ -980,6 +980,34 @@  (define_insn "*msub<mode>3"
    mls\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>"
 )
 
+;; Unpredicated highpart multiplication.
+(define_expand "<su>mul<mode>3_highpart"
+  [(set (match_operand:SVE_I 0 "register_operand")
+	(unspec:SVE_I
+	  [(match_dup 3)
+	   (unspec:SVE_I [(match_operand:SVE_I 1 "register_operand")
+			  (match_operand:SVE_I 2 "register_operand")]
+			 MUL_HIGHPART)]
+	  UNSPEC_MERGE_PTRUE))]
+  "TARGET_SVE"
+  {
+    operands[3] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode));
+  }
+)
+
+;; Predicated highpart multiplication.
+(define_insn "*<su>mul<mode>3_highpart"
+  [(set (match_operand:SVE_I 0 "register_operand" "=w")
+	(unspec:SVE_I
+	  [(match_operand:<VPRED> 1 "register_operand" "Upl")
+	   (unspec:SVE_I [(match_operand:SVE_I 2 "register_operand" "%0")
+			  (match_operand:SVE_I 3 "register_operand" "w")]
+			 MUL_HIGHPART)]
+	  UNSPEC_MERGE_PTRUE))]
+  "TARGET_SVE"
+  "<su>mulh\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
+)
+
 ;; Unpredicated NEG, NOT and POPCOUNT.
 (define_expand "<optab><mode>2"
   [(set (match_operand:SVE_I 0 "register_operand")
Index: gcc/testsuite/gcc.target/aarch64/sve/mul_highpart_1.c
===================================================================
--- /dev/null	2018-02-08 11:17:10.862716283 +0000
+++ gcc/testsuite/gcc.target/aarch64/sve/mul_highpart_1.c	2018-02-08 13:51:56.252511923 +0000
@@ -0,0 +1,25 @@ 
+/* { dg-do assemble { target aarch64_asm_sve_ok } } */
+/* { dg-options "-O2 -ftree-vectorize -fno-vect-cost-model --save-temps" } */
+
+#include <stdint.h>
+
+#define DEF_LOOP(TYPE)				\
+void __attribute__ ((noipa))			\
+mod_##TYPE (TYPE *dst, TYPE *src, int count)	\
+{						\
+  for (int i = 0; i < count; ++i)		\
+    dst[i] = src[i] % 17;			\
+}
+
+#define TEST_ALL(T) \
+  T (int32_t) \
+  T (uint32_t) \
+  T (int64_t) \
+  T (uint64_t)
+
+TEST_ALL (DEF_LOOP)
+
+/* { dg-final { scan-assembler-times {\tsmulh\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tumulh\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tsmulh\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, z[0-9]+\.d\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tumulh\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, z[0-9]+\.d\n} 1 } } */
Index: gcc/testsuite/gcc.target/aarch64/sve/mul_highpart_1_run.c
===================================================================
--- /dev/null	2018-02-08 11:17:10.862716283 +0000
+++ gcc/testsuite/gcc.target/aarch64/sve/mul_highpart_1_run.c	2018-02-08 13:51:56.253511883 +0000
@@ -0,0 +1,29 @@ 
+/* { dg-do run } */
+/* { dg-options "-O2 -ftree-vectorize -fno-vect-cost-model --save-temps" } */
+
+#include "mul_highpart_1.c"
+
+#define N 79
+
+#define TEST_LOOP(TYPE)				\
+  {						\
+    TYPE dst[N], src[N];			\
+    for (int i = 0; i < N; ++i)			\
+      {						\
+	src[i] = i * 7 + i % 3;			\
+	if (i % 11 > 7)				\
+	  src[i] = -src[i];			\
+	asm volatile ("" ::: "memory");		\
+      }						\
+    mod_##TYPE (dst, src, N);			\
+    for (int i = 0; i < N; ++i)			\
+      if (dst[i] != src[i] % 17)		\
+	__builtin_abort ();			\
+  }
+
+int
+main (void)
+{
+  TEST_ALL (TEST_LOOP);
+  return 0;
+}