[committed,AArch64] Add SVE support for integer division

Message ID 874liwcei8.fsf@linaro.org
State New
Headers show
Series
  • [committed,AArch64] Add SVE support for integer division
Related show

Commit Message

Richard Sandiford May 25, 2018, 8:42 a.m.
After the previous patch to prevent pessimisation of divisions
by constants, this patch adds support for the SVE integer division
instructions.

Tested on aarch64-linux-gnu (with and without SVE) and aarch64_be-elf.
Committed as r260712.

Richard


2018-05-25  Richard Sandiford  <richard.sandiford@linaro.org>

gcc/
	* config/aarch64/iterators.md (SVE_INT_BINARY_SD): New code iterator.
	(optab, sve_int_op): Handle div and udiv.
	* config/aarch64/aarch64-sve.md (<optab><mode>3): New expander
	for SVE_INT_BINARY_SD.
	(*<optab><mode>3): New insn for the same.

gcc/testsuite/
	* gcc.target/aarch64/sve/div_1.c: New test.
	* gcc.target/aarch64/sve/div_1_run.c: Likewise.
	* gcc.target/aarch64/sve/mul_highpart_2.c: Likewise.
	* gcc.target/aarch64/sve/mul_highpart_2_run.c: Likewise.

Patch

Index: gcc/config/aarch64/iterators.md
===================================================================
--- gcc/config/aarch64/iterators.md	2018-05-25 09:08:34.079950643 +0100
+++ gcc/config/aarch64/iterators.md	2018-05-25 09:28:55.825421996 +0100
@@ -1207,6 +1207,8 @@  (define_code_iterator SVE_INT_BINARY [pl
 
 (define_code_iterator SVE_INT_BINARY_REV [minus])
 
+(define_code_iterator SVE_INT_BINARY_SD [div udiv])
+
 ;; SVE integer comparisons.
 (define_code_iterator SVE_INT_CMP [lt le eq ne ge gt ltu leu geu gtu])
 
@@ -1237,6 +1239,8 @@  (define_code_attr optab [(ashift "ashl")
 			 (neg "neg")
 			 (plus "add")
 			 (minus "sub")
+			 (div "div")
+			 (udiv "udiv")
 			 (ss_plus "qadd")
 			 (us_plus "qadd")
 			 (ss_minus "qsub")
@@ -1378,6 +1382,8 @@  (define_mode_attr lconst_atomic [(QI "K"
 ;; The integer SVE instruction that implements an rtx code.
 (define_code_attr sve_int_op [(plus "add")
 			      (minus "sub")
+			      (div "sdiv")
+			      (udiv "udiv")
 			      (neg "neg")
 			      (smin "smin")
 			      (smax "smax")
Index: gcc/config/aarch64/aarch64-sve.md
===================================================================
--- gcc/config/aarch64/aarch64-sve.md	2018-05-25 09:08:34.077950721 +0100
+++ gcc/config/aarch64/aarch64-sve.md	2018-05-25 09:28:55.825421996 +0100
@@ -1008,6 +1008,36 @@  (define_insn "*<su>mul<mode>3_highpart"
   "<su>mulh\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
 )
 
+;; Unpredicated division.
+(define_expand "<optab><mode>3"
+  [(set (match_operand:SVE_SDI 0 "register_operand")
+	(unspec:SVE_SDI
+	  [(match_dup 3)
+	   (SVE_INT_BINARY_SD:SVE_SDI
+	     (match_operand:SVE_SDI 1 "register_operand")
+	     (match_operand:SVE_SDI 2 "register_operand"))]
+	  UNSPEC_MERGE_PTRUE))]
+  "TARGET_SVE"
+  {
+    operands[3] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode));
+  }
+)
+
+;; Division predicated with a PTRUE.
+(define_insn "*<optab><mode>3"
+  [(set (match_operand:SVE_SDI 0 "register_operand" "=w, w")
+	(unspec:SVE_SDI
+	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
+	   (SVE_INT_BINARY_SD:SVE_SDI
+	     (match_operand:SVE_SDI 2 "register_operand" "0, w")
+	     (match_operand:SVE_SDI 3 "aarch64_sve_mul_operand" "w, 0"))]
+	  UNSPEC_MERGE_PTRUE))]
+  "TARGET_SVE"
+  "@
+   <sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
+   <sve_int_op>r\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>"
+)
+
 ;; Unpredicated NEG, NOT and POPCOUNT.
 (define_expand "<optab><mode>2"
   [(set (match_operand:SVE_I 0 "register_operand")
Index: gcc/testsuite/gcc.target/aarch64/sve/div_1.c
===================================================================
--- /dev/null	2018-04-20 16:19:46.369131350 +0100
+++ gcc/testsuite/gcc.target/aarch64/sve/div_1.c	2018-05-25 09:28:55.826421957 +0100
@@ -0,0 +1,26 @@ 
+/* { dg-do assemble { target aarch64_asm_sve_ok } } */
+/* { dg-options "-O2 -ftree-vectorize -fno-vect-cost-model --save-temps" } */
+
+#include <stdint.h>
+
+#define DEF_LOOP(TYPE)					\
+void __attribute__ ((noipa))				\
+mod_##TYPE (TYPE *restrict dst, TYPE *restrict src1,	\
+	    TYPE *restrict src2, int count)		\
+{							\
+  for (int i = 0; i < count; ++i)			\
+    dst[i] = src1[i] / src2[i];				\
+}
+
+#define TEST_ALL(T) \
+  T (int32_t) \
+  T (uint32_t) \
+  T (int64_t) \
+  T (uint64_t)
+
+TEST_ALL (DEF_LOOP)
+
+/* { dg-final { scan-assembler-times {\tsdiv\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tudiv\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tsdiv\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, z[0-9]+\.d\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tudiv\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, z[0-9]+\.d\n} 1 } } */
Index: gcc/testsuite/gcc.target/aarch64/sve/div_1_run.c
===================================================================
--- /dev/null	2018-04-20 16:19:46.369131350 +0100
+++ gcc/testsuite/gcc.target/aarch64/sve/div_1_run.c	2018-05-25 09:28:55.826421957 +0100
@@ -0,0 +1,30 @@ 
+/* { dg-do run { target aarch64_sve_hw } } */
+/* { dg-options "-O2 -ftree-vectorize -fno-vect-cost-model --save-temps" } */
+
+#include "div_1.c"
+
+#define N 79
+
+#define TEST_LOOP(TYPE)				\
+  {						\
+    TYPE dst[N], src1[N], src2[N];		\
+    for (int i = 0; i < N; ++i)			\
+      {						\
+	src1[i] = i * 7 + i % 3;		\
+	if (i % 11 > 7)				\
+	  src1[i] = -src1[i];			\
+	src2[i] = 5 + (i % 5);			\
+	asm volatile ("" ::: "memory");		\
+      }						\
+    mod_##TYPE (dst, src1, src2, N);		\
+    for (int i = 0; i < N; ++i)			\
+      if (dst[i] != src1[i] / src2[i])		\
+	__builtin_abort ();			\
+  }
+
+int
+main (void)
+{
+  TEST_ALL (TEST_LOOP);
+  return 0;
+}
Index: gcc/testsuite/gcc.target/aarch64/sve/mul_highpart_2.c
===================================================================
--- /dev/null	2018-04-20 16:19:46.369131350 +0100
+++ gcc/testsuite/gcc.target/aarch64/sve/mul_highpart_2.c	2018-05-25 09:28:55.826421957 +0100
@@ -0,0 +1,25 @@ 
+/* { dg-do assemble { target aarch64_asm_sve_ok } } */
+/* { dg-options "-O2 -ftree-vectorize -fno-vect-cost-model --save-temps" } */
+
+#include <stdint.h>
+
+#define DEF_LOOP(TYPE)				\
+void __attribute__ ((noipa))			\
+mod_##TYPE (TYPE *dst, TYPE *src, int count)	\
+{						\
+  for (int i = 0; i < count; ++i)		\
+    dst[i] = src[i] / 17;			\
+}
+
+#define TEST_ALL(T) \
+  T (int32_t) \
+  T (uint32_t) \
+  T (int64_t) \
+  T (uint64_t)
+
+TEST_ALL (DEF_LOOP)
+
+/* { dg-final { scan-assembler-times {\tsmulh\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tumulh\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tsmulh\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, z[0-9]+\.d\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tumulh\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, z[0-9]+\.d\n} 1 } } */
Index: gcc/testsuite/gcc.target/aarch64/sve/mul_highpart_2_run.c
===================================================================
--- /dev/null	2018-04-20 16:19:46.369131350 +0100
+++ gcc/testsuite/gcc.target/aarch64/sve/mul_highpart_2_run.c	2018-05-25 09:28:55.826421957 +0100
@@ -0,0 +1,29 @@ 
+/* { dg-do run { target aarch64_sve_hw } } */
+/* { dg-options "-O2 -ftree-vectorize -fno-vect-cost-model --save-temps" } */
+
+#include "mul_highpart_2.c"
+
+#define N 79
+
+#define TEST_LOOP(TYPE)				\
+  {						\
+    TYPE dst[N], src[N];			\
+    for (int i = 0; i < N; ++i)			\
+      {						\
+	src[i] = i * 7 + i % 3;			\
+	if (i % 11 > 7)				\
+	  src[i] = -src[i];			\
+	asm volatile ("" ::: "memory");		\
+      }						\
+    mod_##TYPE (dst, src, N);			\
+    for (int i = 0; i < N; ++i)			\
+      if (dst[i] != src[i] / 17)		\
+	__builtin_abort ();			\
+  }
+
+int
+main (void)
+{
+  TEST_ALL (TEST_LOOP);
+  return 0;
+}