diff mbox series

Fix folding of vector mask EQ/NE expressions

Message ID 87po8vwhpb.fsf@linaro.org
State Accepted
Commit 779fed5fdb6098e67213a82dfd27f5b326a75e88
Headers show
Series Fix folding of vector mask EQ/NE expressions | expand

Commit Message

Richard Sandiford Nov. 6, 2017, 3:27 p.m. UTC
fold_binary_loc assumed that if the type of the result wasn't a vector,
the operands wouldn't be either.  This isn't necessarily true for
EQ_EXPR and NE_EXPR of vector masks, which can return a single scalar
for the mask as a whole.

Tested on aarch64-linux-gnu, x86_64-linux-gnu and powerpc64-linux-gnu.
OK to install?

Richard


2017-11-06  Richard Sandiford  <richard.sandiford@linaro.org>
	    Alan Hayward  <alan.hayward@arm.com>
	    David Sherwood  <david.sherwood@arm.com>

gcc/
	* fold-const.c (fold_binary_loc): Check the argument types
	rather than the result type when testing for a vector operation.

gcc/testsuite/
	* gcc.target/aarch64/sve_vec_bool_cmp_1.c: New test.
	* gcc.target/aarch64/sve_vec_bool_cmp_1_run.c: Likweise.

Comments

Marc Glisse Nov. 6, 2017, 7:19 p.m. UTC | #1
On Mon, 6 Nov 2017, Richard Sandiford wrote:

> fold_binary_loc assumed that if the type of the result wasn't a vector,

> the operands wouldn't be either.  This isn't necessarily true for

> EQ_EXPR and NE_EXPR of vector masks, which can return a single scalar

> for the mask as a whole.


Spell it VECTOR_TYPE_P? This looks clearly better, but that whole block of 
code smells like something that should disappear eventually...

-- 
Marc Glisse
Richard Biener Nov. 7, 2017, 9:54 a.m. UTC | #2
On Mon, Nov 6, 2017 at 8:19 PM, Marc Glisse <marc.glisse@inria.fr> wrote:
> On Mon, 6 Nov 2017, Richard Sandiford wrote:

>

>> fold_binary_loc assumed that if the type of the result wasn't a vector,

>> the operands wouldn't be either.  This isn't necessarily true for

>> EQ_EXPR and NE_EXPR of vector masks, which can return a single scalar

>> for the mask as a whole.

>

>

> Spell it VECTOR_TYPE_P? This looks clearly better, but that whole block of

> code smells like something that should disappear eventually...


Ok with using VECTOR_TYPE_P.

Thanks,
Richard.

> --

> Marc Glisse
diff mbox series

Patch

Index: gcc/fold-const.c
===================================================================
--- gcc/fold-const.c	2017-11-06 15:26:35.441288554 +0000
+++ gcc/fold-const.c	2017-11-06 15:26:35.595423552 +0000
@@ -9392,7 +9392,7 @@  fold_binary_loc (location_t loc,
 
   if ((code == BIT_AND_EXPR || code == BIT_IOR_EXPR
        || code == EQ_EXPR || code == NE_EXPR)
-      && TREE_CODE (type) != VECTOR_TYPE
+      && TREE_CODE (TREE_TYPE (arg0)) != VECTOR_TYPE
       && ((truth_value_p (TREE_CODE (arg0))
 	   && (truth_value_p (TREE_CODE (arg1))
 	       || (TREE_CODE (arg1) == BIT_AND_EXPR
Index: gcc/testsuite/gcc.target/aarch64/sve_vec_bool_cmp_1.c
===================================================================
--- /dev/null	2017-11-03 22:04:05.605699023 +0000
+++ gcc/testsuite/gcc.target/aarch64/sve_vec_bool_cmp_1.c	2017-11-06 15:26:35.595423552 +0000
@@ -0,0 +1,40 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */
+
+#include <stdint.h>
+#include <stdbool.h>
+
+#define VEC_BOOL(NAME, OP, VARTYPE, INDUCTYPE)				\
+void __attribute__ ((noinline, noclone))				\
+vec_bool_##NAME##_##VARTYPE##_##INDUCTYPE (VARTYPE *dst, VARTYPE *src,	\
+					   INDUCTYPE start,		\
+					   INDUCTYPE n,			\
+					   INDUCTYPE mask)		\
+{									\
+  for (INDUCTYPE i = 0; i < n; i++)					\
+    {									\
+      bool lhs = i >= start;						\
+      bool rhs = (i & mask) != 0x3D;					\
+      if (lhs OP rhs)							\
+        dst[i] = src[i];						\
+    }									\
+}
+
+#define TEST_OP(T, NAME, OP)			\
+  T (NAME, OP, uint8_t, uint8_t)		\
+  T (NAME, OP, uint16_t, uint16_t)		\
+  T (NAME, OP, uint32_t, uint32_t)		\
+  T (NAME, OP, uint64_t, uint64_t)		\
+  T (NAME, OP, float, uint32_t)			\
+  T (NAME, OP, double, uint64_t)
+
+#define TEST_ALL(T)				\
+  TEST_OP (T, cmpeq, ==)			\
+  TEST_OP (T, cmpne, !=)
+
+TEST_ALL (VEC_BOOL)
+
+/* Both cmpne and cmpeq loops will contain an exclusive predicate or.  */
+/* { dg-final { scan-assembler-times {\teors?\tp[0-9]*\.b, p[0-7]/z, p[0-9]*\.b, p[0-9]*\.b\n} 12 } } */
+/* cmpeq will also contain a predicate not operation.  */
+/* { dg-final { scan-assembler-times {\tnot\tp[0-9]*\.b, p[0-7]/z, p[0-9]*\.b\n} 6 } } */
Index: gcc/testsuite/gcc.target/aarch64/sve_vec_bool_cmp_1_run.c
===================================================================
--- /dev/null	2017-11-03 22:04:05.605699023 +0000
+++ gcc/testsuite/gcc.target/aarch64/sve_vec_bool_cmp_1_run.c	2017-11-06 15:26:35.595423552 +0000
@@ -0,0 +1,37 @@ 
+/* { dg-do run { target { aarch64_sve_hw } } } */
+/* { dg-options "-O3 -fno-inline -march=armv8-a+sve" } */
+
+#include "sve_vec_bool_cmp_1.c"
+
+#define N 103
+
+#define TEST_VEC_BOOL(NAME, OP, VARTYPE, INDUCTYPE)		\
+{								\
+  INDUCTYPE i;							\
+  VARTYPE src[N];						\
+  VARTYPE dst[N];						\
+  for (i = 0; i < N; i++)					\
+    {								\
+      src[i] = i;						\
+      dst[i] = i * 2;						\
+      asm volatile ("" ::: "memory");				\
+    }								\
+  vec_bool_##NAME##_##VARTYPE##_##INDUCTYPE (dst, src, 13,	\
+					     97, 0xFF);		\
+  for (i = 0; i < 13; i++)					\
+    if (dst[i] != (VARTYPE) (0 OP 1 ? i : i * 2))		\
+      __builtin_abort ();					\
+  for (i = 13; i < 97; i++)					\
+    if (dst[i] != (VARTYPE) (1 OP (i != 0x3D) ? i : i * 2))	\
+      __builtin_abort ();					\
+  for (i = 97; i < N; i++)					\
+    if (dst[i] != (i * 2))					\
+      __builtin_abort ();					\
+}
+
+int __attribute__ ((optimize (1)))
+main ()
+{
+  TEST_ALL (TEST_VEC_BOOL)
+  return 0;
+}