diff mbox series

[SVE] PR88839

Message ID CAAgBjMnXtt8b4icE=usPtTdyHEe6YY+21nbJw0Uf0L=dBc9Lzg@mail.gmail.com
State New
Headers show
Series [SVE] PR88839 | expand

Commit Message

Prathamesh Kulkarni Aug. 20, 2019, 6:32 p.m. UTC
Hi,
The attached patch is a fix for PR88839 ported from sve-acle-branch.
OK to commit to trunk ?

Thanks,
Prathamesh
2019-08-21  Prathamesh Kulkarni  <prathamesh.kulkarni@linaro.org>
	    Richard Sandiford  <richard.sandiford@arm.com>

	PR target/88839
	* config/aarch64/aarch64.c (aarch64_evpc_sel): New function.
	(aarch64_expand_vec_perm_const_1): Call aarch64_evpc_sel.

testsuite/
	* gcc.target/aarch64/sve/sel_1.c: New test.
	* gcc.target/aarch64/sve/sel_2.c: Likewise.
	* gcc.target/aarch64/sve/sel_3.c: Likewise.
	* gcc.target/aarch64/sve/sel_4.c: Likewise.
	* gcc.target/aarch64/sve/sel_5.c: Likewise.
	* gcc.target/aarch64/sve/sel_6.c: Likewise.

Comments

Richard Sandiford Aug. 21, 2019, 9:48 a.m. UTC | #1
Prathamesh Kulkarni <prathamesh.kulkarni@linaro.org> writes:
> Hi,

> The attached patch is a fix for PR88839 ported from sve-acle-branch.

> OK to commit to trunk ?

>

> Thanks,

> Prathamesh

>

> 2019-08-21  Prathamesh Kulkarni  <prathamesh.kulkarni@linaro.org>

> 	    Richard Sandiford  <richard.sandiford@arm.com>

>

> 	PR target/88839

> 	* config/aarch64/aarch64.c (aarch64_evpc_sel): New function.

> 	(aarch64_expand_vec_perm_const_1): Call aarch64_evpc_sel.

>

> testsuite/

> 	* gcc.target/aarch64/sve/sel_1.c: New test.

> 	* gcc.target/aarch64/sve/sel_2.c: Likewise.

> 	* gcc.target/aarch64/sve/sel_3.c: Likewise.

> 	* gcc.target/aarch64/sve/sel_4.c: Likewise.

> 	* gcc.target/aarch64/sve/sel_5.c: Likewise.

> 	* gcc.target/aarch64/sve/sel_6.c: Likewise.

>

> diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c

> index ec60e972f5f..f8d5270b982 100644

> --- a/gcc/config/aarch64/aarch64.c

> +++ b/gcc/config/aarch64/aarch64.c

> @@ -16632,6 +16632,50 @@ aarch64_evpc_sve_tbl (struct expand_vec_perm_d *d)

>    return true;

>  }

>  

> +/* Try to implement D using SVE SEL instruction.  */

> +

> +static bool

> +aarch64_evpc_sel (struct expand_vec_perm_d *d)

> +{

> +  machine_mode vmode = d->vmode;

> +  int nunits = GET_MODE_UNIT_SIZE (vmode);


Sorry for not noticing last time, but this should be "unit_size"
rather than "nunit".

OK with that change, thanks.

Richard
Prathamesh Kulkarni Aug. 21, 2019, 11:47 a.m. UTC | #2
On Wed, 21 Aug 2019 at 15:18, Richard Sandiford
<richard.sandiford@arm.com> wrote:
>

> Prathamesh Kulkarni <prathamesh.kulkarni@linaro.org> writes:

> > Hi,

> > The attached patch is a fix for PR88839 ported from sve-acle-branch.

> > OK to commit to trunk ?

> >

> > Thanks,

> > Prathamesh

> >

> > 2019-08-21  Prathamesh Kulkarni  <prathamesh.kulkarni@linaro.org>

> >           Richard Sandiford  <richard.sandiford@arm.com>

> >

> >       PR target/88839

> >       * config/aarch64/aarch64.c (aarch64_evpc_sel): New function.

> >       (aarch64_expand_vec_perm_const_1): Call aarch64_evpc_sel.

> >

> > testsuite/

> >       * gcc.target/aarch64/sve/sel_1.c: New test.

> >       * gcc.target/aarch64/sve/sel_2.c: Likewise.

> >       * gcc.target/aarch64/sve/sel_3.c: Likewise.

> >       * gcc.target/aarch64/sve/sel_4.c: Likewise.

> >       * gcc.target/aarch64/sve/sel_5.c: Likewise.

> >       * gcc.target/aarch64/sve/sel_6.c: Likewise.

> >

> > diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c

> > index ec60e972f5f..f8d5270b982 100644

> > --- a/gcc/config/aarch64/aarch64.c

> > +++ b/gcc/config/aarch64/aarch64.c

> > @@ -16632,6 +16632,50 @@ aarch64_evpc_sve_tbl (struct expand_vec_perm_d *d)

> >    return true;

> >  }

> >

> > +/* Try to implement D using SVE SEL instruction.  */

> > +

> > +static bool

> > +aarch64_evpc_sel (struct expand_vec_perm_d *d)

> > +{

> > +  machine_mode vmode = d->vmode;

> > +  int nunits = GET_MODE_UNIT_SIZE (vmode);

>

> Sorry for not noticing last time, but this should be "unit_size"

> rather than "nunit".

Oops, sorry about that.
>

> OK with that change, thanks.

Thanks, will commit the patch after validating the patch on trunk.

Thanks,
Prathamesh
>

> Richard
diff mbox series

Patch

diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index ec60e972f5f..f8d5270b982 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -16632,6 +16632,50 @@  aarch64_evpc_sve_tbl (struct expand_vec_perm_d *d)
   return true;
 }
 
+/* Try to implement D using SVE SEL instruction.  */
+
+static bool
+aarch64_evpc_sel (struct expand_vec_perm_d *d)
+{
+  machine_mode vmode = d->vmode;
+  int nunits = GET_MODE_UNIT_SIZE (vmode);
+
+  if (d->vec_flags != VEC_SVE_DATA
+      || nunits > 8)
+    return false;
+
+  int n_patterns = d->perm.encoding ().npatterns ();
+  poly_int64 vec_len = d->perm.length ();
+
+  for (int i = 0; i < n_patterns; ++i)
+    if (!known_eq (d->perm[i], i)
+	&& !known_eq (d->perm[i], vec_len + i))
+      return false;
+
+  for (int i = n_patterns; i < n_patterns * 2; i++)
+    if (!d->perm.series_p (i, n_patterns, i, n_patterns)
+	&& !d->perm.series_p (i, n_patterns, vec_len + i, n_patterns))
+      return false;
+
+  if (d->testing_p)
+    return true;
+
+  machine_mode pred_mode = aarch64_sve_pred_mode (nunits).require ();
+
+  rtx_vector_builder builder (pred_mode, n_patterns, 2);
+  for (int i = 0; i < n_patterns * 2; i++)
+    {
+      rtx elem = known_eq (d->perm[i], i) ? CONST1_RTX (BImode)
+					  : CONST0_RTX (BImode);
+      builder.quick_push (elem);
+    }
+
+  rtx const_vec = builder.build ();
+  rtx pred = force_reg (pred_mode, const_vec);
+  emit_insn (gen_vcond_mask (vmode, vmode, d->target, d->op1, d->op0, pred));
+  return true;
+}
+
 static bool
 aarch64_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
 {
@@ -16664,6 +16708,8 @@  aarch64_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
 	return true;
       else if (aarch64_evpc_trn (d))
 	return true;
+      else if (aarch64_evpc_sel (d))
+	return true;
       if (d->vec_flags == VEC_SVE_DATA)
 	return aarch64_evpc_sve_tbl (d);
       else if (d->vec_flags == VEC_ADVSIMD)
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/sel_1.c b/gcc/testsuite/gcc.target/aarch64/sve/sel_1.c
new file mode 100644
index 00000000000..e651e5b93b6
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/sel_1.c
@@ -0,0 +1,27 @@ 
+/* { dg-do assemble { target aarch64_asm_sve_ok } } */
+/* { dg-options "-O2 -msve-vector-bits=256 --save-temps" } */
+
+#include <stdint.h>
+
+typedef int8_t vnx16qi __attribute__((vector_size (32)));
+
+/* Predicate vector: 1 0 1 0 ... */
+
+#define MASK_32		{ 0, 33, 2, 35, 4, 37, 6, 39, 8, 41,			\
+			  10, 43, 12, 45, 14, 47, 16, 49, 18, 51, 		\
+			  20, 53, 22, 55, 24, 57, 26, 59, 28, 61, 30, 63 }
+
+#define INDEX_32 vnx16qi
+
+#define PERMUTE(type, nunits)						\
+type permute_##type (type x, type y)					\
+{									\
+  return __builtin_shuffle (x, y, (INDEX_##nunits) MASK_##nunits);	\
+}
+
+PERMUTE(vnx16qi, 32)
+
+/* { dg-final { scan-assembler-not {\ttbl\t} } } */
+
+/* { dg-final { scan-assembler-times {\tsel\tz[0-9]+\.b, p[0-9]+, z[0-9]+\.b, z[0-9]+\.b\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tptrue\tp[0-9]+\.h, vl16\n} 1 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/sel_2.c b/gcc/testsuite/gcc.target/aarch64/sve/sel_2.c
new file mode 100644
index 00000000000..05391474a92
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/sel_2.c
@@ -0,0 +1,41 @@ 
+/* { dg-do assemble { target aarch64_asm_sve_ok } } */
+/* { dg-options "-O2 -msve-vector-bits=256 --save-temps" } */
+
+#include <stdint.h>
+
+typedef int8_t vnx16qi __attribute__((vector_size (32)));
+typedef int16_t vnx8hi __attribute__((vector_size (32)));
+typedef int32_t vnx4si __attribute__((vector_size (32)));
+
+typedef _Float16 vnx8hf __attribute__((vector_size (32)));
+typedef float vnx4sf __attribute__((vector_size (32)));
+
+/* Predicate vector: 1 0 0 0 ... */
+
+#define MASK_32		{ 0, 33, 34, 35, 4, 37, 38, 39, 8, 41, 42, 43, 12,		\
+			  45, 46, 47, 16, 49, 50, 51, 20, 53, 54, 55, 24,		\
+			  57, 58, 59, 28, 61, 62, 63 } 
+
+/* Predicate vector: 1 0 1 0 ... */
+
+#define MASK_16		{0, 17, 2, 19, 4, 21, 6, 23, 8, 25, 10, 27, 12, 29, 14, 31}
+
+#define INDEX_32 vnx16qi
+#define INDEX_16 vnx8hi
+
+#define PERMUTE(type, nunits)						\
+type permute_##type (type x, type y)					\
+{									\
+  return __builtin_shuffle (x, y, (INDEX_##nunits) MASK_##nunits);	\
+}
+
+PERMUTE(vnx16qi, 32)
+PERMUTE(vnx8hi, 16)
+PERMUTE(vnx8hf, 16)
+
+/* { dg-final { scan-assembler-not {\ttbl\t} } } */
+
+/* { dg-final { scan-assembler-times {\tsel\tz[0-9]+\.b, p[0-9]+, z[0-9]+\.b, z[0-9]+\.b\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tsel\tz[0-9]+\.h, p[0-9]+, z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */
+
+/* { dg-final { scan-assembler-times {\tptrue\tp[0-9]+\.s, vl8\n} 3 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/sel_3.c b/gcc/testsuite/gcc.target/aarch64/sve/sel_3.c
new file mode 100644
index 00000000000..a87492d9df1
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/sel_3.c
@@ -0,0 +1,50 @@ 
+/* { dg-do assemble { target aarch64_asm_sve_ok } } */
+/* { dg-options "-O2 -msve-vector-bits=256 --save-temps" } */
+
+#include <stdint.h>
+
+typedef int8_t vnx16qi __attribute__((vector_size (32)));
+typedef int16_t vnx8hi __attribute__((vector_size (32)));
+typedef int32_t vnx4si __attribute__((vector_size (32)));
+typedef _Float16 vnx8hf __attribute__((vector_size (32)));
+typedef float vnx4sf __attribute__((vector_size (32)));
+
+/* Predicate vector: 1 0 0 0 0 0 0 0 ... */
+
+#define MASK_32		{ 0, 33, 34, 35, 36, 37, 38, 39,  \
+			  8, 41, 42, 43, 44, 45, 46, 47,  \
+			  16, 49, 50, 51, 52, 53, 54, 55, \
+			  24, 57, 58, 59, 60, 61, 62, 63  }
+
+/* Predicate vector: 1 0 0 0 ... */
+
+#define MASK_16		{ 0, 17, 18, 19, 4, 21, 22, 23, \
+			  8, 25, 26, 27, 12, 29, 30, 31 } 
+
+/* Predicate vector: 1 0 ... */
+
+#define MASK_8		{ 0, 9, 2, 11, 4, 13, 6, 15 }
+
+#define INDEX_32 vnx16qi
+#define INDEX_16 vnx8hi
+#define INDEX_8 vnx4si
+
+#define PERMUTE(type, nunits)						\
+type permute_##type (type x, type y)					\
+{									\
+  return __builtin_shuffle (x, y, (INDEX_##nunits) MASK_##nunits);	\
+}
+
+PERMUTE(vnx16qi, 32)
+PERMUTE(vnx8hi, 16)
+PERMUTE(vnx4si, 8)
+PERMUTE(vnx8hf, 16)
+PERMUTE(vnx4sf, 8)
+
+/* { dg-final { scan-assembler-not {\ttbl\t} } } */
+
+/* { dg-final { scan-assembler-times {\tsel\tz[0-9]+\.b, p[0-9]+, z[0-9]+\.b, z[0-9]+\.b\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tsel\tz[0-9]+\.h, p[0-9]+, z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tsel\tz[0-9]+\.s, p[0-9]+, z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */
+
+/* { dg-final { scan-assembler-times {\tptrue\tp[0-9]+\.d, vl4\n} 5 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/sel_4.c b/gcc/testsuite/gcc.target/aarch64/sve/sel_4.c
new file mode 100644
index 00000000000..e9bbc5527d3
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/sel_4.c
@@ -0,0 +1,50 @@ 
+/* { dg-do assemble { target aarch64_asm_sve_ok } } */
+/* { dg-options "-O2 -msve-vector-bits=256 --save-temps" } */
+
+#include <stdint.h>
+
+typedef int8_t vnx16qi __attribute__((vector_size (32)));
+typedef int16_t vnx8hi __attribute__((vector_size (32)));
+typedef int32_t vnx4si __attribute__((vector_size (32)));
+typedef int64_t vnx2di __attribute__((vector_size (32)));
+
+typedef _Float16 vnx8hf __attribute__((vector_size (32)));
+typedef float vnx4sf __attribute__((vector_size (32)));
+typedef double vnx2df __attribute__((vector_size (32)));
+
+/* Predicate vector: 1 1 0 0 ... */
+
+#define MASK_32		{ 0, 1, 34, 35, 4, 5, 38, 39, 8, 9, 42, 43, 12, 13,	\
+			  46, 47, 16, 17, 50, 51, 20, 21, 54, 55, 24, 25,	\
+			  58, 59, 28, 29, 62, 63 } 
+
+#define MASK_16		{0, 1, 18, 19, 4, 5, 22, 23, 8, 9, 26, 27, 12, 13, 30, 31} 
+#define MASK_8		{0, 1, 10, 11, 4, 5, 14, 15} 
+#define MASK_4		{0, 1, 6, 7}
+
+#define INDEX_32 vnx16qi
+#define INDEX_16 vnx8hi
+#define INDEX_8 vnx4si
+#define INDEX_4 vnx2di
+
+#define PERMUTE(type, nunits)						\
+type permute_##type (type x, type y)					\
+{									\
+  return __builtin_shuffle (x, y, (INDEX_##nunits) MASK_##nunits);	\
+}
+
+PERMUTE(vnx16qi, 32)
+PERMUTE(vnx8hi, 16)
+PERMUTE(vnx4si, 8)
+PERMUTE(vnx2di, 4)
+
+PERMUTE(vnx8hf, 16)
+PERMUTE(vnx4sf, 8)
+PERMUTE(vnx2df, 4)
+
+/* { dg-final { scan-assembler-not {\ttbl\t} } } */
+
+/* { dg-final { scan-assembler-times {\tsel\tz[0-9]+\.b, p[0-9]+, z[0-9]+\.b, z[0-9]+\.b\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tsel\tz[0-9]+\.h, p[0-9]+, z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tsel\tz[0-9]+\.s, p[0-9]+, z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tsel\tz[0-9]+\.d, p[0-9]+, z[0-9]+\.d, z[0-9]+\.d\n} 2 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/sel_5.c b/gcc/testsuite/gcc.target/aarch64/sve/sel_5.c
new file mode 100644
index 00000000000..935abb54dd1
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/sel_5.c
@@ -0,0 +1,50 @@ 
+/* { dg-do assemble { target aarch64_asm_sve_ok } } */
+/* { dg-options "-O2 -msve-vector-bits=256 --save-temps" } */
+
+#include <stdint.h>
+
+typedef int8_t vnx16qi __attribute__((vector_size (32)));
+typedef int16_t vnx8hi __attribute__((vector_size (32)));
+typedef int32_t vnx4si __attribute__((vector_size (32)));
+typedef int64_t vnx2di __attribute__((vector_size (32)));
+
+typedef _Float16 vnx8hf __attribute__((vector_size (32)));
+typedef float vnx4sf __attribute__((vector_size (32)));
+typedef double vnx2df __attribute__((vector_size (32)));
+
+/* Predicate vector: 1 0 0 1 ... */
+
+#define MASK_32		{ 0, 33, 34, 3, 4, 37, 38, 7, 8, 41, 42, 11, 12, 45, 46,	\
+			  15, 16, 49, 50, 19, 20, 53, 54, 23, 24, 57, 58, 27, 28,	\
+			  61, 62, 31 } 
+
+#define MASK_16		{0, 17, 18, 3, 4, 21, 22, 7, 8, 25, 26, 11, 12, 29, 30, 15}
+#define MASK_8		{0, 9, 10, 3, 4, 13, 14, 7} 
+#define MASK_4		{0, 5, 6, 3}
+
+#define INDEX_32 vnx16qi
+#define INDEX_16 vnx8hi
+#define INDEX_8 vnx4si
+#define INDEX_4 vnx2di
+
+#define PERMUTE(type, nunits)						\
+type permute_##type (type x, type y)					\
+{									\
+  return __builtin_shuffle (x, y, (INDEX_##nunits) MASK_##nunits);	\
+}
+
+PERMUTE(vnx16qi, 32)
+PERMUTE(vnx8hi, 16)
+PERMUTE(vnx4si, 8)
+PERMUTE(vnx2di, 4)
+
+PERMUTE(vnx8hf, 16)
+PERMUTE(vnx4sf, 8)
+PERMUTE(vnx2df, 4)
+
+/* { dg-final { scan-assembler-not {\ttbl\t} } } */
+
+/* { dg-final { scan-assembler-times {\tsel\tz[0-9]+\.b, p[0-9]+, z[0-9]+\.b, z[0-9]+\.b\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tsel\tz[0-9]+\.h, p[0-9]+, z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tsel\tz[0-9]+\.s, p[0-9]+, z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tsel\tz[0-9]+\.d, p[0-9]+, z[0-9]+\.d, z[0-9]+\.d\n} 2 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/sel_6.c b/gcc/testsuite/gcc.target/aarch64/sve/sel_6.c
new file mode 100644
index 00000000000..772938f68a7
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/sel_6.c
@@ -0,0 +1,42 @@ 
+/* { dg-do assemble { target aarch64_asm_sve_ok } } */
+/* { dg-options "-O2 -msve-vector-bits=256 --save-temps" } */
+
+#include <stdint.h>
+
+typedef int32_t vnx4si __attribute__((vector_size (32)));
+typedef int64_t vnx2di __attribute__((vector_size (32)));
+
+typedef float vnx4sf __attribute__((vector_size (32)));
+typedef double vnx2df __attribute__((vector_size (32)));
+
+/* Predicate vector: 1 0 0 0 ... */
+
+#define MASK_32		{ 0, 33, 34, 35, 4, 37, 38, 39, 8, 41, 42, 43, 12,		\
+			  45, 46, 47, 16, 49, 50, 51, 20, 53, 54, 55, 24,		\
+			  57, 58, 59, 28, 61, 62, 63 } 
+
+#define MASK_16		{0, 17, 18, 19, 4, 21, 22, 23, 8, 25, 26, 27, 12, 29, 30, 31} 
+#define MASK_8		{0, 9, 10, 11, 4, 13, 14, 15}
+#define MASK_4		{0, 5, 6, 7}
+
+#define INDEX_8 vnx4si
+#define INDEX_4 vnx2di
+
+#define PERMUTE(type, nunits)						\
+type permute_##type (type x, type y)					\
+{									\
+  return __builtin_shuffle (x, y, (INDEX_##nunits) MASK_##nunits);	\
+}
+
+PERMUTE(vnx4si, 8)
+PERMUTE(vnx2di, 4)
+
+PERMUTE(vnx4sf, 8)
+PERMUTE(vnx2df, 4)
+
+/* { dg-final { scan-assembler-not {\ttbl\t} } } */
+
+/* { dg-final { scan-assembler-times {\tsel\tz[0-9]+\.s, p[0-9]+, z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tsel\tz[0-9]+\.d, p[0-9]+, z[0-9]+\.d, z[0-9]+\.d\n} 2 } } */
+
+/* { dg-final { scan-assembler-times {\tptrue\tp[0-9]+\.d, vl4\n} 2 } } */