diff mbox series

[01/nn,AArch64] Generate permute patterns using rtx builders

Message ID 87y3nwbu8w.fsf@linaro.org
State New
Headers show
Series [01/nn,AArch64] Generate permute patterns using rtx builders | expand

Commit Message

Richard Sandiford Oct. 27, 2017, 1:22 p.m. UTC
This patch replaces switch statements that call specific generator
functions with code that constructs the rtl pattern directly.
This seemed to scale better to SVE and also seems less error-prone.

As a side-effect, the patch fixes the REV handling for diff==1,
vmode==E_V4HFmode and adds missing support for diff==3,
vmode==E_V4HFmode.

To compensate for the lack of switches that check for specific modes,
the patch makes aarch64_expand_vec_perm_const_1 reject permutes on
single-element vectors (specifically V1DImode).


2017-10-27  Richard Sandiford  <richard.sandiford@linaro.org>
	    Alan Hayward  <alan.hayward@arm.com>
	    David Sherwood  <david.sherwood@arm.com>

gcc/
	* config/aarch64/aarch64.c (aarch64_evpc_trn, aarch64_evpc_uzp)
	(aarch64_evpc_zip, aarch64_evpc_ext, aarch64_evpc_rev)
	(aarch64_evpc_dup): Generate rtl direcly, rather than using
	named expanders.
	(aarch64_expand_vec_perm_const_1): Explicitly check for permutes
	of a single element.

Comments

James Greenhalgh Oct. 31, 2017, 5:59 p.m. UTC | #1
On Fri, Oct 27, 2017 at 02:22:39PM +0100, Richard Sandiford wrote:
> This patch replaces switch statements that call specific generator

> functions with code that constructs the rtl pattern directly.

> This seemed to scale better to SVE and also seems less error-prone.

> 

> As a side-effect, the patch fixes the REV handling for diff==1,

> vmode==E_V4HFmode and adds missing support for diff==3,

> vmode==E_V4HFmode.

> 

> To compensate for the lack of switches that check for specific modes,

> the patch makes aarch64_expand_vec_perm_const_1 reject permutes on

> single-element vectors (specifically V1DImode).


OK.

Would you mind placing a comment somewhere near both the unspecs, and the
patterns using these unspecs to warn that the calls constructing the
RTX here *MUST* be kept in sync?

Some of these patterns are probably used rarely enough that we could easily
miss an unreconizable insn.

Reviewed-by: James Greenhalgh <james.greenhalgh@arm.com>


Thanks,
James

> 

> 

> 2017-10-27  Richard Sandiford  <richard.sandiford@linaro.org>

> 	    Alan Hayward  <alan.hayward@arm.com>

> 	    David Sherwood  <david.sherwood@arm.com>

> 

> gcc/

> 	* config/aarch64/aarch64.c (aarch64_evpc_trn, aarch64_evpc_uzp)

> 	(aarch64_evpc_zip, aarch64_evpc_ext, aarch64_evpc_rev)

> 	(aarch64_evpc_dup): Generate rtl direcly, rather than using

> 	named expanders.

> 	(aarch64_expand_vec_perm_const_1): Explicitly check for permutes

> 	of a single element.

>
Richard Sandiford Nov. 2, 2017, 9:03 a.m. UTC | #2
James Greenhalgh <james.greenhalgh@arm.com> writes:
> On Fri, Oct 27, 2017 at 02:22:39PM +0100, Richard Sandiford wrote:

>> This patch replaces switch statements that call specific generator

>> functions with code that constructs the rtl pattern directly.

>> This seemed to scale better to SVE and also seems less error-prone.

>> 

>> As a side-effect, the patch fixes the REV handling for diff==1,

>> vmode==E_V4HFmode and adds missing support for diff==3,

>> vmode==E_V4HFmode.

>> 

>> To compensate for the lack of switches that check for specific modes,

>> the patch makes aarch64_expand_vec_perm_const_1 reject permutes on

>> single-element vectors (specifically V1DImode).

>

> OK.

>

> Would you mind placing a comment somewhere near both the unspecs, and the

> patterns using these unspecs to warn that the calls constructing the

> RTX here *MUST* be kept in sync?


OK, here's what I committed.

Thanks for the reviews,

Richard


2017-11-01  Richard Sandiford  <richard.sandiford@linaro.org>
	    Alan Hayward  <alan.hayward@arm.com>
	    David Sherwood  <david.sherwood@arm.com>

gcc/
	* config/aarch64/aarch64.c (aarch64_evpc_trn, aarch64_evpc_uzp)
	(aarch64_evpc_zip, aarch64_evpc_ext, aarch64_evpc_rev)
	(aarch64_evpc_dup): Generate rtl direcly, rather than using
	named expanders.
	(aarch64_expand_vec_perm_const_1): Explicitly check for permutes
	of a single element.
	* config/aarch64/iterators.md: Add a comment above the permute
	unspecs to say that they are generated directly by
	aarch64_expand_vec_perm_const.
	* config/aarch64/aarch64-simd.md: Likewise the permute instructions.

Index: gcc/config/aarch64/aarch64.c
===================================================================
--- gcc/config/aarch64/aarch64.c	2017-11-01 09:20:07.343478870 +0000
+++ gcc/config/aarch64/aarch64.c	2017-11-01 20:35:54.431165938 +0000
@@ -13263,7 +13263,6 @@ aarch64_evpc_trn (struct expand_vec_perm
 {
   unsigned int i, odd, mask, nelt = d->perm.length ();
   rtx out, in0, in1, x;
-  rtx (*gen) (rtx, rtx, rtx);
   machine_mode vmode = d->vmode;
 
   if (GET_MODE_UNIT_SIZE (vmode) > 8)
@@ -13300,48 +13299,8 @@ aarch64_evpc_trn (struct expand_vec_perm
     }
   out = d->target;
 
-  if (odd)
-    {
-      switch (vmode)
-	{
-	case E_V16QImode: gen = gen_aarch64_trn2v16qi; break;
-	case E_V8QImode: gen = gen_aarch64_trn2v8qi; break;
-	case E_V8HImode: gen = gen_aarch64_trn2v8hi; break;
-	case E_V4HImode: gen = gen_aarch64_trn2v4hi; break;
-	case E_V4SImode: gen = gen_aarch64_trn2v4si; break;
-	case E_V2SImode: gen = gen_aarch64_trn2v2si; break;
-	case E_V2DImode: gen = gen_aarch64_trn2v2di; break;
-	case E_V4HFmode: gen = gen_aarch64_trn2v4hf; break;
-	case E_V8HFmode: gen = gen_aarch64_trn2v8hf; break;
-	case E_V4SFmode: gen = gen_aarch64_trn2v4sf; break;
-	case E_V2SFmode: gen = gen_aarch64_trn2v2sf; break;
-	case E_V2DFmode: gen = gen_aarch64_trn2v2df; break;
-	default:
-	  return false;
-	}
-    }
-  else
-    {
-      switch (vmode)
-	{
-	case E_V16QImode: gen = gen_aarch64_trn1v16qi; break;
-	case E_V8QImode: gen = gen_aarch64_trn1v8qi; break;
-	case E_V8HImode: gen = gen_aarch64_trn1v8hi; break;
-	case E_V4HImode: gen = gen_aarch64_trn1v4hi; break;
-	case E_V4SImode: gen = gen_aarch64_trn1v4si; break;
-	case E_V2SImode: gen = gen_aarch64_trn1v2si; break;
-	case E_V2DImode: gen = gen_aarch64_trn1v2di; break;
-	case E_V4HFmode: gen = gen_aarch64_trn1v4hf; break;
-	case E_V8HFmode: gen = gen_aarch64_trn1v8hf; break;
-	case E_V4SFmode: gen = gen_aarch64_trn1v4sf; break;
-	case E_V2SFmode: gen = gen_aarch64_trn1v2sf; break;
-	case E_V2DFmode: gen = gen_aarch64_trn1v2df; break;
-	default:
-	  return false;
-	}
-    }
-
-  emit_insn (gen (out, in0, in1));
+  emit_set_insn (out, gen_rtx_UNSPEC (vmode, gen_rtvec (2, in0, in1),
+				      odd ? UNSPEC_TRN2 : UNSPEC_TRN1));
   return true;
 }
 
@@ -13351,7 +13310,6 @@ aarch64_evpc_uzp (struct expand_vec_perm
 {
   unsigned int i, odd, mask, nelt = d->perm.length ();
   rtx out, in0, in1, x;
-  rtx (*gen) (rtx, rtx, rtx);
   machine_mode vmode = d->vmode;
 
   if (GET_MODE_UNIT_SIZE (vmode) > 8)
@@ -13387,48 +13345,8 @@ aarch64_evpc_uzp (struct expand_vec_perm
     }
   out = d->target;
 
-  if (odd)
-    {
-      switch (vmode)
-	{
-	case E_V16QImode: gen = gen_aarch64_uzp2v16qi; break;
-	case E_V8QImode: gen = gen_aarch64_uzp2v8qi; break;
-	case E_V8HImode: gen = gen_aarch64_uzp2v8hi; break;
-	case E_V4HImode: gen = gen_aarch64_uzp2v4hi; break;
-	case E_V4SImode: gen = gen_aarch64_uzp2v4si; break;
-	case E_V2SImode: gen = gen_aarch64_uzp2v2si; break;
-	case E_V2DImode: gen = gen_aarch64_uzp2v2di; break;
-	case E_V4HFmode: gen = gen_aarch64_uzp2v4hf; break;
-	case E_V8HFmode: gen = gen_aarch64_uzp2v8hf; break;
-	case E_V4SFmode: gen = gen_aarch64_uzp2v4sf; break;
-	case E_V2SFmode: gen = gen_aarch64_uzp2v2sf; break;
-	case E_V2DFmode: gen = gen_aarch64_uzp2v2df; break;
-	default:
-	  return false;
-	}
-    }
-  else
-    {
-      switch (vmode)
-	{
-	case E_V16QImode: gen = gen_aarch64_uzp1v16qi; break;
-	case E_V8QImode: gen = gen_aarch64_uzp1v8qi; break;
-	case E_V8HImode: gen = gen_aarch64_uzp1v8hi; break;
-	case E_V4HImode: gen = gen_aarch64_uzp1v4hi; break;
-	case E_V4SImode: gen = gen_aarch64_uzp1v4si; break;
-	case E_V2SImode: gen = gen_aarch64_uzp1v2si; break;
-	case E_V2DImode: gen = gen_aarch64_uzp1v2di; break;
-	case E_V4HFmode: gen = gen_aarch64_uzp1v4hf; break;
-	case E_V8HFmode: gen = gen_aarch64_uzp1v8hf; break;
-	case E_V4SFmode: gen = gen_aarch64_uzp1v4sf; break;
-	case E_V2SFmode: gen = gen_aarch64_uzp1v2sf; break;
-	case E_V2DFmode: gen = gen_aarch64_uzp1v2df; break;
-	default:
-	  return false;
-	}
-    }
-
-  emit_insn (gen (out, in0, in1));
+  emit_set_insn (out, gen_rtx_UNSPEC (vmode, gen_rtvec (2, in0, in1),
+				      odd ? UNSPEC_UZP2 : UNSPEC_UZP1));
   return true;
 }
 
@@ -13438,7 +13356,6 @@ aarch64_evpc_zip (struct expand_vec_perm
 {
   unsigned int i, high, mask, nelt = d->perm.length ();
   rtx out, in0, in1, x;
-  rtx (*gen) (rtx, rtx, rtx);
   machine_mode vmode = d->vmode;
 
   if (GET_MODE_UNIT_SIZE (vmode) > 8)
@@ -13479,48 +13396,8 @@ aarch64_evpc_zip (struct expand_vec_perm
     }
   out = d->target;
 
-  if (high)
-    {
-      switch (vmode)
-	{
-	case E_V16QImode: gen = gen_aarch64_zip2v16qi; break;
-	case E_V8QImode: gen = gen_aarch64_zip2v8qi; break;
-	case E_V8HImode: gen = gen_aarch64_zip2v8hi; break;
-	case E_V4HImode: gen = gen_aarch64_zip2v4hi; break;
-	case E_V4SImode: gen = gen_aarch64_zip2v4si; break;
-	case E_V2SImode: gen = gen_aarch64_zip2v2si; break;
-	case E_V2DImode: gen = gen_aarch64_zip2v2di; break;
-	case E_V4HFmode: gen = gen_aarch64_zip2v4hf; break;
-	case E_V8HFmode: gen = gen_aarch64_zip2v8hf; break;
-	case E_V4SFmode: gen = gen_aarch64_zip2v4sf; break;
-	case E_V2SFmode: gen = gen_aarch64_zip2v2sf; break;
-	case E_V2DFmode: gen = gen_aarch64_zip2v2df; break;
-	default:
-	  return false;
-	}
-    }
-  else
-    {
-      switch (vmode)
-	{
-	case E_V16QImode: gen = gen_aarch64_zip1v16qi; break;
-	case E_V8QImode: gen = gen_aarch64_zip1v8qi; break;
-	case E_V8HImode: gen = gen_aarch64_zip1v8hi; break;
-	case E_V4HImode: gen = gen_aarch64_zip1v4hi; break;
-	case E_V4SImode: gen = gen_aarch64_zip1v4si; break;
-	case E_V2SImode: gen = gen_aarch64_zip1v2si; break;
-	case E_V2DImode: gen = gen_aarch64_zip1v2di; break;
-	case E_V4HFmode: gen = gen_aarch64_zip1v4hf; break;
-	case E_V8HFmode: gen = gen_aarch64_zip1v8hf; break;
-	case E_V4SFmode: gen = gen_aarch64_zip1v4sf; break;
-	case E_V2SFmode: gen = gen_aarch64_zip1v2sf; break;
-	case E_V2DFmode: gen = gen_aarch64_zip1v2df; break;
-	default:
-	  return false;
-	}
-    }
-
-  emit_insn (gen (out, in0, in1));
+  emit_set_insn (out, gen_rtx_UNSPEC (vmode, gen_rtvec (2, in0, in1),
+				      high ? UNSPEC_ZIP2 : UNSPEC_ZIP1));
   return true;
 }
 
@@ -13530,7 +13407,6 @@ aarch64_evpc_zip (struct expand_vec_perm
 aarch64_evpc_ext (struct expand_vec_perm_d *d)
 {
   unsigned int i, nelt = d->perm.length ();
-  rtx (*gen) (rtx, rtx, rtx, rtx);
   rtx offset;
 
   unsigned int location = d->perm[0]; /* Always < nelt.  */
@@ -13548,24 +13424,6 @@ aarch64_evpc_ext (struct expand_vec_perm
         return false;
     }
 
-  switch (d->vmode)
-    {
-    case E_V16QImode: gen = gen_aarch64_extv16qi; break;
-    case E_V8QImode: gen = gen_aarch64_extv8qi; break;
-    case E_V4HImode: gen = gen_aarch64_extv4hi; break;
-    case E_V8HImode: gen = gen_aarch64_extv8hi; break;
-    case E_V2SImode: gen = gen_aarch64_extv2si; break;
-    case E_V4SImode: gen = gen_aarch64_extv4si; break;
-    case E_V4HFmode: gen = gen_aarch64_extv4hf; break;
-    case E_V8HFmode: gen = gen_aarch64_extv8hf; break;
-    case E_V2SFmode: gen = gen_aarch64_extv2sf; break;
-    case E_V4SFmode: gen = gen_aarch64_extv4sf; break;
-    case E_V2DImode: gen = gen_aarch64_extv2di; break;
-    case E_V2DFmode: gen = gen_aarch64_extv2df; break;
-    default:
-      return false;
-    }
-
   /* Success! */
   if (d->testing_p)
     return true;
@@ -13584,7 +13442,10 @@ aarch64_evpc_ext (struct expand_vec_perm
     }
 
   offset = GEN_INT (location);
-  emit_insn (gen (d->target, d->op0, d->op1, offset));
+  emit_set_insn (d->target,
+		 gen_rtx_UNSPEC (d->vmode,
+				 gen_rtvec (3, d->op0, d->op1, offset),
+				 UNSPEC_EXT));
   return true;
 }
 
@@ -13593,55 +13454,21 @@ aarch64_evpc_ext (struct expand_vec_perm
 static bool
 aarch64_evpc_rev (struct expand_vec_perm_d *d)
 {
-  unsigned int i, j, diff, nelt = d->perm.length ();
-  rtx (*gen) (rtx, rtx);
+  unsigned int i, j, diff, size, unspec, nelt = d->perm.length ();
 
   if (!d->one_vector_p)
     return false;
 
   diff = d->perm[0];
-  switch (diff)
-    {
-    case 7:
-      switch (d->vmode)
-	{
-	case E_V16QImode: gen = gen_aarch64_rev64v16qi; break;
-	case E_V8QImode: gen = gen_aarch64_rev64v8qi;  break;
-	default:
-	  return false;
-	}
-      break;
-    case 3:
-      switch (d->vmode)
-	{
-	case E_V16QImode: gen = gen_aarch64_rev32v16qi; break;
-	case E_V8QImode: gen = gen_aarch64_rev32v8qi;  break;
-	case E_V8HImode: gen = gen_aarch64_rev64v8hi;  break;
-	case E_V4HImode: gen = gen_aarch64_rev64v4hi;  break;
-	default:
-	  return false;
-	}
-      break;
-    case 1:
-      switch (d->vmode)
-	{
-	case E_V16QImode: gen = gen_aarch64_rev16v16qi; break;
-	case E_V8QImode: gen = gen_aarch64_rev16v8qi;  break;
-	case E_V8HImode: gen = gen_aarch64_rev32v8hi;  break;
-	case E_V4HImode: gen = gen_aarch64_rev32v4hi;  break;
-	case E_V4SImode: gen = gen_aarch64_rev64v4si;  break;
-	case E_V2SImode: gen = gen_aarch64_rev64v2si;  break;
-	case E_V4SFmode: gen = gen_aarch64_rev64v4sf;  break;
-	case E_V2SFmode: gen = gen_aarch64_rev64v2sf;  break;
-	case E_V8HFmode: gen = gen_aarch64_rev64v8hf;  break;
-	case E_V4HFmode: gen = gen_aarch64_rev64v4hf;  break;
-	default:
-	  return false;
-	}
-      break;
-    default:
-      return false;
-    }
+  size = (diff + 1) * GET_MODE_UNIT_SIZE (d->vmode);
+  if (size == 8)
+    unspec = UNSPEC_REV64;
+  else if (size == 4)
+    unspec = UNSPEC_REV32;
+  else if (size == 2)
+    unspec = UNSPEC_REV16;
+  else
+    return false;
 
   for (i = 0; i < nelt ; i += diff + 1)
     for (j = 0; j <= diff; j += 1)
@@ -13660,14 +13487,14 @@ aarch64_evpc_rev (struct expand_vec_perm
   if (d->testing_p)
     return true;
 
-  emit_insn (gen (d->target, d->op0));
+  emit_set_insn (d->target, gen_rtx_UNSPEC (d->vmode, gen_rtvec (1, d->op0),
+					    unspec));
   return true;
 }
 
 static bool
 aarch64_evpc_dup (struct expand_vec_perm_d *d)
 {
-  rtx (*gen) (rtx, rtx, rtx);
   rtx out = d->target;
   rtx in0;
   machine_mode vmode = d->vmode;
@@ -13689,25 +13516,9 @@ aarch64_evpc_dup (struct expand_vec_perm
   in0 = d->op0;
   lane = GEN_INT (elt); /* The pattern corrects for big-endian.  */
 
-  switch (vmode)
-    {
-    case E_V16QImode: gen = gen_aarch64_dup_lanev16qi; break;
-    case E_V8QImode: gen = gen_aarch64_dup_lanev8qi; break;
-    case E_V8HImode: gen = gen_aarch64_dup_lanev8hi; break;
-    case E_V4HImode: gen = gen_aarch64_dup_lanev4hi; break;
-    case E_V4SImode: gen = gen_aarch64_dup_lanev4si; break;
-    case E_V2SImode: gen = gen_aarch64_dup_lanev2si; break;
-    case E_V2DImode: gen = gen_aarch64_dup_lanev2di; break;
-    case E_V8HFmode: gen = gen_aarch64_dup_lanev8hf; break;
-    case E_V4HFmode: gen = gen_aarch64_dup_lanev4hf; break;
-    case E_V4SFmode: gen = gen_aarch64_dup_lanev4sf; break;
-    case E_V2SFmode: gen = gen_aarch64_dup_lanev2sf; break;
-    case E_V2DFmode: gen = gen_aarch64_dup_lanev2df; break;
-    default:
-      return false;
-    }
-
-  emit_insn (gen (out, in0, lane));
+  rtx parallel = gen_rtx_PARALLEL (vmode, gen_rtvec (1, lane));
+  rtx select = gen_rtx_VEC_SELECT (GET_MODE_INNER (vmode), in0, parallel);
+  emit_set_insn (out, gen_rtx_VEC_DUPLICATE (vmode, select));
   return true;
 }
 
@@ -13760,7 +13571,7 @@ aarch64_expand_vec_perm_const_1 (struct
       std::swap (d->op0, d->op1);
     }
 
-  if (TARGET_SIMD)
+  if (TARGET_SIMD && nelt > 1)
     {
       if (aarch64_evpc_rev (d))
 	return true;
Index: gcc/config/aarch64/iterators.md
===================================================================
--- gcc/config/aarch64/iterators.md	2017-11-01 08:07:13.560976713 +0000
+++ gcc/config/aarch64/iterators.md	2017-11-01 20:35:54.431165938 +0000
@@ -322,16 +322,21 @@ (define_c_enum "unspec"
     UNSPEC_TBL		; Used in vector permute patterns.
     UNSPEC_TBX		; Used in vector permute patterns.
     UNSPEC_CONCAT	; Used in vector permute patterns.
+
+    ;; The following permute unspecs are generated directly by
+    ;; aarch64_expand_vec_perm_const, so any changes to the underlying
+    ;; instructions would need a corresponding change there.
     UNSPEC_ZIP1		; Used in vector permute patterns.
     UNSPEC_ZIP2		; Used in vector permute patterns.
     UNSPEC_UZP1		; Used in vector permute patterns.
     UNSPEC_UZP2		; Used in vector permute patterns.
     UNSPEC_TRN1		; Used in vector permute patterns.
     UNSPEC_TRN2		; Used in vector permute patterns.
-    UNSPEC_EXT		; Used in aarch64-simd.md.
+    UNSPEC_EXT		; Used in vector permute patterns.
     UNSPEC_REV64	; Used in vector reverse patterns (permute).
     UNSPEC_REV32	; Used in vector reverse patterns (permute).
     UNSPEC_REV16	; Used in vector reverse patterns (permute).
+
     UNSPEC_AESE		; Used in aarch64-simd.md.
     UNSPEC_AESD         ; Used in aarch64-simd.md.
     UNSPEC_AESMC        ; Used in aarch64-simd.md.
Index: gcc/config/aarch64/aarch64-simd.md
===================================================================
--- gcc/config/aarch64/aarch64-simd.md	2017-11-01 08:07:13.561934013 +0000
+++ gcc/config/aarch64/aarch64-simd.md	2017-11-01 20:35:54.427167006 +0000
@@ -5369,6 +5369,9 @@ (define_insn_and_split "aarch64_combinev
 [(set_attr "type" "multiple")]
 )
 
+;; This instruction's pattern is generated directly by
+;; aarch64_expand_vec_perm_const, so any changes to the pattern would
+;; need corresponding changes there.
 (define_insn "aarch64_<PERMUTE:perm_insn><PERMUTE:perm_hilo><mode>"
   [(set (match_operand:VALL_F16 0 "register_operand" "=w")
 	(unspec:VALL_F16 [(match_operand:VALL_F16 1 "register_operand" "w")
@@ -5379,7 +5382,10 @@ (define_insn "aarch64_<PERMUTE:perm_insn
   [(set_attr "type" "neon_permute<q>")]
 )
 
-;; Note immediate (third) operand is lane index not byte index.
+;; This instruction's pattern is generated directly by
+;; aarch64_expand_vec_perm_const, so any changes to the pattern would
+;; need corresponding changes there.  Note that the immediate (third)
+;; operand is a lane index not a byte index.
 (define_insn "aarch64_ext<mode>"
   [(set (match_operand:VALL_F16 0 "register_operand" "=w")
         (unspec:VALL_F16 [(match_operand:VALL_F16 1 "register_operand" "w")
@@ -5395,6 +5401,9 @@ (define_insn "aarch64_ext<mode>"
   [(set_attr "type" "neon_ext<q>")]
 )
 
+;; This instruction's pattern is generated directly by
+;; aarch64_expand_vec_perm_const, so any changes to the pattern would
+;; need corresponding changes there.
 (define_insn "aarch64_rev<REVERSE:rev_op><mode>"
   [(set (match_operand:VALL_F16 0 "register_operand" "=w")
 	(unspec:VALL_F16 [(match_operand:VALL_F16 1 "register_operand" "w")]
diff mbox series

Patch

Index: gcc/config/aarch64/aarch64.c
===================================================================
--- gcc/config/aarch64/aarch64.c	2017-10-27 14:10:08.337833963 +0100
+++ gcc/config/aarch64/aarch64.c	2017-10-27 14:10:14.622293803 +0100
@@ -13475,7 +13475,6 @@  aarch64_evpc_trn (struct expand_vec_perm
 {
   unsigned int i, odd, mask, nelt = d->perm.length ();
   rtx out, in0, in1, x;
-  rtx (*gen) (rtx, rtx, rtx);
   machine_mode vmode = d->vmode;
 
   if (GET_MODE_UNIT_SIZE (vmode) > 8)
@@ -13512,48 +13511,8 @@  aarch64_evpc_trn (struct expand_vec_perm
     }
   out = d->target;
 
-  if (odd)
-    {
-      switch (vmode)
-	{
-	case E_V16QImode: gen = gen_aarch64_trn2v16qi; break;
-	case E_V8QImode: gen = gen_aarch64_trn2v8qi; break;
-	case E_V8HImode: gen = gen_aarch64_trn2v8hi; break;
-	case E_V4HImode: gen = gen_aarch64_trn2v4hi; break;
-	case E_V4SImode: gen = gen_aarch64_trn2v4si; break;
-	case E_V2SImode: gen = gen_aarch64_trn2v2si; break;
-	case E_V2DImode: gen = gen_aarch64_trn2v2di; break;
-	case E_V4HFmode: gen = gen_aarch64_trn2v4hf; break;
-	case E_V8HFmode: gen = gen_aarch64_trn2v8hf; break;
-	case E_V4SFmode: gen = gen_aarch64_trn2v4sf; break;
-	case E_V2SFmode: gen = gen_aarch64_trn2v2sf; break;
-	case E_V2DFmode: gen = gen_aarch64_trn2v2df; break;
-	default:
-	  return false;
-	}
-    }
-  else
-    {
-      switch (vmode)
-	{
-	case E_V16QImode: gen = gen_aarch64_trn1v16qi; break;
-	case E_V8QImode: gen = gen_aarch64_trn1v8qi; break;
-	case E_V8HImode: gen = gen_aarch64_trn1v8hi; break;
-	case E_V4HImode: gen = gen_aarch64_trn1v4hi; break;
-	case E_V4SImode: gen = gen_aarch64_trn1v4si; break;
-	case E_V2SImode: gen = gen_aarch64_trn1v2si; break;
-	case E_V2DImode: gen = gen_aarch64_trn1v2di; break;
-	case E_V4HFmode: gen = gen_aarch64_trn1v4hf; break;
-	case E_V8HFmode: gen = gen_aarch64_trn1v8hf; break;
-	case E_V4SFmode: gen = gen_aarch64_trn1v4sf; break;
-	case E_V2SFmode: gen = gen_aarch64_trn1v2sf; break;
-	case E_V2DFmode: gen = gen_aarch64_trn1v2df; break;
-	default:
-	  return false;
-	}
-    }
-
-  emit_insn (gen (out, in0, in1));
+  emit_set_insn (out, gen_rtx_UNSPEC (vmode, gen_rtvec (2, in0, in1),
+				      odd ? UNSPEC_TRN2 : UNSPEC_TRN1));
   return true;
 }
 
@@ -13563,7 +13522,6 @@  aarch64_evpc_uzp (struct expand_vec_perm
 {
   unsigned int i, odd, mask, nelt = d->perm.length ();
   rtx out, in0, in1, x;
-  rtx (*gen) (rtx, rtx, rtx);
   machine_mode vmode = d->vmode;
 
   if (GET_MODE_UNIT_SIZE (vmode) > 8)
@@ -13599,48 +13557,8 @@  aarch64_evpc_uzp (struct expand_vec_perm
     }
   out = d->target;
 
-  if (odd)
-    {
-      switch (vmode)
-	{
-	case E_V16QImode: gen = gen_aarch64_uzp2v16qi; break;
-	case E_V8QImode: gen = gen_aarch64_uzp2v8qi; break;
-	case E_V8HImode: gen = gen_aarch64_uzp2v8hi; break;
-	case E_V4HImode: gen = gen_aarch64_uzp2v4hi; break;
-	case E_V4SImode: gen = gen_aarch64_uzp2v4si; break;
-	case E_V2SImode: gen = gen_aarch64_uzp2v2si; break;
-	case E_V2DImode: gen = gen_aarch64_uzp2v2di; break;
-	case E_V4HFmode: gen = gen_aarch64_uzp2v4hf; break;
-	case E_V8HFmode: gen = gen_aarch64_uzp2v8hf; break;
-	case E_V4SFmode: gen = gen_aarch64_uzp2v4sf; break;
-	case E_V2SFmode: gen = gen_aarch64_uzp2v2sf; break;
-	case E_V2DFmode: gen = gen_aarch64_uzp2v2df; break;
-	default:
-	  return false;
-	}
-    }
-  else
-    {
-      switch (vmode)
-	{
-	case E_V16QImode: gen = gen_aarch64_uzp1v16qi; break;
-	case E_V8QImode: gen = gen_aarch64_uzp1v8qi; break;
-	case E_V8HImode: gen = gen_aarch64_uzp1v8hi; break;
-	case E_V4HImode: gen = gen_aarch64_uzp1v4hi; break;
-	case E_V4SImode: gen = gen_aarch64_uzp1v4si; break;
-	case E_V2SImode: gen = gen_aarch64_uzp1v2si; break;
-	case E_V2DImode: gen = gen_aarch64_uzp1v2di; break;
-	case E_V4HFmode: gen = gen_aarch64_uzp1v4hf; break;
-	case E_V8HFmode: gen = gen_aarch64_uzp1v8hf; break;
-	case E_V4SFmode: gen = gen_aarch64_uzp1v4sf; break;
-	case E_V2SFmode: gen = gen_aarch64_uzp1v2sf; break;
-	case E_V2DFmode: gen = gen_aarch64_uzp1v2df; break;
-	default:
-	  return false;
-	}
-    }
-
-  emit_insn (gen (out, in0, in1));
+  emit_set_insn (out, gen_rtx_UNSPEC (vmode, gen_rtvec (2, in0, in1),
+				      odd ? UNSPEC_UZP2 : UNSPEC_UZP1));
   return true;
 }
 
@@ -13650,7 +13568,6 @@  aarch64_evpc_zip (struct expand_vec_perm
 {
   unsigned int i, high, mask, nelt = d->perm.length ();
   rtx out, in0, in1, x;
-  rtx (*gen) (rtx, rtx, rtx);
   machine_mode vmode = d->vmode;
 
   if (GET_MODE_UNIT_SIZE (vmode) > 8)
@@ -13691,48 +13608,8 @@  aarch64_evpc_zip (struct expand_vec_perm
     }
   out = d->target;
 
-  if (high)
-    {
-      switch (vmode)
-	{
-	case E_V16QImode: gen = gen_aarch64_zip2v16qi; break;
-	case E_V8QImode: gen = gen_aarch64_zip2v8qi; break;
-	case E_V8HImode: gen = gen_aarch64_zip2v8hi; break;
-	case E_V4HImode: gen = gen_aarch64_zip2v4hi; break;
-	case E_V4SImode: gen = gen_aarch64_zip2v4si; break;
-	case E_V2SImode: gen = gen_aarch64_zip2v2si; break;
-	case E_V2DImode: gen = gen_aarch64_zip2v2di; break;
-	case E_V4HFmode: gen = gen_aarch64_zip2v4hf; break;
-	case E_V8HFmode: gen = gen_aarch64_zip2v8hf; break;
-	case E_V4SFmode: gen = gen_aarch64_zip2v4sf; break;
-	case E_V2SFmode: gen = gen_aarch64_zip2v2sf; break;
-	case E_V2DFmode: gen = gen_aarch64_zip2v2df; break;
-	default:
-	  return false;
-	}
-    }
-  else
-    {
-      switch (vmode)
-	{
-	case E_V16QImode: gen = gen_aarch64_zip1v16qi; break;
-	case E_V8QImode: gen = gen_aarch64_zip1v8qi; break;
-	case E_V8HImode: gen = gen_aarch64_zip1v8hi; break;
-	case E_V4HImode: gen = gen_aarch64_zip1v4hi; break;
-	case E_V4SImode: gen = gen_aarch64_zip1v4si; break;
-	case E_V2SImode: gen = gen_aarch64_zip1v2si; break;
-	case E_V2DImode: gen = gen_aarch64_zip1v2di; break;
-	case E_V4HFmode: gen = gen_aarch64_zip1v4hf; break;
-	case E_V8HFmode: gen = gen_aarch64_zip1v8hf; break;
-	case E_V4SFmode: gen = gen_aarch64_zip1v4sf; break;
-	case E_V2SFmode: gen = gen_aarch64_zip1v2sf; break;
-	case E_V2DFmode: gen = gen_aarch64_zip1v2df; break;
-	default:
-	  return false;
-	}
-    }
-
-  emit_insn (gen (out, in0, in1));
+  emit_set_insn (out, gen_rtx_UNSPEC (vmode, gen_rtvec (2, in0, in1),
+				      high ? UNSPEC_ZIP2 : UNSPEC_ZIP1));
   return true;
 }
 
@@ -13742,7 +13619,6 @@  aarch64_evpc_zip (struct expand_vec_perm
 aarch64_evpc_ext (struct expand_vec_perm_d *d)
 {
   unsigned int i, nelt = d->perm.length ();
-  rtx (*gen) (rtx, rtx, rtx, rtx);
   rtx offset;
 
   unsigned int location = d->perm[0]; /* Always < nelt.  */
@@ -13760,24 +13636,6 @@  aarch64_evpc_ext (struct expand_vec_perm
         return false;
     }
 
-  switch (d->vmode)
-    {
-    case E_V16QImode: gen = gen_aarch64_extv16qi; break;
-    case E_V8QImode: gen = gen_aarch64_extv8qi; break;
-    case E_V4HImode: gen = gen_aarch64_extv4hi; break;
-    case E_V8HImode: gen = gen_aarch64_extv8hi; break;
-    case E_V2SImode: gen = gen_aarch64_extv2si; break;
-    case E_V4SImode: gen = gen_aarch64_extv4si; break;
-    case E_V4HFmode: gen = gen_aarch64_extv4hf; break;
-    case E_V8HFmode: gen = gen_aarch64_extv8hf; break;
-    case E_V2SFmode: gen = gen_aarch64_extv2sf; break;
-    case E_V4SFmode: gen = gen_aarch64_extv4sf; break;
-    case E_V2DImode: gen = gen_aarch64_extv2di; break;
-    case E_V2DFmode: gen = gen_aarch64_extv2df; break;
-    default:
-      return false;
-    }
-
   /* Success! */
   if (d->testing_p)
     return true;
@@ -13796,7 +13654,10 @@  aarch64_evpc_ext (struct expand_vec_perm
     }
 
   offset = GEN_INT (location);
-  emit_insn (gen (d->target, d->op0, d->op1, offset));
+  emit_set_insn (d->target,
+		 gen_rtx_UNSPEC (d->vmode,
+				 gen_rtvec (3, d->op0, d->op1, offset),
+				 UNSPEC_EXT));
   return true;
 }
 
@@ -13805,55 +13666,21 @@  aarch64_evpc_ext (struct expand_vec_perm
 static bool
 aarch64_evpc_rev (struct expand_vec_perm_d *d)
 {
-  unsigned int i, j, diff, nelt = d->perm.length ();
-  rtx (*gen) (rtx, rtx);
+  unsigned int i, j, diff, size, unspec, nelt = d->perm.length ();
 
   if (!d->one_vector_p)
     return false;
 
   diff = d->perm[0];
-  switch (diff)
-    {
-    case 7:
-      switch (d->vmode)
-	{
-	case E_V16QImode: gen = gen_aarch64_rev64v16qi; break;
-	case E_V8QImode: gen = gen_aarch64_rev64v8qi;  break;
-	default:
-	  return false;
-	}
-      break;
-    case 3:
-      switch (d->vmode)
-	{
-	case E_V16QImode: gen = gen_aarch64_rev32v16qi; break;
-	case E_V8QImode: gen = gen_aarch64_rev32v8qi;  break;
-	case E_V8HImode: gen = gen_aarch64_rev64v8hi;  break;
-	case E_V4HImode: gen = gen_aarch64_rev64v4hi;  break;
-	default:
-	  return false;
-	}
-      break;
-    case 1:
-      switch (d->vmode)
-	{
-	case E_V16QImode: gen = gen_aarch64_rev16v16qi; break;
-	case E_V8QImode: gen = gen_aarch64_rev16v8qi;  break;
-	case E_V8HImode: gen = gen_aarch64_rev32v8hi;  break;
-	case E_V4HImode: gen = gen_aarch64_rev32v4hi;  break;
-	case E_V4SImode: gen = gen_aarch64_rev64v4si;  break;
-	case E_V2SImode: gen = gen_aarch64_rev64v2si;  break;
-	case E_V4SFmode: gen = gen_aarch64_rev64v4sf;  break;
-	case E_V2SFmode: gen = gen_aarch64_rev64v2sf;  break;
-	case E_V8HFmode: gen = gen_aarch64_rev64v8hf;  break;
-	case E_V4HFmode: gen = gen_aarch64_rev64v4hf;  break;
-	default:
-	  return false;
-	}
-      break;
-    default:
-      return false;
-    }
+  size = (diff + 1) * GET_MODE_UNIT_SIZE (d->vmode);
+  if (size == 8)
+    unspec = UNSPEC_REV64;
+  else if (size == 4)
+    unspec = UNSPEC_REV32;
+  else if (size == 2)
+    unspec = UNSPEC_REV16;
+  else
+    return false;
 
   for (i = 0; i < nelt ; i += diff + 1)
     for (j = 0; j <= diff; j += 1)
@@ -13872,14 +13699,14 @@  aarch64_evpc_rev (struct expand_vec_perm
   if (d->testing_p)
     return true;
 
-  emit_insn (gen (d->target, d->op0));
+  emit_set_insn (d->target, gen_rtx_UNSPEC (d->vmode, gen_rtvec (1, d->op0),
+					    unspec));
   return true;
 }
 
 static bool
 aarch64_evpc_dup (struct expand_vec_perm_d *d)
 {
-  rtx (*gen) (rtx, rtx, rtx);
   rtx out = d->target;
   rtx in0;
   machine_mode vmode = d->vmode;
@@ -13901,25 +13728,9 @@  aarch64_evpc_dup (struct expand_vec_perm
   in0 = d->op0;
   lane = GEN_INT (elt); /* The pattern corrects for big-endian.  */
 
-  switch (vmode)
-    {
-    case E_V16QImode: gen = gen_aarch64_dup_lanev16qi; break;
-    case E_V8QImode: gen = gen_aarch64_dup_lanev8qi; break;
-    case E_V8HImode: gen = gen_aarch64_dup_lanev8hi; break;
-    case E_V4HImode: gen = gen_aarch64_dup_lanev4hi; break;
-    case E_V4SImode: gen = gen_aarch64_dup_lanev4si; break;
-    case E_V2SImode: gen = gen_aarch64_dup_lanev2si; break;
-    case E_V2DImode: gen = gen_aarch64_dup_lanev2di; break;
-    case E_V8HFmode: gen = gen_aarch64_dup_lanev8hf; break;
-    case E_V4HFmode: gen = gen_aarch64_dup_lanev4hf; break;
-    case E_V4SFmode: gen = gen_aarch64_dup_lanev4sf; break;
-    case E_V2SFmode: gen = gen_aarch64_dup_lanev2sf; break;
-    case E_V2DFmode: gen = gen_aarch64_dup_lanev2df; break;
-    default:
-      return false;
-    }
-
-  emit_insn (gen (out, in0, lane));
+  rtx parallel = gen_rtx_PARALLEL (vmode, gen_rtvec (1, lane));
+  rtx select = gen_rtx_VEC_SELECT (GET_MODE_INNER (vmode), in0, parallel);
+  emit_set_insn (out, gen_rtx_VEC_DUPLICATE (vmode, select));
   return true;
 }
 
@@ -13972,7 +13783,7 @@  aarch64_expand_vec_perm_const_1 (struct
       std::swap (d->op0, d->op1);
     }
 
-  if (TARGET_SIMD)
+  if (TARGET_SIMD && nelt > 1)
     {
       if (aarch64_evpc_rev (d))
 	return true;