diff mbox

[AArch64,PR65375] Fix RTX cost for vector SET

Message ID 5530EC32.4030806@linaro.org
State New
Headers show

Commit Message

Kugan Vivekanandarajah April 17, 2015, 11:19 a.m. UTC
>> My point is that adding your patch while keeping the logic at the top
>> which claims to catch ALL vector operations makes for less readable
>> code.
>>
>> At the very least you'll need to update this comment:
>>
>>   /* TODO: The cost infrastructure currently does not handle
>>      vector operations.  Assume that all vector operations
>>      are equally expensive.  */
>>
>> to make it clear that this doesn't catch vector set operations.
>>
>> But fixing the comment doesn't improve the messy code so I'd certainly
>> prefer to see one of the other approaches which have been discussed.
> 
> I see your point. Let me work on this based on your suggestions above.

Hi James,

Here is an attempt along this line. Is this what you have in mind?
Trying to keep functionality as before so that we can tune the
parameters later. Not fully tested yet.

Thanks,
Kugan
diff mbox

Patch

diff --git a/gcc/config/aarch64/aarch64-cost-tables.h b/gcc/config/aarch64/aarch64-cost-tables.h
index ae2b547..ed9432e 100644
--- a/gcc/config/aarch64/aarch64-cost-tables.h
+++ b/gcc/config/aarch64/aarch64-cost-tables.h
@@ -121,7 +121,9 @@  const struct cpu_cost_table thunderx_extra_costs =
   },
   /* Vector */
   {
-    COSTS_N_INSNS (1)	/* Alu.  */
+    COSTS_N_INSNS (1),	/* Alu.  */
+    COSTS_N_INSNS (1),	/* Load.  */
+    COSTS_N_INSNS (1)	/* Store.  */
   }
 };
 
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index cba3c1a..c2d4a53 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -5499,16 +5499,6 @@  aarch64_rtx_costs (rtx x, int code, int outer ATTRIBUTE_UNUSED,
      above this default.  */
   *cost = COSTS_N_INSNS (1);
 
-  /* TODO: The cost infrastructure currently does not handle
-     vector operations.  Assume that all vector operations
-     are equally expensive.  */
-  if (VECTOR_MODE_P (mode))
-    {
-      if (speed)
-	*cost += extra_cost->vect.alu;
-      return true;
-    }
-
   switch (code)
     {
     case SET:
@@ -5523,7 +5513,9 @@  aarch64_rtx_costs (rtx x, int code, int outer ATTRIBUTE_UNUSED,
 	  if (speed)
 	    {
 	      rtx address = XEXP (op0, 0);
-	      if (GET_MODE_CLASS (mode) == MODE_INT)
+	      if (VECTOR_MODE_P (mode))
+		*cost += extra_cost->vect.store;
+	      else if (GET_MODE_CLASS (mode) == MODE_INT)
 		*cost += extra_cost->ldst.store;
 	      else if (mode == SFmode)
 		*cost += extra_cost->ldst.storef;
@@ -5544,10 +5536,17 @@  aarch64_rtx_costs (rtx x, int code, int outer ATTRIBUTE_UNUSED,
 
 	  /* Fall through.  */
 	case REG:
+	  if (VECTOR_MODE_P (GET_MODE (op0)) && REG_P (op1))
+	    {
+              /* The cost is 1 per vector-register copied.  */
+              int n_minus_1 = (GET_MODE_SIZE (GET_MODE (op0)) - 1)
+			      / GET_MODE_SIZE (V4SImode);
+              *cost = COSTS_N_INSNS (n_minus_1 + 1);
+	    }
 	  /* const0_rtx is in general free, but we will use an
 	     instruction to set a register to 0.  */
-          if (REG_P (op1) || op1 == const0_rtx)
-            {
+	  else if (REG_P (op1) || op1 == const0_rtx)
+	    {
               /* The cost is 1 per register copied.  */
               int n_minus_1 = (GET_MODE_SIZE (GET_MODE (op0)) - 1)
 			      / UNITS_PER_WORD;
@@ -5570,6 +5569,7 @@  aarch64_rtx_costs (rtx x, int code, int outer ATTRIBUTE_UNUSED,
 	      && (GET_MODE_BITSIZE (GET_MODE (XEXP (op1, 0)))
 		  >= INTVAL (XEXP (op0, 1))))
 	    op1 = XEXP (op1, 0);
+	  gcc_assert (!VECTOR_MODE_P (mode));
 
           if (CONST_INT_P (op1))
             {
@@ -5621,8 +5621,10 @@  aarch64_rtx_costs (rtx x, int code, int outer ATTRIBUTE_UNUSED,
     case CONST_DOUBLE:
       if (speed)
 	{
+	  if (VECTOR_MODE_P (mode))
+	    *cost += extra_cost->vect.alu;
 	  /* mov[df,sf]_aarch64.  */
-	  if (aarch64_float_const_representable_p (x))
+	  else if (aarch64_float_const_representable_p (x))
 	    /* FMOV (scalar immediate).  */
 	    *cost += extra_cost->fp[mode == DFmode].fpconst;
 	  else if (!aarch64_float_const_zero_rtx_p (x))
@@ -5650,7 +5652,9 @@  aarch64_rtx_costs (rtx x, int code, int outer ATTRIBUTE_UNUSED,
 	     approximation for the additional cost of the addressing
 	     mode.  */
 	  rtx address = XEXP (x, 0);
-	  if (GET_MODE_CLASS (mode) == MODE_INT)
+	  if (VECTOR_MODE_P (mode))
+	    *cost += extra_cost->vect.load;
+	  else if (GET_MODE_CLASS (mode) == MODE_INT)
 	    *cost += extra_cost->ldst.load;
 	  else if (mode == SFmode)
 	    *cost += extra_cost->ldst.loadf;
@@ -5705,7 +5709,12 @@  aarch64_rtx_costs (rtx x, int code, int outer ATTRIBUTE_UNUSED,
     case CLRSB:
     case CLZ:
       if (speed)
-        *cost += extra_cost->alu.clz;
+	{
+	  if (VECTOR_MODE_P (mode))
+	    *cost += extra_cost->vect.alu;
+	  else
+	    *cost += extra_cost->alu.clz;
+	}
 
       return false;
 
@@ -5790,6 +5799,13 @@  aarch64_rtx_costs (rtx x, int code, int outer ATTRIBUTE_UNUSED,
           return false;
         }
 
+      /* VCMP.  */
+      if (VECTOR_MODE_P (mode))
+	{
+	  if (speed)
+	    *cost += extra_cost->vect.alu;
+	  return true;
+	}
       return false;
 
     case MINUS:
@@ -5808,8 +5824,13 @@  cost_minus:
 	    *cost += rtx_cost (op0, MINUS, 0, speed);
 
 	    if (speed)
-	      /* SUB(S) (immediate).  */
-	      *cost += extra_cost->alu.arith;
+	      {
+		if (VECTOR_MODE_P (mode))
+		  *cost += extra_cost->vect.alu;
+		/* SUB(S) (immediate).  */
+		else
+		  *cost += extra_cost->alu.arith;
+	      }
 	    return true;
 
 	  }
@@ -5818,8 +5839,12 @@  cost_minus:
         if (aarch64_rtx_arith_op_extract_p (op1, mode))
 	  {
 	    if (speed)
-	      *cost += extra_cost->alu.arith_shift;
-
+	      {
+		if (VECTOR_MODE_P (mode))
+		  *cost += extra_cost->vect.alu;
+		else
+		  *cost += extra_cost->alu.arith_shift;
+	      }
 	    *cost += rtx_cost (XEXP (XEXP (op1, 0), 0),
 			       (enum rtx_code) GET_CODE (op1),
 			       0, speed);
@@ -5844,7 +5869,10 @@  cost_minus:
 
 	if (speed)
 	  {
-	    if (GET_MODE_CLASS (mode) == MODE_INT)
+	    if (VECTOR_MODE_P (mode))
+	      /* Vector SUB.  */
+	      *cost += extra_cost->vect.alu;
+	    else if (GET_MODE_CLASS (mode) == MODE_INT)
 	      /* SUB(S).  */
 	      *cost += extra_cost->alu.arith;
 	    else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
@@ -5878,8 +5906,13 @@  cost_plus:
 	    *cost += rtx_cost (op0, PLUS, 0, speed);
 
 	    if (speed)
-	      /* ADD (immediate).  */
-	      *cost += extra_cost->alu.arith;
+	      {
+		if (VECTOR_MODE_P (mode))
+		  *cost += extra_cost->vect.alu;
+		/* ADD (immediate).  */
+		else
+		  *cost += extra_cost->alu.arith;
+	      }
 	    return true;
 	  }
 
@@ -5887,8 +5920,12 @@  cost_plus:
         if (aarch64_rtx_arith_op_extract_p (op0, mode))
 	  {
 	    if (speed)
-	      *cost += extra_cost->alu.arith_shift;
-
+	      {
+		if (VECTOR_MODE_P (mode))
+		  *cost += extra_cost->vect.alu;
+		else
+		  *cost += extra_cost->alu.arith_shift;
+	      }
 	    *cost += rtx_cost (XEXP (XEXP (op0, 0), 0),
 			       (enum rtx_code) GET_CODE (op0),
 			       0, speed);
@@ -5913,7 +5950,10 @@  cost_plus:
 
 	if (speed)
 	  {
-	    if (GET_MODE_CLASS (mode) == MODE_INT)
+	    if (VECTOR_MODE_P (mode))
+	      /* Vector ADD.  */
+	      *cost += extra_cost->vect.alu;
+	    else if (GET_MODE_CLASS (mode) == MODE_INT)
 	      /* ADD.  */
 	      *cost += extra_cost->alu.arith;
 	    else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
@@ -5927,8 +5967,12 @@  cost_plus:
       *cost = COSTS_N_INSNS (1);
 
       if (speed)
-        *cost += extra_cost->alu.rev;
-
+	{
+	  if (VECTOR_MODE_P (mode))
+	    *cost += extra_cost->vect.alu;
+	  else
+	    *cost += extra_cost->alu.rev;
+	}
       return false;
 
     case IOR:
@@ -5936,10 +5980,14 @@  cost_plus:
         {
           *cost = COSTS_N_INSNS (1);
 
-          if (speed)
-            *cost += extra_cost->alu.rev;
-
-          return true;
+	  if (speed)
+	    {
+	      if (VECTOR_MODE_P (mode))
+		*cost += extra_cost->vect.alu;
+	      else
+		*cost += extra_cost->alu.rev;
+	    }
+	  return true;
         }
     /* Fall through.  */
     case XOR:
@@ -5948,6 +5996,13 @@  cost_plus:
       op0 = XEXP (x, 0);
       op1 = XEXP (x, 1);
 
+      if (VECTOR_MODE_P (mode))
+	{
+	  if (speed)
+	    *cost += extra_cost->vect.alu;
+	  return true;
+	}
+
       if (code == AND
           && GET_CODE (op0) == MULT
           && CONST_INT_P (XEXP (op0, 1))
@@ -6013,10 +6068,15 @@  cost_plus:
       return false;
 
     case NOT:
-      /* MVN.  */
       if (speed)
-	*cost += extra_cost->alu.logical;
-
+	{
+	  /* VNEG.  */
+	  if (VECTOR_MODE_P (mode))
+	    *cost += extra_cost->vect.alu;
+	  /* MVN.  */
+	  else
+	    *cost += extra_cost->alu.logical;
+	}
       /* The logical instruction could have the shifted register form,
          but the cost is the same if the shift is processed as a separate
          instruction, so we don't bother with it here.  */
@@ -6057,13 +6117,18 @@  cost_plus:
 
       /* UXTB/UXTH.  */
       if (speed)
-	*cost += extra_cost->alu.extend;
-
+	{
+	  if (VECTOR_MODE_P (mode))
+	    *cost += extra_cost->vect.alu;
+	  else
+	    *cost += extra_cost->alu.extend;
+	}
       return false;
 
     case SIGN_EXTEND:
       if (MEM_P (XEXP (x, 0)))
 	{
+	  gcc_assert (!VECTOR_MODE_P (mode));
 	  /* LDRSH.  */
 	  if (speed)
 	    {
@@ -6078,7 +6143,12 @@  cost_plus:
 	}
 
       if (speed)
-	*cost += extra_cost->alu.extend;
+	{
+	  if (VECTOR_MODE_P (mode))
+	    *cost += extra_cost->vect.alu;
+	  else
+	    *cost += extra_cost->alu.extend;
+	}
       return false;
 
     case ASHIFT:
@@ -6087,10 +6157,16 @@  cost_plus:
 
       if (CONST_INT_P (op1))
         {
-	  /* LSL (immediate), UBMF, UBFIZ and friends.  These are all
-	     aliases.  */
 	  if (speed)
-	    *cost += extra_cost->alu.shift;
+	    {
+	      /* VSHL (immediate).  */
+	      if (VECTOR_MODE_P (mode))
+		*cost += extra_cost->vect.alu;
+	      /* LSL (immediate), UBMF, UBFIZ and friends.  These are all
+		 aliases.  */
+	      else
+		*cost += extra_cost->alu.shift;
+	    }
 
           /* We can incorporate zero/sign extend for free.  */
           if (GET_CODE (op0) == ZERO_EXTEND
@@ -6102,10 +6178,15 @@  cost_plus:
         }
       else
         {
-	  /* LSLV.  */
 	  if (speed)
-	    *cost += extra_cost->alu.shift_reg;
-
+	    {
+	      /* VSHL (register).  */
+	      if (VECTOR_MODE_P (mode))
+		*cost += extra_cost->vect.alu;
+	      /* LSLV.  */
+	      else
+		*cost += extra_cost->alu.shift_reg;
+	    }
 	  return false;  /* All arguments need to be in registers.  */
         }
 
@@ -6120,18 +6201,27 @@  cost_plus:
 	{
 	  /* ASR (immediate) and friends.  */
 	  if (speed)
-	    *cost += extra_cost->alu.shift;
+	    {
+	      if (VECTOR_MODE_P (mode))
+		*cost += extra_cost->vect.alu;
+	      else
+		*cost += extra_cost->alu.shift;
+	    }
 
 	  *cost += rtx_cost (op0, (enum rtx_code) code, 0, speed);
 	  return true;
 	}
       else
 	{
-
-	  /* ASR (register) and friends.  */
 	  if (speed)
-	    *cost += extra_cost->alu.shift_reg;
-
+	    {
+	      /* VAHR (register).  */
+	      if (VECTOR_MODE_P (mode))
+		*cost += extra_cost->vect.alu;
+	      /* ASR (register) and friends.  */
+	      else
+		*cost += extra_cost->alu.shift_reg;
+	    }
 	  return false;  /* All arguments need to be in registers.  */
 	}
 
@@ -6179,7 +6269,12 @@  cost_plus:
     case SIGN_EXTRACT:
       /* UBFX/SBFX.  */
       if (speed)
-	*cost += extra_cost->alu.bfx;
+	{
+	  if (VECTOR_MODE_P (mode))
+	    *cost += extra_cost->vect.alu;
+	  else
+	    *cost += extra_cost->alu.bfx;
+	}
 
       /* We can trust that the immediates used will be correct (there
 	 are no by-register forms), so we need only cost op0.  */
@@ -6196,7 +6291,9 @@  cost_plus:
     case UMOD:
       if (speed)
 	{
-	  if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
+	  if (VECTOR_MODE_P (mode))
+	    *cost += extra_cost->vect.alu;
+	  else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
 	    *cost += (extra_cost->mult[GET_MODE (x) == DImode].add
 		      + extra_cost->mult[GET_MODE (x) == DImode].idiv);
 	  else if (GET_MODE (x) == DFmode)
@@ -6213,7 +6310,9 @@  cost_plus:
     case SQRT:
       if (speed)
 	{
-	  if (GET_MODE_CLASS (mode) == MODE_INT)
+	  if (VECTOR_MODE_P (mode))
+	    *cost += extra_cost->vect.alu;
+	  else if (GET_MODE_CLASS (mode) == MODE_INT)
 	    /* There is no integer SQRT, so only DIV and UDIV can get
 	       here.  */
 	    *cost += extra_cost->mult[mode == DImode].idiv;
@@ -6245,7 +6344,12 @@  cost_plus:
       op2 = XEXP (x, 2);
 
       if (speed)
-	*cost += extra_cost->fp[mode == DFmode].fma;
+	{
+	  if (VECTOR_MODE_P (mode))
+	    *cost += extra_cost->vect.alu;
+	  else
+	    *cost += extra_cost->fp[mode == DFmode].fma;
+	}
 
       /* FMSUB, FNMADD, and FNMSUB are free.  */
       if (GET_CODE (op0) == NEG)
@@ -6285,7 +6389,13 @@  cost_plus:
 
     case FLOAT_EXTEND:
       if (speed)
-	*cost += extra_cost->fp[mode == DFmode].widen;
+	{
+	  if (VECTOR_MODE_P (mode))
+	    /*Vector convertion.  */
+	    *cost += extra_cost->vect.alu;
+	  else
+	    *cost += extra_cost->fp[mode == DFmode].widen;
+	}
       return false;
 
     case FLOAT_TRUNCATE:
@@ -6311,8 +6421,13 @@  cost_plus:
         }
 
       if (speed)
-        *cost += extra_cost->fp[GET_MODE (x) == DFmode].toint;
-
+	{
+	  /* FCVT.  */
+	  if (VECTOR_MODE_P (mode))
+	    *cost += extra_cost->vect.alu;
+	  else
+	    *cost += extra_cost->fp[GET_MODE (x) == DFmode].toint;
+	}
       *cost += rtx_cost (x, (enum rtx_code) code, 0, speed);
       return true;
 
@@ -6321,7 +6436,12 @@  cost_plus:
 	{
 	  /* FABS and FNEG are analogous.  */
 	  if (speed)
-	    *cost += extra_cost->fp[mode == DFmode].neg;
+	    {
+	      if (VECTOR_MODE_P (mode))
+		*cost += extra_cost->vect.alu;
+	      else
+		*cost += extra_cost->fp[mode == DFmode].neg;
+	    }
 	}
       else
 	{
@@ -6338,10 +6458,13 @@  cost_plus:
     case SMIN:
       if (speed)
 	{
+	  if (VECTOR_MODE_P (mode))
+	    *cost += extra_cost->vect.alu;
 	  /* FMAXNM/FMINNM/FMAX/FMIN.
 	     TODO: This may not be accurate for all implementations, but
 	     we do not model this in the cost tables.  */
-	  *cost += extra_cost->fp[mode == DFmode].addsub;
+	  else
+	    *cost += extra_cost->fp[mode == DFmode].addsub;
 	}
       return false;
 
diff --git a/gcc/config/arm/aarch-common-protos.h b/gcc/config/arm/aarch-common-protos.h
index 3ee7ebf..c8e1d2e 100644
--- a/gcc/config/arm/aarch-common-protos.h
+++ b/gcc/config/arm/aarch-common-protos.h
@@ -124,6 +124,8 @@  struct fp_cost_table
 struct vector_cost_table
 {
   const int alu;
+  const int load;
+  const int store;
 };
 
 struct cpu_cost_table
diff --git a/gcc/config/arm/aarch-cost-tables.h b/gcc/config/arm/aarch-cost-tables.h
index 05e96a9..257902c 100644
--- a/gcc/config/arm/aarch-cost-tables.h
+++ b/gcc/config/arm/aarch-cost-tables.h
@@ -119,7 +119,9 @@  const struct cpu_cost_table generic_extra_costs =
   },
   /* Vector */
   {
-    COSTS_N_INSNS (1)	/* alu.  */
+    COSTS_N_INSNS (1),	/* alu.  */
+    COSTS_N_INSNS (1),	/* Load.  */
+    COSTS_N_INSNS (1)	/* Store.  */
   }
 };
 
@@ -220,7 +222,9 @@  const struct cpu_cost_table cortexa53_extra_costs =
   },
   /* Vector */
   {
-    COSTS_N_INSNS (1)	/* alu.  */
+    COSTS_N_INSNS (1),	/* alu.  */
+    COSTS_N_INSNS (1),	/* Load.  */
+    COSTS_N_INSNS (1)	/* Store.  */
   }
 };
 
@@ -321,7 +325,9 @@  const struct cpu_cost_table cortexa57_extra_costs =
   },
   /* Vector */
   {
-    COSTS_N_INSNS (1)  /* alu.  */
+    COSTS_N_INSNS (1),  /* alu.  */
+    COSTS_N_INSNS (1),  /* Load.  */
+    COSTS_N_INSNS (1)   /* Store.  */
   }
 };
 
@@ -422,7 +428,9 @@  const struct cpu_cost_table xgene1_extra_costs =
   },
   /* Vector */
   {
-    COSTS_N_INSNS (2)  /* alu.  */
+    COSTS_N_INSNS (2),  /* alu.  */
+    COSTS_N_INSNS (1),  /* Load.  */
+    COSTS_N_INSNS (1),  /* Store.  */
   }
 };