@@ -121,7 +121,9 @@ const struct cpu_cost_table thunderx_extra_costs =
},
/* Vector */
{
- COSTS_N_INSNS (1) /* Alu. */
+ COSTS_N_INSNS (1), /* Alu. */
+ COSTS_N_INSNS (1), /* Load. */
+ COSTS_N_INSNS (1) /* Store. */
}
};
@@ -5499,16 +5499,6 @@ aarch64_rtx_costs (rtx x, int code, int outer ATTRIBUTE_UNUSED,
above this default. */
*cost = COSTS_N_INSNS (1);
- /* TODO: The cost infrastructure currently does not handle
- vector operations. Assume that all vector operations
- are equally expensive. */
- if (VECTOR_MODE_P (mode))
- {
- if (speed)
- *cost += extra_cost->vect.alu;
- return true;
- }
-
switch (code)
{
case SET:
@@ -5523,7 +5513,9 @@ aarch64_rtx_costs (rtx x, int code, int outer ATTRIBUTE_UNUSED,
if (speed)
{
rtx address = XEXP (op0, 0);
- if (GET_MODE_CLASS (mode) == MODE_INT)
+ if (VECTOR_MODE_P (mode))
+ *cost += extra_cost->vect.store;
+ else if (GET_MODE_CLASS (mode) == MODE_INT)
*cost += extra_cost->ldst.store;
else if (mode == SFmode)
*cost += extra_cost->ldst.storef;
@@ -5544,10 +5536,17 @@ aarch64_rtx_costs (rtx x, int code, int outer ATTRIBUTE_UNUSED,
/* Fall through. */
case REG:
+ if (VECTOR_MODE_P (GET_MODE (op0)) && REG_P (op1))
+ {
+ /* The cost is 1 per vector-register copied. */
+ int n_minus_1 = (GET_MODE_SIZE (GET_MODE (op0)) - 1)
+ / GET_MODE_SIZE (V4SImode);
+ *cost = COSTS_N_INSNS (n_minus_1 + 1);
+ }
/* const0_rtx is in general free, but we will use an
instruction to set a register to 0. */
- if (REG_P (op1) || op1 == const0_rtx)
- {
+ else if (REG_P (op1) || op1 == const0_rtx)
+ {
/* The cost is 1 per register copied. */
int n_minus_1 = (GET_MODE_SIZE (GET_MODE (op0)) - 1)
/ UNITS_PER_WORD;
@@ -5570,6 +5569,7 @@ aarch64_rtx_costs (rtx x, int code, int outer ATTRIBUTE_UNUSED,
&& (GET_MODE_BITSIZE (GET_MODE (XEXP (op1, 0)))
>= INTVAL (XEXP (op0, 1))))
op1 = XEXP (op1, 0);
+ gcc_assert (!VECTOR_MODE_P (mode));
if (CONST_INT_P (op1))
{
@@ -5621,8 +5621,10 @@ aarch64_rtx_costs (rtx x, int code, int outer ATTRIBUTE_UNUSED,
case CONST_DOUBLE:
if (speed)
{
+ if (VECTOR_MODE_P (mode))
+ *cost += extra_cost->vect.alu;
/* mov[df,sf]_aarch64. */
- if (aarch64_float_const_representable_p (x))
+ else if (aarch64_float_const_representable_p (x))
/* FMOV (scalar immediate). */
*cost += extra_cost->fp[mode == DFmode].fpconst;
else if (!aarch64_float_const_zero_rtx_p (x))
@@ -5650,7 +5652,9 @@ aarch64_rtx_costs (rtx x, int code, int outer ATTRIBUTE_UNUSED,
approximation for the additional cost of the addressing
mode. */
rtx address = XEXP (x, 0);
- if (GET_MODE_CLASS (mode) == MODE_INT)
+ if (VECTOR_MODE_P (mode))
+ *cost += extra_cost->vect.load;
+ else if (GET_MODE_CLASS (mode) == MODE_INT)
*cost += extra_cost->ldst.load;
else if (mode == SFmode)
*cost += extra_cost->ldst.loadf;
@@ -5705,7 +5709,12 @@ aarch64_rtx_costs (rtx x, int code, int outer ATTRIBUTE_UNUSED,
case CLRSB:
case CLZ:
if (speed)
- *cost += extra_cost->alu.clz;
+ {
+ if (VECTOR_MODE_P (mode))
+ *cost += extra_cost->vect.alu;
+ else
+ *cost += extra_cost->alu.clz;
+ }
return false;
@@ -5790,6 +5799,13 @@ aarch64_rtx_costs (rtx x, int code, int outer ATTRIBUTE_UNUSED,
return false;
}
+ /* VCMP. */
+ if (VECTOR_MODE_P (mode))
+ {
+ if (speed)
+ *cost += extra_cost->vect.alu;
+ return true;
+ }
return false;
case MINUS:
@@ -5808,8 +5824,13 @@ cost_minus:
*cost += rtx_cost (op0, MINUS, 0, speed);
if (speed)
- /* SUB(S) (immediate). */
- *cost += extra_cost->alu.arith;
+ {
+ if (VECTOR_MODE_P (mode))
+ *cost += extra_cost->vect.alu;
+ /* SUB(S) (immediate). */
+ else
+ *cost += extra_cost->alu.arith;
+ }
return true;
}
@@ -5818,8 +5839,12 @@ cost_minus:
if (aarch64_rtx_arith_op_extract_p (op1, mode))
{
if (speed)
- *cost += extra_cost->alu.arith_shift;
-
+ {
+ if (VECTOR_MODE_P (mode))
+ *cost += extra_cost->vect.alu;
+ else
+ *cost += extra_cost->alu.arith_shift;
+ }
*cost += rtx_cost (XEXP (XEXP (op1, 0), 0),
(enum rtx_code) GET_CODE (op1),
0, speed);
@@ -5844,7 +5869,10 @@ cost_minus:
if (speed)
{
- if (GET_MODE_CLASS (mode) == MODE_INT)
+ if (VECTOR_MODE_P (mode))
+ /* Vector SUB. */
+ *cost += extra_cost->vect.alu;
+ else if (GET_MODE_CLASS (mode) == MODE_INT)
/* SUB(S). */
*cost += extra_cost->alu.arith;
else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
@@ -5878,8 +5906,13 @@ cost_plus:
*cost += rtx_cost (op0, PLUS, 0, speed);
if (speed)
- /* ADD (immediate). */
- *cost += extra_cost->alu.arith;
+ {
+ if (VECTOR_MODE_P (mode))
+ *cost += extra_cost->vect.alu;
+ /* ADD (immediate). */
+ else
+ *cost += extra_cost->alu.arith;
+ }
return true;
}
@@ -5887,8 +5920,12 @@ cost_plus:
if (aarch64_rtx_arith_op_extract_p (op0, mode))
{
if (speed)
- *cost += extra_cost->alu.arith_shift;
-
+ {
+ if (VECTOR_MODE_P (mode))
+ *cost += extra_cost->vect.alu;
+ else
+ *cost += extra_cost->alu.arith_shift;
+ }
*cost += rtx_cost (XEXP (XEXP (op0, 0), 0),
(enum rtx_code) GET_CODE (op0),
0, speed);
@@ -5913,7 +5950,10 @@ cost_plus:
if (speed)
{
- if (GET_MODE_CLASS (mode) == MODE_INT)
+ if (VECTOR_MODE_P (mode))
+ /* Vector ADD. */
+ *cost += extra_cost->vect.alu;
+ else if (GET_MODE_CLASS (mode) == MODE_INT)
/* ADD. */
*cost += extra_cost->alu.arith;
else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
@@ -5927,8 +5967,12 @@ cost_plus:
*cost = COSTS_N_INSNS (1);
if (speed)
- *cost += extra_cost->alu.rev;
-
+ {
+ if (VECTOR_MODE_P (mode))
+ *cost += extra_cost->vect.alu;
+ else
+ *cost += extra_cost->alu.rev;
+ }
return false;
case IOR:
@@ -5936,10 +5980,14 @@ cost_plus:
{
*cost = COSTS_N_INSNS (1);
- if (speed)
- *cost += extra_cost->alu.rev;
-
- return true;
+ if (speed)
+ {
+ if (VECTOR_MODE_P (mode))
+ *cost += extra_cost->vect.alu;
+ else
+ *cost += extra_cost->alu.rev;
+ }
+ return true;
}
/* Fall through. */
case XOR:
@@ -5948,6 +5996,13 @@ cost_plus:
op0 = XEXP (x, 0);
op1 = XEXP (x, 1);
+ if (VECTOR_MODE_P (mode))
+ {
+ if (speed)
+ *cost += extra_cost->vect.alu;
+ return true;
+ }
+
if (code == AND
&& GET_CODE (op0) == MULT
&& CONST_INT_P (XEXP (op0, 1))
@@ -6013,10 +6068,15 @@ cost_plus:
return false;
case NOT:
- /* MVN. */
if (speed)
- *cost += extra_cost->alu.logical;
-
+ {
+ /* VNEG. */
+ if (VECTOR_MODE_P (mode))
+ *cost += extra_cost->vect.alu;
+ /* MVN. */
+ else
+ *cost += extra_cost->alu.logical;
+ }
/* The logical instruction could have the shifted register form,
but the cost is the same if the shift is processed as a separate
instruction, so we don't bother with it here. */
@@ -6057,13 +6117,18 @@ cost_plus:
/* UXTB/UXTH. */
if (speed)
- *cost += extra_cost->alu.extend;
-
+ {
+ if (VECTOR_MODE_P (mode))
+ *cost += extra_cost->vect.alu;
+ else
+ *cost += extra_cost->alu.extend;
+ }
return false;
case SIGN_EXTEND:
if (MEM_P (XEXP (x, 0)))
{
+ gcc_assert (!VECTOR_MODE_P (mode));
/* LDRSH. */
if (speed)
{
@@ -6078,7 +6143,12 @@ cost_plus:
}
if (speed)
- *cost += extra_cost->alu.extend;
+ {
+ if (VECTOR_MODE_P (mode))
+ *cost += extra_cost->vect.alu;
+ else
+ *cost += extra_cost->alu.extend;
+ }
return false;
case ASHIFT:
@@ -6087,10 +6157,16 @@ cost_plus:
if (CONST_INT_P (op1))
{
- /* LSL (immediate), UBMF, UBFIZ and friends. These are all
- aliases. */
if (speed)
- *cost += extra_cost->alu.shift;
+ {
+ /* VSHL (immediate). */
+ if (VECTOR_MODE_P (mode))
+ *cost += extra_cost->vect.alu;
+ /* LSL (immediate), UBMF, UBFIZ and friends. These are all
+ aliases. */
+ else
+ *cost += extra_cost->alu.shift;
+ }
/* We can incorporate zero/sign extend for free. */
if (GET_CODE (op0) == ZERO_EXTEND
@@ -6102,10 +6178,15 @@ cost_plus:
}
else
{
- /* LSLV. */
if (speed)
- *cost += extra_cost->alu.shift_reg;
-
+ {
+ /* VSHL (register). */
+ if (VECTOR_MODE_P (mode))
+ *cost += extra_cost->vect.alu;
+ /* LSLV. */
+ else
+ *cost += extra_cost->alu.shift_reg;
+ }
return false; /* All arguments need to be in registers. */
}
@@ -6120,18 +6201,27 @@ cost_plus:
{
/* ASR (immediate) and friends. */
if (speed)
- *cost += extra_cost->alu.shift;
+ {
+ if (VECTOR_MODE_P (mode))
+ *cost += extra_cost->vect.alu;
+ else
+ *cost += extra_cost->alu.shift;
+ }
*cost += rtx_cost (op0, (enum rtx_code) code, 0, speed);
return true;
}
else
{
-
- /* ASR (register) and friends. */
if (speed)
- *cost += extra_cost->alu.shift_reg;
-
+ {
+ /* VAHR (register). */
+ if (VECTOR_MODE_P (mode))
+ *cost += extra_cost->vect.alu;
+ /* ASR (register) and friends. */
+ else
+ *cost += extra_cost->alu.shift_reg;
+ }
return false; /* All arguments need to be in registers. */
}
@@ -6179,7 +6269,12 @@ cost_plus:
case SIGN_EXTRACT:
/* UBFX/SBFX. */
if (speed)
- *cost += extra_cost->alu.bfx;
+ {
+ if (VECTOR_MODE_P (mode))
+ *cost += extra_cost->vect.alu;
+ else
+ *cost += extra_cost->alu.bfx;
+ }
/* We can trust that the immediates used will be correct (there
are no by-register forms), so we need only cost op0. */
@@ -6196,7 +6291,9 @@ cost_plus:
case UMOD:
if (speed)
{
- if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
+ if (VECTOR_MODE_P (mode))
+ *cost += extra_cost->vect.alu;
+ else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
*cost += (extra_cost->mult[GET_MODE (x) == DImode].add
+ extra_cost->mult[GET_MODE (x) == DImode].idiv);
else if (GET_MODE (x) == DFmode)
@@ -6213,7 +6310,9 @@ cost_plus:
case SQRT:
if (speed)
{
- if (GET_MODE_CLASS (mode) == MODE_INT)
+ if (VECTOR_MODE_P (mode))
+ *cost += extra_cost->vect.alu;
+ else if (GET_MODE_CLASS (mode) == MODE_INT)
/* There is no integer SQRT, so only DIV and UDIV can get
here. */
*cost += extra_cost->mult[mode == DImode].idiv;
@@ -6245,7 +6344,12 @@ cost_plus:
op2 = XEXP (x, 2);
if (speed)
- *cost += extra_cost->fp[mode == DFmode].fma;
+ {
+ if (VECTOR_MODE_P (mode))
+ *cost += extra_cost->vect.alu;
+ else
+ *cost += extra_cost->fp[mode == DFmode].fma;
+ }
/* FMSUB, FNMADD, and FNMSUB are free. */
if (GET_CODE (op0) == NEG)
@@ -6285,7 +6389,13 @@ cost_plus:
case FLOAT_EXTEND:
if (speed)
- *cost += extra_cost->fp[mode == DFmode].widen;
+ {
+ if (VECTOR_MODE_P (mode))
+ /*Vector convertion. */
+ *cost += extra_cost->vect.alu;
+ else
+ *cost += extra_cost->fp[mode == DFmode].widen;
+ }
return false;
case FLOAT_TRUNCATE:
@@ -6311,8 +6421,13 @@ cost_plus:
}
if (speed)
- *cost += extra_cost->fp[GET_MODE (x) == DFmode].toint;
-
+ {
+ /* FCVT. */
+ if (VECTOR_MODE_P (mode))
+ *cost += extra_cost->vect.alu;
+ else
+ *cost += extra_cost->fp[GET_MODE (x) == DFmode].toint;
+ }
*cost += rtx_cost (x, (enum rtx_code) code, 0, speed);
return true;
@@ -6321,7 +6436,12 @@ cost_plus:
{
/* FABS and FNEG are analogous. */
if (speed)
- *cost += extra_cost->fp[mode == DFmode].neg;
+ {
+ if (VECTOR_MODE_P (mode))
+ *cost += extra_cost->vect.alu;
+ else
+ *cost += extra_cost->fp[mode == DFmode].neg;
+ }
}
else
{
@@ -6338,10 +6458,13 @@ cost_plus:
case SMIN:
if (speed)
{
+ if (VECTOR_MODE_P (mode))
+ *cost += extra_cost->vect.alu;
/* FMAXNM/FMINNM/FMAX/FMIN.
TODO: This may not be accurate for all implementations, but
we do not model this in the cost tables. */
- *cost += extra_cost->fp[mode == DFmode].addsub;
+ else
+ *cost += extra_cost->fp[mode == DFmode].addsub;
}
return false;
@@ -124,6 +124,8 @@ struct fp_cost_table
struct vector_cost_table
{
const int alu;
+ const int load;
+ const int store;
};
struct cpu_cost_table
@@ -119,7 +119,9 @@ const struct cpu_cost_table generic_extra_costs =
},
/* Vector */
{
- COSTS_N_INSNS (1) /* alu. */
+ COSTS_N_INSNS (1), /* alu. */
+ COSTS_N_INSNS (1), /* Load. */
+ COSTS_N_INSNS (1) /* Store. */
}
};
@@ -220,7 +222,9 @@ const struct cpu_cost_table cortexa53_extra_costs =
},
/* Vector */
{
- COSTS_N_INSNS (1) /* alu. */
+ COSTS_N_INSNS (1), /* alu. */
+ COSTS_N_INSNS (1), /* Load. */
+ COSTS_N_INSNS (1) /* Store. */
}
};
@@ -321,7 +325,9 @@ const struct cpu_cost_table cortexa57_extra_costs =
},
/* Vector */
{
- COSTS_N_INSNS (1) /* alu. */
+ COSTS_N_INSNS (1), /* alu. */
+ COSTS_N_INSNS (1), /* Load. */
+ COSTS_N_INSNS (1) /* Store. */
}
};
@@ -422,7 +428,9 @@ const struct cpu_cost_table xgene1_extra_costs =
},
/* Vector */
{
- COSTS_N_INSNS (2) /* alu. */
+ COSTS_N_INSNS (2), /* alu. */
+ COSTS_N_INSNS (1), /* Load. */
+ COSTS_N_INSNS (1), /* Store. */
}
};