@@ -83,7 +83,9 @@ const struct cpu_cost_table thunderx_extra_costs =
0, /* N/A: Stm_regs_per_insn_subsequent. */
0, /* Storef. */
0, /* Stored. */
- COSTS_N_INSNS (1) /* Store_unaligned. */
+ COSTS_N_INSNS (1), /* Store_unaligned. */
+ COSTS_N_INSNS (1), /* Loadv. */
+ COSTS_N_INSNS (1) /* Storev. */
},
{
/* FP SFmode */
@@ -5499,16 +5499,6 @@ aarch64_rtx_costs (rtx x, int code, int outer ATTRIBUTE_UNUSED,
above this default. */
*cost = COSTS_N_INSNS (1);
- /* TODO: The cost infrastructure currently does not handle
- vector operations. Assume that all vector operations
- are equally expensive. */
- if (VECTOR_MODE_P (mode))
- {
- if (speed)
- *cost += extra_cost->vect.alu;
- return true;
- }
-
switch (code)
{
case SET:
@@ -5523,7 +5513,9 @@ aarch64_rtx_costs (rtx x, int code, int outer ATTRIBUTE_UNUSED,
if (speed)
{
rtx address = XEXP (op0, 0);
- if (GET_MODE_CLASS (mode) == MODE_INT)
+ if (VECTOR_MODE_P (mode))
+ *cost += extra_cost->ldst.storev;
+ else if (GET_MODE_CLASS (mode) == MODE_INT)
*cost += extra_cost->ldst.store;
else if (mode == SFmode)
*cost += extra_cost->ldst.storef;
@@ -5544,15 +5536,22 @@ aarch64_rtx_costs (rtx x, int code, int outer ATTRIBUTE_UNUSED,
/* Fall through. */
case REG:
+ /* The cost is one per vector-register copied. */
+ if (VECTOR_MODE_P (GET_MODE (op0)) && REG_P (op1))
+ {
+ int n_minus_1 = (GET_MODE_SIZE (GET_MODE (op0)) - 1)
+ / GET_MODE_SIZE (V4SImode);
+ *cost = COSTS_N_INSNS (n_minus_1 + 1);
+ }
/* const0_rtx is in general free, but we will use an
instruction to set a register to 0. */
- if (REG_P (op1) || op1 == const0_rtx)
- {
- /* The cost is 1 per register copied. */
- int n_minus_1 = (GET_MODE_SIZE (GET_MODE (op0)) - 1)
+ else if (REG_P (op1) || op1 == const0_rtx)
+ {
+ /* The cost is 1 per register copied. */
+ int n_minus_1 = (GET_MODE_SIZE (GET_MODE (op0)) - 1)
/ UNITS_PER_WORD;
- *cost = COSTS_N_INSNS (n_minus_1 + 1);
- }
+ *cost = COSTS_N_INSNS (n_minus_1 + 1);
+ }
else
/* Cost is just the cost of the RHS of the set. */
*cost += rtx_cost (op1, SET, 1, speed);
@@ -5650,7 +5649,9 @@ aarch64_rtx_costs (rtx x, int code, int outer ATTRIBUTE_UNUSED,
approximation for the additional cost of the addressing
mode. */
rtx address = XEXP (x, 0);
- if (GET_MODE_CLASS (mode) == MODE_INT)
+ if (VECTOR_MODE_P (mode))
+ *cost += extra_cost->ldst.loadv;
+ else if (GET_MODE_CLASS (mode) == MODE_INT)
*cost += extra_cost->ldst.load;
else if (mode == SFmode)
*cost += extra_cost->ldst.loadf;
@@ -5667,6 +5668,16 @@ aarch64_rtx_costs (rtx x, int code, int outer ATTRIBUTE_UNUSED,
case NEG:
op0 = XEXP (x, 0);
+ if (VECTOR_MODE_P (mode))
+ {
+ if (speed)
+ {
+ /* FNEG. */
+ *cost += extra_cost->vect.alu;
+ }
+ return false;
+ }
+
if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
{
if (GET_RTX_CLASS (GET_CODE (op0)) == RTX_COMPARE
@@ -5705,7 +5716,12 @@ aarch64_rtx_costs (rtx x, int code, int outer ATTRIBUTE_UNUSED,
case CLRSB:
case CLZ:
if (speed)
- *cost += extra_cost->alu.clz;
+ {
+ if (VECTOR_MODE_P (mode))
+ *cost += extra_cost->vect.alu;
+ else
+ *cost += extra_cost->alu.clz;
+ }
return false;
@@ -5790,6 +5806,20 @@ aarch64_rtx_costs (rtx x, int code, int outer ATTRIBUTE_UNUSED,
return false;
}
+ if (VECTOR_MODE_P (mode))
+ {
+ /* Vector compare. */
+ if (speed)
+ *cost += extra_cost->vect.alu;
+
+ if (aarch64_float_const_zero_rtx_p (op1))
+ {
+ /* Vector cm (eq|ge|gt|lt|le) supports constant 0.0 for no extra
+ cost. */
+ return true;
+ }
+ return false;
+ }
return false;
case MINUS:
@@ -5844,12 +5874,21 @@ cost_minus:
if (speed)
{
- if (GET_MODE_CLASS (mode) == MODE_INT)
- /* SUB(S). */
- *cost += extra_cost->alu.arith;
+ if (VECTOR_MODE_P (mode))
+ {
+ /* Vector SUB. */
+ *cost += extra_cost->vect.alu;
+ }
+ else if (GET_MODE_CLASS (mode) == MODE_INT)
+ {
+ /* SUB(S). */
+ *cost += extra_cost->alu.arith;
+ }
else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
- /* FSUB. */
- *cost += extra_cost->fp[mode == DFmode].addsub;
+ {
+ /* FSUB. */
+ *cost += extra_cost->fp[mode == DFmode].addsub;
+ }
}
return true;
}
@@ -5913,12 +5952,21 @@ cost_plus:
if (speed)
{
- if (GET_MODE_CLASS (mode) == MODE_INT)
- /* ADD. */
- *cost += extra_cost->alu.arith;
+ if (VECTOR_MODE_P (mode))
+ {
+ /* Vector ADD. */
+ *cost += extra_cost->vect.alu;
+ }
+ else if (GET_MODE_CLASS (mode) == MODE_INT)
+ {
+ /* ADD. */
+ *cost += extra_cost->alu.arith;
+ }
else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
- /* FADD. */
- *cost += extra_cost->fp[mode == DFmode].addsub;
+ {
+ /* FADD. */
+ *cost += extra_cost->fp[mode == DFmode].addsub;
+ }
}
return true;
}
@@ -5927,8 +5975,12 @@ cost_plus:
*cost = COSTS_N_INSNS (1);
if (speed)
- *cost += extra_cost->alu.rev;
-
+ {
+ if (VECTOR_MODE_P (mode))
+ *cost += extra_cost->vect.alu;
+ else
+ *cost += extra_cost->alu.rev;
+ }
return false;
case IOR:
@@ -5936,10 +5988,14 @@ cost_plus:
{
*cost = COSTS_N_INSNS (1);
- if (speed)
- *cost += extra_cost->alu.rev;
-
- return true;
+ if (speed)
+ {
+ if (VECTOR_MODE_P (mode))
+ *cost += extra_cost->vect.alu;
+ else
+ *cost += extra_cost->alu.rev;
+ }
+ return true;
}
/* Fall through. */
case XOR:
@@ -5948,6 +6004,13 @@ cost_plus:
op0 = XEXP (x, 0);
op1 = XEXP (x, 1);
+ if (VECTOR_MODE_P (mode))
+ {
+ if (speed)
+ *cost += extra_cost->vect.alu;
+ return true;
+ }
+
if (code == AND
&& GET_CODE (op0) == MULT
&& CONST_INT_P (XEXP (op0, 1))
@@ -6013,10 +6076,19 @@ cost_plus:
return false;
case NOT:
- /* MVN. */
if (speed)
- *cost += extra_cost->alu.logical;
-
+ {
+ if (VECTOR_MODE_P (mode))
+ {
+ /* Vector NOT. */
+ *cost += extra_cost->vect.alu;
+ }
+ else
+ {
+ /* MVN. */
+ *cost += extra_cost->alu.logical;
+ }
+ }
/* The logical instruction could have the shifted register form,
but the cost is the same if the shift is processed as a separate
instruction, so we don't bother with it here. */
@@ -6055,10 +6127,19 @@ cost_plus:
return true;
}
- /* UXTB/UXTH. */
if (speed)
- *cost += extra_cost->alu.extend;
-
+ {
+ if (VECTOR_MODE_P (mode))
+ {
+ /* UMOV. */
+ *cost += extra_cost->vect.alu;
+ }
+ else
+ {
+ /* UXTB/UXTH. */
+ *cost += extra_cost->alu.extend;
+ }
+ }
return false;
case SIGN_EXTEND:
@@ -6078,7 +6159,12 @@ cost_plus:
}
if (speed)
- *cost += extra_cost->alu.extend;
+ {
+ if (VECTOR_MODE_P (mode))
+ *cost += extra_cost->vect.alu;
+ else
+ *cost += extra_cost->alu.extend;
+ }
return false;
case ASHIFT:
@@ -6087,10 +6173,20 @@ cost_plus:
if (CONST_INT_P (op1))
{
- /* LSL (immediate), UBMF, UBFIZ and friends. These are all
- aliases. */
if (speed)
- *cost += extra_cost->alu.shift;
+ {
+ if (VECTOR_MODE_P (mode))
+ {
+ /* Vector shift (immediate). */
+ *cost += extra_cost->vect.alu;
+ }
+ else
+ {
+ /* LSL (immediate), UBMF, UBFIZ and friends. These are all
+ aliases. */
+ *cost += extra_cost->alu.shift;
+ }
+ }
/* We can incorporate zero/sign extend for free. */
if (GET_CODE (op0) == ZERO_EXTEND
@@ -6102,10 +6198,19 @@ cost_plus:
}
else
{
- /* LSLV. */
if (speed)
- *cost += extra_cost->alu.shift_reg;
-
+ {
+ if (VECTOR_MODE_P (mode))
+ {
+ /* Vector shift (register). */
+ *cost += extra_cost->vect.alu;
+ }
+ else
+ {
+ /* LSLV. */
+ *cost += extra_cost->alu.shift_reg;
+ }
+ }
return false; /* All arguments need to be in registers. */
}
@@ -6120,7 +6225,12 @@ cost_plus:
{
/* ASR (immediate) and friends. */
if (speed)
- *cost += extra_cost->alu.shift;
+ {
+ if (VECTOR_MODE_P (mode))
+ *cost += extra_cost->vect.alu;
+ else
+ *cost += extra_cost->alu.shift;
+ }
*cost += rtx_cost (op0, (enum rtx_code) code, 0, speed);
return true;
@@ -6130,8 +6240,12 @@ cost_plus:
/* ASR (register) and friends. */
if (speed)
- *cost += extra_cost->alu.shift_reg;
-
+ {
+ if (VECTOR_MODE_P (mode))
+ *cost += extra_cost->vect.alu;
+ else
+ *cost += extra_cost->alu.shift_reg;
+ }
return false; /* All arguments need to be in registers. */
}
@@ -6179,7 +6293,12 @@ cost_plus:
case SIGN_EXTRACT:
/* UBFX/SBFX. */
if (speed)
- *cost += extra_cost->alu.bfx;
+ {
+ if (VECTOR_MODE_P (mode))
+ *cost += extra_cost->vect.alu;
+ else
+ *cost += extra_cost->alu.bfx;
+ }
/* We can trust that the immediates used will be correct (there
are no by-register forms), so we need only cost op0. */
@@ -6196,7 +6315,9 @@ cost_plus:
case UMOD:
if (speed)
{
- if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
+ if (VECTOR_MODE_P (mode))
+ *cost += extra_cost->vect.alu;
+ else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
*cost += (extra_cost->mult[GET_MODE (x) == DImode].add
+ extra_cost->mult[GET_MODE (x) == DImode].idiv);
else if (GET_MODE (x) == DFmode)
@@ -6213,7 +6334,9 @@ cost_plus:
case SQRT:
if (speed)
{
- if (GET_MODE_CLASS (mode) == MODE_INT)
+ if (VECTOR_MODE_P (mode))
+ *cost += extra_cost->vect.alu;
+ else if (GET_MODE_CLASS (mode) == MODE_INT)
/* There is no integer SQRT, so only DIV and UDIV can get
here. */
*cost += extra_cost->mult[mode == DImode].idiv;
@@ -6245,7 +6368,12 @@ cost_plus:
op2 = XEXP (x, 2);
if (speed)
- *cost += extra_cost->fp[mode == DFmode].fma;
+ {
+ if (VECTOR_MODE_P (mode))
+ *cost += extra_cost->vect.alu;
+ else
+ *cost += extra_cost->fp[mode == DFmode].fma;
+ }
/* FMSUB, FNMADD, and FNMSUB are free. */
if (GET_CODE (op0) == NEG)
@@ -6285,12 +6413,28 @@ cost_plus:
case FLOAT_EXTEND:
if (speed)
- *cost += extra_cost->fp[mode == DFmode].widen;
+ {
+ if (VECTOR_MODE_P (mode))
+ {
+ /*Vector truncate. */
+ *cost += extra_cost->vect.alu;
+ }
+ else
+ *cost += extra_cost->fp[mode == DFmode].widen;
+ }
return false;
case FLOAT_TRUNCATE:
if (speed)
- *cost += extra_cost->fp[mode == DFmode].narrow;
+ {
+ if (VECTOR_MODE_P (mode))
+ {
+ /*Vector conversion. */
+ *cost += extra_cost->vect.alu;
+ }
+ else
+ *cost += extra_cost->fp[mode == DFmode].narrow;
+ }
return false;
case FIX:
@@ -6311,13 +6455,23 @@ cost_plus:
}
if (speed)
- *cost += extra_cost->fp[GET_MODE (x) == DFmode].toint;
-
+ {
+ if (VECTOR_MODE_P (mode))
+ *cost += extra_cost->vect.alu;
+ else
+ *cost += extra_cost->fp[GET_MODE (x) == DFmode].toint;
+ }
*cost += rtx_cost (x, (enum rtx_code) code, 0, speed);
return true;
case ABS:
- if (GET_MODE_CLASS (mode) == MODE_FLOAT)
+ if (VECTOR_MODE_P (mode))
+ {
+ /* ABS (vector). */
+ if (speed)
+ *cost += extra_cost->vect.alu;
+ }
+ else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
{
/* FABS and FNEG are analogous. */
if (speed)
@@ -6338,10 +6492,15 @@ cost_plus:
case SMIN:
if (speed)
{
- /* FMAXNM/FMINNM/FMAX/FMIN.
- TODO: This may not be accurate for all implementations, but
- we do not model this in the cost tables. */
- *cost += extra_cost->fp[mode == DFmode].addsub;
+ if (VECTOR_MODE_P (mode))
+ *cost += extra_cost->vect.alu;
+ else
+ {
+ /* FMAXNM/FMINNM/FMAX/FMIN.
+ TODO: This may not be accurate for all implementations, but
+ we do not model this in the cost tables. */
+ *cost += extra_cost->fp[mode == DFmode].addsub;
+ }
}
return false;
@@ -102,6 +102,8 @@ struct mem_cost_table
const int storef; /* SFmode. */
const int stored; /* DFmode. */
const int store_unaligned; /* Extra for unaligned stores. */
+ const int loadv; /* Vector load. */
+ const int storev; /* Vector store. */
};
struct fp_cost_table
@@ -81,7 +81,9 @@ const struct cpu_cost_table generic_extra_costs =
1, /* stm_regs_per_insn_subsequent. */
COSTS_N_INSNS (2), /* storef. */
COSTS_N_INSNS (3), /* stored. */
- COSTS_N_INSNS (1) /* store_unaligned. */
+ COSTS_N_INSNS (1), /* store_unaligned. */
+ COSTS_N_INSNS (1), /* loadv. */
+ COSTS_N_INSNS (1) /* storev. */
},
{
/* FP SFmode */
@@ -182,7 +184,9 @@ const struct cpu_cost_table cortexa53_extra_costs =
2, /* stm_regs_per_insn_subsequent. */
0, /* storef. */
0, /* stored. */
- COSTS_N_INSNS (1) /* store_unaligned. */
+ COSTS_N_INSNS (1), /* store_unaligned. */
+ COSTS_N_INSNS (1), /* loadv. */
+ COSTS_N_INSNS (1) /* storev. */
},
{
/* FP SFmode */
@@ -283,7 +287,9 @@ const struct cpu_cost_table cortexa57_extra_costs =
2, /* stm_regs_per_insn_subsequent. */
0, /* storef. */
0, /* stored. */
- COSTS_N_INSNS (1) /* store_unaligned. */
+ COSTS_N_INSNS (1), /* store_unaligned. */
+ COSTS_N_INSNS (1), /* loadv. */
+ COSTS_N_INSNS (1) /* storev. */
},
{
/* FP SFmode */
@@ -385,6 +391,8 @@ const struct cpu_cost_table xgene1_extra_costs =
0, /* storef. */
0, /* stored. */
0, /* store_unaligned. */
+ COSTS_N_INSNS (1), /* loadv. */
+ COSTS_N_INSNS (1) /* storev. */
},
{
/* FP SFmode */