diff mbox

[ARM,2/2] Remove old rtx costs

Message ID 5818CD08.4090906@foss.arm.com
State New
Headers show

Commit Message

Kyrill Tkachov Nov. 1, 2016, 5:12 p.m. UTC
Hi all,

This is the big removal patch that removes the old costs functions, the function pointer
field in tune_params, and the transitional options -mold-rtx-costs and -mnew-generic-costs.
The diff stats come in at:
3 files changed, 61 insertions(+), 1275 deletions(-)

Bootstrapped and tested on arm-none-linux-gnueabihf.

Ok for trunk?

Thanks,
Kyrill

2016-11-01  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>

     * config/arm/arm.opt (mold-rtx-costs): Delete.
     (mnew-generic-costs): Delete.
     * config/arm/arm-protos.h (struct tune_params): Delete rtx_costs field.
     * config/arm/arm.c (arm_rtx_costs_1): Delete.
     (arm_size_rtx_costs): Likewise.
     (arm_slowmul_rtx_costs): Likewise.
     (arm_fastmul_rtx_costs): Likewise.
     (arm_xscale_rtx_costs): Likewise.
     (arm_9e_rtx_costs): Likewise.
     (arm_slowmul_tune, arm_fastmul_tune, arm_strongarm_tune,
     arm_xscale_tune, arm_9e_tune, arm_v6t2_tune, arm_cortex_tune,
     arm_cortex_a8_tune, arm_cortex_a7_tune, arm_cortex_a15_tune,
     arm_cortex_a53_tune, arm_cortex_a57_tune, arm_cortex_a9_tune,
     arm_cortex_a12_tune, arm_v7m_tune, arm_v6m_tune, arm_fa726te_tune
     arm_cortex_a5_tune, arm_xgene1_tune, arm_marvell_pj4_tune,
     arm_cortex_a35_tune, arm_exynosm1_tune, arm_cortex_a73_tune,
     arm_cortex_m7_tune):
     Delete rtx_costs field.
     (arm_new_rtx_costs): Rename to...
     (arm_rtx_costs_internal): ... This.
     (arm_rtx_costs): Remove old way of doing rtx costs.

Comments

Kyrill Tkachov Nov. 8, 2016, 9:26 a.m. UTC | #1
Ping.
https://gcc.gnu.org/ml/gcc-patches/2016-11/msg00064.html

Thanks,
Kyrill

On 01/11/16 17:12, Kyrill Tkachov wrote:
> Hi all,

>

> This is the big removal patch that removes the old costs functions, the function pointer

> field in tune_params, and the transitional options -mold-rtx-costs and -mnew-generic-costs.

> The diff stats come in at:

> 3 files changed, 61 insertions(+), 1275 deletions(-)

>

> Bootstrapped and tested on arm-none-linux-gnueabihf.

>

> Ok for trunk?

>

> Thanks,

> Kyrill

>

> 2016-11-01  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>

>

>     * config/arm/arm.opt (mold-rtx-costs): Delete.

>     (mnew-generic-costs): Delete.

>     * config/arm/arm-protos.h (struct tune_params): Delete rtx_costs field.

>     * config/arm/arm.c (arm_rtx_costs_1): Delete.

>     (arm_size_rtx_costs): Likewise.

>     (arm_slowmul_rtx_costs): Likewise.

>     (arm_fastmul_rtx_costs): Likewise.

>     (arm_xscale_rtx_costs): Likewise.

>     (arm_9e_rtx_costs): Likewise.

>     (arm_slowmul_tune, arm_fastmul_tune, arm_strongarm_tune,

>     arm_xscale_tune, arm_9e_tune, arm_v6t2_tune, arm_cortex_tune,

>     arm_cortex_a8_tune, arm_cortex_a7_tune, arm_cortex_a15_tune,

>     arm_cortex_a53_tune, arm_cortex_a57_tune, arm_cortex_a9_tune,

>     arm_cortex_a12_tune, arm_v7m_tune, arm_v6m_tune, arm_fa726te_tune

>     arm_cortex_a5_tune, arm_xgene1_tune, arm_marvell_pj4_tune,

>     arm_cortex_a35_tune, arm_exynosm1_tune, arm_cortex_a73_tune,

>     arm_cortex_m7_tune):

>     Delete rtx_costs field.

>     (arm_new_rtx_costs): Rename to...

>     (arm_rtx_costs_internal): ... This.

>     (arm_rtx_costs): Remove old way of doing rtx costs.
Richard Earnshaw (lists) Nov. 8, 2016, 11:23 a.m. UTC | #2
On 01/11/16 17:12, Kyrill Tkachov wrote:
> Hi all,

> 

> This is the big removal patch that removes the old costs functions, the

> function pointer

> field in tune_params, and the transitional options -mold-rtx-costs and

> -mnew-generic-costs.

> The diff stats come in at:

> 3 files changed, 61 insertions(+), 1275 deletions(-)

> 

> Bootstrapped and tested on arm-none-linux-gnueabihf.

> 

> Ok for trunk?

> 

> Thanks,

> Kyrill

> 

> 2016-11-01  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>

> 

>     * config/arm/arm.opt (mold-rtx-costs): Delete.

>     (mnew-generic-costs): Delete.

>     * config/arm/arm-protos.h (struct tune_params): Delete rtx_costs field.

>     * config/arm/arm.c (arm_rtx_costs_1): Delete.

>     (arm_size_rtx_costs): Likewise.

>     (arm_slowmul_rtx_costs): Likewise.

>     (arm_fastmul_rtx_costs): Likewise.

>     (arm_xscale_rtx_costs): Likewise.

>     (arm_9e_rtx_costs): Likewise.

>     (arm_slowmul_tune, arm_fastmul_tune, arm_strongarm_tune,

>     arm_xscale_tune, arm_9e_tune, arm_v6t2_tune, arm_cortex_tune,

>     arm_cortex_a8_tune, arm_cortex_a7_tune, arm_cortex_a15_tune,

>     arm_cortex_a53_tune, arm_cortex_a57_tune, arm_cortex_a9_tune,

>     arm_cortex_a12_tune, arm_v7m_tune, arm_v6m_tune, arm_fa726te_tune

>     arm_cortex_a5_tune, arm_xgene1_tune, arm_marvell_pj4_tune,

>     arm_cortex_a35_tune, arm_exynosm1_tune, arm_cortex_a73_tune,

>     arm_cortex_m7_tune):

>     Delete rtx_costs field.

>     (arm_new_rtx_costs): Rename to...

>     (arm_rtx_costs_internal): ... This.

>     (arm_rtx_costs): Remove old way of doing rtx costs.


OK.

R.
diff mbox

Patch

diff --git a/gcc/config/arm/arm-protos.h b/gcc/config/arm/arm-protos.h
index 374836cbdd52533237b81903c602117dc10ada22..9e0427720487549b99429c816e02295e72f8713a 100644
--- a/gcc/config/arm/arm-protos.h
+++ b/gcc/config/arm/arm-protos.h
@@ -260,7 +260,6 @@  struct cpu_cost_table;
 
 struct tune_params
 {
-  bool (*rtx_costs) (rtx, RTX_CODE, RTX_CODE, int *, bool);
   const struct cpu_cost_table *insn_extra_cost;
   bool (*sched_adjust_cost) (rtx_insn *, int, rtx_insn *, int *);
   int (*branch_cost) (bool, bool);
diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c
index 6bc588f7993979659ffe69646d8606f0f72cf608..f4a91e08c496f419742f3bd90ebf31b1b195c090 100644
--- a/gcc/config/arm/arm.c
+++ b/gcc/config/arm/arm.c
@@ -163,12 +163,6 @@  static void arm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT,
 static bool arm_have_conditional_execution (void);
 static bool arm_cannot_force_const_mem (machine_mode, rtx);
 static bool arm_legitimate_constant_p (machine_mode, rtx);
-static bool arm_rtx_costs_1 (rtx, enum rtx_code, int*, bool);
-static bool arm_size_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *);
-static bool arm_slowmul_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
-static bool arm_fastmul_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
-static bool arm_xscale_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
-static bool arm_9e_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
 static bool arm_rtx_costs (rtx, machine_mode, int, int, int *, bool);
 static int arm_address_cost (rtx, machine_mode, addr_space_t, bool);
 static int arm_register_move_cost (machine_mode, reg_class_t, reg_class_t);
@@ -1681,7 +1675,6 @@  const struct cpu_cost_table v7m_extra_costs =
 
 const struct tune_params arm_slowmul_tune =
 {
-  arm_slowmul_rtx_costs,
   &generic_extra_costs,			/* Insn extra costs.  */
   NULL,					/* Sched adj cost.  */
   arm_default_branch_cost,
@@ -1704,7 +1697,6 @@  const struct tune_params arm_slowmul_tune =
 
 const struct tune_params arm_fastmul_tune =
 {
-  arm_fastmul_rtx_costs,
   &generic_extra_costs,			/* Insn extra costs.  */
   NULL,					/* Sched adj cost.  */
   arm_default_branch_cost,
@@ -1730,7 +1722,6 @@  const struct tune_params arm_fastmul_tune =
 
 const struct tune_params arm_strongarm_tune =
 {
-  arm_fastmul_rtx_costs,
   &generic_extra_costs,			/* Insn extra costs.  */
   NULL,					/* Sched adj cost.  */
   arm_default_branch_cost,
@@ -1753,7 +1744,6 @@  const struct tune_params arm_strongarm_tune =
 
 const struct tune_params arm_xscale_tune =
 {
-  arm_xscale_rtx_costs,
   &generic_extra_costs,			/* Insn extra costs.  */
   xscale_sched_adjust_cost,
   arm_default_branch_cost,
@@ -1776,7 +1766,6 @@  const struct tune_params arm_xscale_tune =
 
 const struct tune_params arm_9e_tune =
 {
-  arm_9e_rtx_costs,
   &generic_extra_costs,			/* Insn extra costs.  */
   NULL,					/* Sched adj cost.  */
   arm_default_branch_cost,
@@ -1799,7 +1788,6 @@  const struct tune_params arm_9e_tune =
 
 const struct tune_params arm_marvell_pj4_tune =
 {
-  arm_9e_rtx_costs,
   &generic_extra_costs,			/* Insn extra costs.  */
   NULL,					/* Sched adj cost.  */
   arm_default_branch_cost,
@@ -1822,7 +1810,6 @@  const struct tune_params arm_marvell_pj4_tune =
 
 const struct tune_params arm_v6t2_tune =
 {
-  arm_9e_rtx_costs,
   &generic_extra_costs,			/* Insn extra costs.  */
   NULL,					/* Sched adj cost.  */
   arm_default_branch_cost,
@@ -1847,7 +1834,6 @@  const struct tune_params arm_v6t2_tune =
 /* Generic Cortex tuning.  Use more specific tunings if appropriate.  */
 const struct tune_params arm_cortex_tune =
 {
-  arm_9e_rtx_costs,
   &generic_extra_costs,
   NULL,					/* Sched adj cost.  */
   arm_default_branch_cost,
@@ -1870,7 +1856,6 @@  const struct tune_params arm_cortex_tune =
 
 const struct tune_params arm_cortex_a8_tune =
 {
-  arm_9e_rtx_costs,
   &cortexa8_extra_costs,
   NULL,					/* Sched adj cost.  */
   arm_default_branch_cost,
@@ -1893,7 +1878,6 @@  const struct tune_params arm_cortex_a8_tune =
 
 const struct tune_params arm_cortex_a7_tune =
 {
-  arm_9e_rtx_costs,
   &cortexa7_extra_costs,
   NULL,					/* Sched adj cost.  */
   arm_default_branch_cost,
@@ -1916,7 +1900,6 @@  const struct tune_params arm_cortex_a7_tune =
 
 const struct tune_params arm_cortex_a15_tune =
 {
-  arm_9e_rtx_costs,
   &cortexa15_extra_costs,
   NULL,					/* Sched adj cost.  */
   arm_default_branch_cost,
@@ -1939,7 +1922,6 @@  const struct tune_params arm_cortex_a15_tune =
 
 const struct tune_params arm_cortex_a35_tune =
 {
-  arm_9e_rtx_costs,
   &cortexa53_extra_costs,
   NULL,					/* Sched adj cost.  */
   arm_default_branch_cost,
@@ -1962,7 +1944,6 @@  const struct tune_params arm_cortex_a35_tune =
 
 const struct tune_params arm_cortex_a53_tune =
 {
-  arm_9e_rtx_costs,
   &cortexa53_extra_costs,
   NULL,					/* Sched adj cost.  */
   arm_default_branch_cost,
@@ -1985,7 +1966,6 @@  const struct tune_params arm_cortex_a53_tune =
 
 const struct tune_params arm_cortex_a57_tune =
 {
-  arm_9e_rtx_costs,
   &cortexa57_extra_costs,
   NULL,					/* Sched adj cost.  */
   arm_default_branch_cost,
@@ -2008,7 +1988,6 @@  const struct tune_params arm_cortex_a57_tune =
 
 const struct tune_params arm_exynosm1_tune =
 {
-  arm_9e_rtx_costs,
   &exynosm1_extra_costs,
   NULL,						/* Sched adj cost.  */
   arm_default_branch_cost,
@@ -2031,7 +2010,6 @@  const struct tune_params arm_exynosm1_tune =
 
 const struct tune_params arm_xgene1_tune =
 {
-  arm_9e_rtx_costs,
   &xgene1_extra_costs,
   NULL,					/* Sched adj cost.  */
   arm_default_branch_cost,
@@ -2054,7 +2032,6 @@  const struct tune_params arm_xgene1_tune =
 
 const struct tune_params arm_qdf24xx_tune =
 {
-  arm_9e_rtx_costs,
   &qdf24xx_extra_costs,
   NULL,                                         /* Scheduler cost adjustment.  */
   arm_default_branch_cost,
@@ -2080,7 +2057,6 @@  const struct tune_params arm_qdf24xx_tune =
 
 const struct tune_params arm_cortex_a5_tune =
 {
-  arm_9e_rtx_costs,
   &cortexa5_extra_costs,
   NULL,					/* Sched adj cost.  */
   arm_cortex_a5_branch_cost,
@@ -2103,7 +2079,6 @@  const struct tune_params arm_cortex_a5_tune =
 
 const struct tune_params arm_cortex_a9_tune =
 {
-  arm_9e_rtx_costs,
   &cortexa9_extra_costs,
   cortex_a9_sched_adjust_cost,
   arm_default_branch_cost,
@@ -2126,7 +2101,6 @@  const struct tune_params arm_cortex_a9_tune =
 
 const struct tune_params arm_cortex_a12_tune =
 {
-  arm_9e_rtx_costs,
   &cortexa12_extra_costs,
   NULL,					/* Sched adj cost.  */
   arm_default_branch_cost,
@@ -2149,7 +2123,6 @@  const struct tune_params arm_cortex_a12_tune =
 
 const struct tune_params arm_cortex_a73_tune =
 {
-  arm_9e_rtx_costs,
   &cortexa57_extra_costs,
   NULL,						/* Sched adj cost.  */
   arm_default_branch_cost,
@@ -2179,7 +2152,6 @@  const struct tune_params arm_cortex_a73_tune =
 
 const struct tune_params arm_v7m_tune =
 {
-  arm_9e_rtx_costs,
   &v7m_extra_costs,
   NULL,					/* Sched adj cost.  */
   arm_cortex_m_branch_cost,
@@ -2204,7 +2176,6 @@  const struct tune_params arm_v7m_tune =
 
 const struct tune_params arm_cortex_m7_tune =
 {
-  arm_9e_rtx_costs,
   &v7m_extra_costs,
   NULL,					/* Sched adj cost.  */
   arm_cortex_m7_branch_cost,
@@ -2229,7 +2200,6 @@  const struct tune_params arm_cortex_m7_tune =
    arm_v6t2_tune. It is used for cortex-m0, cortex-m1 and cortex-m0plus.  */
 const struct tune_params arm_v6m_tune =
 {
-  arm_9e_rtx_costs,
   &generic_extra_costs,			/* Insn extra costs.  */
   NULL,					/* Sched adj cost.  */
   arm_default_branch_cost,
@@ -2252,7 +2222,6 @@  const struct tune_params arm_v6m_tune =
 
 const struct tune_params arm_fa726te_tune =
 {
-  arm_9e_rtx_costs,
   &generic_extra_costs,				/* Insn extra costs.  */
   fa726te_sched_adjust_cost,
   arm_default_branch_cost,
@@ -8456,621 +8425,6 @@  thumb1_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
     }
 }
 
-static inline bool
-arm_rtx_costs_1 (rtx x, enum rtx_code outer, int* total, bool speed)
-{
-  machine_mode mode = GET_MODE (x);
-  enum rtx_code subcode;
-  rtx operand;
-  enum rtx_code code = GET_CODE (x);
-  *total = 0;
-
-  switch (code)
-    {
-    case MEM:
-      /* Memory costs quite a lot for the first word, but subsequent words
-	 load at the equivalent of a single insn each.  */
-      *total = COSTS_N_INSNS (2 + ARM_NUM_REGS (mode));
-      return true;
-
-    case DIV:
-    case MOD:
-    case UDIV:
-    case UMOD:
-      if (TARGET_HARD_FLOAT && mode == SFmode)
-	*total = COSTS_N_INSNS (2);
-      else if (TARGET_HARD_FLOAT && mode == DFmode && !TARGET_VFP_SINGLE)
-	*total = COSTS_N_INSNS (4);
-      else
-	*total = COSTS_N_INSNS (20);
-      return false;
-
-    case ROTATE:
-      if (REG_P (XEXP (x, 1)))
-	*total = COSTS_N_INSNS (1); /* Need to subtract from 32 */
-      else if (!CONST_INT_P (XEXP (x, 1)))
-	*total = rtx_cost (XEXP (x, 1), mode, code, 1, speed);
-
-      /* Fall through */
-    case ROTATERT:
-      if (mode != SImode)
-	{
-	  *total += COSTS_N_INSNS (4);
-	  return true;
-	}
-
-      /* Fall through */
-    case ASHIFT: case LSHIFTRT: case ASHIFTRT:
-      *total += rtx_cost (XEXP (x, 0), mode, code, 0, speed);
-      if (mode == DImode)
-	{
-	  *total += COSTS_N_INSNS (3);
-	  return true;
-	}
-
-      *total += COSTS_N_INSNS (1);
-      /* Increase the cost of complex shifts because they aren't any faster,
-         and reduce dual issue opportunities.  */
-      if (arm_tune_cortex_a9
-	  && outer != SET && !CONST_INT_P (XEXP (x, 1)))
-	++*total;
-
-      return true;
-
-    case MINUS:
-      if (mode == DImode)
-	{
-	  *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
-	  if (CONST_INT_P (XEXP (x, 0))
-	      && const_ok_for_arm (INTVAL (XEXP (x, 0))))
-	    {
-	      *total += rtx_cost (XEXP (x, 1), mode, code, 1, speed);
-	      return true;
-	    }
-
-	  if (CONST_INT_P (XEXP (x, 1))
-	      && const_ok_for_arm (INTVAL (XEXP (x, 1))))
-	    {
-	      *total += rtx_cost (XEXP (x, 0), mode, code, 0, speed);
-	      return true;
-	    }
-
-	  return false;
-	}
-
-      if (GET_MODE_CLASS (mode) == MODE_FLOAT)
-	{
-	  if (TARGET_HARD_FLOAT
-	      && (mode == SFmode
-		  || (mode == DFmode && !TARGET_VFP_SINGLE)))
-	    {
-	      *total = COSTS_N_INSNS (1);
-	      if (CONST_DOUBLE_P (XEXP (x, 0))
-		  && arm_const_double_rtx (XEXP (x, 0)))
-		{
-		  *total += rtx_cost (XEXP (x, 1), mode, code, 1, speed);
-		  return true;
-		}
-
-	      if (CONST_DOUBLE_P (XEXP (x, 1))
-		  && arm_const_double_rtx (XEXP (x, 1)))
-		{
-		  *total += rtx_cost (XEXP (x, 0), mode, code, 0, speed);
-		  return true;
-		}
-
-	      return false;
-	    }
-	  *total = COSTS_N_INSNS (20);
-	  return false;
-	}
-
-      *total = COSTS_N_INSNS (1);
-      if (CONST_INT_P (XEXP (x, 0))
-	  && const_ok_for_arm (INTVAL (XEXP (x, 0))))
-	{
-	  *total += rtx_cost (XEXP (x, 1), mode, code, 1, speed);
-	  return true;
-	}
-
-      subcode = GET_CODE (XEXP (x, 1));
-      if (subcode == ASHIFT || subcode == ASHIFTRT
-	  || subcode == LSHIFTRT
-	  || subcode == ROTATE || subcode == ROTATERT)
-	{
-	  *total += rtx_cost (XEXP (x, 0), mode, code, 0, speed);
-	  *total += rtx_cost (XEXP (XEXP (x, 1), 0), mode, subcode, 0, speed);
-	  return true;
-	}
-
-      /* A shift as a part of RSB costs no more than RSB itself.  */
-      if (GET_CODE (XEXP (x, 0)) == MULT
-	  && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
-	{
-	  *total += rtx_cost (XEXP (XEXP (x, 0), 0), mode, code, 0, speed);
-	  *total += rtx_cost (XEXP (x, 1), mode, code, 1, speed);
-	  return true;
-	}
-
-      if (subcode == MULT
-	  && power_of_two_operand (XEXP (XEXP (x, 1), 1), SImode))
-	{
-	  *total += rtx_cost (XEXP (x, 0), mode, code, 0, speed);
-	  *total += rtx_cost (XEXP (XEXP (x, 1), 0), mode, subcode, 0, speed);
-	  return true;
-	}
-
-      if (GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == RTX_COMPARE
-	  || GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == RTX_COMM_COMPARE)
-	{
-	  *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), mode, code,
-						 0, speed);
-	  if (REG_P (XEXP (XEXP (x, 1), 0))
-	      && REGNO (XEXP (XEXP (x, 1), 0)) != CC_REGNUM)
-	    *total += COSTS_N_INSNS (1);
-
-	  return true;
-	}
-
-      /* Fall through */
-
-    case PLUS:
-      if (code == PLUS && arm_arch6 && mode == SImode
-	  && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
-	      || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
-	{
-	  *total = COSTS_N_INSNS (1);
-	  *total += rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode,
-			      GET_CODE (XEXP (x, 0)), 0, speed);
-	  *total += rtx_cost (XEXP (x, 1), mode, code, 1, speed);
-	  return true;
-	}
-
-      /* MLA: All arguments must be registers.  We filter out
-	 multiplication by a power of two, so that we fall down into
-	 the code below.  */
-      if (GET_CODE (XEXP (x, 0)) == MULT
-	  && !power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
-	{
-	  /* The cost comes from the cost of the multiply.  */
-	  return false;
-	}
-
-      if (GET_MODE_CLASS (mode) == MODE_FLOAT)
-	{
-	  if (TARGET_HARD_FLOAT
-	      && (mode == SFmode
-		  || (mode == DFmode && !TARGET_VFP_SINGLE)))
-	    {
-	      *total = COSTS_N_INSNS (1);
-	      if (CONST_DOUBLE_P (XEXP (x, 1))
-		  && arm_const_double_rtx (XEXP (x, 1)))
-		{
-		  *total += rtx_cost (XEXP (x, 0), mode, code, 0, speed);
-		  return true;
-		}
-
-	      return false;
-	    }
-
-	  *total = COSTS_N_INSNS (20);
-	  return false;
-	}
-
-      if (GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMPARE
-	  || GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMM_COMPARE)
-	{
-	  *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 1), mode, code,
-						 1, speed);
-	  if (REG_P (XEXP (XEXP (x, 0), 0))
-	      && REGNO (XEXP (XEXP (x, 0), 0)) != CC_REGNUM)
-	    *total += COSTS_N_INSNS (1);
-	  return true;
-	}
-
-      /* Fall through */
-
-    case AND: case XOR: case IOR:
-
-      /* Normally the frame registers will be spilt into reg+const during
-	 reload, so it is a bad idea to combine them with other instructions,
-	 since then they might not be moved outside of loops.  As a compromise
-	 we allow integration with ops that have a constant as their second
-	 operand.  */
-      if (REG_OR_SUBREG_REG (XEXP (x, 0))
-	  && ARM_FRAME_RTX (REG_OR_SUBREG_RTX (XEXP (x, 0)))
-	  && !CONST_INT_P (XEXP (x, 1)))
-	*total = COSTS_N_INSNS (1);
-
-      if (mode == DImode)
-	{
-	  *total += COSTS_N_INSNS (2);
-	  if (CONST_INT_P (XEXP (x, 1))
-	      && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
-	    {
-	      *total += rtx_cost (XEXP (x, 0), mode, code, 0, speed);
-	      return true;
-	    }
-
-	  return false;
-	}
-
-      *total += COSTS_N_INSNS (1);
-      if (CONST_INT_P (XEXP (x, 1))
-	  && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
-	{
-	  *total += rtx_cost (XEXP (x, 0), mode, code, 0, speed);
-	  return true;
-	}
-      subcode = GET_CODE (XEXP (x, 0));
-      if (subcode == ASHIFT || subcode == ASHIFTRT
-	  || subcode == LSHIFTRT
-	  || subcode == ROTATE || subcode == ROTATERT)
-	{
-	  *total += rtx_cost (XEXP (x, 1), mode, code, 1, speed);
-	  *total += rtx_cost (XEXP (XEXP (x, 0), 0), mode, subcode, 0, speed);
-	  return true;
-	}
-
-      if (subcode == MULT
-	  && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
-	{
-	  *total += rtx_cost (XEXP (x, 1), mode, code, 1, speed);
-	  *total += rtx_cost (XEXP (XEXP (x, 0), 0), mode, subcode, 0, speed);
-	  return true;
-	}
-
-      if (subcode == UMIN || subcode == UMAX
-	  || subcode == SMIN || subcode == SMAX)
-	{
-	  *total = COSTS_N_INSNS (3);
-	  return true;
-	}
-
-      return false;
-
-    case MULT:
-      /* This should have been handled by the CPU specific routines.  */
-      gcc_unreachable ();
-
-    case TRUNCATE:
-      if (arm_arch3m && mode == SImode
-	  && GET_CODE (XEXP (x, 0)) == LSHIFTRT
-	  && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
-	  && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0))
-	      == GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)))
-	  && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
-	      || GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND))
-	{
-	  *total = rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, LSHIFTRT,
-			     0, speed);
-	  return true;
-	}
-      *total = COSTS_N_INSNS (2); /* Plus the cost of the MULT */
-      return false;
-
-    case NEG:
-      if (GET_MODE_CLASS (mode) == MODE_FLOAT)
-	{
-	  if (TARGET_HARD_FLOAT
-	      && (mode == SFmode
-		  || (mode == DFmode && !TARGET_VFP_SINGLE)))
-	    {
-	      *total = COSTS_N_INSNS (1);
-	      return false;
-	    }
-	  *total = COSTS_N_INSNS (2);
-	  return false;
-	}
-
-      /* Fall through */
-    case NOT:
-      *total = COSTS_N_INSNS (ARM_NUM_REGS(mode));
-      if (mode == SImode && code == NOT)
-	{
-	  subcode = GET_CODE (XEXP (x, 0));
-	  if (subcode == ASHIFT || subcode == ASHIFTRT
-	      || subcode == LSHIFTRT
-	      || subcode == ROTATE || subcode == ROTATERT
-	      || (subcode == MULT
-		  && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode)))
-	    {
-	      *total += rtx_cost (XEXP (XEXP (x, 0), 0), mode, subcode,
-				  0, speed);
-	      /* Register shifts cost an extra cycle.  */
-	      if (!CONST_INT_P (XEXP (XEXP (x, 0), 1)))
-		*total += COSTS_N_INSNS (1) + rtx_cost (XEXP (XEXP (x, 0), 1),
-							mode, subcode,
-							1, speed);
-	      return true;
-	    }
-	}
-
-      return false;
-
-    case IF_THEN_ELSE:
-      if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
-	{
-	  *total = COSTS_N_INSNS (4);
-	  return true;
-	}
-
-      operand = XEXP (x, 0);
-
-      if (!((GET_RTX_CLASS (GET_CODE (operand)) == RTX_COMPARE
-	     || GET_RTX_CLASS (GET_CODE (operand)) == RTX_COMM_COMPARE)
-	    && REG_P (XEXP (operand, 0))
-	    && REGNO (XEXP (operand, 0)) == CC_REGNUM))
-	*total += COSTS_N_INSNS (1);
-      *total += rtx_cost (XEXP (x, 1), VOIDmode, code, 1, speed);
-      *total += rtx_cost (XEXP (x, 2), VOIDmode, code, 2, speed);
-      return true;
-
-    case NE:
-      if (mode == SImode && XEXP (x, 1) == const0_rtx)
-	{
-	  *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), mode, code,
-						 0, speed);
-	  return true;
-	}
-      goto scc_insn;
-
-    case GE:
-      if ((!REG_P (XEXP (x, 0)) || REGNO (XEXP (x, 0)) != CC_REGNUM)
-	  && mode == SImode && XEXP (x, 1) == const0_rtx)
-	{
-	  *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), mode, code,
-						 0, speed);
-	  return true;
-	}
-      goto scc_insn;
-
-    case LT:
-      if ((!REG_P (XEXP (x, 0)) || REGNO (XEXP (x, 0)) != CC_REGNUM)
-	  && mode == SImode && XEXP (x, 1) == const0_rtx)
-	{
-	  *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), mode, code,
-						 0, speed);
-	  return true;
-	}
-      goto scc_insn;
-
-    case EQ:
-    case GT:
-    case LE:
-    case GEU:
-    case LTU:
-    case GTU:
-    case LEU:
-    case UNORDERED:
-    case ORDERED:
-    case UNEQ:
-    case UNGE:
-    case UNLT:
-    case UNGT:
-    case UNLE:
-    scc_insn:
-      /* SCC insns.  In the case where the comparison has already been
-	 performed, then they cost 2 instructions.  Otherwise they need
-	 an additional comparison before them.  */
-      *total = COSTS_N_INSNS (2);
-      if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM)
-	{
-	  return true;
-	}
-
-      /* Fall through */
-    case COMPARE:
-      if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM)
-	{
-	  *total = 0;
-	  return true;
-	}
-
-      *total += COSTS_N_INSNS (1);
-      if (CONST_INT_P (XEXP (x, 1))
-	  && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
-	{
-	  *total += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed);
-	  return true;
-	}
-
-      subcode = GET_CODE (XEXP (x, 0));
-      if (subcode == ASHIFT || subcode == ASHIFTRT
-	  || subcode == LSHIFTRT
-	  || subcode == ROTATE || subcode == ROTATERT)
-	{
-	  mode = GET_MODE (XEXP (x, 0));
-	  *total += rtx_cost (XEXP (x, 1), mode, code, 1, speed);
-	  *total += rtx_cost (XEXP (XEXP (x, 0), 0), mode, subcode, 0, speed);
-	  return true;
-	}
-
-      if (subcode == MULT
-	  && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
-	{
-	  mode = GET_MODE (XEXP (x, 0));
-	  *total += rtx_cost (XEXP (x, 1), mode, code, 1, speed);
-	  *total += rtx_cost (XEXP (XEXP (x, 0), 0), mode, subcode, 0, speed);
-	  return true;
-	}
-
-      return false;
-
-    case UMIN:
-    case UMAX:
-    case SMIN:
-    case SMAX:
-      *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), mode, code, 0, speed);
-      if (!CONST_INT_P (XEXP (x, 1))
-	  || !const_ok_for_arm (INTVAL (XEXP (x, 1))))
-	*total += rtx_cost (XEXP (x, 1), mode, code, 1, speed);
-      return true;
-
-    case ABS:
-      if (GET_MODE_CLASS (mode) == MODE_FLOAT)
-	{
-	  if (TARGET_HARD_FLOAT
-	      && (mode == SFmode
-		  || (mode == DFmode && !TARGET_VFP_SINGLE)))
-	    {
-	      *total = COSTS_N_INSNS (1);
-	      return false;
-	    }
-	  *total = COSTS_N_INSNS (20);
-	  return false;
-	}
-      *total = COSTS_N_INSNS (1);
-      if (mode == DImode)
-	*total += COSTS_N_INSNS (3);
-      return false;
-
-    case SIGN_EXTEND:
-    case ZERO_EXTEND:
-      *total = 0;
-      if (GET_MODE_CLASS (mode) == MODE_INT)
-	{
-	  rtx op = XEXP (x, 0);
-	  machine_mode opmode = GET_MODE (op);
-
-	  if (mode == DImode)
-	    *total += COSTS_N_INSNS (1);
-
-	  if (opmode != SImode)
-	    {
-	      if (MEM_P (op))
-		{
-		  /* If !arm_arch4, we use one of the extendhisi2_mem
-		     or movhi_bytes patterns for HImode.  For a QImode
-		     sign extension, we first zero-extend from memory
-		     and then perform a shift sequence.  */
-		  if (!arm_arch4 && (opmode != QImode || code == SIGN_EXTEND))
-		    *total += COSTS_N_INSNS (2);
-		}
-	      else if (arm_arch6)
-		*total += COSTS_N_INSNS (1);
-
-	      /* We don't have the necessary insn, so we need to perform some
-		 other operation.  */
-	      else if (TARGET_ARM && code == ZERO_EXTEND && mode == QImode)
-		/* An and with constant 255.  */
-		*total += COSTS_N_INSNS (1);
-	      else
-		/* A shift sequence.  Increase costs slightly to avoid
-		   combining two shifts into an extend operation.  */
-		*total += COSTS_N_INSNS (2) + 1;
-	    }
-
-	  return false;
-	}
-
-      switch (GET_MODE (XEXP (x, 0)))
-	{
-	case V8QImode:
-	case V4HImode:
-	case V2SImode:
-	case V4QImode:
-	case V2HImode:
-	  *total = COSTS_N_INSNS (1);
-	  return false;
-
-	default:
-	  gcc_unreachable ();
-	}
-      gcc_unreachable ();
-
-    case ZERO_EXTRACT:
-    case SIGN_EXTRACT:
-      mode = GET_MODE (XEXP (x, 0));
-      *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), mode, code, 0, speed);
-      return true;
-
-    case CONST_INT:
-      if (const_ok_for_arm (INTVAL (x))
-	  || const_ok_for_arm (~INTVAL (x)))
-	*total = COSTS_N_INSNS (1);
-      else
-	*total = COSTS_N_INSNS (arm_gen_constant (SET, mode, NULL_RTX,
-						  INTVAL (x), NULL_RTX,
-						  NULL_RTX, 0, 0));
-      return true;
-
-    case CONST:
-    case LABEL_REF:
-    case SYMBOL_REF:
-      *total = COSTS_N_INSNS (3);
-      return true;
-
-    case HIGH:
-      *total = COSTS_N_INSNS (1);
-      return true;
-
-    case LO_SUM:
-      *total = COSTS_N_INSNS (1);
-      *total += rtx_cost (XEXP (x, 0), mode, code, 0, speed);
-      return true;
-
-    case CONST_DOUBLE:
-      if (TARGET_HARD_FLOAT && vfp3_const_double_rtx (x)
-	  && (mode == SFmode || !TARGET_VFP_SINGLE))
-	*total = COSTS_N_INSNS (1);
-      else
-	*total = COSTS_N_INSNS (4);
-      return true;
-
-    case SET:
-      /* The vec_extract patterns accept memory operands that require an
-	 address reload.  Account for the cost of that reload to give the
-	 auto-inc-dec pass an incentive to try to replace them.  */
-      if (TARGET_NEON && MEM_P (SET_DEST (x))
-	  && GET_CODE (SET_SRC (x)) == VEC_SELECT)
-	{
-	  mode = GET_MODE (SET_DEST (x));
-	  *total = rtx_cost (SET_DEST (x), mode, code, 0, speed);
-	  if (!neon_vector_mem_operand (SET_DEST (x), 2, true))
-	    *total += COSTS_N_INSNS (1);
-	  return true;
-	}
-      /* Likewise for the vec_set patterns.  */
-      if (TARGET_NEON && GET_CODE (SET_SRC (x)) == VEC_MERGE
-	  && GET_CODE (XEXP (SET_SRC (x), 0)) == VEC_DUPLICATE
-	  && MEM_P (XEXP (XEXP (SET_SRC (x), 0), 0)))
-	{
-	  rtx mem = XEXP (XEXP (SET_SRC (x), 0), 0);
-	  mode = GET_MODE (SET_DEST (x));
-	  *total = rtx_cost (mem, mode, code, 0, speed);
-	  if (!neon_vector_mem_operand (mem, 2, true))
-	    *total += COSTS_N_INSNS (1);
-	  return true;
-	}
-      return false;
-
-    case UNSPEC:
-      /* We cost this as high as our memory costs to allow this to
-	 be hoisted from loops.  */
-      if (XINT (x, 1) == UNSPEC_PIC_UNIFIED)
-	{
-	  *total = COSTS_N_INSNS (2 + ARM_NUM_REGS (mode));
-	}
-      return true;
-
-    case CONST_VECTOR:
-      if (TARGET_NEON
-	  && TARGET_HARD_FLOAT
-	  && outer == SET
-	  && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
-	  && neon_immediate_valid_for_move (x, mode, NULL, NULL))
-	*total = COSTS_N_INSNS (1);
-      else
-	*total = COSTS_N_INSNS (4);
-      return true;
-
-    default:
-      *total = COSTS_N_INSNS (4);
-      return false;
-    }
-}
-
 /* Estimates the size cost of thumb1 instructions.
    For now most of the code is copied from thumb1_rtx_costs. We need more
    fine grain tuning when we have more related test cases.  */
@@ -9164,308 +8518,69 @@  thumb1_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
           int i;
           /* This duplicates the tests in the andsi3 expander.  */
           for (i = 9; i <= 31; i++)
-            if ((HOST_WIDE_INT_1 << i) - 1 == INTVAL (x)
-                || (HOST_WIDE_INT_1 << i) - 1 == ~INTVAL (x))
-              return COSTS_N_INSNS (2);
-        }
-      else if (outer == ASHIFT || outer == ASHIFTRT
-               || outer == LSHIFTRT)
-        return 0;
-      return COSTS_N_INSNS (2);
-
-    case CONST:
-    case CONST_DOUBLE:
-    case LABEL_REF:
-    case SYMBOL_REF:
-      return COSTS_N_INSNS (3);
-
-    case UDIV:
-    case UMOD:
-    case DIV:
-    case MOD:
-      return 100;
-
-    case TRUNCATE:
-      return 99;
-
-    case AND:
-    case XOR:
-    case IOR:
-      return COSTS_N_INSNS (1);
-
-    case MEM:
-      return (COSTS_N_INSNS (1)
-	      + COSTS_N_INSNS (1)
-		* ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
-              + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
-                 ? COSTS_N_INSNS (1) : 0));
-
-    case IF_THEN_ELSE:
-      /* XXX a guess.  */
-      if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
-        return 14;
-      return 2;
-
-    case ZERO_EXTEND:
-      /* XXX still guessing.  */
-      switch (GET_MODE (XEXP (x, 0)))
-        {
-          case QImode:
-            return (1 + (mode == DImode ? 4 : 0)
-                    + (MEM_P (XEXP (x, 0)) ? 10 : 0));
-
-          case HImode:
-            return (4 + (mode == DImode ? 4 : 0)
-                    + (MEM_P (XEXP (x, 0)) ? 10 : 0));
-
-          case SImode:
-            return (1 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
-
-          default:
-            return 99;
-        }
-
-    default:
-      return 99;
-    }
-}
-
-/* RTX costs when optimizing for size.  */
-static bool
-arm_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
-		    int *total)
-{
-  machine_mode mode = GET_MODE (x);
-  if (TARGET_THUMB1)
-    {
-      *total = thumb1_size_rtx_costs (x, code, outer_code);
-      return true;
-    }
-
-  /* FIXME: This makes no attempt to prefer narrow Thumb-2 instructions.  */
-  switch (code)
-    {
-    case MEM:
-      /* A memory access costs 1 insn if the mode is small, or the address is
-	 a single register, otherwise it costs one insn per word.  */
-      if (REG_P (XEXP (x, 0)))
-	*total = COSTS_N_INSNS (1);
-      else if (flag_pic
-	       && GET_CODE (XEXP (x, 0)) == PLUS
-	       && will_be_in_index_register (XEXP (XEXP (x, 0), 1)))
-	/* This will be split into two instructions.
-	   See arm.md:calculate_pic_address.  */
-	*total = COSTS_N_INSNS (2);
-      else
-	*total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
-      return true;
-
-    case DIV:
-    case MOD:
-    case UDIV:
-    case UMOD:
-      /* Needs a libcall, so it costs about this.  */
-      *total = COSTS_N_INSNS (2);
-      return false;
-
-    case ROTATE:
-      if (mode == SImode && REG_P (XEXP (x, 1)))
-	{
-	  *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), mode, code,
-						 0, false);
-	  return true;
-	}
-      /* Fall through */
-    case ROTATERT:
-    case ASHIFT:
-    case LSHIFTRT:
-    case ASHIFTRT:
-      if (mode == DImode && CONST_INT_P (XEXP (x, 1)))
-	{
-	  *total = COSTS_N_INSNS (3) + rtx_cost (XEXP (x, 0), mode, code,
-						 0, false);
-	  return true;
-	}
-      else if (mode == SImode)
-	{
-	  *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), mode, code,
-						 0, false);
-	  /* Slightly disparage register shifts, but not by much.  */
-	  if (!CONST_INT_P (XEXP (x, 1)))
-	    *total += 1 + rtx_cost (XEXP (x, 1), mode, code, 1, false);
-	  return true;
-	}
-
-      /* Needs a libcall.  */
-      *total = COSTS_N_INSNS (2);
-      return false;
-
-    case MINUS:
-      if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
-	  && (mode == SFmode || !TARGET_VFP_SINGLE))
-	{
-	  *total = COSTS_N_INSNS (1);
-	  return false;
-	}
-
-      if (mode == SImode)
-	{
-	  enum rtx_code subcode0 = GET_CODE (XEXP (x, 0));
-	  enum rtx_code subcode1 = GET_CODE (XEXP (x, 1));
-
-	  if (subcode0 == ROTATE || subcode0 == ROTATERT || subcode0 == ASHIFT
-	      || subcode0 == LSHIFTRT || subcode0 == ASHIFTRT
-	      || subcode1 == ROTATE || subcode1 == ROTATERT
-	      || subcode1 == ASHIFT || subcode1 == LSHIFTRT
-	      || subcode1 == ASHIFTRT)
-	    {
-	      /* It's just the cost of the two operands.  */
-	      *total = 0;
-	      return false;
-	    }
-
-	  *total = COSTS_N_INSNS (1);
-	  return false;
-	}
-
-      *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
-      return false;
-
-    case PLUS:
-      if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
-	  && (mode == SFmode || !TARGET_VFP_SINGLE))
-	{
-	  *total = COSTS_N_INSNS (1);
-	  return false;
-	}
-
-      /* A shift as a part of ADD costs nothing.  */
-      if (GET_CODE (XEXP (x, 0)) == MULT
-	  && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
-	{
-	  *total = COSTS_N_INSNS (TARGET_THUMB2 ? 2 : 1);
-	  *total += rtx_cost (XEXP (XEXP (x, 0), 0), mode, code, 0, false);
-	  *total += rtx_cost (XEXP (x, 1), mode, code, 1, false);
-	  return true;
-	}
-
-      /* Fall through */
-    case AND: case XOR: case IOR:
-      if (mode == SImode)
-	{
-	  enum rtx_code subcode = GET_CODE (XEXP (x, 0));
-
-	  if (subcode == ROTATE || subcode == ROTATERT || subcode == ASHIFT
-	      || subcode == LSHIFTRT || subcode == ASHIFTRT
-	      || (code == AND && subcode == NOT))
-	    {
-	      /* It's just the cost of the two operands.  */
-	      *total = 0;
-	      return false;
-	    }
-	}
-
-      *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
-      return false;
-
-    case MULT:
-      *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
-      return false;
-
-    case NEG:
-      if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
-	  && (mode == SFmode || !TARGET_VFP_SINGLE))
-	{
-	  *total = COSTS_N_INSNS (1);
-	  return false;
-	}
-
-      /* Fall through */
-    case NOT:
-      *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
-
-      return false;
-
-    case IF_THEN_ELSE:
-      *total = 0;
-      return false;
-
-    case COMPARE:
-      if (cc_register (XEXP (x, 0), VOIDmode))
-	* total = 0;
-      else
-	*total = COSTS_N_INSNS (1);
-      return false;
-
-    case ABS:
-      if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
-	  && (mode == SFmode || !TARGET_VFP_SINGLE))
-	*total = COSTS_N_INSNS (1);
-      else
-	*total = COSTS_N_INSNS (1 + ARM_NUM_REGS (mode));
-      return false;
-
-    case SIGN_EXTEND:
-    case ZERO_EXTEND:
-      return arm_rtx_costs_1 (x, outer_code, total, 0);
-
-    case CONST_INT:
-      if (const_ok_for_arm (INTVAL (x)))
-	/* A multiplication by a constant requires another instruction
-	   to load the constant to a register.  */
-	*total = COSTS_N_INSNS ((outer_code == SET || outer_code == MULT)
-				? 1 : 0);
-      else if (const_ok_for_arm (~INTVAL (x)))
-	*total = COSTS_N_INSNS (outer_code == AND ? 0 : 1);
-      else if (const_ok_for_arm (-INTVAL (x)))
-	{
-	  if (outer_code == COMPARE || outer_code == PLUS
-	      || outer_code == MINUS)
-	    *total = 0;
-	  else
-	    *total = COSTS_N_INSNS (1);
-	}
-      else
-	*total = COSTS_N_INSNS (2);
-      return true;
+            if ((HOST_WIDE_INT_1 << i) - 1 == INTVAL (x)
+                || (HOST_WIDE_INT_1 << i) - 1 == ~INTVAL (x))
+              return COSTS_N_INSNS (2);
+        }
+      else if (outer == ASHIFT || outer == ASHIFTRT
+               || outer == LSHIFTRT)
+        return 0;
+      return COSTS_N_INSNS (2);
 
     case CONST:
+    case CONST_DOUBLE:
     case LABEL_REF:
     case SYMBOL_REF:
-      *total = COSTS_N_INSNS (2);
-      return true;
+      return COSTS_N_INSNS (3);
 
-    case CONST_DOUBLE:
-      *total = COSTS_N_INSNS (4);
-      return true;
+    case UDIV:
+    case UMOD:
+    case DIV:
+    case MOD:
+      return 100;
 
-    case CONST_VECTOR:
-      if (TARGET_NEON
-	  && TARGET_HARD_FLOAT
-	  && outer_code == SET
-	  && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
-	  && neon_immediate_valid_for_move (x, mode, NULL, NULL))
-	*total = COSTS_N_INSNS (1);
-      else
-	*total = COSTS_N_INSNS (4);
-      return true;
+    case TRUNCATE:
+      return 99;
 
-    case HIGH:
-    case LO_SUM:
-      /* We prefer constant pool entries to MOVW/MOVT pairs, so bump the
-	 cost of these slightly.  */
-      *total = COSTS_N_INSNS (1) + 1;
-      return true;
+    case AND:
+    case XOR:
+    case IOR:
+      return COSTS_N_INSNS (1);
 
-    case SET:
-      return false;
+    case MEM:
+      return (COSTS_N_INSNS (1)
+	      + COSTS_N_INSNS (1)
+		* ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
+              + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
+                 ? COSTS_N_INSNS (1) : 0));
+
+    case IF_THEN_ELSE:
+      /* XXX a guess.  */
+      if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
+        return 14;
+      return 2;
+
+    case ZERO_EXTEND:
+      /* XXX still guessing.  */
+      switch (GET_MODE (XEXP (x, 0)))
+        {
+          case QImode:
+            return (1 + (mode == DImode ? 4 : 0)
+                    + (MEM_P (XEXP (x, 0)) ? 10 : 0));
+
+          case HImode:
+            return (4 + (mode == DImode ? 4 : 0)
+                    + (MEM_P (XEXP (x, 0)) ? 10 : 0));
+
+          case SImode:
+            return (1 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
+
+          default:
+            return 99;
+        }
 
     default:
-      if (mode != VOIDmode)
-	*total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
-      else
-	*total = COSTS_N_INSNS (4); /* How knows?  */
-      return false;
+      return 99;
     }
 }
 
@@ -9592,7 +8707,7 @@  arm_unspec_cost (rtx x, enum rtx_code /* outer_code */, bool speed_p, int *cost)
    flags are live or not, and thus no realistic way to determine what
    the size will eventually be.  */
 static bool
-arm_new_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
+arm_rtx_costs_internal (rtx x, enum rtx_code code, enum rtx_code outer_code,
 		   const struct cpu_cost_table *extra_cost,
 		   int *cost, bool speed_p)
 {
@@ -11275,41 +10390,20 @@  arm_new_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
 
 #undef HANDLE_NARROW_SHIFT_ARITH
 
-/* RTX costs when optimizing for size.  */
+/* RTX costs entry point.  */
+
 static bool
 arm_rtx_costs (rtx x, machine_mode mode ATTRIBUTE_UNUSED, int outer_code,
 	       int opno ATTRIBUTE_UNUSED, int *total, bool speed)
 {
   bool result;
   int code = GET_CODE (x);
+  gcc_assert (current_tune->insn_extra_cost);
 
-  if (TARGET_OLD_RTX_COSTS
-      || (!current_tune->insn_extra_cost && !TARGET_NEW_GENERIC_COSTS))
-    {
-      /* Old way.  (Deprecated.)  */
-      if (!speed)
-	result = arm_size_rtx_costs (x, (enum rtx_code) code,
-				     (enum rtx_code) outer_code, total);
-      else
-	result = current_tune->rtx_costs (x,  (enum rtx_code) code,
-					  (enum rtx_code) outer_code, total,
-					  speed);
-    }
-  else
-    {
-    /* New way.  */
-      if (current_tune->insn_extra_cost)
-        result =  arm_new_rtx_costs (x, (enum rtx_code) code,
-				     (enum rtx_code) outer_code,
-				     current_tune->insn_extra_cost,
-				     total, speed);
-    /* TARGET_NEW_GENERIC_COSTS && !TARGET_OLD_RTX_COSTS
-       && current_tune->insn_extra_cost != NULL  */
-      else
-        result =  arm_new_rtx_costs (x, (enum rtx_code) code,
-				    (enum rtx_code) outer_code,
-				    &generic_extra_costs, total, speed);
-    }
+  result =  arm_rtx_costs_internal (x, (enum rtx_code) code,
+				(enum rtx_code) outer_code,
+				current_tune->insn_extra_cost,
+				total, speed);
 
   if (dump_file && (dump_flags & TDF_DETAILS))
     {
@@ -11320,305 +10414,6 @@  arm_rtx_costs (rtx x, machine_mode mode ATTRIBUTE_UNUSED, int outer_code,
   return result;
 }
 
-/* RTX costs for cores with a slow MUL implementation.  Thumb-2 is not
-   supported on any "slowmul" cores, so it can be ignored.  */
-
-static bool
-arm_slowmul_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
-		       int *total, bool speed)
-{
-  machine_mode mode = GET_MODE (x);
-
-  if (TARGET_THUMB)
-    {
-      *total = thumb1_rtx_costs (x, code, outer_code);
-      return true;
-    }
-
-  switch (code)
-    {
-    case MULT:
-      if (GET_MODE_CLASS (mode) == MODE_FLOAT
-	  || mode == DImode)
-	{
-	  *total = COSTS_N_INSNS (20);
-	  return false;
-	}
-
-      if (CONST_INT_P (XEXP (x, 1)))
-	{
-	  unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
-				      & (unsigned HOST_WIDE_INT) 0xffffffff);
-	  int cost, const_ok = const_ok_for_arm (i);
-	  int j, booth_unit_size;
-
-	  /* Tune as appropriate.  */
-	  cost = const_ok ? 4 : 8;
-	  booth_unit_size = 2;
-	  for (j = 0; i && j < 32; j += booth_unit_size)
-	    {
-	      i >>= booth_unit_size;
-	      cost++;
-	    }
-
-	  *total = COSTS_N_INSNS (cost);
-	  *total += rtx_cost (XEXP (x, 0), mode, code, 0, speed);
-	  return true;
-	}
-
-      *total = COSTS_N_INSNS (20);
-      return false;
-
-    default:
-      return arm_rtx_costs_1 (x, outer_code, total, speed);;
-    }
-}
-
-
-/* RTX cost for cores with a fast multiply unit (M variants).  */
-
-static bool
-arm_fastmul_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
-		       int *total, bool speed)
-{
-  machine_mode mode = GET_MODE (x);
-
-  if (TARGET_THUMB1)
-    {
-      *total = thumb1_rtx_costs (x, code, outer_code);
-      return true;
-    }
-
-  /* ??? should thumb2 use different costs?  */
-  switch (code)
-    {
-    case MULT:
-      /* There is no point basing this on the tuning, since it is always the
-	 fast variant if it exists at all.  */
-      if (mode == DImode
-	  && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
-	  && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
-	      || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
-	{
-	  *total = COSTS_N_INSNS(2);
-	  return false;
-	}
-
-
-      if (mode == DImode)
-	{
-	  *total = COSTS_N_INSNS (5);
-	  return false;
-	}
-
-      if (CONST_INT_P (XEXP (x, 1)))
-	{
-	  unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
-				      & (unsigned HOST_WIDE_INT) 0xffffffff);
-	  int cost, const_ok = const_ok_for_arm (i);
-	  int j, booth_unit_size;
-
-	  /* Tune as appropriate.  */
-	  cost = const_ok ? 4 : 8;
-	  booth_unit_size = 8;
-	  for (j = 0; i && j < 32; j += booth_unit_size)
-	    {
-	      i >>= booth_unit_size;
-	      cost++;
-	    }
-
-	  *total = COSTS_N_INSNS(cost);
-	  return false;
-	}
-
-      if (mode == SImode)
-	{
-	  *total = COSTS_N_INSNS (4);
-	  return false;
-	}
-
-      if (GET_MODE_CLASS (mode) == MODE_FLOAT)
-	{
-	  if (TARGET_HARD_FLOAT
-	      && (mode == SFmode
-		  || (mode == DFmode && !TARGET_VFP_SINGLE)))
-	    {
-	      *total = COSTS_N_INSNS (1);
-	      return false;
-	    }
-	}
-
-      /* Requires a lib call */
-      *total = COSTS_N_INSNS (20);
-      return false;
-
-    default:
-      return arm_rtx_costs_1 (x, outer_code, total, speed);
-    }
-}
-
-
-/* RTX cost for XScale CPUs.  Thumb-2 is not supported on any xscale cores,
-   so it can be ignored.  */
-
-static bool
-arm_xscale_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
-		      int *total, bool speed)
-{
-  machine_mode mode = GET_MODE (x);
-
-  if (TARGET_THUMB)
-    {
-      *total = thumb1_rtx_costs (x, code, outer_code);
-      return true;
-    }
-
-  switch (code)
-    {
-    case COMPARE:
-      if (GET_CODE (XEXP (x, 0)) != MULT)
-	return arm_rtx_costs_1 (x, outer_code, total, speed);
-
-      /* A COMPARE of a MULT is slow on XScale; the muls instruction
-	 will stall until the multiplication is complete.  */
-      *total = COSTS_N_INSNS (3);
-      return false;
-
-    case MULT:
-      /* There is no point basing this on the tuning, since it is always the
-	 fast variant if it exists at all.  */
-      if (mode == DImode
-	  && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
-	  && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
-	      || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
-	{
-	  *total = COSTS_N_INSNS (2);
-	  return false;
-	}
-
-
-      if (mode == DImode)
-	{
-	  *total = COSTS_N_INSNS (5);
-	  return false;
-	}
-
-      if (CONST_INT_P (XEXP (x, 1)))
-	{
-	  /* If operand 1 is a constant we can more accurately
-	     calculate the cost of the multiply.  The multiplier can
-	     retire 15 bits on the first cycle and a further 12 on the
-	     second.  We do, of course, have to load the constant into
-	     a register first.  */
-	  unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
-	  /* There's a general overhead of one cycle.  */
-	  int cost = 1;
-	  unsigned HOST_WIDE_INT masked_const;
-
-	  if (i & 0x80000000)
-	    i = ~i;
-
-	  i &= (unsigned HOST_WIDE_INT) 0xffffffff;
-
-	  masked_const = i & 0xffff8000;
-	  if (masked_const != 0)
-	    {
-	      cost++;
-	      masked_const = i & 0xf8000000;
-	      if (masked_const != 0)
-		cost++;
-	    }
-	  *total = COSTS_N_INSNS (cost);
-	  return false;
-	}
-
-      if (mode == SImode)
-	{
-	  *total = COSTS_N_INSNS (3);
-	  return false;
-	}
-
-      /* Requires a lib call */
-      *total = COSTS_N_INSNS (20);
-      return false;
-
-    default:
-      return arm_rtx_costs_1 (x, outer_code, total, speed);
-    }
-}
-
-
-/* RTX costs for 9e (and later) cores.  */
-
-static bool
-arm_9e_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
-		  int *total, bool speed)
-{
-  machine_mode mode = GET_MODE (x);
-
-  if (TARGET_THUMB1)
-    {
-      switch (code)
-	{
-	case MULT:
-	  /* Small multiply: 32 cycles for an integer multiply inst.  */
-	  if (arm_arch6m && arm_m_profile_small_mul)
-	    *total = COSTS_N_INSNS (32);
-	  else
-	    *total = COSTS_N_INSNS (3);
-	  return true;
-
-	default:
-	  *total = thumb1_rtx_costs (x, code, outer_code);
-	  return true;
-	}
-    }
-
-  switch (code)
-    {
-    case MULT:
-      /* There is no point basing this on the tuning, since it is always the
-	 fast variant if it exists at all.  */
-      if (mode == DImode
-	  && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
-	  && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
-	      || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
-	{
-	  *total = COSTS_N_INSNS (2);
-	  return false;
-	}
-
-
-      if (mode == DImode)
-	{
-	  *total = COSTS_N_INSNS (5);
-	  return false;
-	}
-
-      if (mode == SImode)
-	{
-	  *total = COSTS_N_INSNS (2);
-	  return false;
-	}
-
-      if (GET_MODE_CLASS (mode) == MODE_FLOAT)
-	{
-	  if (TARGET_HARD_FLOAT
-	      && (mode == SFmode
-		  || (mode == DFmode && !TARGET_VFP_SINGLE)))
-	    {
-	      *total = COSTS_N_INSNS (1);
-	      return false;
-	    }
-	}
-
-      *total = COSTS_N_INSNS (20);
-      return false;
-
-    default:
-      return arm_rtx_costs_1 (x, outer_code, total, speed);
-    }
-}
 /* All address computations that can be done are free, but rtx cost returns
    the same for practically all of them.  So we weight the different types
    of address here in the order (most pref first):
diff --git a/gcc/config/arm/arm.opt b/gcc/config/arm/arm.opt
index 0ebe0174390167b79a64583c35a3f8fb018f6538..63cf342dbacc3ec8c6964fa593de0df9bb87e08f 100644
--- a/gcc/config/arm/arm.opt
+++ b/gcc/config/arm/arm.opt
@@ -253,14 +253,6 @@  mrestrict-it
 Target Report Var(arm_restrict_it) Init(2) Save
 Generate IT blocks appropriate for ARMv8.
 
-mold-rtx-costs
-Target Report Mask(OLD_RTX_COSTS)
-Use the old RTX costing tables (transitional).
-
-mnew-generic-costs
-Target Report Mask(NEW_GENERIC_COSTS)
-Use the new generic RTX cost tables if new core-specific cost table not available (transitional).
-
 mfix-cortex-m3-ldrd
 Target Report Var(fix_cm3_ldrd) Init(2)
 Avoid overlapping destination and address registers on LDRD instructions