Message ID | 1487696064-3233-3-git-send-email-charles.baylis@linaro.org |
---|---|
State | New |
Headers | show |
Series | PR61551 addressing mode costs | expand |
On 21/02/17 16:54, charles.baylis@linaro.org wrote: > From: Charles Baylis <charles.baylis@linaro.org> > > This patch adds support for modelling the varying costs of > different addressing modes. The generic cost table treats > all addressing modes as having equal cost. The cost table > for Cortex-A57 is derived from http://infocenter.arm.com/help/topic/com.arm.doc.uan0015b/Cortex_A57_Software_Optimization_Guide_external.pdf > and treats addressing modes with write-back as having a > cost equal to one additional instruction. > > gcc/ChangeLog: > > <date> Charles Baylis <charles.baylis@linaro.org> > > * config/arm/aarch-common-protos.h (enum arm_addr_mode_op): New. > (struct addr_mode_cost_table): New. > (struct cpu_cost_table): Add pointer to an addr_mode_cost_table. > * config/arm/aarch-cost-tables.h: (generic_addr_mode_costs): New. > (generic_extra_costs) Initialise aarch32_addr_mode. > (cortexa53_extra_costs) Likewise. > (addr_mode_costs_cortexa57) New. > (cortexa57_extra_costs) Initialise aarch32_addr_mode. > (exynosm1_extra_costs) Likewise. > (xgene1_extra_costs) Likewise. > (qdf24xx_extra_costs) Likewise. > * config/arm/arm.c (cortexa9_extra_costs) Initialise aarch32_addr_mode. > (cortexa9_extra_costs) Likewise. > (cortexa8_extra_costs) Likewise. > (cortexa5_extra_costs) Likewise. > (cortexa7_extra_costs) Likewise. > (cortexa12_extra_costs) Likewise. > (cortexv7m_extra_costs) Likewise. > (arm_mem_costs): Use table lookup to calculate cost of addressing > mode. > > Change-Id: If71bd7c4f4bb876c5ed82dc28791130efb8bf89e > --- > gcc/config/arm/aarch-common-protos.h | 16 +++++++++++ > gcc/config/arm/aarch-cost-tables.h | 54 ++++++++++++++++++++++++++++++---- > gcc/config/arm/arm.c | 56 ++++++++++++++++++++++++++++++------ > 3 files changed, 113 insertions(+), 13 deletions(-) > > diff --git a/gcc/config/arm/aarch-common-protos.h b/gcc/config/arm/aarch-common-protos.h > index 7c2bb4c..f6fcc94 100644 > --- a/gcc/config/arm/aarch-common-protos.h > +++ b/gcc/config/arm/aarch-common-protos.h > @@ -130,6 +130,21 @@ struct vector_cost_table > const int alu; > }; > > +enum arm_addr_mode_op > +{ > + AMO_DEFAULT, > + AMO_NO_WB, > + AMO_WB, > + AMO_MAX /* for array size */ Comment style: Capital letter at start, full stop and two spaces at the end. The enum and structure below should have some comments explaining what they are for. > +}; > + > +struct addr_mode_cost_table > +{ > + const int integer[AMO_MAX]; > + const int fp[AMO_MAX]; > + const int vector[AMO_MAX]; > +}; > + > struct cpu_cost_table > { > const struct alu_cost_table alu; > @@ -137,6 +152,7 @@ struct cpu_cost_table > const struct mem_cost_table ldst; > const struct fp_cost_table fp[2]; /* SFmode and DFmode. */ > const struct vector_cost_table vect; > + const struct addr_mode_cost_table *aarch32_addr_mode; > }; > > > diff --git a/gcc/config/arm/aarch-cost-tables.h b/gcc/config/arm/aarch-cost-tables.h > index 68f84b0..e3d257f 100644 > --- a/gcc/config/arm/aarch-cost-tables.h > +++ b/gcc/config/arm/aarch-cost-tables.h > @@ -22,6 +22,16 @@ > #ifndef GCC_AARCH_COST_TABLES_H > #define GCC_AARCH_COST_TABLES_H > > +const struct addr_mode_cost_table generic_addr_mode_costs = > +{ > + /* int */ > + { 0, 0, 0 }, Elements need to be commented, otherwise minor changes to the contents of the arrays is hard to update and maintain. > + /* float */ > + { 0, 0, 0 }, > + /* vector */ > + { 0, 0, 0 } > +}; > + > const struct cpu_cost_table generic_extra_costs = > { > /* ALU */ > @@ -122,7 +132,9 @@ const struct cpu_cost_table generic_extra_costs = > /* Vector */ > { > COSTS_N_INSNS (1) /* alu. */ > - } > + }, > + /* Addressing mode */ Comment style. > + &generic_addr_mode_costs > }; > > const struct cpu_cost_table cortexa53_extra_costs = > @@ -225,6 +237,30 @@ const struct cpu_cost_table cortexa53_extra_costs = > /* Vector */ > { > COSTS_N_INSNS (1) /* alu. */ > + }, > + /* Addressing mode */ > + &generic_addr_mode_costs > +}; > + > +const struct addr_mode_cost_table addr_mode_costs_cortexa57 = > +{ > + /* int */ > + { > + 0, > + 0, > + COSTS_N_INSNS (1), > + }, > + /* float */ > + { > + 0, > + 0, > + COSTS_N_INSNS (1), > + }, > + /* vector */ > + { > + 0, > + 0, > + COSTS_N_INSNS (1), > } > }; > > @@ -328,7 +364,9 @@ const struct cpu_cost_table cortexa57_extra_costs = > /* Vector */ > { > COSTS_N_INSNS (1) /* alu. */ > - } > + }, > + /* Addressing mode */ > + &addr_mode_costs_cortexa57 > }; > > const struct cpu_cost_table exynosm1_extra_costs = > @@ -431,7 +469,9 @@ const struct cpu_cost_table exynosm1_extra_costs = > /* Vector */ > { > COSTS_N_INSNS (0) /* alu. */ > - } > + }, > + /* Addressing mode */ > + &generic_addr_mode_costs > }; > > const struct cpu_cost_table xgene1_extra_costs = > @@ -534,7 +574,9 @@ const struct cpu_cost_table xgene1_extra_costs = > /* Vector */ > { > COSTS_N_INSNS (2) /* alu. */ > - } > + }, > + /* Addressing mode */ > + &generic_addr_mode_costs > }; > > const struct cpu_cost_table qdf24xx_extra_costs = > @@ -637,7 +679,9 @@ const struct cpu_cost_table qdf24xx_extra_costs = > /* Vector */ > { > COSTS_N_INSNS (1) /* alu. */ > - } > + }, > + /* Addressing mode */ > + &generic_addr_mode_costs > }; > > #endif /* GCC_AARCH_COST_TABLES_H */ > diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c > index 7f002f1..fe4cd73 100644 > --- a/gcc/config/arm/arm.c > +++ b/gcc/config/arm/arm.c > @@ -1099,7 +1099,9 @@ const struct cpu_cost_table cortexa9_extra_costs = > /* Vector */ > { > COSTS_N_INSNS (1) /* alu. */ > - } > + }, > + /* Addressing mode */ > + &generic_addr_mode_costs > }; > > const struct cpu_cost_table cortexa8_extra_costs = > @@ -1202,7 +1204,9 @@ const struct cpu_cost_table cortexa8_extra_costs = > /* Vector */ > { > COSTS_N_INSNS (1) /* alu. */ > - } > + }, > + /* Addressing mode */ > + &generic_addr_mode_costs > }; > > const struct cpu_cost_table cortexa5_extra_costs = > @@ -1306,7 +1310,9 @@ const struct cpu_cost_table cortexa5_extra_costs = > /* Vector */ > { > COSTS_N_INSNS (1) /* alu. */ > - } > + }, > + /* Addressing mode */ > + &generic_addr_mode_costs > }; > > > @@ -1411,7 +1417,9 @@ const struct cpu_cost_table cortexa7_extra_costs = > /* Vector */ > { > COSTS_N_INSNS (1) /* alu. */ > - } > + }, > + /* Addressing mode */ > + &generic_addr_mode_costs > }; > > const struct cpu_cost_table cortexa12_extra_costs = > @@ -1514,7 +1522,9 @@ const struct cpu_cost_table cortexa12_extra_costs = > /* Vector */ > { > COSTS_N_INSNS (1) /* alu. */ > - } > + }, > + /* Addressing mode */ > + &generic_addr_mode_costs > }; > > const struct cpu_cost_table cortexa15_extra_costs = > @@ -1617,7 +1627,9 @@ const struct cpu_cost_table cortexa15_extra_costs = > /* Vector */ > { > COSTS_N_INSNS (1) /* alu. */ > - } > + }, > + /* Addressing mode */ > + &generic_addr_mode_costs > }; > > const struct cpu_cost_table v7m_extra_costs = > @@ -1720,7 +1732,9 @@ const struct cpu_cost_table v7m_extra_costs = > /* Vector */ > { > COSTS_N_INSNS (1) /* alu. */ > - } > + }, > + /* Addressing mode */ > + &generic_addr_mode_costs > }; > > const struct tune_params arm_slowmul_tune = > @@ -9093,7 +9107,33 @@ arm_mem_costs (rtx x, const struct cpu_cost_table *extra_cost, > /* Calculate cost of the addressing mode. */ > if (speed_p) > { > - /* TODO: Add table-driven costs for addressing modes. */ > + arm_addr_mode_op op_type; > + switch (GET_CODE(XEXP (x, 0))) > + { > + case REG: > + default: > + op_type = AMO_DEFAULT; Why would REG and default share the same cost? Presumably default is too complex to recognize, but then it's unlikely to be cheap. > + break; > + case PLUS: > + case MINUS: > + op_type = AMO_NO_WB; GCC doesn't support MINUS in addresses, even though the architecture could in theory handle this. Also, I think you need a different cost for scaled offset addressing, and possibly even for different scaling factors and types of scaling. > + break; > + case PRE_INC: > + case PRE_DEC: > + case POST_INC: > + case POST_DEC: > + case PRE_MODIFY: > + case POST_MODIFY: > + op_type = AMO_WB; Pre and post might also need separate entries (plus further entries for scaling). A post operation might happen in parallel with the data fetch, while a pre operation must happen before the address can be sent to the load/store pipe. > + break; > + } blank line between the switch block and the subsequent statements. Also, the following needs to laid out in GNU style. > + if (VECTOR_MODE_P (mode)) { > + *cost += extra_cost->aarch32_addr_mode->vector[op_type]; > + } else if (FLOAT_MODE_P (mode)) { > + *cost += extra_cost->aarch32_addr_mode->fp[op_type]; > + } else { > + *cost += extra_cost->aarch32_addr_mode->integer[op_type]; > + } > } > > /* cost of memory access */ >
On 9 June 2017 at 15:13, Richard Earnshaw (lists) <Richard.Earnshaw@arm.com> wrote: > On 21/02/17 16:54, charles.baylis@linaro.org wrote: >> From: Charles Baylis <charles.baylis@linaro.org> >> >> This patch adds support for modelling the varying costs of >> different addressing modes. The generic cost table treats >> all addressing modes as having equal cost. The cost table >> for Cortex-A57 is derived from http://infocenter.arm.com/help/topic/com.arm.doc.uan0015b/Cortex_A57_Software_Optimization_Guide_external.pdf >> and treats addressing modes with write-back as having a >> cost equal to one additional instruction. >> >> gcc/ChangeLog: >> >> <date> Charles Baylis <charles.baylis@linaro.org> >> >> * config/arm/aarch-common-protos.h (enum arm_addr_mode_op): New. >> (struct addr_mode_cost_table): New. >> (struct cpu_cost_table): Add pointer to an addr_mode_cost_table. >> * config/arm/aarch-cost-tables.h: (generic_addr_mode_costs): New. >> (generic_extra_costs) Initialise aarch32_addr_mode. >> (cortexa53_extra_costs) Likewise. >> (addr_mode_costs_cortexa57) New. >> (cortexa57_extra_costs) Initialise aarch32_addr_mode. >> (exynosm1_extra_costs) Likewise. >> (xgene1_extra_costs) Likewise. >> (qdf24xx_extra_costs) Likewise. >> * config/arm/arm.c (cortexa9_extra_costs) Initialise aarch32_addr_mode. >> (cortexa9_extra_costs) Likewise. >> (cortexa8_extra_costs) Likewise. >> (cortexa5_extra_costs) Likewise. >> (cortexa7_extra_costs) Likewise. >> (cortexa12_extra_costs) Likewise. >> (cortexv7m_extra_costs) Likewise. >> (arm_mem_costs): Use table lookup to calculate cost of addressing >> mode. >> >> Change-Id: If71bd7c4f4bb876c5ed82dc28791130efb8bf89e >> --- >> gcc/config/arm/aarch-common-protos.h | 16 +++++++++++ >> gcc/config/arm/aarch-cost-tables.h | 54 ++++++++++++++++++++++++++++++---- >> gcc/config/arm/arm.c | 56 ++++++++++++++++++++++++++++++------ >> 3 files changed, 113 insertions(+), 13 deletions(-) >> >> diff --git a/gcc/config/arm/aarch-common-protos.h b/gcc/config/arm/aarch-common-protos.h >> index 7c2bb4c..f6fcc94 100644 >> --- a/gcc/config/arm/aarch-common-protos.h >> +++ b/gcc/config/arm/aarch-common-protos.h >> @@ -130,6 +130,21 @@ struct vector_cost_table >> const int alu; >> }; >> >> +enum arm_addr_mode_op >> +{ >> + AMO_DEFAULT, >> + AMO_NO_WB, >> + AMO_WB, >> + AMO_MAX /* for array size */ > > Comment style: Capital letter at start, full stop and two spaces at the end. Done. > The enum and structure below should have some comments explaining what > they are for. Done. >> +const struct addr_mode_cost_table generic_addr_mode_costs = >> +{ >> + /* int */ >> + { 0, 0, 0 }, > > Elements need to be commented, otherwise minor changes to the contents > of the arrays is hard to update and maintain. Done. >> + /* Addressing mode */ > > Comment style. Done. >> - /* TODO: Add table-driven costs for addressing modes. */ >> + arm_addr_mode_op op_type; >> + switch (GET_CODE(XEXP (x, 0))) >> + { >> + case REG: >> + default: >> + op_type = AMO_DEFAULT; > > Why would REG and default share the same cost? Presumably default is > too complex to recognize, but then it's unlikely to be cheap. Default covers literals in various forms of RTL, for which the cost is the same as regular, and PIC, which is handled in the original code above this section. >> + break; >> + case PLUS: >> + case MINUS: >> + op_type = AMO_NO_WB; > > GCC doesn't support MINUS in addresses, even though the architecture > could in theory handle this. I've noted that in a comment, but kept the "case MINUS:" in place. > Also, I think you need a different cost for scaled offset addressing, > and possibly even for different scaling factors and types of scaling. ... see below: >> + break; >> + case PRE_INC: >> + case PRE_DEC: >> + case POST_INC: >> + case POST_DEC: >> + case PRE_MODIFY: >> + case POST_MODIFY: >> + op_type = AMO_WB; > > Pre and post might also need separate entries (plus further entries for > scaling). A post operation might happen in parallel with the data > fetch, while a pre operation must happen before the address can be sent > to the load/store pipe. The {DEFAULT, NO_WB, WB} range is also Ramana's requested design. I think this is OK because it is sufficient to describe the currently publicly documented microarchitectures, and the structure is readily extensible if future cores require more detailed modelling. Expanding the range of AMO_* entries in the array makes the tables more unwieldy for no immediate benefit. >> + break; >> + } > > blank line between the switch block and the subsequent statements. > Also, the following needs to laid out in GNU style. Done. This version of the patch also moves the Cortex-A57 tuning (which was present in the earlier version) into a separate patch. Discussion of that in a follow-up post. Updated patch attached From b6968804f37fa999c090d7316ab59f40b14b5f49 Mon Sep 17 00:00:00 2001 From: Charles Baylis <charles.baylis@linaro.org> Date: Wed, 8 Feb 2017 16:52:40 +0000 Subject: [PATCH 2/3] [ARM] Add table of costs for AAarch32 addressing modes. This patch adds support for modelling the varying costs of different addressing modes. The generic cost table treats all addressing modes as having equal cost. gcc/ChangeLog: <date> Charles Baylis <charles.baylis@linaro.org> * config/arm/aarch-common-protos.h (enum arm_addr_mode_op): New. (struct addr_mode_cost_table): New. (struct cpu_cost_table): Add pointer to an addr_mode_cost_table. * config/arm/aarch-cost-tables.h: (generic_addr_mode_costs): New. (generic_extra_costs) Initialise aarch32_addr_mode. (cortexa53_extra_costs) Likewise. (cortexa57_extra_costs) Likewise. (exynosm1_extra_costs) Likewise. (xgene1_extra_costs) Likewise. (qdf24xx_extra_costs) Likewise. * config/arm/arm.c (cortexa9_extra_costs) Initialise aarch32_addr_mode. (cortexa9_extra_costs) Likewise. (cortexa8_extra_costs) Likewise. (cortexa5_extra_costs) Likewise. (cortexa7_extra_costs) Likewise. (cortexa12_extra_costs) Likewise. (cortexv7m_extra_costs) Likewise. (arm_mem_costs): Use table lookup to calculate cost of addressing mode. Change-Id: If71bd7c4f4bb876c5ed82dc28791130efb8bf89e --- gcc/config/arm/aarch-common-protos.h | 20 +++++++++++++ gcc/config/arm/aarch-cost-tables.h | 38 +++++++++++++++++++---- gcc/config/arm/arm.c | 58 +++++++++++++++++++++++++++++++----- 3 files changed, 103 insertions(+), 13 deletions(-) diff --git a/gcc/config/arm/aarch-common-protos.h b/gcc/config/arm/aarch-common-protos.h index a511211..7887c92 100644 --- a/gcc/config/arm/aarch-common-protos.h +++ b/gcc/config/arm/aarch-common-protos.h @@ -131,6 +131,25 @@ struct vector_cost_table const int alu; }; +/* Addressing mode operations. Used to index tables in struct + addr_mode_cost_table. */ +enum arm_addr_mode_op +{ + AMO_DEFAULT, + AMO_NO_WB, /* Offset with no writeback. */ + AMO_WB, /* Offset with writeback. */ + AMO_MAX /* For array size. */ +}; + +/* Table of additional costs when using addressing modes for each + access type. */ +struct addr_mode_cost_table +{ + const int integer[AMO_MAX]; + const int fp[AMO_MAX]; + const int vector[AMO_MAX]; +}; + struct cpu_cost_table { const struct alu_cost_table alu; @@ -138,6 +157,7 @@ struct cpu_cost_table const struct mem_cost_table ldst; const struct fp_cost_table fp[2]; /* SFmode and DFmode. */ const struct vector_cost_table vect; + const struct addr_mode_cost_table *aarch32_addr_mode; }; diff --git a/gcc/config/arm/aarch-cost-tables.h b/gcc/config/arm/aarch-cost-tables.h index dc67faf..b16a95c 100644 --- a/gcc/config/arm/aarch-cost-tables.h +++ b/gcc/config/arm/aarch-cost-tables.h @@ -22,6 +22,29 @@ #ifndef GCC_AARCH_COST_TABLES_H #define GCC_AARCH_COST_TABLES_H +const struct addr_mode_cost_table generic_addr_mode_costs = +{ + /* int. */ + { + 0, /* AMO_DEFAULT. */ + 0, /* AMO_NO_WB. */ + 0 /* AMO_WB. */ + }, + /* float. */ + { + 0, /* AMO_DEFAULT. */ + 0, /* AMO_NO_WB. */ + 0 /* AMO_WB. */ + }, + /* vector. */ + { + 0, /* AMO_DEFAULT. */ + 0, /* AMO_NO_WB. */ + 0 /* AMO_WB. */ + } +}; + + const struct cpu_cost_table generic_extra_costs = { /* ALU */ @@ -122,7 +145,8 @@ const struct cpu_cost_table generic_extra_costs = /* Vector */ { COSTS_N_INSNS (1) /* alu. */ - } + }, + &generic_addr_mode_costs /* aarch32_addr_mode. */ }; const struct cpu_cost_table cortexa53_extra_costs = @@ -225,7 +249,8 @@ const struct cpu_cost_table cortexa53_extra_costs = /* Vector */ { COSTS_N_INSNS (1) /* alu. */ - } + }, + &generic_addr_mode_costs /* aarch32_addr_mode. */ }; const struct cpu_cost_table cortexa57_extra_costs = @@ -328,7 +353,8 @@ const struct cpu_cost_table cortexa57_extra_costs = /* Vector */ { COSTS_N_INSNS (1) /* alu. */ - } + }, + &generic_addr_mode_costs /* aarch32_addr_mode. */ }; const struct cpu_cost_table exynosm1_extra_costs = @@ -431,7 +457,8 @@ const struct cpu_cost_table exynosm1_extra_costs = /* Vector */ { COSTS_N_INSNS (0) /* alu. */ - } + }, + &generic_addr_mode_costs /* aarch32_addr_mode. */ }; const struct cpu_cost_table xgene1_extra_costs = @@ -534,7 +561,8 @@ const struct cpu_cost_table xgene1_extra_costs = /* Vector */ { COSTS_N_INSNS (2) /* alu. */ - } + }, + &generic_addr_mode_costs /* aarch32_addr_mode. */ }; #endif /* GCC_AARCH_COST_TABLES_H */ diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c index 13cd421..ae5140c 100644 --- a/gcc/config/arm/arm.c +++ b/gcc/config/arm/arm.c @@ -1117,7 +1117,8 @@ const struct cpu_cost_table cortexa9_extra_costs = /* Vector */ { COSTS_N_INSNS (1) /* alu. */ - } + }, + &generic_addr_mode_costs /* aarch32_addr_mode. */ }; const struct cpu_cost_table cortexa8_extra_costs = @@ -1220,7 +1221,8 @@ const struct cpu_cost_table cortexa8_extra_costs = /* Vector */ { COSTS_N_INSNS (1) /* alu. */ - } + }, + &generic_addr_mode_costs /* aarch32_addr_mode. */ }; const struct cpu_cost_table cortexa5_extra_costs = @@ -1324,7 +1326,8 @@ const struct cpu_cost_table cortexa5_extra_costs = /* Vector */ { COSTS_N_INSNS (1) /* alu. */ - } + }, + &generic_addr_mode_costs /* aarch32_addr_mode. */ }; @@ -1429,7 +1432,8 @@ const struct cpu_cost_table cortexa7_extra_costs = /* Vector */ { COSTS_N_INSNS (1) /* alu. */ - } + }, + &generic_addr_mode_costs /* aarch32_addr_mode. */ }; const struct cpu_cost_table cortexa12_extra_costs = @@ -1532,7 +1536,8 @@ const struct cpu_cost_table cortexa12_extra_costs = /* Vector */ { COSTS_N_INSNS (1) /* alu. */ - } + }, + &generic_addr_mode_costs /* aarch32_addr_mode. */ }; const struct cpu_cost_table cortexa15_extra_costs = @@ -1635,7 +1640,8 @@ const struct cpu_cost_table cortexa15_extra_costs = /* Vector */ { COSTS_N_INSNS (1) /* alu. */ - } + }, + &generic_addr_mode_costs /* aarch32_addr_mode. */ }; const struct cpu_cost_table v7m_extra_costs = @@ -1738,7 +1744,8 @@ const struct cpu_cost_table v7m_extra_costs = /* Vector */ { COSTS_N_INSNS (1) /* alu. */ - } + }, + &generic_addr_mode_costs /* aarch32_addr_mode. */ }; const struct tune_params arm_slowmul_tune = @@ -9219,7 +9226,42 @@ arm_mem_costs (rtx x, const struct cpu_cost_table *extra_cost, /* Calculate cost of the addressing mode. */ if (speed_p) { - /* TODO: Add table-driven costs for addressing modes. (See patch 2) */ + arm_addr_mode_op op_type; + switch (GET_CODE (XEXP (x, 0))) + { + default: + case REG: + op_type = AMO_DEFAULT; + break; + case MINUS: + /* MINUS does not appear in RTL, but the architecture supports it, + so handle this case defensively. */ + /* fall through */ + case PLUS: + op_type = AMO_NO_WB; + break; + case PRE_INC: + case PRE_DEC: + case POST_INC: + case POST_DEC: + case PRE_MODIFY: + case POST_MODIFY: + op_type = AMO_WB; + break; + } + + if (VECTOR_MODE_P (mode)) + { + *cost += extra_cost->aarch32_addr_mode->vector[op_type]; + } + else if (FLOAT_MODE_P (mode)) + { + *cost += extra_cost->aarch32_addr_mode->fp[op_type]; + } + else + { + *cost += extra_cost->aarch32_addr_mode->integer[op_type]; + } } /* Calculate cost of memory access. */
On Fri, Aug 25, 2017 at 10:43 AM, Charles Baylis <charles.baylis@linaro.org> wrote: > On 9 June 2017 at 15:13, Richard Earnshaw (lists) > <Richard.Earnshaw@arm.com> wrote: >> On 21/02/17 16:54, charles.baylis@linaro.org wrote: >>> From: Charles Baylis <charles.baylis@linaro.org> >>> >>> This patch adds support for modelling the varying costs of >>> different addressing modes. The generic cost table treats >>> all addressing modes as having equal cost. The cost table >>> for Cortex-A57 is derived from http://infocenter.arm.com/help/topic/com.arm.doc.uan0015b/Cortex_A57_Software_Optimization_Guide_external.pdf >>> and treats addressing modes with write-back as having a >>> cost equal to one additional instruction. >>> >>> gcc/ChangeLog: >>> >>> <date> Charles Baylis <charles.baylis@linaro.org> >>> >>> * config/arm/aarch-common-protos.h (enum arm_addr_mode_op): New. >>> (struct addr_mode_cost_table): New. >>> (struct cpu_cost_table): Add pointer to an addr_mode_cost_table. >>> * config/arm/aarch-cost-tables.h: (generic_addr_mode_costs): New. >>> (generic_extra_costs) Initialise aarch32_addr_mode. >>> (cortexa53_extra_costs) Likewise. >>> (addr_mode_costs_cortexa57) New. >>> (cortexa57_extra_costs) Initialise aarch32_addr_mode. >>> (exynosm1_extra_costs) Likewise. >>> (xgene1_extra_costs) Likewise. >>> (qdf24xx_extra_costs) Likewise. >>> * config/arm/arm.c (cortexa9_extra_costs) Initialise aarch32_addr_mode. >>> (cortexa9_extra_costs) Likewise. >>> (cortexa8_extra_costs) Likewise. >>> (cortexa5_extra_costs) Likewise. >>> (cortexa7_extra_costs) Likewise. >>> (cortexa12_extra_costs) Likewise. >>> (cortexv7m_extra_costs) Likewise. >>> (arm_mem_costs): Use table lookup to calculate cost of addressing >>> mode. >>> >>> Change-Id: If71bd7c4f4bb876c5ed82dc28791130efb8bf89e >>> --- >>> gcc/config/arm/aarch-common-protos.h | 16 +++++++++++ >>> gcc/config/arm/aarch-cost-tables.h | 54 ++++++++++++++++++++++++++++++---- >>> gcc/config/arm/arm.c | 56 ++++++++++++++++++++++++++++++------ >>> 3 files changed, 113 insertions(+), 13 deletions(-) >>> >>> diff --git a/gcc/config/arm/aarch-common-protos.h b/gcc/config/arm/aarch-common-protos.h >>> index 7c2bb4c..f6fcc94 100644 >>> --- a/gcc/config/arm/aarch-common-protos.h >>> +++ b/gcc/config/arm/aarch-common-protos.h >>> @@ -130,6 +130,21 @@ struct vector_cost_table >>> const int alu; >>> }; >>> >>> +enum arm_addr_mode_op >>> +{ >>> + AMO_DEFAULT, >>> + AMO_NO_WB, >>> + AMO_WB, >>> + AMO_MAX /* for array size */ >> >> Comment style: Capital letter at start, full stop and two spaces at the end. > > Done. > >> The enum and structure below should have some comments explaining what >> they are for. > > Done. > >>> +const struct addr_mode_cost_table generic_addr_mode_costs = >>> +{ >>> + /* int */ >>> + { 0, 0, 0 }, >> >> Elements need to be commented, otherwise minor changes to the contents >> of the arrays is hard to update and maintain. > > Done. > > >>> + /* Addressing mode */ >> >> Comment style. > > Done. > >>> - /* TODO: Add table-driven costs for addressing modes. */ >>> + arm_addr_mode_op op_type; >>> + switch (GET_CODE(XEXP (x, 0))) >>> + { >>> + case REG: >>> + default: >>> + op_type = AMO_DEFAULT; >> >> Why would REG and default share the same cost? Presumably default is >> too complex to recognize, but then it's unlikely to be cheap. > > Default covers literals in various forms of RTL, for which the cost is > the same as regular, and PIC, which is handled in the original code > above this section. > >>> + break; >>> + case PLUS: >>> + case MINUS: >>> + op_type = AMO_NO_WB; >> >> GCC doesn't support MINUS in addresses, even though the architecture >> could in theory handle this. > > I've noted that in a comment, but kept the "case MINUS:" in place. > >> Also, I think you need a different cost for scaled offset addressing, >> and possibly even for different scaling factors and types of scaling. > > ... see below: > >>> + break; >>> + case PRE_INC: >>> + case PRE_DEC: >>> + case POST_INC: >>> + case POST_DEC: >>> + case PRE_MODIFY: >>> + case POST_MODIFY: >>> + op_type = AMO_WB; >> >> Pre and post might also need separate entries (plus further entries for >> scaling). A post operation might happen in parallel with the data >> fetch, while a pre operation must happen before the address can be sent >> to the load/store pipe. > > The {DEFAULT, NO_WB, WB} range is also Ramana's requested design. I > think this is OK because it is sufficient to describe the currently > publicly documented microarchitectures, and the structure is readily > extensible if future cores require more detailed modelling. > > Expanding the range of AMO_* entries in the array makes the tables > more unwieldy for no immediate benefit. > >>> + break; >>> + } >> >> blank line between the switch block and the subsequent statements. >> Also, the following needs to laid out in GNU style. > > Done. > > This version of the patch also moves the Cortex-A57 tuning (which was > present in the earlier version) into a separate patch. Discussion of > that in a follow-up post. This will break aarch64 and the cores which are aarch64 only. Also your changelog is incorrect here: (qdf24xx_extra_costs) Likewise. Because qdf24xx_extra_costs has been moved to aarch64/aarch64-cost-tables.h with the other 2 aarch64 only processors (thunderx and thunderxt99). Are you planing on moving cpu_addrcost_table in aarch64 to the same model as you are using arm? I would say you are losing information if you do that as cpu_addrcost_table in aarch64 describes more than what is described in your new table on the arm side. Thanks, Andrew > > Updated patch attached
diff --git a/gcc/config/arm/aarch-common-protos.h b/gcc/config/arm/aarch-common-protos.h index 7c2bb4c..f6fcc94 100644 --- a/gcc/config/arm/aarch-common-protos.h +++ b/gcc/config/arm/aarch-common-protos.h @@ -130,6 +130,21 @@ struct vector_cost_table const int alu; }; +enum arm_addr_mode_op +{ + AMO_DEFAULT, + AMO_NO_WB, + AMO_WB, + AMO_MAX /* for array size */ +}; + +struct addr_mode_cost_table +{ + const int integer[AMO_MAX]; + const int fp[AMO_MAX]; + const int vector[AMO_MAX]; +}; + struct cpu_cost_table { const struct alu_cost_table alu; @@ -137,6 +152,7 @@ struct cpu_cost_table const struct mem_cost_table ldst; const struct fp_cost_table fp[2]; /* SFmode and DFmode. */ const struct vector_cost_table vect; + const struct addr_mode_cost_table *aarch32_addr_mode; }; diff --git a/gcc/config/arm/aarch-cost-tables.h b/gcc/config/arm/aarch-cost-tables.h index 68f84b0..e3d257f 100644 --- a/gcc/config/arm/aarch-cost-tables.h +++ b/gcc/config/arm/aarch-cost-tables.h @@ -22,6 +22,16 @@ #ifndef GCC_AARCH_COST_TABLES_H #define GCC_AARCH_COST_TABLES_H +const struct addr_mode_cost_table generic_addr_mode_costs = +{ + /* int */ + { 0, 0, 0 }, + /* float */ + { 0, 0, 0 }, + /* vector */ + { 0, 0, 0 } +}; + const struct cpu_cost_table generic_extra_costs = { /* ALU */ @@ -122,7 +132,9 @@ const struct cpu_cost_table generic_extra_costs = /* Vector */ { COSTS_N_INSNS (1) /* alu. */ - } + }, + /* Addressing mode */ + &generic_addr_mode_costs }; const struct cpu_cost_table cortexa53_extra_costs = @@ -225,6 +237,30 @@ const struct cpu_cost_table cortexa53_extra_costs = /* Vector */ { COSTS_N_INSNS (1) /* alu. */ + }, + /* Addressing mode */ + &generic_addr_mode_costs +}; + +const struct addr_mode_cost_table addr_mode_costs_cortexa57 = +{ + /* int */ + { + 0, + 0, + COSTS_N_INSNS (1), + }, + /* float */ + { + 0, + 0, + COSTS_N_INSNS (1), + }, + /* vector */ + { + 0, + 0, + COSTS_N_INSNS (1), } }; @@ -328,7 +364,9 @@ const struct cpu_cost_table cortexa57_extra_costs = /* Vector */ { COSTS_N_INSNS (1) /* alu. */ - } + }, + /* Addressing mode */ + &addr_mode_costs_cortexa57 }; const struct cpu_cost_table exynosm1_extra_costs = @@ -431,7 +469,9 @@ const struct cpu_cost_table exynosm1_extra_costs = /* Vector */ { COSTS_N_INSNS (0) /* alu. */ - } + }, + /* Addressing mode */ + &generic_addr_mode_costs }; const struct cpu_cost_table xgene1_extra_costs = @@ -534,7 +574,9 @@ const struct cpu_cost_table xgene1_extra_costs = /* Vector */ { COSTS_N_INSNS (2) /* alu. */ - } + }, + /* Addressing mode */ + &generic_addr_mode_costs }; const struct cpu_cost_table qdf24xx_extra_costs = @@ -637,7 +679,9 @@ const struct cpu_cost_table qdf24xx_extra_costs = /* Vector */ { COSTS_N_INSNS (1) /* alu. */ - } + }, + /* Addressing mode */ + &generic_addr_mode_costs }; #endif /* GCC_AARCH_COST_TABLES_H */ diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c index 7f002f1..fe4cd73 100644 --- a/gcc/config/arm/arm.c +++ b/gcc/config/arm/arm.c @@ -1099,7 +1099,9 @@ const struct cpu_cost_table cortexa9_extra_costs = /* Vector */ { COSTS_N_INSNS (1) /* alu. */ - } + }, + /* Addressing mode */ + &generic_addr_mode_costs }; const struct cpu_cost_table cortexa8_extra_costs = @@ -1202,7 +1204,9 @@ const struct cpu_cost_table cortexa8_extra_costs = /* Vector */ { COSTS_N_INSNS (1) /* alu. */ - } + }, + /* Addressing mode */ + &generic_addr_mode_costs }; const struct cpu_cost_table cortexa5_extra_costs = @@ -1306,7 +1310,9 @@ const struct cpu_cost_table cortexa5_extra_costs = /* Vector */ { COSTS_N_INSNS (1) /* alu. */ - } + }, + /* Addressing mode */ + &generic_addr_mode_costs }; @@ -1411,7 +1417,9 @@ const struct cpu_cost_table cortexa7_extra_costs = /* Vector */ { COSTS_N_INSNS (1) /* alu. */ - } + }, + /* Addressing mode */ + &generic_addr_mode_costs }; const struct cpu_cost_table cortexa12_extra_costs = @@ -1514,7 +1522,9 @@ const struct cpu_cost_table cortexa12_extra_costs = /* Vector */ { COSTS_N_INSNS (1) /* alu. */ - } + }, + /* Addressing mode */ + &generic_addr_mode_costs }; const struct cpu_cost_table cortexa15_extra_costs = @@ -1617,7 +1627,9 @@ const struct cpu_cost_table cortexa15_extra_costs = /* Vector */ { COSTS_N_INSNS (1) /* alu. */ - } + }, + /* Addressing mode */ + &generic_addr_mode_costs }; const struct cpu_cost_table v7m_extra_costs = @@ -1720,7 +1732,9 @@ const struct cpu_cost_table v7m_extra_costs = /* Vector */ { COSTS_N_INSNS (1) /* alu. */ - } + }, + /* Addressing mode */ + &generic_addr_mode_costs }; const struct tune_params arm_slowmul_tune = @@ -9093,7 +9107,33 @@ arm_mem_costs (rtx x, const struct cpu_cost_table *extra_cost, /* Calculate cost of the addressing mode. */ if (speed_p) { - /* TODO: Add table-driven costs for addressing modes. */ + arm_addr_mode_op op_type; + switch (GET_CODE(XEXP (x, 0))) + { + case REG: + default: + op_type = AMO_DEFAULT; + break; + case PLUS: + case MINUS: + op_type = AMO_NO_WB; + break; + case PRE_INC: + case PRE_DEC: + case POST_INC: + case POST_DEC: + case PRE_MODIFY: + case POST_MODIFY: + op_type = AMO_WB; + break; + } + if (VECTOR_MODE_P (mode)) { + *cost += extra_cost->aarch32_addr_mode->vector[op_type]; + } else if (FLOAT_MODE_P (mode)) { + *cost += extra_cost->aarch32_addr_mode->fp[op_type]; + } else { + *cost += extra_cost->aarch32_addr_mode->integer[op_type]; + } } /* cost of memory access */
From: Charles Baylis <charles.baylis@linaro.org> This patch adds support for modelling the varying costs of different addressing modes. The generic cost table treats all addressing modes as having equal cost. The cost table for Cortex-A57 is derived from http://infocenter.arm.com/help/topic/com.arm.doc.uan0015b/Cortex_A57_Software_Optimization_Guide_external.pdf and treats addressing modes with write-back as having a cost equal to one additional instruction. gcc/ChangeLog: <date> Charles Baylis <charles.baylis@linaro.org> * config/arm/aarch-common-protos.h (enum arm_addr_mode_op): New. (struct addr_mode_cost_table): New. (struct cpu_cost_table): Add pointer to an addr_mode_cost_table. * config/arm/aarch-cost-tables.h: (generic_addr_mode_costs): New. (generic_extra_costs) Initialise aarch32_addr_mode. (cortexa53_extra_costs) Likewise. (addr_mode_costs_cortexa57) New. (cortexa57_extra_costs) Initialise aarch32_addr_mode. (exynosm1_extra_costs) Likewise. (xgene1_extra_costs) Likewise. (qdf24xx_extra_costs) Likewise. * config/arm/arm.c (cortexa9_extra_costs) Initialise aarch32_addr_mode. (cortexa9_extra_costs) Likewise. (cortexa8_extra_costs) Likewise. (cortexa5_extra_costs) Likewise. (cortexa7_extra_costs) Likewise. (cortexa12_extra_costs) Likewise. (cortexv7m_extra_costs) Likewise. (arm_mem_costs): Use table lookup to calculate cost of addressing mode. Change-Id: If71bd7c4f4bb876c5ed82dc28791130efb8bf89e --- gcc/config/arm/aarch-common-protos.h | 16 +++++++++++ gcc/config/arm/aarch-cost-tables.h | 54 ++++++++++++++++++++++++++++++---- gcc/config/arm/arm.c | 56 ++++++++++++++++++++++++++++++------ 3 files changed, 113 insertions(+), 13 deletions(-) -- 2.7.4