diff mbox series

[1/3,ARM] Add bus_width_bits to tune_params

Message ID 1505205277-26276-2-git-send-email-charles.baylis@linaro.org
State New
Headers show
Series Addressing mode costs v3 | expand

Commit Message

Charles Baylis Sept. 12, 2017, 8:34 a.m. UTC
From: Charles Baylis <charles.baylis@linaro.org>


Add bus widths. These use the approximation that v7 and later cores have
64bit data bus width, and earlier cores have 32 bit bus width, with the
exception of v7m.

<date>  Charles Baylis  <charles.baylis@linaro.org>

	* config/arm/arm-protos.h (struct tune_params): New field
	bus_width.
	* config/arm/arm.c (arm_slowmul_tune): Initialise bus_width field.
	(arm_fastmul_tune): Likewise.
	(arm_strongarm_tune): Likewise.
	(arm_xscale_tune): Likewise.
	(arm_9e_tune): Likewise.
	(arm_marvell_pj4_tune): Likewise.
	(arm_v6t2_tune): Likewise.
	(arm_cortex_tune): Likewise.
	(arm_cortex_a8_tune): Likewise.
	(arm_cortex_a7_tune): Likewise.
	(arm_cortex_a15_tune): Likewise.
	(arm_cortex_a35_tune): Likewise.
	(arm_cortex_a53_tune): Likewise.
	(arm_cortex_a57_tune): Likewise.
	(arm_exynosm1_tune): Likewise.
	(arm_xgene1_tune): Likewise.
	(arm_cortex_a5_tune): Likewise.
	(arm_cortex_a9_tune): Likewise.
	(arm_cortex_a12_tune): Likewise.
	(arm_cortex_a73_tune): Likewise.
	(arm_v7m_tune): Likewise.
	(arm_cortex_m7_tune): Likewise.
	(arm_v6m_tune): Likewise.
	(arm_fa726te_tune): Likewise.

Change-Id: I613e876db93ffd6f8c1e72ba483be2efc0b56d66
---
 gcc/config/arm/arm-protos.h |  2 ++
 gcc/config/arm/arm.c        | 24 ++++++++++++++++++++++++
 2 files changed, 26 insertions(+)

-- 
2.7.4

Comments

Kyrill Tkachov Sept. 13, 2017, 9:02 a.m. UTC | #1
Hi Charles,

On 12/09/17 09:34, charles.baylis@linaro.org wrote:
> From: Charles Baylis <charles.baylis@linaro.org>

>

> Add bus widths. These use the approximation that v7 and later cores have

> 64bit data bus width, and earlier cores have 32 bit bus width, with the

> exception of v7m.

>


Given the way this field is used in patch 2 does it affect the 
addressing mode generation
in the tests you added depending on the -mtune option given?
If so, we'll get testsuite failures when people test with particular 
default CPU configurations.

Could you expand on the benefits we get from this extra bus_width 
information?
I get that we increase the cost of memory accesses if the size of the 
mode we load is larger than the
bus width, but it's not as if there is ever an alternative in this 
regard, such as loading less memory,
so what pass can make different decisions thanks to this field?

Thanks,
Kyrill

> <date>  Charles Baylis <charles.baylis@linaro.org>

>

>         * config/arm/arm-protos.h (struct tune_params): New field

>         bus_width.

>         * config/arm/arm.c (arm_slowmul_tune): Initialise bus_width field.

>         (arm_fastmul_tune): Likewise.

>         (arm_strongarm_tune): Likewise.

>         (arm_xscale_tune): Likewise.

>         (arm_9e_tune): Likewise.

>         (arm_marvell_pj4_tune): Likewise.

>         (arm_v6t2_tune): Likewise.

>         (arm_cortex_tune): Likewise.

>         (arm_cortex_a8_tune): Likewise.

>         (arm_cortex_a7_tune): Likewise.

>         (arm_cortex_a15_tune): Likewise.

>         (arm_cortex_a35_tune): Likewise.

>         (arm_cortex_a53_tune): Likewise.

>         (arm_cortex_a57_tune): Likewise.

>         (arm_exynosm1_tune): Likewise.

>         (arm_xgene1_tune): Likewise.

>         (arm_cortex_a5_tune): Likewise.

>         (arm_cortex_a9_tune): Likewise.

>         (arm_cortex_a12_tune): Likewise.

>         (arm_cortex_a73_tune): Likewise.

>         (arm_v7m_tune): Likewise.

>         (arm_cortex_m7_tune): Likewise.

>         (arm_v6m_tune): Likewise.

>         (arm_fa726te_tune): Likewise.

>

> Change-Id: I613e876db93ffd6f8c1e72ba483be2efc0b56d66

> ---

>  gcc/config/arm/arm-protos.h |  2 ++

>  gcc/config/arm/arm.c        | 24 ++++++++++++++++++++++++

>  2 files changed, 26 insertions(+)

>

> diff --git a/gcc/config/arm/arm-protos.h b/gcc/config/arm/arm-protos.h

> index 4538078..47a85cc 100644

> --- a/gcc/config/arm/arm-protos.h

> +++ b/gcc/config/arm/arm-protos.h

> @@ -278,6 +278,8 @@ struct tune_params

>    int max_insns_inline_memset;

>    /* Issue rate of the processor.  */

>    unsigned int issue_rate;

> +  /* Bus width (bits).  */

> +  unsigned int bus_width;

>    /* Explicit prefetch data.  */

>    struct

>      {

> diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c

> index bca8a34..32001e5 100644

> --- a/gcc/config/arm/arm.c

> +++ b/gcc/config/arm/arm.c

> @@ -1761,6 +1761,7 @@ const struct tune_params arm_slowmul_tune =

>    5,                                           /* Max cond insns.  */

>    8,                                           /* Memset max inline.  */

>    1,                                           /* Issue rate.  */

> +  32,                                          /* Bus width.  */

>    ARM_PREFETCH_NOT_BENEFICIAL,

>    tune_params::PREF_CONST_POOL_TRUE,

>    tune_params::PREF_LDRD_FALSE,

> @@ -1783,6 +1784,7 @@ const struct tune_params arm_fastmul_tune =

>    5,                                           /* Max cond insns.  */

>    8,                                           /* Memset max inline.  */

>    1,                                           /* Issue rate.  */

> +  32,                                          /* Bus width.  */

>    ARM_PREFETCH_NOT_BENEFICIAL,

>    tune_params::PREF_CONST_POOL_TRUE,

>    tune_params::PREF_LDRD_FALSE,

> @@ -1808,6 +1810,7 @@ const struct tune_params arm_strongarm_tune =

>    3,                                           /* Max cond insns.  */

>    8,                                           /* Memset max inline.  */

>    1,                                           /* Issue rate.  */

> +  32,                                          /* Bus width.  */

>    ARM_PREFETCH_NOT_BENEFICIAL,

>    tune_params::PREF_CONST_POOL_TRUE,

>    tune_params::PREF_LDRD_FALSE,

> @@ -1830,6 +1833,7 @@ const struct tune_params arm_xscale_tune =

>    3,                                           /* Max cond insns.  */

>    8,                                           /* Memset max inline.  */

>    1,                                           /* Issue rate.  */

> +  32,                                          /* Bus width.  */

>    ARM_PREFETCH_NOT_BENEFICIAL,

>    tune_params::PREF_CONST_POOL_TRUE,

>    tune_params::PREF_LDRD_FALSE,

> @@ -1852,6 +1856,7 @@ const struct tune_params arm_9e_tune =

>    5,                                           /* Max cond insns.  */

>    8,                                           /* Memset max inline.  */

>    1,                                           /* Issue rate.  */

> +  32,                                          /* Bus width.  */

>    ARM_PREFETCH_NOT_BENEFICIAL,

>    tune_params::PREF_CONST_POOL_TRUE,

>    tune_params::PREF_LDRD_FALSE,

> @@ -1874,6 +1879,7 @@ const struct tune_params arm_marvell_pj4_tune =

>    5,                                           /* Max cond insns.  */

>    8,                                           /* Memset max inline.  */

>    2,                                           /* Issue rate.  */

> +  32,                                          /* Bus width.  */

>    ARM_PREFETCH_NOT_BENEFICIAL,

>    tune_params::PREF_CONST_POOL_TRUE,

>    tune_params::PREF_LDRD_FALSE,

> @@ -1896,6 +1902,7 @@ const struct tune_params arm_v6t2_tune =

>    5,                                           /* Max cond insns.  */

>    8,                                           /* Memset max inline.  */

>    1,                                           /* Issue rate.  */

> +  32,                                          /* Bus width.  */

>    ARM_PREFETCH_NOT_BENEFICIAL,

>    tune_params::PREF_CONST_POOL_FALSE,

>    tune_params::PREF_LDRD_FALSE,

> @@ -1920,6 +1927,7 @@ const struct tune_params arm_cortex_tune =

>    5,                                           /* Max cond insns.  */

>    8,                                           /* Memset max inline.  */

>    2,                                           /* Issue rate.  */

> +  64,                                          /* Bus width.  */

>    ARM_PREFETCH_NOT_BENEFICIAL,

>    tune_params::PREF_CONST_POOL_FALSE,

>    tune_params::PREF_LDRD_FALSE,

> @@ -1942,6 +1950,7 @@ const struct tune_params arm_cortex_a8_tune =

>    5,                                           /* Max cond insns.  */

>    8,                                           /* Memset max inline.  */

>    2,                                           /* Issue rate.  */

> +  64,                                          /* Bus width.  */

>    ARM_PREFETCH_NOT_BENEFICIAL,

>    tune_params::PREF_CONST_POOL_FALSE,

>    tune_params::PREF_LDRD_FALSE,

> @@ -1964,6 +1973,7 @@ const struct tune_params arm_cortex_a7_tune =

>    5,                                           /* Max cond insns.  */

>    8,                                           /* Memset max inline.  */

>    2,                                           /* Issue rate.  */

> +  64,                                          /* Bus width.  */

>    ARM_PREFETCH_NOT_BENEFICIAL,

>    tune_params::PREF_CONST_POOL_FALSE,

>    tune_params::PREF_LDRD_FALSE,

> @@ -1986,6 +1996,7 @@ const struct tune_params arm_cortex_a15_tune =

>    2,                                           /* Max cond insns.  */

>    8,                                           /* Memset max inline.  */

>    3,                                           /* Issue rate.  */

> +  64,                                          /* Bus width.  */

>    ARM_PREFETCH_NOT_BENEFICIAL,

>    tune_params::PREF_CONST_POOL_FALSE,

>    tune_params::PREF_LDRD_TRUE,

> @@ -2008,6 +2019,7 @@ const struct tune_params arm_cortex_a35_tune =

>    5,                                           /* Max cond insns.  */

>    8,                                           /* Memset max inline.  */

>    1,                                           /* Issue rate.  */

> +  64,                                          /* Bus width.  */

>    ARM_PREFETCH_NOT_BENEFICIAL,

>    tune_params::PREF_CONST_POOL_FALSE,

>    tune_params::PREF_LDRD_FALSE,

> @@ -2030,6 +2042,7 @@ const struct tune_params arm_cortex_a53_tune =

>    5,                                           /* Max cond insns.  */

>    8,                                           /* Memset max inline.  */

>    2,                                           /* Issue rate.  */

> +  64,                                          /* Bus width.  */

>    ARM_PREFETCH_NOT_BENEFICIAL,

>    tune_params::PREF_CONST_POOL_FALSE,

>    tune_params::PREF_LDRD_FALSE,

> @@ -2052,6 +2065,7 @@ const struct tune_params arm_cortex_a57_tune =

>    2,                                           /* Max cond insns.  */

>    8,                                           /* Memset max inline.  */

>    3,                                           /* Issue rate.  */

> +  64,                                          /* Bus width.  */

>    ARM_PREFETCH_NOT_BENEFICIAL,

>    tune_params::PREF_CONST_POOL_FALSE,

>    tune_params::PREF_LDRD_TRUE,

> @@ -2074,6 +2088,7 @@ const struct tune_params arm_exynosm1_tune =

>    2,                                           /* Max cond insns.  */

>    8,                                           /* Memset max inline.  */

>    3,                                           /* Issue rate.  */

> +  64,                                          /* Bus width.  */

>    ARM_PREFETCH_NOT_BENEFICIAL,

>    tune_params::PREF_CONST_POOL_FALSE,

>    tune_params::PREF_LDRD_TRUE,

> @@ -2096,6 +2111,7 @@ const struct tune_params arm_xgene1_tune =

>    2,                                           /* Max cond insns.  */

>    32,                                          /* Memset max inline.  */

>    4,                                           /* Issue rate.  */

> +  64,                                          /* Bus width.  */

>    ARM_PREFETCH_NOT_BENEFICIAL,

>    tune_params::PREF_CONST_POOL_FALSE,

>    tune_params::PREF_LDRD_TRUE,

> @@ -2121,6 +2137,7 @@ const struct tune_params arm_cortex_a5_tune =

>    1,                                           /* Max cond insns.  */

>    8,                                           /* Memset max inline.  */

>    2,                                           /* Issue rate.  */

> +  64,                                          /* Bus width.  */

>    ARM_PREFETCH_NOT_BENEFICIAL,

>    tune_params::PREF_CONST_POOL_FALSE,

>    tune_params::PREF_LDRD_FALSE,

> @@ -2143,6 +2160,7 @@ const struct tune_params arm_cortex_a9_tune =

>    5,                                           /* Max cond insns.  */

>    8,                                           /* Memset max inline.  */

>    2,                                           /* Issue rate.  */

> +  64,                                          /* Bus width.  */

>    ARM_PREFETCH_BENEFICIAL(4,32,32),

>    tune_params::PREF_CONST_POOL_FALSE,

>    tune_params::PREF_LDRD_FALSE,

> @@ -2165,6 +2183,7 @@ const struct tune_params arm_cortex_a12_tune =

>    2,                                           /* Max cond insns.  */

>    8,                                           /* Memset max inline.  */

>    2,                                           /* Issue rate.  */

> +  64,                                          /* Bus width.  */

>    ARM_PREFETCH_NOT_BENEFICIAL,

>    tune_params::PREF_CONST_POOL_FALSE,

>    tune_params::PREF_LDRD_TRUE,

> @@ -2187,6 +2206,7 @@ const struct tune_params arm_cortex_a73_tune =

>    2,                                           /* Max cond insns.  */

>    8,                                           /* Memset max inline.  */

>    2,                                           /* Issue rate.  */

> +  64,                                          /* Bus width.  */

>    ARM_PREFETCH_NOT_BENEFICIAL,

>    tune_params::PREF_CONST_POOL_FALSE,

>    tune_params::PREF_LDRD_TRUE,

> @@ -2216,6 +2236,7 @@ const struct tune_params arm_v7m_tune =

>    2,                                           /* Max cond insns.  */

>    8,                                           /* Memset max inline.  */

>    1,                                           /* Issue rate.  */

> +  32,                                          /* Bus width.  */

>    ARM_PREFETCH_NOT_BENEFICIAL,

>    tune_params::PREF_CONST_POOL_TRUE,

>    tune_params::PREF_LDRD_FALSE,

> @@ -2240,6 +2261,7 @@ const struct tune_params arm_cortex_m7_tune =

>    1,                                           /* Max cond insns.  */

>    8,                                           /* Memset max inline.  */

>    2,                                           /* Issue rate.  */

> +  64,                                          /* Bus width.  */

>    ARM_PREFETCH_NOT_BENEFICIAL,

>    tune_params::PREF_CONST_POOL_TRUE,

>    tune_params::PREF_LDRD_FALSE,

> @@ -2265,6 +2287,7 @@ const struct tune_params arm_v6m_tune =

>    5,                                           /* Max cond insns.  */

>    8,                                           /* Memset max inline.  */

>    1,                                           /* Issue rate.  */

> +  32,                                          /* Bus width.  */

>    ARM_PREFETCH_NOT_BENEFICIAL,

>    tune_params::PREF_CONST_POOL_FALSE,

>    tune_params::PREF_LDRD_FALSE,

> @@ -2287,6 +2310,7 @@ const struct tune_params arm_fa726te_tune =

>    5,                                           /* Max cond insns.  */

>    8,                                           /* Memset max inline.  */

>    2,                                           /* Issue rate.  */

> +  32,                                          /* Bus width.  */

>    ARM_PREFETCH_NOT_BENEFICIAL,

>    tune_params::PREF_CONST_POOL_TRUE,

>    tune_params::PREF_LDRD_FALSE,

> -- 

> 2.7.4

>
Charles Baylis Sept. 15, 2017, 3:38 p.m. UTC | #2
On 13 September 2017 at 10:02, Kyrill  Tkachov
<kyrylo.tkachov@foss.arm.com> wrote:
> Hi Charles,

>

> On 12/09/17 09:34, charles.baylis@linaro.org wrote:

>>

>> From: Charles Baylis <charles.baylis@linaro.org>

>>

>> Add bus widths. These use the approximation that v7 and later cores have

>> 64bit data bus width, and earlier cores have 32 bit bus width, with the

>> exception of v7m.

>>

>

> Given the way this field is used in patch 2 does it affect the addressing

> mode generation

> in the tests you added depending on the -mtune option given?

> If so, we'll get testsuite failures when people test with particular default

> CPU configurations.


No, because the auto_inc_dec phase compares the cost of two different
MEMs which differ only by addressing mode. The part of the calculation
which depends on the bus_width is the same both times, so it is
cancelled out.

> Could you expand on the benefits we get from this extra bus_width

> information?

> I get that we increase the cost of memory accesses if the size of the mode

> we load is larger than the

> bus width, but it's not as if there is ever an alternative in this regard,

> such as loading less memory,

> so what pass can make different decisions thanks to this field?


As far as this patch series is concerned, it doesn't matter. It is
there to encapsulate the notion that a larger transfer results in
rtx_costs() returning a larger cost, but I don't know of any part of
the compiler which is sensitive to that difference. It's done this way
because Ramana and Richard wanted it done that way
(https://gcc.gnu.org/ml/gcc-patches/2017-06/msg00652.html).
From b7bec2e4f7ca0335e0e5bd84c297215a3a7fb8c7 Mon Sep 17 00:00:00 2001
From: Charles Baylis <charles.baylis@linaro.org>
Date: Fri, 8 Sep 2017 12:53:50 +0100
Subject: [PATCH 1/3] [ARM] Add bus_width_bits to tune_params

Add bus widths. These use the approximation that v7 and later cores have
64bit data bus width, and earlier cores have 32 bit bus width, with the
exception of v7m.

<date>  Charles Baylis  <charles.baylis@linaro.org>

	* config/arm/arm-protos.h (struct tune_params): New field
	bus_width.
	* config/arm/arm.c (arm_slowmul_tune): Initialise bus_width field.
	(arm_fastmul_tune): Likewise.
	(arm_strongarm_tune): Likewise.
	(arm_xscale_tune): Likewise.
	(arm_9e_tune): Likewise.
	(arm_marvell_pj4_tune): Likewise.
	(arm_v6t2_tune): Likewise.
	(arm_cortex_tune): Likewise.
	(arm_cortex_a8_tune): Likewise.
	(arm_cortex_a7_tune): Likewise.
	(arm_cortex_a15_tune): Likewise.
	(arm_cortex_a35_tune): Likewise.
	(arm_cortex_a53_tune): Likewise.
	(arm_cortex_a57_tune): Likewise.
	(arm_exynosm1_tune): Likewise.
	(arm_xgene1_tune): Likewise.
	(arm_cortex_a5_tune): Likewise.
	(arm_cortex_a9_tune): Likewise.
	(arm_cortex_a12_tune): Likewise.
	(arm_cortex_a73_tune): Likewise.
	(arm_v7m_tune): Likewise.
	(arm_cortex_m7_tune): Likewise.
	(arm_v6m_tune): Likewise.
	(arm_fa726te_tune): Likewise.

Change-Id: I613e876db93ffd6f8c1e72ba483be2efc0b56d66
---
 gcc/config/arm/arm-protos.h |  2 ++
 gcc/config/arm/arm.c        | 24 ++++++++++++++++++++++++
 2 files changed, 26 insertions(+)

diff --git a/gcc/config/arm/arm-protos.h b/gcc/config/arm/arm-protos.h
index 4538078..47a85cc 100644
--- a/gcc/config/arm/arm-protos.h
+++ b/gcc/config/arm/arm-protos.h
@@ -278,6 +278,8 @@ struct tune_params
   int max_insns_inline_memset;
   /* Issue rate of the processor.  */
   unsigned int issue_rate;
+  /* Bus width (bits).  */
+  unsigned int bus_width;
   /* Explicit prefetch data.  */
   struct
     {
diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c
index bca8a34..32001e5 100644
--- a/gcc/config/arm/arm.c
+++ b/gcc/config/arm/arm.c
@@ -1761,6 +1761,7 @@ const struct tune_params arm_slowmul_tune =
   5,						/* Max cond insns.  */
   8,						/* Memset max inline.  */
   1,						/* Issue rate.  */
+  32,						/* Bus width.  */
   ARM_PREFETCH_NOT_BENEFICIAL,
   tune_params::PREF_CONST_POOL_TRUE,
   tune_params::PREF_LDRD_FALSE,
@@ -1783,6 +1784,7 @@ const struct tune_params arm_fastmul_tune =
   5,						/* Max cond insns.  */
   8,						/* Memset max inline.  */
   1,						/* Issue rate.  */
+  32,						/* Bus width.  */
   ARM_PREFETCH_NOT_BENEFICIAL,
   tune_params::PREF_CONST_POOL_TRUE,
   tune_params::PREF_LDRD_FALSE,
@@ -1808,6 +1810,7 @@ const struct tune_params arm_strongarm_tune =
   3,						/* Max cond insns.  */
   8,						/* Memset max inline.  */
   1,						/* Issue rate.  */
+  32,						/* Bus width.  */
   ARM_PREFETCH_NOT_BENEFICIAL,
   tune_params::PREF_CONST_POOL_TRUE,
   tune_params::PREF_LDRD_FALSE,
@@ -1830,6 +1833,7 @@ const struct tune_params arm_xscale_tune =
   3,						/* Max cond insns.  */
   8,						/* Memset max inline.  */
   1,						/* Issue rate.  */
+  32,						/* Bus width.  */
   ARM_PREFETCH_NOT_BENEFICIAL,
   tune_params::PREF_CONST_POOL_TRUE,
   tune_params::PREF_LDRD_FALSE,
@@ -1852,6 +1856,7 @@ const struct tune_params arm_9e_tune =
   5,						/* Max cond insns.  */
   8,						/* Memset max inline.  */
   1,						/* Issue rate.  */
+  32,						/* Bus width.  */
   ARM_PREFETCH_NOT_BENEFICIAL,
   tune_params::PREF_CONST_POOL_TRUE,
   tune_params::PREF_LDRD_FALSE,
@@ -1874,6 +1879,7 @@ const struct tune_params arm_marvell_pj4_tune =
   5,						/* Max cond insns.  */
   8,						/* Memset max inline.  */
   2,						/* Issue rate.  */
+  32,						/* Bus width.  */
   ARM_PREFETCH_NOT_BENEFICIAL,
   tune_params::PREF_CONST_POOL_TRUE,
   tune_params::PREF_LDRD_FALSE,
@@ -1896,6 +1902,7 @@ const struct tune_params arm_v6t2_tune =
   5,						/* Max cond insns.  */
   8,						/* Memset max inline.  */
   1,						/* Issue rate.  */
+  32,						/* Bus width.  */
   ARM_PREFETCH_NOT_BENEFICIAL,
   tune_params::PREF_CONST_POOL_FALSE,
   tune_params::PREF_LDRD_FALSE,
@@ -1920,6 +1927,7 @@ const struct tune_params arm_cortex_tune =
   5,						/* Max cond insns.  */
   8,						/* Memset max inline.  */
   2,						/* Issue rate.  */
+  64,						/* Bus width.  */
   ARM_PREFETCH_NOT_BENEFICIAL,
   tune_params::PREF_CONST_POOL_FALSE,
   tune_params::PREF_LDRD_FALSE,
@@ -1942,6 +1950,7 @@ const struct tune_params arm_cortex_a8_tune =
   5,						/* Max cond insns.  */
   8,						/* Memset max inline.  */
   2,						/* Issue rate.  */
+  64,						/* Bus width.  */
   ARM_PREFETCH_NOT_BENEFICIAL,
   tune_params::PREF_CONST_POOL_FALSE,
   tune_params::PREF_LDRD_FALSE,
@@ -1964,6 +1973,7 @@ const struct tune_params arm_cortex_a7_tune =
   5,						/* Max cond insns.  */
   8,						/* Memset max inline.  */
   2,						/* Issue rate.  */
+  64,						/* Bus width.  */
   ARM_PREFETCH_NOT_BENEFICIAL,
   tune_params::PREF_CONST_POOL_FALSE,
   tune_params::PREF_LDRD_FALSE,
@@ -1986,6 +1996,7 @@ const struct tune_params arm_cortex_a15_tune =
   2,						/* Max cond insns.  */
   8,						/* Memset max inline.  */
   3,						/* Issue rate.  */
+  64,						/* Bus width.  */
   ARM_PREFETCH_NOT_BENEFICIAL,
   tune_params::PREF_CONST_POOL_FALSE,
   tune_params::PREF_LDRD_TRUE,
@@ -2008,6 +2019,7 @@ const struct tune_params arm_cortex_a35_tune =
   5,						/* Max cond insns.  */
   8,						/* Memset max inline.  */
   1,						/* Issue rate.  */
+  64,						/* Bus width.  */
   ARM_PREFETCH_NOT_BENEFICIAL,
   tune_params::PREF_CONST_POOL_FALSE,
   tune_params::PREF_LDRD_FALSE,
@@ -2030,6 +2042,7 @@ const struct tune_params arm_cortex_a53_tune =
   5,						/* Max cond insns.  */
   8,						/* Memset max inline.  */
   2,						/* Issue rate.  */
+  64,						/* Bus width.  */
   ARM_PREFETCH_NOT_BENEFICIAL,
   tune_params::PREF_CONST_POOL_FALSE,
   tune_params::PREF_LDRD_FALSE,
@@ -2052,6 +2065,7 @@ const struct tune_params arm_cortex_a57_tune =
   2,						/* Max cond insns.  */
   8,						/* Memset max inline.  */
   3,						/* Issue rate.  */
+  64,						/* Bus width.  */
   ARM_PREFETCH_NOT_BENEFICIAL,
   tune_params::PREF_CONST_POOL_FALSE,
   tune_params::PREF_LDRD_TRUE,
@@ -2074,6 +2088,7 @@ const struct tune_params arm_exynosm1_tune =
   2,						/* Max cond insns.  */
   8,						/* Memset max inline.  */
   3,						/* Issue rate.  */
+  64,						/* Bus width.  */
   ARM_PREFETCH_NOT_BENEFICIAL,
   tune_params::PREF_CONST_POOL_FALSE,
   tune_params::PREF_LDRD_TRUE,
@@ -2096,6 +2111,7 @@ const struct tune_params arm_xgene1_tune =
   2,						/* Max cond insns.  */
   32,						/* Memset max inline.  */
   4,						/* Issue rate.  */
+  64,						/* Bus width.  */
   ARM_PREFETCH_NOT_BENEFICIAL,
   tune_params::PREF_CONST_POOL_FALSE,
   tune_params::PREF_LDRD_TRUE,
@@ -2121,6 +2137,7 @@ const struct tune_params arm_cortex_a5_tune =
   1,						/* Max cond insns.  */
   8,						/* Memset max inline.  */
   2,						/* Issue rate.  */
+  64,						/* Bus width.  */
   ARM_PREFETCH_NOT_BENEFICIAL,
   tune_params::PREF_CONST_POOL_FALSE,
   tune_params::PREF_LDRD_FALSE,
@@ -2143,6 +2160,7 @@ const struct tune_params arm_cortex_a9_tune =
   5,						/* Max cond insns.  */
   8,						/* Memset max inline.  */
   2,						/* Issue rate.  */
+  64,						/* Bus width.  */
   ARM_PREFETCH_BENEFICIAL(4,32,32),
   tune_params::PREF_CONST_POOL_FALSE,
   tune_params::PREF_LDRD_FALSE,
@@ -2165,6 +2183,7 @@ const struct tune_params arm_cortex_a12_tune =
   2,						/* Max cond insns.  */
   8,						/* Memset max inline.  */
   2,						/* Issue rate.  */
+  64,						/* Bus width.  */
   ARM_PREFETCH_NOT_BENEFICIAL,
   tune_params::PREF_CONST_POOL_FALSE,
   tune_params::PREF_LDRD_TRUE,
@@ -2187,6 +2206,7 @@ const struct tune_params arm_cortex_a73_tune =
   2,						/* Max cond insns.  */
   8,						/* Memset max inline.  */
   2,						/* Issue rate.  */
+  64,						/* Bus width.  */
   ARM_PREFETCH_NOT_BENEFICIAL,
   tune_params::PREF_CONST_POOL_FALSE,
   tune_params::PREF_LDRD_TRUE,
@@ -2216,6 +2236,7 @@ const struct tune_params arm_v7m_tune =
   2,						/* Max cond insns.  */
   8,						/* Memset max inline.  */
   1,						/* Issue rate.  */
+  32,						/* Bus width.  */
   ARM_PREFETCH_NOT_BENEFICIAL,
   tune_params::PREF_CONST_POOL_TRUE,
   tune_params::PREF_LDRD_FALSE,
@@ -2240,6 +2261,7 @@ const struct tune_params arm_cortex_m7_tune =
   1,						/* Max cond insns.  */
   8,						/* Memset max inline.  */
   2,						/* Issue rate.  */
+  64,						/* Bus width.  */
   ARM_PREFETCH_NOT_BENEFICIAL,
   tune_params::PREF_CONST_POOL_TRUE,
   tune_params::PREF_LDRD_FALSE,
@@ -2265,6 +2287,7 @@ const struct tune_params arm_v6m_tune =
   5,						/* Max cond insns.  */
   8,						/* Memset max inline.  */
   1,						/* Issue rate.  */
+  32,						/* Bus width.  */
   ARM_PREFETCH_NOT_BENEFICIAL,
   tune_params::PREF_CONST_POOL_FALSE,
   tune_params::PREF_LDRD_FALSE,
@@ -2287,6 +2310,7 @@ const struct tune_params arm_fa726te_tune =
   5,						/* Max cond insns.  */
   8,						/* Memset max inline.  */
   2,						/* Issue rate.  */
+  32,						/* Bus width.  */
   ARM_PREFETCH_NOT_BENEFICIAL,
   tune_params::PREF_CONST_POOL_TRUE,
   tune_params::PREF_LDRD_FALSE,
Kyrill Tkachov Sept. 15, 2017, 5:01 p.m. UTC | #3
On 15/09/17 16:38, Charles Baylis wrote:
> On 13 September 2017 at 10:02, Kyrill  Tkachov

> <kyrylo.tkachov@foss.arm.com> wrote:

>> Hi Charles,

>>

>> On 12/09/17 09:34, charles.baylis@linaro.org wrote:

>>> From: Charles Baylis <charles.baylis@linaro.org>

>>>

>>> Add bus widths. These use the approximation that v7 and later cores have

>>> 64bit data bus width, and earlier cores have 32 bit bus width, with the

>>> exception of v7m.

>>>

>> Given the way this field is used in patch 2 does it affect the addressing

>> mode generation

>> in the tests you added depending on the -mtune option given?

>> If so, we'll get testsuite failures when people test with particular default

>> CPU configurations.

> No, because the auto_inc_dec phase compares the cost of two different

> MEMs which differ only by addressing mode. The part of the calculation

> which depends on the bus_width is the same both times, so it is

> cancelled out.

>

>> Could you expand on the benefits we get from this extra bus_width

>> information?

>> I get that we increase the cost of memory accesses if the size of the mode

>> we load is larger than the

>> bus width, but it's not as if there is ever an alternative in this regard,

>> such as loading less memory,

>> so what pass can make different decisions thanks to this field?

> As far as this patch series is concerned, it doesn't matter. It is

> there to encapsulate the notion that a larger transfer results in

> rtx_costs() returning a larger cost, but I don't know of any part of

> the compiler which is sensitive to that difference. It's done this way

> because Ramana and Richard wanted it done that way

> (https://gcc.gnu.org/ml/gcc-patches/2017-06/msg00652.html).


 From what I can tell Ramana and Richard preferred to encode this 
attribute as
a tuning struct property rather than an inline conditional based on 
arm_arch7.
I agree that if we want to use that information, it should be encoded 
this way.
What I'm not convinced about is whether we do want this parameter in the 
first place.

The cost tables already encode information about the costs of different 
sized loads/stores.
In patch 2, for example, you add the cost for extra_cost->ldst.load 
which is nominally just
the cost of a normal 32-bit ldr. But we also have costs for ldst.ldrd 
which is the 64-bit two-register load
which should reflect any extra cost due to a narrower bus in it. We also 
have costs for ldst.loadf (for 32-bit
VFP loads) and ldst.loadd (for 64-bit VFP D-register loads). So I think 
we should use those cost fields
depending on the mode class and size instead of using ldst.load 
unconditionally and adding a new bus_size parameter.

So I think the way forward is to drop this patch and modify patch 2/3 to 
use the extra_cost->ldst fields as described above.

Sorry for the back-and-forth. I think this is the best approach because 
it uses the existing fields more naturally and
doesn't add new parameters that partly duplicate the information encoded 
in the existing fields.
Ramana, Richard: if you prefer the bus_width approach I won't block it, 
but could you clarify your preference?
If we do end up adding the bus_width parameter then this patch and patch 
2/3 look ok.
Thanks,
Kyrill

P.S. I'm going on a 4-week holiday from today, so I won't be able to do 
any further review in that timeframe.
As I said, if we go with the bus_size approach then these patches are 
ok. If we go with my suggestion, this would
be dropped and patch 2 would be extended to select the appropriate 
extra_cost->ldst field depending on mode.
Charles Baylis Nov. 20, 2017, 9:10 p.m. UTC | #4
On 15 September 2017 at 18:01, Kyrill  Tkachov
<kyrylo.tkachov@foss.arm.com> wrote:

> From what I can tell Ramana and Richard preferred to encode this attribute

> as

> a tuning struct property rather than an inline conditional based on

> arm_arch7.

> I agree that if we want to use that information, it should be encoded this

> way.

> What I'm not convinced about is whether we do want this parameter in the

> first place.

>

> The cost tables already encode information about the costs of different

> sized loads/stores.

> In patch 2, for example, you add the cost for extra_cost->ldst.load which is

> nominally just

> the cost of a normal 32-bit ldr. But we also have costs for ldst.ldrd which

> is the 64-bit two-register load

> which should reflect any extra cost due to a narrower bus in it. We also

> have costs for ldst.loadf (for 32-bit

> VFP loads) and ldst.loadd (for 64-bit VFP D-register loads). So I think we

> should use those cost fields

> depending on the mode class and size instead of using ldst.load

> unconditionally and adding a new bus_size parameter.

>

> So I think the way forward is to drop this patch and modify patch 2/3 to use

> the extra_cost->ldst fields as described above.

>

> Sorry for the back-and-forth. I think this is the best approach because it

> uses the existing fields more naturally and

> doesn't add new parameters that partly duplicate the information encoded in

> the existing fields.

> Ramana, Richard: if you prefer the bus_width approach I won't block it, but

> could you clarify your preference?

> If we do end up adding the bus_width parameter then this patch and patch 2/3

> look ok.

> Thanks,

> Kyrill

>

> P.S. I'm going on a 4-week holiday from today, so I won't be able to do any

> further review in that timeframe.

> As I said, if we go with the bus_size approach then these patches are ok. If

> we go with my suggestion, this would

> be dropped and patch 2 would be extended to select the appropriate

> extra_cost->ldst field depending on mode.


OK, I agree with dropping this patch. I have posted an updated patch 2
which does not require it.
diff mbox series

Patch

diff --git a/gcc/config/arm/arm-protos.h b/gcc/config/arm/arm-protos.h
index 4538078..47a85cc 100644
--- a/gcc/config/arm/arm-protos.h
+++ b/gcc/config/arm/arm-protos.h
@@ -278,6 +278,8 @@  struct tune_params
   int max_insns_inline_memset;
   /* Issue rate of the processor.  */
   unsigned int issue_rate;
+  /* Bus width (bits).  */
+  unsigned int bus_width;
   /* Explicit prefetch data.  */
   struct
     {
diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c
index bca8a34..32001e5 100644
--- a/gcc/config/arm/arm.c
+++ b/gcc/config/arm/arm.c
@@ -1761,6 +1761,7 @@  const struct tune_params arm_slowmul_tune =
   5,						/* Max cond insns.  */
   8,						/* Memset max inline.  */
   1,						/* Issue rate.  */
+  32,						/* Bus width.  */
   ARM_PREFETCH_NOT_BENEFICIAL,
   tune_params::PREF_CONST_POOL_TRUE,
   tune_params::PREF_LDRD_FALSE,
@@ -1783,6 +1784,7 @@  const struct tune_params arm_fastmul_tune =
   5,						/* Max cond insns.  */
   8,						/* Memset max inline.  */
   1,						/* Issue rate.  */
+  32,						/* Bus width.  */
   ARM_PREFETCH_NOT_BENEFICIAL,
   tune_params::PREF_CONST_POOL_TRUE,
   tune_params::PREF_LDRD_FALSE,
@@ -1808,6 +1810,7 @@  const struct tune_params arm_strongarm_tune =
   3,						/* Max cond insns.  */
   8,						/* Memset max inline.  */
   1,						/* Issue rate.  */
+  32,						/* Bus width.  */
   ARM_PREFETCH_NOT_BENEFICIAL,
   tune_params::PREF_CONST_POOL_TRUE,
   tune_params::PREF_LDRD_FALSE,
@@ -1830,6 +1833,7 @@  const struct tune_params arm_xscale_tune =
   3,						/* Max cond insns.  */
   8,						/* Memset max inline.  */
   1,						/* Issue rate.  */
+  32,						/* Bus width.  */
   ARM_PREFETCH_NOT_BENEFICIAL,
   tune_params::PREF_CONST_POOL_TRUE,
   tune_params::PREF_LDRD_FALSE,
@@ -1852,6 +1856,7 @@  const struct tune_params arm_9e_tune =
   5,						/* Max cond insns.  */
   8,						/* Memset max inline.  */
   1,						/* Issue rate.  */
+  32,						/* Bus width.  */
   ARM_PREFETCH_NOT_BENEFICIAL,
   tune_params::PREF_CONST_POOL_TRUE,
   tune_params::PREF_LDRD_FALSE,
@@ -1874,6 +1879,7 @@  const struct tune_params arm_marvell_pj4_tune =
   5,						/* Max cond insns.  */
   8,						/* Memset max inline.  */
   2,						/* Issue rate.  */
+  32,						/* Bus width.  */
   ARM_PREFETCH_NOT_BENEFICIAL,
   tune_params::PREF_CONST_POOL_TRUE,
   tune_params::PREF_LDRD_FALSE,
@@ -1896,6 +1902,7 @@  const struct tune_params arm_v6t2_tune =
   5,						/* Max cond insns.  */
   8,						/* Memset max inline.  */
   1,						/* Issue rate.  */
+  32,						/* Bus width.  */
   ARM_PREFETCH_NOT_BENEFICIAL,
   tune_params::PREF_CONST_POOL_FALSE,
   tune_params::PREF_LDRD_FALSE,
@@ -1920,6 +1927,7 @@  const struct tune_params arm_cortex_tune =
   5,						/* Max cond insns.  */
   8,						/* Memset max inline.  */
   2,						/* Issue rate.  */
+  64,						/* Bus width.  */
   ARM_PREFETCH_NOT_BENEFICIAL,
   tune_params::PREF_CONST_POOL_FALSE,
   tune_params::PREF_LDRD_FALSE,
@@ -1942,6 +1950,7 @@  const struct tune_params arm_cortex_a8_tune =
   5,						/* Max cond insns.  */
   8,						/* Memset max inline.  */
   2,						/* Issue rate.  */
+  64,						/* Bus width.  */
   ARM_PREFETCH_NOT_BENEFICIAL,
   tune_params::PREF_CONST_POOL_FALSE,
   tune_params::PREF_LDRD_FALSE,
@@ -1964,6 +1973,7 @@  const struct tune_params arm_cortex_a7_tune =
   5,						/* Max cond insns.  */
   8,						/* Memset max inline.  */
   2,						/* Issue rate.  */
+  64,						/* Bus width.  */
   ARM_PREFETCH_NOT_BENEFICIAL,
   tune_params::PREF_CONST_POOL_FALSE,
   tune_params::PREF_LDRD_FALSE,
@@ -1986,6 +1996,7 @@  const struct tune_params arm_cortex_a15_tune =
   2,						/* Max cond insns.  */
   8,						/* Memset max inline.  */
   3,						/* Issue rate.  */
+  64,						/* Bus width.  */
   ARM_PREFETCH_NOT_BENEFICIAL,
   tune_params::PREF_CONST_POOL_FALSE,
   tune_params::PREF_LDRD_TRUE,
@@ -2008,6 +2019,7 @@  const struct tune_params arm_cortex_a35_tune =
   5,						/* Max cond insns.  */
   8,						/* Memset max inline.  */
   1,						/* Issue rate.  */
+  64,						/* Bus width.  */
   ARM_PREFETCH_NOT_BENEFICIAL,
   tune_params::PREF_CONST_POOL_FALSE,
   tune_params::PREF_LDRD_FALSE,
@@ -2030,6 +2042,7 @@  const struct tune_params arm_cortex_a53_tune =
   5,						/* Max cond insns.  */
   8,						/* Memset max inline.  */
   2,						/* Issue rate.  */
+  64,						/* Bus width.  */
   ARM_PREFETCH_NOT_BENEFICIAL,
   tune_params::PREF_CONST_POOL_FALSE,
   tune_params::PREF_LDRD_FALSE,
@@ -2052,6 +2065,7 @@  const struct tune_params arm_cortex_a57_tune =
   2,						/* Max cond insns.  */
   8,						/* Memset max inline.  */
   3,						/* Issue rate.  */
+  64,						/* Bus width.  */
   ARM_PREFETCH_NOT_BENEFICIAL,
   tune_params::PREF_CONST_POOL_FALSE,
   tune_params::PREF_LDRD_TRUE,
@@ -2074,6 +2088,7 @@  const struct tune_params arm_exynosm1_tune =
   2,						/* Max cond insns.  */
   8,						/* Memset max inline.  */
   3,						/* Issue rate.  */
+  64,						/* Bus width.  */
   ARM_PREFETCH_NOT_BENEFICIAL,
   tune_params::PREF_CONST_POOL_FALSE,
   tune_params::PREF_LDRD_TRUE,
@@ -2096,6 +2111,7 @@  const struct tune_params arm_xgene1_tune =
   2,						/* Max cond insns.  */
   32,						/* Memset max inline.  */
   4,						/* Issue rate.  */
+  64,						/* Bus width.  */
   ARM_PREFETCH_NOT_BENEFICIAL,
   tune_params::PREF_CONST_POOL_FALSE,
   tune_params::PREF_LDRD_TRUE,
@@ -2121,6 +2137,7 @@  const struct tune_params arm_cortex_a5_tune =
   1,						/* Max cond insns.  */
   8,						/* Memset max inline.  */
   2,						/* Issue rate.  */
+  64,						/* Bus width.  */
   ARM_PREFETCH_NOT_BENEFICIAL,
   tune_params::PREF_CONST_POOL_FALSE,
   tune_params::PREF_LDRD_FALSE,
@@ -2143,6 +2160,7 @@  const struct tune_params arm_cortex_a9_tune =
   5,						/* Max cond insns.  */
   8,						/* Memset max inline.  */
   2,						/* Issue rate.  */
+  64,						/* Bus width.  */
   ARM_PREFETCH_BENEFICIAL(4,32,32),
   tune_params::PREF_CONST_POOL_FALSE,
   tune_params::PREF_LDRD_FALSE,
@@ -2165,6 +2183,7 @@  const struct tune_params arm_cortex_a12_tune =
   2,						/* Max cond insns.  */
   8,						/* Memset max inline.  */
   2,						/* Issue rate.  */
+  64,						/* Bus width.  */
   ARM_PREFETCH_NOT_BENEFICIAL,
   tune_params::PREF_CONST_POOL_FALSE,
   tune_params::PREF_LDRD_TRUE,
@@ -2187,6 +2206,7 @@  const struct tune_params arm_cortex_a73_tune =
   2,						/* Max cond insns.  */
   8,						/* Memset max inline.  */
   2,						/* Issue rate.  */
+  64,						/* Bus width.  */
   ARM_PREFETCH_NOT_BENEFICIAL,
   tune_params::PREF_CONST_POOL_FALSE,
   tune_params::PREF_LDRD_TRUE,
@@ -2216,6 +2236,7 @@  const struct tune_params arm_v7m_tune =
   2,						/* Max cond insns.  */
   8,						/* Memset max inline.  */
   1,						/* Issue rate.  */
+  32,						/* Bus width.  */
   ARM_PREFETCH_NOT_BENEFICIAL,
   tune_params::PREF_CONST_POOL_TRUE,
   tune_params::PREF_LDRD_FALSE,
@@ -2240,6 +2261,7 @@  const struct tune_params arm_cortex_m7_tune =
   1,						/* Max cond insns.  */
   8,						/* Memset max inline.  */
   2,						/* Issue rate.  */
+  64,						/* Bus width.  */
   ARM_PREFETCH_NOT_BENEFICIAL,
   tune_params::PREF_CONST_POOL_TRUE,
   tune_params::PREF_LDRD_FALSE,
@@ -2265,6 +2287,7 @@  const struct tune_params arm_v6m_tune =
   5,						/* Max cond insns.  */
   8,						/* Memset max inline.  */
   1,						/* Issue rate.  */
+  32,						/* Bus width.  */
   ARM_PREFETCH_NOT_BENEFICIAL,
   tune_params::PREF_CONST_POOL_FALSE,
   tune_params::PREF_LDRD_FALSE,
@@ -2287,6 +2310,7 @@  const struct tune_params arm_fa726te_tune =
   5,						/* Max cond insns.  */
   8,						/* Memset max inline.  */
   2,						/* Issue rate.  */
+  32,						/* Bus width.  */
   ARM_PREFETCH_NOT_BENEFICIAL,
   tune_params::PREF_CONST_POOL_TRUE,
   tune_params::PREF_LDRD_FALSE,