[ARM] Generic tuning

Message ID 4E57C610.5030201@codesourcery.com
State New
Headers show

Commit Message

Andrew Stubbs Aug. 26, 2011, 4:13 p.m.
Hi all,

This patch is step 1 towards having generic (best-blend) tuning on ARM.

The patch adds an option '-mtune=generic-armv7-a' but does not actually 
do any tuning tweaks yet - those are for follow up patches.

x86 has simply '-mtune=generic', and from that (the documentation 
suggests) the compiler selects the most common architecture/cpu variants 
to tune for. I don't think that translates well to the ARM world, so I 
have chosen to make it generic within the architecture family.

My intention is to make this the default tuning whenever the use 
specifies '-march=armv7-a', but that will have to wait until it does 
something meaningful.




Joseph Myers Aug. 26, 2011, 4:18 p.m. | #1
Again, arm-tables.opt is generated - so the log entry should just be

	* config/arm/arm-tables.opt: Regenerate.

and the file should be what you get from regeneration.


2011-08-26  Andrew Stubbs  <ams@codesourcery.com>

	* config/arm/arm-cores.def (generic-armv7-a): New architecture.
	* config/arm/arm-tables.opt: Add generic-armv7-a tune/cpu type.
	* config/arm/arm-tune.md: Regenerate.
	* config/arm/arm.c (arm_file_start): Output .arch directive when
	user passes -mcpu=generic-*.
	(arm_issue_rate): Add genericv7a support.
	* config/arm/arm.h (EXTRA_SPECS): Add asm_cpu_spec.
	(ASM_CPU_SPEC): New define.
	* config/arm/elf.h (ASM_SPEC): Use %(asm_cpu_spec).
	* config/arm/semi.h (ASM_SPEC): Likewise.
	* doc/invoke.texi (ARM Options): Document -mcpu=generic-*
	and -mtune=generic-*.

--- a/gcc/config/arm/arm-cores.def
+++ b/gcc/config/arm/arm-cores.def
@@ -124,6 +124,7 @@  ARM_CORE("mpcorenovfp",	  mpcorenovfp,	6K,				 FL_LDSCHED, 9e)
 ARM_CORE("mpcore",	  mpcore,	6K,				 FL_LDSCHED | FL_VFPV2, 9e)
 ARM_CORE("arm1156t2-s",	  arm1156t2s,	6T2,				 FL_LDSCHED, v6t2)
 ARM_CORE("arm1156t2f-s",  arm1156t2fs,  6T2,				 FL_LDSCHED | FL_VFPV2, v6t2)
+ARM_CORE("generic-armv7-a", genericv7a,	7A,				 FL_LDSCHED, cortex)
 ARM_CORE("cortex-a5",	  cortexa5,	7A,				 FL_LDSCHED, cortex_a5)
 ARM_CORE("cortex-a8",	  cortexa8,	7A,				 FL_LDSCHED, cortex)
 ARM_CORE("cortex-a9",	  cortexa9,	7A,				 FL_LDSCHED, cortex_a9)
@@ -135,3 +136,4 @@  ARM_CORE("cortex-m4",	  cortexm4,	7EM,				 FL_LDSCHED, cortex)
 ARM_CORE("cortex-m3",	  cortexm3,	7M,				 FL_LDSCHED, cortex)
 ARM_CORE("cortex-m1",	  cortexm1,	6M,				 FL_LDSCHED, cortex)
 ARM_CORE("cortex-m0",	  cortexm0,	6M,				 FL_LDSCHED, cortex)
--- a/gcc/config/arm/arm-tables.opt
+++ b/gcc/config/arm/arm-tables.opt
@@ -235,6 +235,9 @@  EnumValue
 Enum(processor_type) String(arm1156t2f-s) Value(arm1156t2fs)
+Enum(processor_type) String(generic-armv7-a) Value(genericv7a)
 Enum(processor_type) String(cortex-a5) Value(cortexa5)
--- a/gcc/config/arm/arm-tune.md
+++ b/gcc/config/arm/arm-tune.md
@@ -1,5 +1,5 @@ 
 ;; -*- buffer-read-only: t -*-
 ;; Generated automatically by gentune.sh from arm-cores.def
 (define_attr "tune"
-	"arm2,arm250,arm3,arm6,arm60,arm600,arm610,arm620,arm7,arm7d,arm7di,arm70,arm700,arm700i,arm710,arm720,arm710c,arm7100,arm7500,arm7500fe,arm7m,arm7dm,arm7dmi,arm8,arm810,strongarm,strongarm110,strongarm1100,strongarm1110,fa526,fa626,arm7tdmi,arm7tdmis,arm710t,arm720t,arm740t,arm9,arm9tdmi,arm920,arm920t,arm922t,arm940t,ep9312,arm10tdmi,arm1020t,arm9e,arm946es,arm966es,arm968es,arm10e,arm1020e,arm1022e,xscale,iwmmxt,iwmmxt2,fa606te,fa626te,fmp626,fa726te,arm926ejs,arm1026ejs,arm1136js,arm1136jfs,arm1176jzs,arm1176jzfs,mpcorenovfp,mpcore,arm1156t2s,arm1156t2fs,cortexa5,cortexa8,cortexa9,cortexa15,cortexr4,cortexr4f,cortexr5,cortexm4,cortexm3,cortexm1,cortexm0"
+	"arm2,arm250,arm3,arm6,arm60,arm600,arm610,arm620,arm7,arm7d,arm7di,arm70,arm700,arm700i,arm710,arm720,arm710c,arm7100,arm7500,arm7500fe,arm7m,arm7dm,arm7dmi,arm8,arm810,strongarm,strongarm110,strongarm1100,strongarm1110,fa526,fa626,arm7tdmi,arm7tdmis,arm710t,arm720t,arm740t,arm9,arm9tdmi,arm920,arm920t,arm922t,arm940t,ep9312,arm10tdmi,arm1020t,arm9e,arm946es,arm966es,arm968es,arm10e,arm1020e,arm1022e,xscale,iwmmxt,iwmmxt2,fa606te,fa626te,fmp626,fa726te,arm926ejs,arm1026ejs,arm1136js,arm1136jfs,arm1176jzs,arm1176jzfs,mpcorenovfp,mpcore,arm1156t2s,arm1156t2fs,genericv7a,cortexa5,cortexa8,cortexa9,cortexa15,cortexr4,cortexr4f,cortexr5,cortexm4,cortexm3,cortexm1,cortexm0"
 	(const (symbol_ref "((enum attr_tune) arm_tune)")))
--- a/gcc/config/arm/arm.c
+++ b/gcc/config/arm/arm.c
@@ -22195,6 +22195,8 @@  arm_file_start (void)
       const char *fpu_name;
       if (arm_selected_arch)
 	asm_fprintf (asm_out_file, "\t.arch %s\n", arm_selected_arch->name);
+      else if (strncmp (arm_selected_cpu->name, "generic", 7) == 0)
+	asm_fprintf (asm_out_file, "\t.arch %s\n", arm_selected_cpu->name + 8);
 	asm_fprintf (asm_out_file, "\t.cpu %s\n", arm_selected_cpu->name);
@@ -23719,6 +23721,7 @@  arm_issue_rate (void)
     case cortexr4:
     case cortexr4f:
     case cortexr5:
+    case genericv7a:
     case cortexa5:
     case cortexa8:
     case cortexa9:
--- a/gcc/config/arm/arm.h
+++ b/gcc/config/arm/arm.h
@@ -189,6 +189,7 @@  extern void (*arm_lang_output_object_attributes_hook)(void);
    Do not define this macro if it does not need to do anything.  */
 #define EXTRA_SPECS						\
   { "subtarget_cpp_spec",	SUBTARGET_CPP_SPEC },           \
+  { "asm_cpu_spec",		ASM_CPU_SPEC },			\
@@ -2240,4 +2241,8 @@  extern const char *host_detect_local_cpu (int argc, const char **argv);
+#define ASM_CPU_SPEC \
+   " %{mcpu=generic-*:-march=%*;"				\
+   "   :%{mcpu=*:-mcpu=%*} %{march=*:-march=%*}}"
 #endif /* ! GCC_ARM_H */
--- a/gcc/config/arm/elf.h
+++ b/gcc/config/arm/elf.h
@@ -56,8 +56,7 @@ 
 #define ASM_SPEC "\
 %{mbig-endian:-EB} \
 %{mlittle-endian:-EL} \
-%{mcpu=*:-mcpu=%*} \
-%{march=*:-march=%*} \
+%(asm_cpu_spec) \
 %{mapcs-*:-mapcs-%*} \
 %(subtarget_asm_float_spec) \
 %{mthumb-interwork:-mthumb-interwork} \
--- a/gcc/config/arm/semi.h
+++ b/gcc/config/arm/semi.h
@@ -61,8 +61,7 @@ 
 #define ASM_SPEC "\
 %{fpic|fpie: -k} %{fPIC|fPIE: -k} \
 %{mbig-endian:-EB} \
-%{mcpu=*:-mcpu=%*} \
-%{march=*:-march=%*} \
+%(arm_cpu_spec) \
 %{mapcs-float:-mfloat} \
 %{mfloat-abi=*} %{mfpu=*} \
 %{mthumb-interwork:-mthumb-interwork} \
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -10318,6 +10318,10 @@  assembly code.  Permissible names are: @samp{arm2}, @samp{arm250},
 @samp{fa526}, @samp{fa626},
 @samp{fa606te}, @samp{fa626te}, @samp{fmp626}, @samp{fa726te}.
+@option{-mcpu=generic-@var{arch}} is also permissible, and is
+equivalent to @option{-march=@var{arch} -mtune=generic-@var{arch}}.
+See @option{-mtune} for more information.
 @option{-mcpu=native} causes the compiler to auto-detect the CPU
 of the build computer.  At present, this feature is only supported on
 Linux, and not all architectures are recognised.  If the auto-detect is
@@ -10334,6 +10338,13 @@  will generate based on the CPU specified by a @option{-mcpu=} option.
 For some ARM implementations better performance can be obtained by using
 this option.
+@option{-mtune=generic-@var{arch}} specifies that GCC should tune the
+performance for a blend of processors within architecture @var{arch}.
+The aim is to generate code that run well on the current most popular
+processors, balancing between optimizations that benefit some CPUs in the
+range, and avoiding performance pitfalls of other CPUs.  The effects of
+this option may change in future GCC versions as CPU models come and go.
 @option{-mtune=native} causes the compiler to auto-detect the CPU
 of the build computer.  At present, this feature is only supported on
 Linux, and not all architectures are recognised.  If the auto-detect is