From patchwork Sun Mar 13 16:31:48 2011 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Andrew Stubbs X-Patchwork-Id: 541 Return-Path: Delivered-To: unknown Received: from imap.gmail.com (74.125.159.109) by localhost6.localdomain6 with IMAP4-SSL; 08 Jun 2011 14:43:29 -0000 Delivered-To: patches@linaro.org Received: by 10.224.45.75 with SMTP id d11cs29653qaf; Sun, 13 Mar 2011 09:31:55 -0700 (PDT) Received: by 10.101.16.14 with SMTP id t14mr2744986ani.109.1300033915127; Sun, 13 Mar 2011 09:31:55 -0700 (PDT) Received: from mail.codesourcery.com (mail.codesourcery.com [38.113.113.100]) by mx.google.com with ESMTPS id c14si4162368anc.85.2011.03.13.09.31.53 (version=TLSv1/SSLv3 cipher=OTHER); Sun, 13 Mar 2011 09:31:54 -0700 (PDT) Received-SPF: pass (google.com: domain of ams@codesourcery.com designates 38.113.113.100 as permitted sender) client-ip=38.113.113.100; Authentication-Results: mx.google.com; spf=pass (google.com: domain of ams@codesourcery.com designates 38.113.113.100 as permitted sender) smtp.mail=ams@codesourcery.com Received: (qmail 23046 invoked from network); 13 Mar 2011 16:31:51 -0000 Received: from unknown (HELO ?192.168.0.104?) (ams@127.0.0.2) by mail.codesourcery.com with ESMTPA; 13 Mar 2011 16:31:51 -0000 Message-ID: <4D7CF174.1090608@codesourcery.com> Date: Sun, 13 Mar 2011 16:31:48 +0000 From: Andrew Stubbs Organization: CodeSourcery User-Agent: Mozilla/5.0 (X11; U; Linux x86_64; en-US; rv:1.9.2.14) Gecko/20110223 Lightning/1.0b2 Thunderbird/3.1.8 MIME-Version: 1.0 To: gcc-patches@gcc.gnu.org CC: patches@linaro.org Subject: [PATCH][ARM] Discourage use of NEON on Cortex-A8 This patch discourages the use of NEON for integer operations on ARM Cortex-A8. The problem is that transferring data from NEON/VFP registers to core registers is prohibitively expensive on A8. This should not affect Cortex-A9 in the same way. This change gives a 6% increase in performance on SPEC2000 crafty, on an imx51 board. An older version of the patch has been used for some time in the CodeSourcery and Linaro toolchains, so it's fairly well tested. OK (for stage 1)? Andrew 2011-03-13 Bernd Schmidt Andrew Stubbs gcc/ * config/arm/vfp.md (arm_movdi_vfp): Enable only when not tuning for Cortex-A8. (arm_movdi_vfp_cortexa8): New pattern. * config/arm/neon.md (adddi3_neon, subdi3_neon, anddi3_neon, iordi3_neon, xordi3_neon): Add alternatives to discourage Neon instructions when tuning for Cortex-A8. Set attribute "arch". * config/arm/arm.md: Move include arm-tune.md up a bit. (define_attr "arch"): Add "onlya8" and "nota8" values. (define_attr "arch_enabled"): Handle "onlya8" and "nota8". --- a/gcc/config/arm/arm.md +++ b/gcc/config/arm/arm.md @@ -149,6 +149,9 @@ ;;--------------------------------------------------------------------------- ;; Attributes +;; Processor type. This is created automatically from arm-cores.def. +(include "arm-tune.md") + ; IS_THUMB is set to 'yes' when we are generating Thumb code, and 'no' when ; generating ARM code. This is used to control the length of some insn ; patterns that share the same RTL in both ARM and Thumb code. @@ -192,7 +195,7 @@ ; for ARM or Thumb-2 with arm_arch6, and nov6 for ARM without ; arm_arch6. This attribute is used to compute attribute "enabled", ; use type "any" to enable an alternative in all cases. -(define_attr "arch" "any,a,t,32,t1,t2,v6,nov6" +(define_attr "arch" "any,a,t,32,t1,t2,v6,nov6,onlya8,nota8" (const_string "any")) (define_attr "arch_enabled" "no,yes" @@ -225,6 +228,14 @@ (and (eq_attr "arch" "nov6") (ne (symbol_ref "(TARGET_32BIT && !arm_arch6)") (const_int 0))) + (const_string "yes") + + (and (eq_attr "arch" "onlya8") + (eq_attr "tune" "cortexa8")) + (const_string "yes") + + (and (eq_attr "arch" "nota8") + (not (eq_attr "tune" "cortexa8"))) (const_string "yes")] (const_string "no"))) @@ -485,9 +496,6 @@ ;;--------------------------------------------------------------------------- ;; Pipeline descriptions -;; Processor type. This is created automatically from arm-cores.def. -(include "arm-tune.md") - (define_attr "tune_cortexr4" "yes,no" (const (if_then_else (eq_attr "tune" "cortexr4,cortexr4f") --- a/gcc/config/arm/neon.md +++ b/gcc/config/arm/neon.md @@ -583,23 +583,25 @@ ) (define_insn "adddi3_neon" - [(set (match_operand:DI 0 "s_register_operand" "=w,?&r,?&r") - (plus:DI (match_operand:DI 1 "s_register_operand" "%w,0,0") - (match_operand:DI 2 "s_register_operand" "w,r,0"))) + [(set (match_operand:DI 0 "s_register_operand" "=w,?w,?&r,?&r") + (plus:DI (match_operand:DI 1 "s_register_operand" "%w,w,0,0") + (match_operand:DI 2 "s_register_operand" "w,w,r,0"))) (clobber (reg:CC CC_REGNUM))] "TARGET_NEON" { switch (which_alternative) { - case 0: return "vadd.i64\t%P0, %P1, %P2"; - case 1: return "#"; + case 0: /* fall through */ + case 1: return "vadd.i64\t%P0, %P1, %P2"; case 2: return "#"; + case 3: return "#"; default: gcc_unreachable (); } } - [(set_attr "neon_type" "neon_int_1,*,*") - (set_attr "conds" "*,clob,clob") - (set_attr "length" "*,8,8")] + [(set_attr "neon_type" "neon_int_1,neon_int_1,*,*") + (set_attr "conds" "*,*,clob,clob") + (set_attr "length" "*,*,8,8") + (set_attr "arch" "nota8,onlya8,*,*")] ) (define_insn "*sub3_neon" @@ -617,24 +619,26 @@ ) (define_insn "subdi3_neon" - [(set (match_operand:DI 0 "s_register_operand" "=w,?&r,?&r,?&r") - (minus:DI (match_operand:DI 1 "s_register_operand" "w,0,r,0") - (match_operand:DI 2 "s_register_operand" "w,r,0,0"))) + [(set (match_operand:DI 0 "s_register_operand" "=w,?w,?&r,?&r,?&r") + (minus:DI (match_operand:DI 1 "s_register_operand" "w,w,0,r,0") + (match_operand:DI 2 "s_register_operand" "w,w,r,0,0"))) (clobber (reg:CC CC_REGNUM))] "TARGET_NEON" { switch (which_alternative) { - case 0: return "vsub.i64\t%P0, %P1, %P2"; - case 1: /* fall through */ - case 2: /* fall through */ - case 3: return "subs\\t%Q0, %Q1, %Q2\;sbc\\t%R0, %R1, %R2"; + case 0: /* fall through */ + case 1: return "vsub.i64\t%P0, %P1, %P2"; + case 2: /* fall through */ + case 3: /* fall through */ + case 4: return "subs\\t%Q0, %Q1, %Q2\;sbc\\t%R0, %R1, %R2"; default: gcc_unreachable (); } } - [(set_attr "neon_type" "neon_int_2,*,*,*") - (set_attr "conds" "*,clob,clob,clob") - (set_attr "length" "*,8,8,8")] + [(set_attr "neon_type" "neon_int_2,neon_int_2,*,*,*") + (set_attr "conds" "*,*,clob,clob,clob") + (set_attr "length" "*,*,8,8,8") + (set_attr "arch" "nota8,onlya8,*,*,*")] ) (define_insn "*mul3_neon" @@ -720,23 +724,26 @@ ) (define_insn "iordi3_neon" - [(set (match_operand:DI 0 "s_register_operand" "=w,w,?&r,?&r") - (ior:DI (match_operand:DI 1 "s_register_operand" "%w,0,0,r") - (match_operand:DI 2 "neon_logic_op2" "w,Dl,r,r")))] + [(set (match_operand:DI 0 "s_register_operand" "=w,?w,w,?w,?&r,?&r") + (ior:DI (match_operand:DI 1 "s_register_operand" "%w,w,0,0,0,r") + (match_operand:DI 2 "neon_logic_op2" "w,w,Dl,Dl,r,r")))] "TARGET_NEON" { switch (which_alternative) { - case 0: return "vorr\t%P0, %P1, %P2"; - case 1: return neon_output_logic_immediate ("vorr", &operands[2], + case 0: /* fall through */ + case 1: return "vorr\t%P0, %P1, %P2"; + case 2: /* fall through */ + case 3: return neon_output_logic_immediate ("vorr", &operands[2], DImode, 0, VALID_NEON_QREG_MODE (DImode)); - case 2: return "#"; - case 3: return "#"; + case 4: return "#"; + case 5: return "#"; default: gcc_unreachable (); } } - [(set_attr "neon_type" "neon_int_1,neon_int_1,*,*") - (set_attr "length" "*,*,8,8")] + [(set_attr "neon_type" "neon_int_1,neon_int_1,neon_int_1,neon_int_1,*,*") + (set_attr "length" "*,*,*,*,8,8") + (set_attr "arch" "nota8,onlya8,nota8,onlya8,*,*")] ) ;; The concrete forms of the Neon immediate-logic instructions are vbic and @@ -762,23 +769,26 @@ ) (define_insn "anddi3_neon" - [(set (match_operand:DI 0 "s_register_operand" "=w,w,?&r,?&r") - (and:DI (match_operand:DI 1 "s_register_operand" "%w,0,0,r") - (match_operand:DI 2 "neon_inv_logic_op2" "w,DL,r,r")))] + [(set (match_operand:DI 0 "s_register_operand" "=w,?w,w,?w,?&r,?&r") + (and:DI (match_operand:DI 1 "s_register_operand" "%w,w,0,0,0,r") + (match_operand:DI 2 "neon_inv_logic_op2" "w,w,DL,DL,r,r")))] "TARGET_NEON" { switch (which_alternative) { - case 0: return "vand\t%P0, %P1, %P2"; - case 1: return neon_output_logic_immediate ("vand", &operands[2], + case 0: /* fall through */ + case 1: return "vand\t%P0, %P1, %P2"; + case 2: /* fall through */ + case 3: return neon_output_logic_immediate ("vand", &operands[2], DImode, 1, VALID_NEON_QREG_MODE (DImode)); - case 2: return "#"; - case 3: return "#"; + case 4: return "#"; + case 5: return "#"; default: gcc_unreachable (); } } - [(set_attr "neon_type" "neon_int_1,neon_int_1,*,*") - (set_attr "length" "*,*,8,8")] + [(set_attr "neon_type" "neon_int_1,neon_int_1,neon_int_1,neon_int_1,*,*") + (set_attr "length" "*,*,*,*,8,8") + (set_attr "arch" "nota8,onlya8,nota8,onlya8,*,*")] ) (define_insn "orn3_neon" @@ -836,16 +846,18 @@ ) (define_insn "xordi3_neon" - [(set (match_operand:DI 0 "s_register_operand" "=w,?&r,?&r") - (xor:DI (match_operand:DI 1 "s_register_operand" "%w,0,r") - (match_operand:DI 2 "s_register_operand" "w,r,r")))] + [(set (match_operand:DI 0 "s_register_operand" "=w,?w,?&r,?&r") + (xor:DI (match_operand:DI 1 "s_register_operand" "%w,w,0,r") + (match_operand:DI 2 "s_register_operand" "w,w,r,r")))] "TARGET_NEON" "@ veor\t%P0, %P1, %P2 + veor\t%P0, %P1, %P2 # #" - [(set_attr "neon_type" "neon_int_1,*,*") - (set_attr "length" "*,8,8")] + [(set_attr "neon_type" "neon_int_1,neon_int_1,*,*") + (set_attr "length" "*,*,8,8") + (set_attr "arch" "nota8,onlya8,*,*")] ) (define_insn "one_cmpl2" --- a/gcc/config/arm/vfp.md +++ b/gcc/config/arm/vfp.md @@ -134,9 +134,51 @@ ;; DImode moves (define_insn "*arm_movdi_vfp" - [(set (match_operand:DI 0 "nonimmediate_di_operand" "=r, r,m,w,r,w,w, Uv") + [(set (match_operand:DI 0 "nonimmediate_di_operand" "=r, r, m,w,r,w,w, Uv") (match_operand:DI 1 "di_operand" "rIK,mi,r,r,w,w,Uvi,w"))] - "TARGET_ARM && TARGET_HARD_FLOAT && TARGET_VFP + "TARGET_ARM && TARGET_HARD_FLOAT && TARGET_VFP && arm_tune != cortexa8 + && ( register_operand (operands[0], DImode) + || register_operand (operands[1], DImode))" + "* + switch (which_alternative) + { + case 0: + return \"#\"; + case 1: + case 2: + return output_move_double (operands); + case 3: + return \"fmdrr%?\\t%P0, %Q1, %R1\\t%@ int\"; + case 4: + return \"fmrrd%?\\t%Q0, %R0, %P1\\t%@ int\"; + case 5: + if (TARGET_VFP_SINGLE) + return \"fcpys%?\\t%0, %1\\t%@ int\;fcpys%?\\t%p0, %p1\\t%@ int\"; + else + return \"fcpyd%?\\t%P0, %P1\\t%@ int\"; + case 6: case 7: + return output_move_vfp (operands); + default: + gcc_unreachable (); + } + " + [(set_attr "type" "*,load2,store2,r_2_f,f_2_r,ffarithd,f_loadd,f_stored") + (set_attr "neon_type" "*,*,*,neon_mcr_2_mcrr,neon_mrrc,neon_vmov,*,*") + (set (attr "length") (cond [(eq_attr "alternative" "0,1,2") (const_int 8) + (eq_attr "alternative" "5") + (if_then_else + (eq (symbol_ref "TARGET_VFP_SINGLE") (const_int 1)) + (const_int 8) + (const_int 4))] + (const_int 4))) + (set_attr "pool_range" "*,1020,*,*,*,*,1020,*") + (set_attr "neg_pool_range" "*,1008,*,*,*,*,1008,*")] +) + +(define_insn "*arm_movdi_vfp_cortexa8" + [(set (match_operand:DI 0 "nonimmediate_di_operand" "=r, r,m,w,!r,w,w, Uv") + (match_operand:DI 1 "di_operand" "rIK,mi,r,r,w,w,Uvi,w"))] + "TARGET_ARM && TARGET_HARD_FLOAT && TARGET_VFP && arm_tune == cortexa8 && ( register_operand (operands[0], DImode) || register_operand (operands[1], DImode))" "*