2011-02-22 Andrew Stubbs <ams@codesourcery.com>
Mark Shinwell <shinwell@codesourcery.com>
gcc/
* config/arm/vfp.md (*arm_movsi_vfp, *thumb2_movsi_vfp)
(*arm_movdi_vfp, *thumb2_movdi_vfp, *movsf_vfp, *thumb2_movsf_vfp)
(*movdf_vfp, *thumb2_movdf_vfp, *movsfcc_vfp, *thumb2_movsfcc_vfp)
(*movdfcc_vfp, *thumb2_movdfcc_vfp): Add neon_type.
* config/arm/arm.md (neon_type): Update comment.
@@ -337,8 +337,6 @@
(define_attr "ldsched" "no,yes" (const (symbol_ref "arm_ld_sched")))
;; Classification of NEON instructions for scheduling purposes.
-;; Do not set this attribute and the "type" attribute together in
-;; any one instruction pattern.
(define_attr "neon_type"
"neon_int_1,\
neon_int_2,\
@@ -83,6 +83,7 @@
"
[(set_attr "predicable" "yes")
(set_attr "type" "*,*,*,*,load1,store1,r_2_f,f_2_r,fcpys,f_loads,f_stores")
+ (set_attr "neon_type" "*,*,*,*,*,*,neon_mcr,neon_mrc,neon_vmov,*,*")
(set_attr "insn" "mov,mov,mvn,mov,*,*,*,*,*,*,*")
(set_attr "pool_range" "*,*,*,*,4096,*,*,*,*,1020,*")
(set_attr "neg_pool_range" "*,*,*,*,4084,*,*,*,*,1008,*")]
@@ -125,6 +126,7 @@
"
[(set_attr "predicable" "yes")
(set_attr "type" "*,*,*,*,load1,load1,store1,store1,r_2_f,f_2_r,fcpys,f_loads,f_stores")
+ (set_attr "neon_type" "*,*,*,*,*,*,*,*,neon_mcr,neon_mrc,neon_vmov,*,*")
(set_attr "insn" "mov,mov,mvn,mov,*,*,*,*,*,*,*,*,*")
(set_attr "pool_range" "*,*,*,*,1020,4096,*,*,*,*,*,1020,*")
(set_attr "neg_pool_range" "*,*,*,*, 0, 0,*,*,*,*,*,1008,*")]
@@ -163,6 +165,7 @@
}
"
[(set_attr "type" "*,load2,store2,r_2_f,f_2_r,ffarithd,f_loadd,f_stored")
+ (set_attr "neon_type" "*,*,*,neon_mcr_2_mcrr,neon_mrrc,neon_vmov,*,*")
(set (attr "length") (cond [(eq_attr "alternative" "0,1,2") (const_int 8)
(eq_attr "alternative" "5")
(if_then_else
@@ -201,6 +204,7 @@
}
"
[(set_attr "type" "*,load2,store2,r_2_f,f_2_r,ffarithd,f_loadd,f_stored")
+ (set_attr "neon_type" "*,*,*,neon_mcr_2_mcrr,neon_mrrc,neon_vmov,*,*")
(set (attr "length") (cond [(eq_attr "alternative" "0,1,2") (const_int 8)
(eq_attr "alternative" "5")
(if_then_else
@@ -355,6 +359,7 @@
[(set_attr "predicable" "yes")
(set_attr "type"
"r_2_f,f_2_r,fconsts,f_loads,f_stores,load1,store1,fcpys,*")
+ (set_attr "neon_type" "neon_mcr,neon_mrc,*,*,*,*,*,neon_vmov,*")
(set_attr "insn" "*,*,*,*,*,*,*,*,mov")
(set_attr "pool_range" "*,*,*,1020,*,4096,*,*,*")
(set_attr "neg_pool_range" "*,*,*,1008,*,4080,*,*,*")]
@@ -392,6 +397,7 @@
[(set_attr "predicable" "yes")
(set_attr "type"
"r_2_f,f_2_r,fconsts,f_loads,f_stores,load1,store1,fcpys,*")
+ (set_attr "neon_type" "neon_mcr,neon_mrc,*,*,*,*,*,neon_vmov,*")
(set_attr "insn" "*,*,*,*,*,*,*,*,mov")
(set_attr "pool_range" "*,*,*,1020,*,4092,*,*,*")
(set_attr "neg_pool_range" "*,*,*,1008,*,0,*,*,*")]
@@ -435,6 +441,7 @@
"
[(set_attr "type"
"r_2_f,f_2_r,fconstd,f_loadd,f_stored,load2,store2,ffarithd,*")
+ (set_attr "neon_type" "neon_mcr_2_mcrr,neon_mrrc,*,*,*,*,*,neon_vmov,*")
(set (attr "length") (cond [(eq_attr "alternative" "3,4,8") (const_int 8)
(eq_attr "alternative" "7")
(if_then_else
@@ -479,6 +486,7 @@
"
[(set_attr "type"
"r_2_f,f_2_r,fconstd,load2,store2,f_loadd,f_stored,ffarithd,*")
+ (set_attr "neon_type" "neon_mcr_2_mcrr,neon_mrrc,*,*,*,*,*,neon_vmov,*")
(set (attr "length") (cond [(eq_attr "alternative" "3,4,8") (const_int 8)
(eq_attr "alternative" "7")
(if_then_else
@@ -514,7 +522,8 @@
fmrs%D3\\t%0, %2\;fmrs%d3\\t%0, %1"
[(set_attr "conds" "use")
(set_attr "length" "4,4,8,4,4,8,4,4,8")
- (set_attr "type" "fcpys,fcpys,fcpys,r_2_f,r_2_f,r_2_f,f_2_r,f_2_r,f_2_r")]
+ (set_attr "type" "fcpys,fcpys,fcpys,r_2_f,r_2_f,r_2_f,f_2_r,f_2_r,f_2_r")
+ (set_attr "neon_type" "neon_vmov,neon_vmov,neon_vmov,neon_mcr,neon_mcr,neon_mcr,neon_mrc,neon_mrc,neon_mrc")]
)
(define_insn "*thumb2_movsfcc_vfp"
@@ -537,7 +546,8 @@
ite\\t%D3\;fmrs%D3\\t%0, %2\;fmrs%d3\\t%0, %1"
[(set_attr "conds" "use")
(set_attr "length" "6,6,10,6,6,10,6,6,10")
- (set_attr "type" "fcpys,fcpys,fcpys,r_2_f,r_2_f,r_2_f,f_2_r,f_2_r,f_2_r")]
+ (set_attr "type" "fcpys,fcpys,fcpys,r_2_f,r_2_f,r_2_f,f_2_r,f_2_r,f_2_r")
+ (set_attr "neon_type" "neon_vmov,neon_vmov,neon_vmov,neon_mcr,neon_mcr,neon_mcr,neon_mrc,neon_mrc,neon_mrc")]
)
(define_insn "*movdfcc_vfp"
@@ -560,7 +570,8 @@
fmrrd%D3\\t%Q0, %R0, %P2\;fmrrd%d3\\t%Q0, %R0, %P1"
[(set_attr "conds" "use")
(set_attr "length" "4,4,8,4,4,8,4,4,8")
- (set_attr "type" "ffarithd,ffarithd,ffarithd,r_2_f,r_2_f,r_2_f,f_2_r,f_2_r,f_2_r")]
+ (set_attr "type" "ffarithd,ffarithd,ffarithd,r_2_f,r_2_f,r_2_f,f_2_r,f_2_r,f_2_r")
+ (set_attr "neon_type" "neon_vmov,neon_vmov,neon_vmov,neon_mcr_2_mcrr,neon_mcr_2_mcrr,neon_mcr_2_mcrr,neon_mrrc,neon_mrrc,neon_mrrc")]
)
(define_insn "*thumb2_movdfcc_vfp"
@@ -583,7 +594,8 @@
ite\\t%D3\;fmrrd%D3\\t%Q0, %R0, %P2\;fmrrd%d3\\t%Q0, %R0, %P1"
[(set_attr "conds" "use")
(set_attr "length" "6,6,10,6,6,10,6,6,10")
- (set_attr "type" "ffarithd,ffarithd,ffarithd,r_2_f,r_2_f,r_2_f,f_2_r,f_2_r,f_2_r")]
+ (set_attr "type" "ffarithd,ffarithd,ffarithd,r_2_f,r_2_f,r_2_f,f_2_r,f_2_r,f_2_r")
+ (set_attr "neon_type" "neon_vmov,neon_vmov,neon_vmov,neon_mcr_2_mcrr,neon_mcr_2_mcrr,neon_mcr_2_mcrr,neon_mrrc,neon_mrrc,neon_mrrc")]
)
-------- Original Message -------- Subject: [PATCH][ARM] NEON scheduling tweak Date: Tue, 22 Feb 2011 12:27:08 +0000 From: Andrew Stubbs <ams@codesourcery.com> Organization: CodeSourcery To: gcc-patches@gcc.gnu.org The attached is an old patch, originally by Mark Shinwell, that has been used in CodeSourcery toolchains for some time. I've forward ported it to GCC 4.6, and benchmarked it. I find that it gives >2% (geomean) speed improvement in a popular benchmark running on Cortex-A9 NEON (Panda board). OK for stage 1? Andrew