diff mbox

[ARM] core -> NEON extend

Message ID 4F46B257.3000505@codesourcery.com
State New
Headers show

Commit Message

Andrew Stubbs Feb. 23, 2012, 9:40 p.m. UTC
Hi All,

This patch converts SImode to DImode extends that also move from core 
registers to VFP/NEON registers.

Currently, the compiler does extends in core registers first, and then 
does the move. This adds to register pressure, which I would imagine to 
be a bad thing. If the value is not in a properly aligned register (the 
first parameter to a register never is) then it also has to move that 
around also.

With my patch, it first moves the SImode value into the NEON register, 
and then extends it, which uses no extra registers.

Zero extend, before and after (assuming the value is passed in r0):

         mov     r2, r0       | vdup.32   d16, r0
         movs    r3, #0       | vshr.u64  d16, d16, #32
         fmdrr   d16, r2, r3  |

Sign extend:

         mov     r2, r0       | vdup.32   d16, r0
         asrs    r3, r0, #31  | vshr.s64  d16, d16, #32
         fmdrr   d16, r2, r3  |

OK for 4.8?

Andrew


P.S.

I have experimented with doing zero-extends something like

         vmov.i64  d7, #0
         fmsr      s14, r0

But, somehow the immediate load doesn't seem to work, and it limits the 
target register to VFP_LO_REGS. It's also not possible to load into only 
s15, so I'm not sure there's any advantage.
diff mbox

Patch

2012-02-23  Andrew Stubbs  <ams@codesourcery.com>

	gcc/
	* config/arm/arm.md (zero_extend<mode>di2): Add extra alternatives
	for NEON registers.
	(extend<mode>di2): Likewise.
	Prevent extend splitters doing NEON alternatives.
	* config/arm/iterators.md (qhs_extenddi_cstr, qhs_zextenddi_cstr):
	Adjust constraints to add new alternatives.
	* config/arm/neon.md: Add splitters for zero- and sign-extend.

	gcc/testsuite/
	* gcc.target/arm/neon-extend-1.c: New file.
	* gcc.target/arm/neon-extend-2.c: New file.

---
 gcc/config/arm/arm.md                        |   26 +++++++++++++++-----------
 gcc/config/arm/iterators.md                  |    4 ++--
 gcc/config/arm/neon.md                       |   22 ++++++++++++++++++++++
 gcc/testsuite/gcc.target/arm/neon-extend-1.c |   13 +++++++++++++
 gcc/testsuite/gcc.target/arm/neon-extend-2.c |   13 +++++++++++++
 5 files changed, 65 insertions(+), 13 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/arm/neon-extend-1.c
 create mode 100644 gcc/testsuite/gcc.target/arm/neon-extend-2.c

diff --git a/gcc/config/arm/arm.md b/gcc/config/arm/arm.md
index 182c52a..35bf688 100644
--- a/gcc/config/arm/arm.md
+++ b/gcc/config/arm/arm.md
@@ -4479,33 +4479,35 @@ 
 ;; Zero and sign extension instructions.
 
 (define_insn "zero_extend<mode>di2"
-  [(set (match_operand:DI 0 "s_register_operand" "=r")
+  [(set (match_operand:DI 0 "s_register_operand" "=w, r")
         (zero_extend:DI (match_operand:QHSI 1 "<qhs_zextenddi_op>"
 					    "<qhs_zextenddi_cstr>")))]
   "TARGET_32BIT <qhs_zextenddi_cond>"
   "#"
-  [(set_attr "length" "8")
-   (set_attr "ce_count" "2")
-   (set_attr "predicable" "yes")]
+  [(set_attr "length" "8,8")
+   (set_attr "ce_count" "2,2")
+   (set_attr "predicable" "yes,yes")]
 )
 
 (define_insn "extend<mode>di2"
-  [(set (match_operand:DI 0 "s_register_operand" "=r")
+  [(set (match_operand:DI 0 "s_register_operand" "=w,r")
         (sign_extend:DI (match_operand:QHSI 1 "<qhs_extenddi_op>"
 					    "<qhs_extenddi_cstr>")))]
   "TARGET_32BIT <qhs_sextenddi_cond>"
   "#"
-  [(set_attr "length" "8")
-   (set_attr "ce_count" "2")
-   (set_attr "shift" "1")
-   (set_attr "predicable" "yes")]
+  [(set_attr "length" "8,8")
+   (set_attr "ce_count" "2,2")
+   (set_attr "shift" "1,1")
+   (set_attr "predicable" "yes,yes")]
 )
 
 ;; Splits for all extensions to DImode
 (define_split
   [(set (match_operand:DI 0 "s_register_operand" "")
         (zero_extend:DI (match_operand 1 "nonimmediate_operand" "")))]
-  "TARGET_32BIT"
+  "TARGET_32BIT && (!TARGET_NEON
+		    || (reload_completed
+			&& !(IS_VFP_REGNUM (REGNO (operands[0])))))"
   [(set (match_dup 0) (match_dup 1))]
 {
   rtx lo_part = gen_lowpart (SImode, operands[0]);
@@ -4531,7 +4533,9 @@ 
 (define_split
   [(set (match_operand:DI 0 "s_register_operand" "")
         (sign_extend:DI (match_operand 1 "nonimmediate_operand" "")))]
-  "TARGET_32BIT"
+  "TARGET_32BIT && (!TARGET_NEON
+		    || (reload_completed
+			&& !(IS_VFP_REGNUM (REGNO (operands[0])))))"
   [(set (match_dup 0) (ashiftrt:SI (match_dup 1) (const_int 31)))]
 {
   rtx lo_part = gen_lowpart (SImode, operands[0]);
diff --git a/gcc/config/arm/iterators.md b/gcc/config/arm/iterators.md
index 1567264..07ac5da 100644
--- a/gcc/config/arm/iterators.md
+++ b/gcc/config/arm/iterators.md
@@ -409,8 +409,8 @@ 
 (define_mode_attr qhs_extenddi_op [(SI "s_register_operand")
 				   (HI "nonimmediate_operand")
 				   (QI "arm_reg_or_extendqisi_mem_op")])
-(define_mode_attr qhs_extenddi_cstr [(SI "r") (HI "rm") (QI "rUq")])
-(define_mode_attr qhs_zextenddi_cstr [(SI "r") (HI "rm") (QI "rm")])
+(define_mode_attr qhs_extenddi_cstr [(SI "r,r") (HI "r,rm") (QI "r,rUq")])
+(define_mode_attr qhs_zextenddi_cstr [(SI "r,r") (HI "r,rm") (QI "r,rm")])
 
 ;; Mode attributes used for fixed-point support.
 (define_mode_attr qaddsub_suf [(V4UQQ "8") (V2UHQ "16") (UQQ "8") (UHQ "16")
diff --git a/gcc/config/arm/neon.md b/gcc/config/arm/neon.md
index 6492721..618d59d 100644
--- a/gcc/config/arm/neon.md
+++ b/gcc/config/arm/neon.md
@@ -5879,3 +5879,25 @@ 
                                    (const_string "neon_fp_vadd_qqq_vabs_qq"))
                      (const_string "neon_int_5")))]
 )
+
+;; Copy from core-to-neon regs, then extend, not vice-versa
+
+(define_split
+  [(set (match_operand:DI 0 "s_register_operand" "")
+	(sign_extend:DI (match_operand:SI 1 "s_register_operand" "")))]
+  "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
+  [(set (match_dup 2) (vec_duplicate:V2SI (match_dup 1)))
+   (set (match_dup 0) (ashiftrt:DI (match_dup 0) (const_int 32)))]
+  {
+    operands[2] = gen_rtx_REG (V2SImode, REGNO (operands[0]));
+  })
+
+(define_split
+  [(set (match_operand:DI 0 "s_register_operand" "")
+	(zero_extend:DI (match_operand:SI 1 "s_register_operand" "")))]
+  "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
+  [(set (match_dup 2) (vec_duplicate:V2SI (match_dup 1)))
+   (set (match_dup 0) (lshiftrt:DI (match_dup 0) (const_int 32)))]
+  {
+    operands[2] = gen_rtx_REG (V2SImode, REGNO (operands[0]));
+  })
diff --git a/gcc/testsuite/gcc.target/arm/neon-extend-1.c b/gcc/testsuite/gcc.target/arm/neon-extend-1.c
new file mode 100644
index 0000000..cfe83ce
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/neon-extend-1.c
@@ -0,0 +1,13 @@ 
+/* { dg-require-effective-target arm_neon_hw } */
+/* { dg-options "-O2" } */
+/* { dg-add-options arm_neon } */
+
+void
+f (unsigned int a)
+{
+  unsigned long long b = a;
+  asm volatile ("@ extended to %0" : : "w" (b));
+}
+
+/* { dg-final { scan-assembler "vdup.32" } } */
+/* { dg-final { scan-assembler "vshr.u64" } } */
diff --git a/gcc/testsuite/gcc.target/arm/neon-extend-2.c b/gcc/testsuite/gcc.target/arm/neon-extend-2.c
new file mode 100644
index 0000000..1c5a17e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/neon-extend-2.c
@@ -0,0 +1,13 @@ 
+/* { dg-require-effective-target arm_neon_hw } */
+/* { dg-options "-O2" } */
+/* { dg-add-options arm_neon } */
+
+void
+f (int a)
+{
+  long long b = a;
+  asm volatile ("@ extended to %0" : : "w" (b));
+}
+
+/* { dg-final { scan-assembler "vdup.32" } } */
+/* { dg-final { scan-assembler "vshr.s64" } } */