2012-05-18 Andrew Stubbs <ams@codesourcery.com>
gcc/
* config/arm/arm.c (arm_print_operand): Add new 'E' format code.
* config/arm/arm.h (enum reg_class): Add VFP_LO_REGS_EVEN.
(REG_CLASS_NAMES, REG_CLASS_CONTENTS, IS_VFP_CLASS): Likewise.
* config/arm/arm.md (opt, opt_enabled): New attributes.
(enabled): Use opt_enabled.
(ashldi3, ashrdi3, lshrdi3): Add TARGET_NEON case.
* config/arm/constraints.md (T): New register constraint.
(Pf, PF, P1, Pg, Ph): New constraints.
* config/arm/neon.md (signed_shift_di3_neon, unsigned_shift_di3_neon,
ashldi3_neon, ashldi3_neon_noclobber, ashrdi3_neon_imm,
ashrdi3_neon_reg, ashrdi3_neon, ashrdi3_neon_imm_noclobber,
lshrdi3_neon_imm, ashrdi3_neon, lshrdi3_neon_imm_noclobber,
lshrdi3_neon_imm, lshrdi3_neon_reg, lshrdi3_neon): New patterns.
* config/arm/predicates.md (int_1_to_64): New predicate.
@@ -17973,6 +17973,24 @@ arm_print_operand (FILE *stream, rtx x, int code)
}
return;
+ /* Print the VFP/Neon double precision register name that overlaps the
+ given single-precision register. */
+ case 'E':
+ {
+ int mode = GET_MODE (x);
+
+ if (GET_MODE_SIZE (mode) != 4
+ || GET_CODE (x) != REG
+ || !IS_VFP_REGNUM (REGNO (x)))
+ {
+ output_operand_lossage ("invalid operand for code '%c'", code);
+ return;
+ }
+
+ fprintf (stream, "d%d", (REGNO (x) - FIRST_VFP_REGNUM) >> 1);
+ }
+ return;
+
/* These two codes print the low/high doubleword register of a Neon quad
register, respectively. For pair-structure types, can also print
low/high quadword registers. */
@@ -1043,6 +1043,7 @@ enum reg_class
CIRRUS_REGS,
VFP_D0_D7_REGS,
VFP_LO_REGS,
+ VFP_LO_REGS_EVEN,
VFP_HI_REGS,
VFP_REGS,
IWMMXT_GR_REGS,
@@ -1069,6 +1070,7 @@ enum reg_class
"CIRRUS_REGS", \
"VFP_D0_D7_REGS", \
"VFP_LO_REGS", \
+ "VFP_LO_REGS_EVEN", \
"VFP_HI_REGS", \
"VFP_REGS", \
"IWMMXT_GR_REGS", \
@@ -1094,6 +1096,7 @@ enum reg_class
{ 0xF8000000, 0x000007FF, 0x00000000, 0x00000000 }, /* CIRRUS_REGS */ \
{ 0x00000000, 0x80000000, 0x00007FFF, 0x00000000 }, /* VFP_D0_D7_REGS */ \
{ 0x00000000, 0x80000000, 0x7FFFFFFF, 0x00000000 }, /* VFP_LO_REGS */ \
+ { 0x00000000, 0x80000000, 0x2AAAAAAA, 0x00000000 }, /* VFP_LO_REGS_EVEN */ \
{ 0x00000000, 0x00000000, 0x80000000, 0x7FFFFFFF }, /* VFP_HI_REGS */ \
{ 0x00000000, 0x80000000, 0xFFFFFFFF, 0x7FFFFFFF }, /* VFP_REGS */ \
{ 0x00000000, 0x00007800, 0x00000000, 0x00000000 }, /* IWMMXT_GR_REGS */ \
@@ -1111,7 +1114,7 @@ enum reg_class
/* Any of the VFP register classes. */
#define IS_VFP_CLASS(X) \
- ((X) == VFP_D0_D7_REGS || (X) == VFP_LO_REGS \
+ ((X) == VFP_D0_D7_REGS || (X) == VFP_LO_REGS || (X) == VFP_LO_REGS_EVEN \
|| (X) == VFP_HI_REGS || (X) == VFP_REGS)
/* The same information, inverted:
@@ -251,6 +251,22 @@
(const_string "yes")]
(const_string "no")))
+(define_attr "opt" "any,speed,size"
+ (const_string "any"))
+
+(define_attr "opt_enabled" "no,yes"
+ (cond [(eq_attr "opt" "any")
+ (const_string "yes")
+
+ (and (eq_attr "opt" "speed")
+ (match_test "optimize_function_for_speed_p (cfun)"))
+ (const_string "yes")
+
+ (and (eq_attr "opt" "size")
+ (match_test "optimize_function_for_size_p (cfun)"))
+ (const_string "yes")]
+ (const_string "no")))
+
; Allows an insn to disable certain alternatives for reasons other than
; arch support.
(define_attr "insn_enabled" "no,yes"
@@ -258,11 +274,15 @@
; Enable all alternatives that are both arch_enabled and insn_enabled.
(define_attr "enabled" "no,yes"
- (if_then_else (eq_attr "insn_enabled" "yes")
- (if_then_else (eq_attr "arch_enabled" "yes")
- (const_string "yes")
- (const_string "no"))
- (const_string "no")))
+ (cond [(eq_attr "insn_enabled" "no")
+ (const_string "no")
+
+ (eq_attr "arch_enabled" "no")
+ (const_string "no")
+
+ (eq_attr "opt_enabled" "no")
+ (const_string "no")]
+ (const_string "yes")))
; POOL_RANGE is how far away from a constant pool entry that this insn
; can be placed. If the distance is zero, then this insn will never
@@ -3520,8 +3540,15 @@
(match_operand:SI 2 "reg_or_int_operand" "")))]
"TARGET_32BIT"
"
- if (!CONST_INT_P (operands[2])
- && (TARGET_REALLY_IWMMXT || (TARGET_HARD_FLOAT && TARGET_MAVERICK)))
+ if (TARGET_NEON)
+ {
+ /* Delay the decision whether to use NEON or core-regs until
+ register allocation. */
+ emit_insn (gen_ashldi3_neon (operands[0], operands[1], operands[2]));
+ DONE;
+ }
+ else if (!CONST_INT_P (operands[2])
+ && (TARGET_REALLY_IWMMXT || (TARGET_HARD_FLOAT && TARGET_MAVERICK)))
; /* No special preparation statements; expand pattern as above. */
else
{
@@ -3595,8 +3622,15 @@
(match_operand:SI 2 "reg_or_int_operand" "")))]
"TARGET_32BIT"
"
- if (!CONST_INT_P (operands[2])
- && (TARGET_REALLY_IWMMXT || (TARGET_HARD_FLOAT && TARGET_MAVERICK)))
+ if (TARGET_NEON)
+ {
+ /* Delay the decision whether to use NEON or core-regs until
+ register allocation. */
+ emit_insn (gen_ashrdi3_neon (operands[0], operands[1], operands[2]));
+ DONE;
+ }
+ else if (!CONST_INT_P (operands[2])
+ && (TARGET_REALLY_IWMMXT || (TARGET_HARD_FLOAT && TARGET_MAVERICK)))
; /* No special preparation statements; expand pattern as above. */
else
{
@@ -3668,8 +3702,15 @@
(match_operand:SI 2 "reg_or_int_operand" "")))]
"TARGET_32BIT"
"
- if (!CONST_INT_P (operands[2])
- && (TARGET_REALLY_IWMMXT || (TARGET_HARD_FLOAT && TARGET_MAVERICK)))
+ if (TARGET_NEON)
+ {
+ /* Delay the decision whether to use NEON or core-regs until
+ register allocation. */
+ emit_insn (gen_lshrdi3_neon (operands[0], operands[1], operands[2]));
+ DONE;
+ }
+ else if (!CONST_INT_P (operands[2])
+ && (TARGET_REALLY_IWMMXT || (TARGET_HARD_FLOAT && TARGET_MAVERICK)))
; /* No special preparation statements; expand pattern as above. */
else
{
@@ -19,7 +19,7 @@
;; <http://www.gnu.org/licenses/>.
;; The following register constraints have been used:
-;; - in ARM/Thumb-2 state: f, t, v, w, x, y, z
+;; - in ARM/Thumb-2 state: f, t, T, v, w, x, y, z
;; - in Thumb state: h, b
;; - in both states: l, c, k
;; In ARM state, 'l' is an alias for 'r'
@@ -29,7 +29,8 @@
;; in Thumb-1 state: I, J, K, L, M, N, O
;; The following multi-letter normal constraints have been used:
-;; in ARM/Thumb-2 state: Da, Db, Dc, Dn, Dl, DL, Dv, Dy, Di, Dt, Dz
+;; in ARM/Thumb-2 state: Da, Db, Dc, Dn, Dl, DL, Dv, Dy, Di, Dt, Dz, Pf, PF,
+;; Pg, Ph, P1
;; in Thumb-1 state: Pa, Pb, Pc, Pd, Pe
;; in Thumb-2 state: Pj, PJ, Ps, Pt, Pu, Pv, Pw, Px, Py
@@ -45,6 +46,9 @@
(define_register_constraint "t" "TARGET_32BIT ? VFP_LO_REGS : NO_REGS"
"The VFP registers @code{s0}-@code{s31}.")
+(define_register_constraint "T" "TARGET_32BIT ? VFP_LO_REGS_EVEN : NO_REGS"
+ "The even numbered VFP registers @code{s0}-@code{s31}.")
+
(define_register_constraint "v" "TARGET_ARM ? CIRRUS_REGS : NO_REGS"
"The Cirrus Maverick co-processor registers.")
@@ -177,6 +181,32 @@
(and (match_code "const_int")
(match_test "TARGET_THUMB1 && ival >= 256 && ival <= 510")))
+(define_constraint "Pf"
+ "@internal In ARM/Thumb-2 state, a constant in the range 0 to 63"
+ (and (match_code "const_int")
+ (match_test "TARGET_32BIT && ival >= 0 && ival < 64")))
+
+(define_constraint "PF"
+ "@internal In ARM/Thumb-2 state, a constant in the range 1 to 64"
+ (and (match_code "const_int")
+ (match_test "TARGET_32BIT && ival > 0 && ival <= 64")))
+
+(define_constraint "P1"
+ "@internal In ARM/Thumb2 state, a constant of 1"
+ (and (match_code "const_int")
+ (match_test "TARGET_32BIT && ival == 1")))
+
+(define_constraint "Pg"
+ "@internal In ARM state, a constant in the range 0 to 63, and in thumb-2 state, 32 to 63"
+ (and (match_code "const_int")
+ (match_test "(TARGET_ARM && ival >= 0 && ival < 64)
+ || (TARGET_THUMB2 && ival >= 32 && ival < 64)")))
+
+(define_constraint "Ph"
+ "@internal In Thumb-2 state, a constant in the range 0 to 31"
+ (and (match_code "const_int")
+ (match_test "TARGET_THUMB2 && ival >= 0 && ival <= 31")))
+
(define_constraint "Ps"
"@internal In Thumb-2 state a constant in the range -255 to +255"
(and (match_code "const_int")
@@ -1133,6 +1133,266 @@
DONE;
})
+;; 64-bit shifts
+
+; The shift amount needs to be negated for right-shifts
+(define_insn "signed_shift_di3_neon"
+ [(set (match_operand:DI 0 "s_register_operand" "=w")
+ (unspec:DI [(match_operand:DI 1 "s_register_operand" " w")
+ (match_operand:SI 2 "s_register_operand" " T")]
+ UNSPEC_ASHIFT_SIGNED))]
+ "TARGET_NEON && reload_completed"
+ "vshl.s64\t%P0, %P1, %E2 @ ashr %P0, %P1, %2"
+ [(set_attr "neon_type" "neon_vshl_ddd")]
+)
+
+; The shift amount needs to be negated for right-shifts
+(define_insn "unsigned_shift_di3_neon"
+ [(set (match_operand:DI 0 "s_register_operand" "=w")
+ (unspec:DI [(match_operand:DI 1 "s_register_operand" " w")
+ (match_operand:SI 2 "s_register_operand" " T")]
+ UNSPEC_ASHIFT_UNSIGNED))]
+ "TARGET_NEON && reload_completed"
+ "vshl.u64\t%P0, %P1, %E2 @ lshr %P0, %P1, %2"
+ [(set_attr "neon_type" "neon_vshl_ddd")]
+)
+
+(define_insn "ashldi3_neon_noclobber"
+ [(set (match_operand:DI 0 "s_register_operand" "=w,w")
+ (ashift:DI (match_operand:DI 1 "s_register_operand" " w,w")
+ (match_operand:SI 2 "reg_or_int_operand" "Pf,T")))]
+ "TARGET_NEON && reload_completed"
+ "@
+ vshl.u64\t%P0, %P1, %2
+ vshl.u64\t%P0, %P1, %E2 @ ashl %P0, %P1, %2"
+ [(set_attr "neon_type" "neon_vshl_ddd,neon_vshl_ddd")]
+)
+
+(define_insn_and_split "ashldi3_neon"
+ [(set (match_operand:DI 0 "s_register_operand" "=w,w,?&r,?&r,?r,?r,?r,?w,?w")
+ (ashift:DI (match_operand:DI 1 "s_register_operand" " w,w, 0, r, r, r, r, w, w")
+ (match_operand:SI 2 "reg_or_int_operand" " T,i, r, r,P1,Pf,Pg, T, i")))
+ (clobber (match_scratch:SI 3 "=X,X, &r, &r, X, X,&r, X, X"))
+ (clobber (match_scratch:SI 4 "=X,X, &r, &r, X, X, X, X, X"))
+ (clobber (reg:CC_C CC_REGNUM))]
+ "TARGET_NEON"
+ "#"
+ "TARGET_NEON && reload_completed"
+ [(const_int 0)]
+ "
+ {
+ if (IS_VFP_REGNUM (REGNO (operands[0])))
+ {
+ if (CONST_INT_P (operands[2]))
+ {
+ if (INTVAL (operands[2]) < 1)
+ {
+ emit_insn (gen_movdi (operands[0], operands[1]));
+ DONE;
+ }
+ else if (INTVAL (operands[2]) > 63)
+ operands[2] = gen_rtx_CONST_INT (VOIDmode, 63);
+ }
+
+ /* Ditch the unnecessary clobbers. */
+ emit_insn (gen_ashldi3_neon_noclobber (operands[0], operands[1],
+ operands[2]));
+ }
+ else if (CONST_INT_P (operands[2]) && INTVAL (operands[2]) == 1)
+ /* This clobbers CC. */
+ emit_insn (gen_arm_ashrdi3_1bit (operands[0], operands[1]));
+ else
+ arm_emit_coreregs_64bit_shift (ASHIFT, operands[0], operands[1],
+ operands[2], operands[3], operands[4]);
+ DONE;
+ }"
+ [(set_attr "length" "*,*,24,24,8,12,12,*,*")
+ (set_attr "arch" "nota8,nota8,*,*,*,*,*,onlya8,onlya8")
+ (set_attr "opt" "*,*,speed,speed,speed,speed,speed,*,*")]
+)
+
+(define_insn "ashrdi3_neon_imm_noclobber"
+ [(set (match_operand:DI 0 "s_register_operand" "=w")
+ (ashiftrt:DI (match_operand:DI 1 "s_register_operand" " w")
+ (match_operand:SI 2 "int_1_to_64" "PF")))]
+ "TARGET_NEON && reload_completed"
+ "vshr.s64\t%P0, %P1, %2"
+ [(set_attr "neon_type" "neon_vshl_ddd")]
+)
+
+(define_insn_and_split "ashrdi3_neon_imm"
+ [(set (match_operand:DI 0 "s_register_operand" "=w,?r,?r,?r,?w")
+ (ashiftrt:DI (match_operand:DI 1 "s_register_operand" " w, r, r, r, w")
+ (match_operand:SI 2 "int_1_to_64" "PF,P1,Pg,Ph,PF")))
+ (clobber (match_scratch:SI 3 "=X, X, X,&r, X"))
+ (clobber (reg:CC_C CC_REGNUM))]
+ "TARGET_NEON"
+ "#"
+ "TARGET_NEON && reload_completed"
+ [(const_int 0)]
+ "
+ {
+ if (IS_VFP_REGNUM (REGNO (operands[0])))
+ /* Ditch the unnecessary clobbers. */
+ emit_insn (gen_ashrdi3_neon_imm_noclobber (operands[0], operands[1],
+ operands[2]));
+ else if (INTVAL (operands[2]) == 1)
+ /* This clobbers CC. */
+ emit_insn (gen_arm_ashrdi3_1bit (operands[0], operands[1]));
+ else
+ arm_emit_coreregs_64bit_shift (ASHIFTRT, operands[0], operands[1],
+ operands[2], operands[3], NULL);
+ DONE;
+ }"
+ [(set_attr "length" "*,8,12,12,*")
+ (set_attr "arch" "nota8,*,*,*,onlya8")
+ (set_attr "opt" "*,speed,speed,speed,*")]
+)
+
+(define_insn_and_split "ashrdi3_neon_reg"
+ [(set (match_operand:DI 0 "s_register_operand" "= w, w,?&r,?&r,?w,?w")
+ (ashiftrt:DI (match_operand:DI 1 "s_register_operand" " w, w, 0, r, w, w")
+ (match_operand:SI 2 "s_register_operand" " r, r, r, r, r, r")))
+ (clobber (match_scratch:SI 3 "= 2,&r, &r, &r, 2,&r"))
+ (clobber (match_scratch:SI 4 "=&T,&T, &r, &r,&T,&T"))
+ (clobber (reg:CC CC_REGNUM))]
+ "TARGET_NEON"
+ "#"
+ "TARGET_NEON && reload_completed"
+ [(const_int 0)]
+ "
+ {
+ if (IS_VFP_REGNUM (REGNO (operands[0])))
+ {
+ emit_insn (gen_negsi2 (operands[3], operands[2]));
+ emit_insn (gen_rtx_SET (SImode, operands[4], operands[3]));
+ emit_insn (gen_signed_shift_di3_neon (operands[0], operands[1],
+ operands[4]));
+ }
+ else
+ /* This clobbers CC (ASHIFTRT only). */
+ arm_emit_coreregs_64bit_shift (ASHIFTRT, operands[0], operands[1],
+ operands[2], operands[3], operands[4]);
+ DONE;
+ }"
+ [(set_attr "length" "12,12,24,24,12,12")
+ (set_attr "arch" "nota8,nota8,*,*,onlya8,onlya8")
+ (set_attr "opt" "*,*,speed,speed,*,*")]
+)
+
+(define_expand "ashrdi3_neon"
+ [(match_operand:DI 0 "s_register_operand" "")
+ (match_operand:DI 1 "s_register_operand" "")
+ (match_operand:SI 2 "reg_or_int_operand" "")]
+ "TARGET_NEON"
+{
+ if (CONST_INT_P (operands[2]))
+ {
+ if (INTVAL (operands[2]) < 1)
+ {
+ emit_insn (gen_movdi (operands[0], operands[1]));
+ DONE;
+ }
+ else if (INTVAL (operands[2]) > 64)
+ operands[2] = gen_rtx_CONST_INT (VOIDmode, 64);
+
+ emit_insn (gen_ashrdi3_neon_imm (operands[0], operands[1], operands[2]));
+ }
+ else
+ emit_insn (gen_ashrdi3_neon_reg (operands[0], operands[1], operands[2]));
+ DONE;
+})
+
+(define_insn "lshrdi3_neon_imm_noclobber"
+ [(set (match_operand:DI 0 "s_register_operand" "=w")
+ (lshiftrt:DI (match_operand:DI 1 "s_register_operand" " w")
+ (match_operand:SI 2 "int_1_to_64" "PF")))]
+ "TARGET_NEON && reload_completed"
+ "vshr.u64\t%P0, %P1, %2"
+ [(set_attr "neon_type" "neon_vshl_ddd")]
+)
+
+(define_insn_and_split "lshrdi3_neon_imm"
+ [(set (match_operand:DI 0 "s_register_operand" "=w,?r,?r,?r,?w")
+ (lshiftrt:DI (match_operand:DI 1 "s_register_operand" " w, r, r, r, w")
+ (match_operand:SI 2 "int_1_to_64" "PF,P1,Pg,Ph,PF")))
+ (clobber (match_scratch:SI 3 "=X, X, X,&r, X"))
+ (clobber (reg:CC_C CC_REGNUM))]
+ "TARGET_NEON"
+ "#"
+ "TARGET_NEON && reload_completed"
+ [(const_int 0)]
+ "
+ {
+ if (IS_VFP_REGNUM (REGNO (operands[0])))
+ /* Ditch the unnecessary clobbers. */
+ emit_insn (gen_lshrdi3_neon_imm_noclobber (operands[0], operands[1],
+ operands[2]));
+ else if (INTVAL (operands[2]) == 1)
+ /* This clobbers CC. */
+ emit_insn (gen_arm_lshrdi3_1bit (operands[0], operands[1]));
+ else
+ arm_emit_coreregs_64bit_shift (LSHIFTRT, operands[0], operands[1],
+ operands[2], operands[3], NULL);
+ DONE;
+ }"
+ [(set_attr "length" "4,8,12,12,4")
+ (set_attr "arch" "nota8,*,*,*,onlya8")
+ (set_attr "opt" "*,speed,speed,speed,*")]
+)
+
+(define_insn_and_split "lshrdi3_neon_reg"
+ [(set (match_operand:DI 0 "s_register_operand" "= w, w,?&r,?&r,?w,?w")
+ (lshiftrt:DI (match_operand:DI 1 "s_register_operand" " w, w, 0, r, w, w")
+ (match_operand:SI 2 "s_register_operand" " r, r, r, r, r, r")))
+ (clobber (match_scratch:SI 3 "= 2,&r, &r, &r, 2,&r"))
+ (clobber (match_scratch:SI 4 "=&T,&T, &r, &r,&T,&T"))]
+ "TARGET_NEON"
+ "#"
+ "TARGET_NEON && reload_completed"
+ [(const_int 0)]
+ "
+ {
+ if (IS_VFP_REGNUM (REGNO (operands[0])))
+ {
+ emit_insn (gen_negsi2 (operands[3], operands[2]));
+ emit_insn (gen_rtx_SET (SImode, operands[4], operands[3]));
+ emit_insn (gen_unsigned_shift_di3_neon (operands[0], operands[1],
+ operands[4]));
+ }
+ else
+ arm_emit_coreregs_64bit_shift (LSHIFTRT, operands[0], operands[1],
+ operands[2], operands[3], operands[4]);
+ DONE;
+ }"
+ [(set_attr "length" "12,12,24,24,12,12")
+ (set_attr "arch" "nota8,nota8,*,*,onlya8,onlya8")
+ (set_attr "opt" "*,*,speed,speed,*,*")]
+)
+
+(define_expand "lshrdi3_neon"
+ [(match_operand:DI 0 "s_register_operand" "")
+ (match_operand:DI 1 "s_register_operand" "")
+ (match_operand:SI 2 "reg_or_int_operand" "")]
+ "TARGET_NEON"
+{
+ if (CONST_INT_P (operands[2]))
+ {
+ if (INTVAL (operands[2]) < 1)
+ {
+ emit_insn (gen_movdi (operands[0], operands[1]));
+ DONE;
+ }
+ else if (INTVAL (operands[2]) > 64)
+ operands[2] = gen_rtx_CONST_INT (VOIDmode, 64);
+
+ emit_insn (gen_lshrdi3_neon_imm (operands[0], operands[1], operands[2]));
+ }
+ else
+ emit_insn (gen_lshrdi3_neon_reg (operands[0], operands[1], operands[2]));
+ DONE;
+})
+
;; Widening operations
(define_insn "widen_ssum<mode>3"
@@ -644,6 +644,11 @@
(define_special_predicate "add_operator"
(match_code "plus"))
+
(define_predicate "mem_noofs_operand"
(and (match_code "mem")
(match_code "reg" "0")))
+
+(define_predicate "int_1_to_64"
+ (and (match_code "const_int")
+ (match_test "IN_RANGE (INTVAL (op), 1, 64)")))